From cad93a7693d9793c50cba4a40e30efd18529e0a5 Mon Sep 17 00:00:00 2001 From: George Bosilca Date: Thu, 12 Apr 2007 05:01:29 +0000 Subject: [PATCH] Add more output. Fix some typos, and some small cleanups. This commit was SVN r14327. --- orte/mca/oob/tcp/oob_tcp.c | 9 +++++---- orte/mca/oob/tcp/oob_tcp_msg.c | 31 ++++++++++++++++++------------- orte/mca/oob/tcp/oob_tcp_recv.c | 12 ++++++++++++ orte/mca/oob/tcp/oob_tcp_send.c | 24 ++++++++++++++++-------- 4 files changed, 51 insertions(+), 25 deletions(-) diff --git a/orte/mca/oob/tcp/oob_tcp.c b/orte/mca/oob/tcp/oob_tcp.c index b1c4449523..5e1b6b0c36 100644 --- a/orte/mca/oob/tcp/oob_tcp.c +++ b/orte/mca/oob/tcp/oob_tcp.c @@ -2,7 +2,7 @@ * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University + * Copyright (c) 2004-2007 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, @@ -18,9 +18,9 @@ * $HEADER$ * * In windows, many of the socket functions return an EWOULDBLOCK - * instead of \ things like EAGAIN, EINPROGRESS, etc. It has been - * verified that this will \ not conflict with other error codes that - * are returned by these functions \ under UNIX/Linux environments + * instead of things like EAGAIN, EINPROGRESS, etc. It has been + * verified that this will not conflict with other error codes that + * are returned by these functions under UNIX/Linux environments */ #include "orte_config.h" @@ -930,6 +930,7 @@ void mca_oob_tcp_registry_callback( &mca_oob_tcp_component.tcp_peer_names, &addr->addr_name); if(NULL != existing) { /* TSW - need to update existing entry */ + opal_output( 0, "WHY ARE WE RECEIVING THE SAME INFORMATION SEVERAL TIMES ?!?!?!?" ); orte_hash_table_set_proc(&mca_oob_tcp_component.tcp_peer_names, &addr->addr_name, addr); OBJ_RELEASE(addr); continue; diff --git a/orte/mca/oob/tcp/oob_tcp_msg.c b/orte/mca/oob/tcp/oob_tcp_msg.c index da240327df..a73e5c56c3 100644 --- a/orte/mca/oob/tcp/oob_tcp_msg.c +++ b/orte/mca/oob/tcp/oob_tcp_msg.c @@ -269,14 +269,14 @@ bool mca_oob_tcp_msg_send_handler(mca_oob_tcp_msg_t* msg, struct mca_oob_tcp_pee return true; } } - } while(msg->msg_rwnum); + } while(1); } } /* * Receives message data. * @param msg the message to be recieved into - * @param peer the peer to recieve from + * @param peer the peer to receive from * @retval true if the whole message was received * @retval false if the whole message was not received */ @@ -307,6 +307,12 @@ bool mca_oob_tcp_msg_recv_handler(mca_oob_tcp_msg_t* msg, struct mca_oob_tcp_pee msg->msg_rwiov[1].iov_len = 0; msg->msg_rwnum = 0; } + if (mca_oob_tcp_component.tcp_debug >= OOB_TCP_DEBUG_CONNECT) { + opal_output(0, "[%lu,%lu,%lu]-[%lu,%lu,%lu] mca_oob_tcp_recv_handler*: size %lu\n", + ORTE_NAME_ARGS(orte_process_info.my_name), + ORTE_NAME_ARGS(&(peer->peer_name)), + (unsigned long)(msg->msg_hdr.msg_size) ); + } } /* do the right thing based on the message type */ @@ -343,16 +349,14 @@ static bool mca_oob_tcp_msg_recv(mca_oob_tcp_msg_t* msg, mca_oob_tcp_peer_t* pee under UNIX/Linux environments */ else if (opal_socket_errno == EAGAIN || opal_socket_errno == EWOULDBLOCK) return false; - else { - opal_output(0, "[%lu,%lu,%lu]-[%lu,%lu,%lu] mca_oob_tcp_msg_recv: readv failed: %s (%d)", - ORTE_NAME_ARGS(orte_process_info.my_name), - ORTE_NAME_ARGS(&(peer->peer_name)), - strerror(opal_socket_errno), - opal_socket_errno); - mca_oob_tcp_peer_close(peer); - mca_oob_call_exception_handlers(&peer->peer_name, MCA_OOB_PEER_DISCONNECTED); - return false; - } + opal_output(0, "[%lu,%lu,%lu]-[%lu,%lu,%lu] mca_oob_tcp_msg_recv: readv failed: %s (%d)", + ORTE_NAME_ARGS(orte_process_info.my_name), + ORTE_NAME_ARGS(&(peer->peer_name)), + strerror(opal_socket_errno), + opal_socket_errno); + mca_oob_tcp_peer_close(peer); + mca_oob_call_exception_handlers(&peer->peer_name, MCA_OOB_PEER_DISCONNECTED); + return false; } else if (rc == 0) { if(mca_oob_tcp_component.tcp_debug >= OOB_TCP_DEBUG_CONNECT_FAIL) { opal_output(0, "[%lu,%lu,%lu]-[%lu,%lu,%lu] mca_oob_tcp_msg_recv: peer closed connection", @@ -374,10 +378,11 @@ static bool mca_oob_tcp_msg_recv(mca_oob_tcp_msg_t* msg, mca_oob_tcp_peer_t* pee (msg->msg_rwnum)--; (msg->msg_rwptr)++; if(0 == msg->msg_rwnum) { + assert( 0 == rc ); return true; } } - } while(msg->msg_rwnum); + } while(1); } return true; } diff --git a/orte/mca/oob/tcp/oob_tcp_recv.c b/orte/mca/oob/tcp/oob_tcp_recv.c index b6cea39a32..0e999b8fc1 100644 --- a/orte/mca/oob/tcp/oob_tcp_recv.c +++ b/orte/mca/oob/tcp/oob_tcp_recv.c @@ -64,6 +64,11 @@ int mca_oob_tcp_recv( return msg->msg_rc; } + opal_output(0, "[%lu,%lu,%lu]-[%lu,%lu,%lu] mca_oob_tcp_recv*unexpected*: tag %d size %lu\n", + ORTE_NAME_ARGS(orte_process_info.my_name), + ORTE_NAME_ARGS(peer), + tag, (unsigned long)(msg->msg_hdr.msg_size) ); + /* if we are returning an allocated buffer - just take it from the message */ if(flags & MCA_OOB_ALLOC) { @@ -110,6 +115,13 @@ int mca_oob_tcp_recv( size += iov[i].iov_len; } + if (mca_oob_tcp_component.tcp_debug >= OOB_TCP_DEBUG_CONNECT) { + opal_output(0, "[%lu,%lu,%lu]-[%lu,%lu,%lu] mca_oob_tcp_recv*expected*: tag %d size %lu\n", + ORTE_NAME_ARGS(orte_process_info.my_name), + ORTE_NAME_ARGS(peer), + tag, (unsigned long)(size) ); + } + /* fill in the struct */ msg->msg_hdr.msg_size = size; msg->msg_hdr.msg_tag = tag; diff --git a/orte/mca/oob/tcp/oob_tcp_send.c b/orte/mca/oob/tcp/oob_tcp_send.c index 25f450b11e..bac6c74b36 100644 --- a/orte/mca/oob/tcp/oob_tcp_send.c +++ b/orte/mca/oob/tcp/oob_tcp_send.c @@ -100,11 +100,17 @@ int mca_oob_tcp_send( if(NULL == peer) return ORTE_ERR_UNREACH; + /* calculate the size of the message */ + size = 0; + for(rc = 0; rc < count; rc++) { + size += iov[rc].iov_len; + } + if(mca_oob_tcp_component.tcp_debug >= OOB_TCP_DEBUG_ALL) { - opal_output(0, "[%lu,%lu,%lu]-[%lu,%lu,%lu] mca_oob_tcp_send: tag %d\n", + opal_output(0, "[%lu,%lu,%lu]-[%lu,%lu,%lu] mca_oob_tcp_send: tag %d size %lu\n", ORTE_NAME_ARGS(orte_process_info.my_name), ORTE_NAME_ARGS(&(peer->peer_name)), - tag); + tag, (unsigned long)size ); } MCA_OOB_TCP_MSG_ALLOC(msg, rc); @@ -112,12 +118,6 @@ int mca_oob_tcp_send( return rc; } - /* calculate the size of the message */ - size = 0; - for(rc = 0; rc < count; rc++) { - size += iov[rc].iov_len; - } - /* turn the size to network byte order so there will be no problems */ msg->msg_hdr.msg_type = MCA_OOB_TCP_DATA; msg->msg_hdr.msg_size = size; @@ -210,6 +210,14 @@ int mca_oob_tcp_send_nb( for(rc = 0; rc < count; rc++) { size += iov[rc].iov_len; } + + if(mca_oob_tcp_component.tcp_debug >= OOB_TCP_DEBUG_ALL) { + opal_output(0, "[%lu,%lu,%lu]-[%lu,%lu,%lu] mca_oob_tcp_send_nb: tag %d size %lu\n", + ORTE_NAME_ARGS(orte_process_info.my_name), + ORTE_NAME_ARGS(&(peer->peer_name)), + tag, (unsigned long)size ); + } + /* turn the size to network byte order so there will be no problems */ msg->msg_hdr.msg_type = MCA_OOB_TCP_DATA; msg->msg_hdr.msg_size = size;