From d5360711faf8352fb4376115fc81c5a75e51881e Mon Sep 17 00:00:00 2001 From: Brian Barrett Date: Thu, 28 Mar 2019 23:10:01 +0000 Subject: [PATCH] btl/tcp: Skip printing error message in racy cleanup path Avoid printing an error message about ENOTCONN return codes from getpeername() when handling an incoming connection request. At this point in the receive state machine, the remote process has been verified to be a valid OMPI instance. In all-to-all startup at 4k rank scale, we're seeing this error message when the remote side drops the connection because it realizes it's the "loser" in the connection race. We were already doing all the right things, other than printing a scary error message. So skip the error message and call it good. Signed-off-by: Brian Barrett --- opal/mca/btl/tcp/btl_tcp_component.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/opal/mca/btl/tcp/btl_tcp_component.c b/opal/mca/btl/tcp/btl_tcp_component.c index 422db48a16..15ea952e84 100644 --- a/opal/mca/btl/tcp/btl_tcp_component.c +++ b/opal/mca/btl/tcp/btl_tcp_component.c @@ -1515,11 +1515,13 @@ static void mca_btl_tcp_component_recv_handler(int sd, short flags, void* user) /* lookup peer address */ if(getpeername(sd, (struct sockaddr*)&addr, &addr_len) != 0) { - opal_show_help("help-mpi-btl-tcp.txt", - "server getpeername failed", - true, opal_process_info.nodename, - getpid(), - strerror(opal_socket_errno), opal_socket_errno); + if (ENOTCONN != opal_socket_errno) { + opal_show_help("help-mpi-btl-tcp.txt", + "server getpeername failed", + true, opal_process_info.nodename, + getpid(), + strerror(opal_socket_errno), opal_socket_errno); + } CLOSE_THE_SOCKET(sd); return; }