From 0e93d0f6476e5504cb08b2c71edd09bb4d278a1f Mon Sep 17 00:00:00 2001 From: Aurelien Bouteiller Date: Mon, 11 May 2020 01:11:05 -0400 Subject: [PATCH] Bugfix: when a TCP socket is closed in error, it could update the endpoint state without holding the endpoint lock, resulting in a race condition. Signed-off-by: Aurelien Bouteiller --- opal/mca/btl/tcp/btl_tcp_endpoint.c | 3 +++ opal/mca/btl/tcp/btl_tcp_frag.c | 24 +++++++++++++----------- 2 files changed, 16 insertions(+), 11 deletions(-) diff --git a/opal/mca/btl/tcp/btl_tcp_endpoint.c b/opal/mca/btl/tcp/btl_tcp_endpoint.c index 2a381e77dd..737060282b 100644 --- a/opal/mca/btl/tcp/btl_tcp_endpoint.c +++ b/opal/mca/btl/tcp/btl_tcp_endpoint.c @@ -388,6 +388,8 @@ mca_btl_tcp_endpoint_send_blocking(mca_btl_base_endpoint_t* btl_endpoint, { int ret = mca_btl_tcp_send_blocking(btl_endpoint->endpoint_sd, data, size); if (ret < 0) { + /* send-lock not needed because never called when the socket is in the + * event set. */ btl_endpoint->endpoint_state = MCA_BTL_TCP_FAILED; mca_btl_tcp_endpoint_close(btl_endpoint); } @@ -1077,6 +1079,7 @@ static void mca_btl_tcp_endpoint_send_handler(int sd, short flags, void* user) mca_btl_tcp_frag_t* frag = btl_endpoint->endpoint_send_frag; int btl_ownership = (frag->base.des_flags & MCA_BTL_DES_FLAGS_BTL_OWNERSHIP); + assert(btl_endpoint->endpoint_state == MCA_BTL_TCP_CONNECTED); if(mca_btl_tcp_frag_send(frag, btl_endpoint->endpoint_sd) == false) { break; } diff --git a/opal/mca/btl/tcp/btl_tcp_frag.c b/opal/mca/btl/tcp/btl_tcp_frag.c index cb5eb4d92c..9b97145e59 100644 --- a/opal/mca/btl/tcp/btl_tcp_frag.c +++ b/opal/mca/btl/tcp/btl_tcp_frag.c @@ -49,6 +49,7 @@ #include "opal/util/show_help.h" #include "btl_tcp_frag.h" +#include "btl_tcp_proc.h" #include "btl_tcp_endpoint.h" #include "btl_tcp_proc.h" @@ -130,6 +131,7 @@ bool mca_btl_tcp_frag_send(mca_btl_tcp_frag_t* frag, int sd) BTL_ERROR(("mca_btl_tcp_frag_send: writev error (%p, %lu)\n\t%s(%lu)\n", frag->iov_ptr[0].iov_base, (unsigned long) frag->iov_ptr[0].iov_len, strerror(opal_socket_errno), (unsigned long) frag->iov_cnt)); + /* send_lock held by caller */ frag->endpoint->endpoint_state = MCA_BTL_TCP_FAILED; mca_btl_tcp_endpoint_close(frag->endpoint); return false; @@ -137,6 +139,7 @@ bool mca_btl_tcp_frag_send(mca_btl_tcp_frag_t* frag, int sd) BTL_ERROR(("mca_btl_tcp_frag_send: writev failed: %s (%d)", strerror(opal_socket_errno), opal_socket_errno)); + /* send_lock held by caller */ frag->endpoint->endpoint_state = MCA_BTL_TCP_FAILED; mca_btl_tcp_endpoint_close(frag->endpoint); return false; @@ -215,9 +218,11 @@ bool mca_btl_tcp_frag_recv(mca_btl_tcp_frag_t* frag, int sd) cnt = readv(sd, frag->iov_ptr, num_vecs); if( 0 < cnt ) goto advance_iov_position; if( cnt == 0 ) { + OPAL_THREAD_LOCK(&btl_endpoint->endpoint_send_lock); if(MCA_BTL_TCP_CONNECTED == btl_endpoint->endpoint_state) btl_endpoint->endpoint_state = MCA_BTL_TCP_FAILED; mca_btl_tcp_endpoint_close(btl_endpoint); + OPAL_THREAD_UNLOCK(&btl_endpoint->endpoint_send_lock); return false; } switch(opal_socket_errno) { @@ -229,28 +234,25 @@ bool mca_btl_tcp_frag_recv(mca_btl_tcp_frag_t* frag, int sd) BTL_ERROR(("mca_btl_tcp_frag_recv: readv error (%p, %lu)\n\t%s(%lu)\n", frag->iov_ptr[0].iov_base, (unsigned long) frag->iov_ptr[0].iov_len, strerror(opal_socket_errno), (unsigned long) frag->iov_cnt)); - btl_endpoint->endpoint_state = MCA_BTL_TCP_FAILED; - mca_btl_tcp_endpoint_close(btl_endpoint); - return false; - + break; case ECONNRESET: errhost = opal_get_proc_hostname(btl_endpoint->endpoint_proc->proc_opal); opal_show_help("help-mpi-btl-tcp.txt", "peer hung up", true, opal_process_info.nodename, getpid(), errhost); free(errhost); - btl_endpoint->endpoint_state = MCA_BTL_TCP_FAILED; - mca_btl_tcp_endpoint_close(btl_endpoint); - return false; - + break; default: BTL_ERROR(("mca_btl_tcp_frag_recv: readv failed: %s (%d)", strerror(opal_socket_errno), opal_socket_errno)); - btl_endpoint->endpoint_state = MCA_BTL_TCP_FAILED; - mca_btl_tcp_endpoint_close(btl_endpoint); - return false; + break; } + OPAL_THREAD_LOCK(&btl_endpoint->endpoint_send_lock); + btl_endpoint->endpoint_state = MCA_BTL_TCP_FAILED; + mca_btl_tcp_endpoint_close(btl_endpoint); + OPAL_THREAD_UNLOCK(&btl_endpoint->endpoint_send_lock); + return false; } while( cnt < 0 ); advance_iov_position: