diff --git a/opal/mca/btl/tcp/btl_tcp.c b/opal/mca/btl/tcp/btl_tcp.c index f007565be3..9024802133 100644 --- a/opal/mca/btl/tcp/btl_tcp.c +++ b/opal/mca/btl/tcp/btl_tcp.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2014 The University of Tennessee and The University + * Copyright (c) 2004-2017 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, @@ -38,6 +38,8 @@ #include "btl_tcp_proc.h" #include "btl_tcp_endpoint.h" +static int mca_btl_tcp_register_error_cb(struct mca_btl_base_module_t* btl, + mca_btl_base_module_error_cb_fn_t cbfunc); mca_btl_tcp_module_t mca_btl_tcp_module = { .super = { @@ -51,11 +53,20 @@ mca_btl_tcp_module_t mca_btl_tcp_module = { .btl_send = mca_btl_tcp_send, .btl_put = mca_btl_tcp_put, .btl_dump = mca_btl_base_dump, + .btl_register_error = mca_btl_tcp_register_error_cb, /* register error */ .btl_ft_event = mca_btl_tcp_ft_event }, .tcp_endpoints_mutex = OPAL_MUTEX_STATIC_INIT }; +static int mca_btl_tcp_register_error_cb(struct mca_btl_base_module_t* btl, + mca_btl_base_module_error_cb_fn_t cbfunc) +{ + mca_btl_tcp_module_t* tcp_btl = (mca_btl_tcp_module_t*)btl; + tcp_btl->tcp_error_cb = cbfunc; + return OPAL_SUCCESS; +} + /** * */ diff --git a/opal/mca/btl/tcp/btl_tcp_endpoint.c b/opal/mca/btl/tcp/btl_tcp_endpoint.c index f8df420ff8..6a4221de8c 100644 --- a/opal/mca/btl/tcp/btl_tcp_endpoint.c +++ b/opal/mca/btl/tcp/btl_tcp_endpoint.c @@ -2,7 +2,7 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2016 The University of Tennessee and The University + * Copyright (c) 2004-2017 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, @@ -385,6 +385,7 @@ mca_btl_tcp_endpoint_send_blocking(mca_btl_base_endpoint_t* btl_endpoint, { int ret = mca_btl_tcp_send_blocking(btl_endpoint->endpoint_sd, data, size); if (ret < 0) { + btl_endpoint->endpoint_state = MCA_BTL_TCP_FAILED; mca_btl_tcp_endpoint_close(btl_endpoint); } return ret; @@ -534,8 +535,9 @@ void mca_btl_tcp_endpoint_close(mca_btl_base_endpoint_t* btl_endpoint) btl_endpoint->endpoint_sd = -1; /** * If we keep failing to connect to the peer let the caller know about - * this situation by triggering all the pending fragments callback and - * reporting the error. + * this situation by triggering the callback on all pending fragments and + * reporting the error. The upper layer has then the opportunity to + * re-route or re-schedule the fragments. */ if( MCA_BTL_TCP_FAILED == btl_endpoint->endpoint_state ) { mca_btl_tcp_frag_t* frag = btl_endpoint->endpoint_send_frag; @@ -543,11 +545,20 @@ void mca_btl_tcp_endpoint_close(mca_btl_base_endpoint_t* btl_endpoint) frag = (mca_btl_tcp_frag_t*)opal_list_remove_first(&btl_endpoint->endpoint_frags); while(NULL != frag) { frag->base.des_cbfunc(&frag->btl->super, frag->endpoint, &frag->base, OPAL_ERR_UNREACH); - + if( frag->base.des_flags & MCA_BTL_DES_FLAGS_BTL_OWNERSHIP ) { + MCA_BTL_TCP_FRAG_RETURN(frag); + } frag = (mca_btl_tcp_frag_t*)opal_list_remove_first(&btl_endpoint->endpoint_frags); } + btl_endpoint->endpoint_send_frag = NULL; + /* Let's report the error upstream */ + if(NULL != btl_endpoint->endpoint_btl->tcp_error_cb) { + btl_endpoint->endpoint_btl->tcp_error_cb((mca_btl_base_module_t*)btl_endpoint->endpoint_btl, 0, + btl_endpoint->endpoint_proc->proc_opal, "Socket closed"); + } + } else { + btl_endpoint->endpoint_state = MCA_BTL_TCP_CLOSED; } - btl_endpoint->endpoint_state = MCA_BTL_TCP_CLOSED; } /* @@ -604,7 +615,6 @@ static int mca_btl_tcp_endpoint_recv_connect_ack(mca_btl_base_endpoint_t* btl_en opal_show_help("help-mpi-btl-tcp.txt", "client handshake fail", true, opal_process_info.nodename, getpid(), "did not receive entire connect ACK from peer"); - return OPAL_ERR_BAD_PARAM; } if (0 != strncmp(hs_msg.magic_id, mca_btl_tcp_magic_id_string, len)) { @@ -624,6 +634,7 @@ static int mca_btl_tcp_endpoint_recv_connect_ack(mca_btl_base_endpoint_t* btl_en if (0 != opal_compare_proc(btl_proc->proc_opal->proc_name, guid)) { BTL_ERROR(("received unexpected process identifier %s", OPAL_NAME_PRINT(guid))); + btl_endpoint->endpoint_state = MCA_BTL_TCP_FAILED; mca_btl_tcp_endpoint_close(btl_endpoint); return OPAL_ERR_UNREACH; } @@ -825,6 +836,7 @@ static int mca_btl_tcp_endpoint_complete_connect(mca_btl_base_endpoint_t* btl_en opal_net_get_hostname((struct sockaddr*) &endpoint_addr), ((struct sockaddr_in*) &endpoint_addr)->sin_port, strerror(opal_socket_errno), opal_socket_errno)); + btl_endpoint->endpoint_state = MCA_BTL_TCP_FAILED; mca_btl_tcp_endpoint_close(btl_endpoint); return OPAL_ERROR; } @@ -841,6 +853,7 @@ static int mca_btl_tcp_endpoint_complete_connect(mca_btl_base_endpoint_t* btl_en getpid(), msg, strerror(opal_socket_errno), opal_socket_errno); free(msg); + btl_endpoint->endpoint_state = MCA_BTL_TCP_FAILED; mca_btl_tcp_endpoint_close(btl_endpoint); return OPAL_ERROR; } @@ -912,12 +925,15 @@ static void mca_btl_tcp_endpoint_recv_handler(int sd, short flags, void* user) OPAL_THREAD_UNLOCK(&btl_endpoint->endpoint_send_lock); MCA_BTL_TCP_ENDPOINT_DUMP(10, btl_endpoint, true, "connected"); } - else if (OPAL_ERR_BAD_PARAM == rc) { + else if (OPAL_ERR_BAD_PARAM == rc + || OPAL_ERROR == rc) { /* If we get a BAD_PARAM, it means that it probably wasn't an OMPI process on the other end of the socket (e.g., - the magic string ID failed). So we can probably just - close the socket and ignore this connection. */ - CLOSE_THE_SOCKET(sd); + the magic string ID failed). recv_connect_ack already cleaned + up the socket. */ + /* If we get OPAL_ERROR, the other end closed the connection + * because it has initiated a symetrical connexion on its end. + * recv_connect_ack already cleaned up the socket. */ } else { /* Otherwise, it probably *was* an OMPI peer process on @@ -1056,6 +1072,8 @@ static void mca_btl_tcp_endpoint_send_handler(int sd, short flags, void* user) opal_event_del(&btl_endpoint->endpoint_send_event); } break; + case MCA_BTL_TCP_FAILED: + break; default: BTL_ERROR(("invalid connection state (%d)", btl_endpoint->endpoint_state)); MCA_BTL_TCP_ENDPOINT_DUMP(1, btl_endpoint, true, "event_del(send) [endpoint_send_handler:error]"); diff --git a/opal/mca/btl/tcp/btl_tcp_frag.c b/opal/mca/btl/tcp/btl_tcp_frag.c index 56775067c9..7a536ced09 100644 --- a/opal/mca/btl/tcp/btl_tcp_frag.c +++ b/opal/mca/btl/tcp/btl_tcp_frag.c @@ -212,7 +212,8 @@ bool mca_btl_tcp_frag_recv(mca_btl_tcp_frag_t* frag, int sd) cnt = readv(sd, frag->iov_ptr, num_vecs); if( 0 < cnt ) goto advance_iov_position; if( cnt == 0 ) { - btl_endpoint->endpoint_state = MCA_BTL_TCP_FAILED; + if(MCA_BTL_TCP_CONNECTED == btl_endpoint->endpoint_state) + btl_endpoint->endpoint_state = MCA_BTL_TCP_FAILED; mca_btl_tcp_endpoint_close(btl_endpoint); return false; }