Merge pull request #7315 from abouteiller/export/tcp_errors_v2
Handle error cases in TCP BTL (v2)
Этот коммит содержится в:
Коммит
10f6a77640
@ -3,7 +3,7 @@
|
||||
* Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2018 The University of Tennessee and The University
|
||||
* Copyright (c) 2004-2020 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
@ -817,6 +817,14 @@ void mca_pml_ob1_error_handler(
|
||||
return;
|
||||
}
|
||||
#endif /* OPAL_CUDA_SUPPORT */
|
||||
/* Some BTL report unreachable errors during normal MPI_Finalize
|
||||
* termination. Lets simply ignore such errors after MPI is not supposed to
|
||||
* be operational anyway.
|
||||
*/
|
||||
if(ompi_mpi_state >= OMPI_MPI_STATE_FINALIZE_PAST_COMM_SELF_DESTRUCT) {
|
||||
return;
|
||||
}
|
||||
|
||||
ompi_rte_abort(-1, btlinfo);
|
||||
}
|
||||
|
||||
|
@ -3,7 +3,7 @@
|
||||
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2014 The University of Tennessee and The University
|
||||
* Copyright (c) 2004-2017 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
@ -38,6 +38,8 @@
|
||||
#include "btl_tcp_proc.h"
|
||||
#include "btl_tcp_endpoint.h"
|
||||
|
||||
static int mca_btl_tcp_register_error_cb(struct mca_btl_base_module_t* btl,
|
||||
mca_btl_base_module_error_cb_fn_t cbfunc);
|
||||
|
||||
mca_btl_tcp_module_t mca_btl_tcp_module = {
|
||||
.super = {
|
||||
@ -51,11 +53,20 @@ mca_btl_tcp_module_t mca_btl_tcp_module = {
|
||||
.btl_send = mca_btl_tcp_send,
|
||||
.btl_put = mca_btl_tcp_put,
|
||||
.btl_dump = mca_btl_base_dump,
|
||||
.btl_register_error = mca_btl_tcp_register_error_cb, /* register error */
|
||||
.btl_ft_event = mca_btl_tcp_ft_event
|
||||
},
|
||||
.tcp_endpoints_mutex = OPAL_MUTEX_STATIC_INIT
|
||||
};
|
||||
|
||||
static int mca_btl_tcp_register_error_cb(struct mca_btl_base_module_t* btl,
|
||||
mca_btl_base_module_error_cb_fn_t cbfunc)
|
||||
{
|
||||
mca_btl_tcp_module_t* tcp_btl = (mca_btl_tcp_module_t*)btl;
|
||||
tcp_btl->tcp_error_cb = cbfunc;
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
|
@ -2,7 +2,7 @@
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2016 The University of Tennessee and The University
|
||||
* Copyright (c) 2004-2020 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
@ -388,6 +388,7 @@ mca_btl_tcp_endpoint_send_blocking(mca_btl_base_endpoint_t* btl_endpoint,
|
||||
{
|
||||
int ret = mca_btl_tcp_send_blocking(btl_endpoint->endpoint_sd, data, size);
|
||||
if (ret < 0) {
|
||||
btl_endpoint->endpoint_state = MCA_BTL_TCP_FAILED;
|
||||
mca_btl_tcp_endpoint_close(btl_endpoint);
|
||||
}
|
||||
return ret;
|
||||
@ -534,12 +535,26 @@ void mca_btl_tcp_endpoint_close(mca_btl_base_endpoint_t* btl_endpoint)
|
||||
btl_endpoint->endpoint_cache_length = 0;
|
||||
#endif /* MCA_BTL_TCP_ENDPOINT_CACHE */
|
||||
|
||||
/* send a message before closing to differentiate between failures and
|
||||
* clean disconnect during finalize */
|
||||
if( MCA_BTL_TCP_CONNECTED == btl_endpoint->endpoint_state ) {
|
||||
mca_btl_tcp_hdr_t fin_msg = {
|
||||
.base.tag = 0,
|
||||
.type = MCA_BTL_TCP_HDR_TYPE_FIN,
|
||||
.count = 0,
|
||||
.size = 0,
|
||||
};
|
||||
mca_btl_tcp_endpoint_send_blocking(btl_endpoint,
|
||||
&fin_msg, sizeof(fin_msg));
|
||||
}
|
||||
|
||||
CLOSE_THE_SOCKET(btl_endpoint->endpoint_sd);
|
||||
btl_endpoint->endpoint_sd = -1;
|
||||
/**
|
||||
* If we keep failing to connect to the peer let the caller know about
|
||||
* this situation by triggering all the pending fragments callback and
|
||||
* reporting the error.
|
||||
* this situation by triggering the callback on all pending fragments and
|
||||
* reporting the error. The upper layer has then the opportunity to
|
||||
* re-route or re-schedule the fragments.
|
||||
*/
|
||||
if( MCA_BTL_TCP_FAILED == btl_endpoint->endpoint_state ) {
|
||||
mca_btl_tcp_frag_t* frag = btl_endpoint->endpoint_send_frag;
|
||||
@ -547,11 +562,20 @@ void mca_btl_tcp_endpoint_close(mca_btl_base_endpoint_t* btl_endpoint)
|
||||
frag = (mca_btl_tcp_frag_t*)opal_list_remove_first(&btl_endpoint->endpoint_frags);
|
||||
while(NULL != frag) {
|
||||
frag->base.des_cbfunc(&frag->btl->super, frag->endpoint, &frag->base, OPAL_ERR_UNREACH);
|
||||
|
||||
if( frag->base.des_flags & MCA_BTL_DES_FLAGS_BTL_OWNERSHIP ) {
|
||||
MCA_BTL_TCP_FRAG_RETURN(frag);
|
||||
}
|
||||
frag = (mca_btl_tcp_frag_t*)opal_list_remove_first(&btl_endpoint->endpoint_frags);
|
||||
}
|
||||
btl_endpoint->endpoint_send_frag = NULL;
|
||||
/* Let's report the error upstream */
|
||||
if(NULL != btl_endpoint->endpoint_btl->tcp_error_cb) {
|
||||
btl_endpoint->endpoint_btl->tcp_error_cb((mca_btl_base_module_t*)btl_endpoint->endpoint_btl, 0,
|
||||
btl_endpoint->endpoint_proc->proc_opal, "Socket closed");
|
||||
}
|
||||
} else {
|
||||
btl_endpoint->endpoint_state = MCA_BTL_TCP_CLOSED;
|
||||
}
|
||||
btl_endpoint->endpoint_state = MCA_BTL_TCP_CLOSED;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -608,7 +632,6 @@ static int mca_btl_tcp_endpoint_recv_connect_ack(mca_btl_base_endpoint_t* btl_en
|
||||
opal_show_help("help-mpi-btl-tcp.txt", "client handshake fail",
|
||||
true, opal_process_info.nodename,
|
||||
getpid(), "did not receive entire connect ACK from peer");
|
||||
|
||||
return OPAL_ERR_BAD_PARAM;
|
||||
}
|
||||
if (0 != strncmp(hs_msg.magic_id, mca_btl_tcp_magic_id_string, len)) {
|
||||
@ -628,6 +651,7 @@ static int mca_btl_tcp_endpoint_recv_connect_ack(mca_btl_base_endpoint_t* btl_en
|
||||
if (0 != opal_compare_proc(btl_proc->proc_opal->proc_name, guid)) {
|
||||
BTL_ERROR(("received unexpected process identifier %s",
|
||||
OPAL_NAME_PRINT(guid)));
|
||||
btl_endpoint->endpoint_state = MCA_BTL_TCP_FAILED;
|
||||
mca_btl_tcp_endpoint_close(btl_endpoint);
|
||||
return OPAL_ERR_UNREACH;
|
||||
}
|
||||
@ -834,6 +858,7 @@ static int mca_btl_tcp_endpoint_complete_connect(mca_btl_base_endpoint_t* btl_en
|
||||
opal_net_get_hostname((struct sockaddr*) &endpoint_addr),
|
||||
((struct sockaddr_in*) &endpoint_addr)->sin_port,
|
||||
strerror(opal_socket_errno), opal_socket_errno));
|
||||
btl_endpoint->endpoint_state = MCA_BTL_TCP_FAILED;
|
||||
mca_btl_tcp_endpoint_close(btl_endpoint);
|
||||
return OPAL_ERROR;
|
||||
}
|
||||
@ -850,6 +875,7 @@ static int mca_btl_tcp_endpoint_complete_connect(mca_btl_base_endpoint_t* btl_en
|
||||
getpid(), msg,
|
||||
strerror(so_error), so_error);
|
||||
free(msg);
|
||||
btl_endpoint->endpoint_state = MCA_BTL_TCP_FAILED;
|
||||
mca_btl_tcp_endpoint_close(btl_endpoint);
|
||||
return OPAL_ERROR;
|
||||
}
|
||||
@ -921,12 +947,15 @@ static void mca_btl_tcp_endpoint_recv_handler(int sd, short flags, void* user)
|
||||
OPAL_THREAD_UNLOCK(&btl_endpoint->endpoint_send_lock);
|
||||
MCA_BTL_TCP_ENDPOINT_DUMP(10, btl_endpoint, true, "connected");
|
||||
}
|
||||
else if (OPAL_ERR_BAD_PARAM == rc) {
|
||||
else if (OPAL_ERR_BAD_PARAM == rc
|
||||
|| OPAL_ERROR == rc) {
|
||||
/* If we get a BAD_PARAM, it means that it probably wasn't
|
||||
an OMPI process on the other end of the socket (e.g.,
|
||||
the magic string ID failed). So we can probably just
|
||||
close the socket and ignore this connection. */
|
||||
CLOSE_THE_SOCKET(sd);
|
||||
the magic string ID failed). recv_connect_ack already cleaned
|
||||
up the socket. */
|
||||
/* If we get OPAL_ERROR, the other end closed the connection
|
||||
* because it has initiated a symetrical connexion on its end.
|
||||
* recv_connect_ack already cleaned up the socket. */
|
||||
}
|
||||
else {
|
||||
/* Otherwise, it probably *was* an OMPI peer process on
|
||||
@ -1065,6 +1094,10 @@ static void mca_btl_tcp_endpoint_send_handler(int sd, short flags, void* user)
|
||||
opal_event_del(&btl_endpoint->endpoint_send_event);
|
||||
}
|
||||
break;
|
||||
case MCA_BTL_TCP_FAILED:
|
||||
MCA_BTL_TCP_ENDPOINT_DUMP(1, btl_endpoint, true, "event_del(send) [endpoint_send_handler:error]");
|
||||
opal_event_del(&btl_endpoint->endpoint_send_event);
|
||||
break;
|
||||
default:
|
||||
BTL_ERROR(("invalid connection state (%d)", btl_endpoint->endpoint_state));
|
||||
MCA_BTL_TCP_ENDPOINT_DUMP(1, btl_endpoint, true, "event_del(send) [endpoint_send_handler:error]");
|
||||
|
@ -3,7 +3,7 @@
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2016 The University of Tennessee and The University
|
||||
* Copyright (c) 2004-2020 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
@ -212,7 +212,8 @@ bool mca_btl_tcp_frag_recv(mca_btl_tcp_frag_t* frag, int sd)
|
||||
cnt = readv(sd, frag->iov_ptr, num_vecs);
|
||||
if( 0 < cnt ) goto advance_iov_position;
|
||||
if( cnt == 0 ) {
|
||||
btl_endpoint->endpoint_state = MCA_BTL_TCP_FAILED;
|
||||
if(MCA_BTL_TCP_CONNECTED == btl_endpoint->endpoint_state)
|
||||
btl_endpoint->endpoint_state = MCA_BTL_TCP_FAILED;
|
||||
mca_btl_tcp_endpoint_close(btl_endpoint);
|
||||
return false;
|
||||
}
|
||||
@ -272,6 +273,10 @@ bool mca_btl_tcp_frag_recv(mca_btl_tcp_frag_t* frag, int sd)
|
||||
if(frag->iov_cnt == 0) {
|
||||
if (btl_endpoint->endpoint_nbo && frag->iov_idx == 1) MCA_BTL_TCP_HDR_NTOH(frag->hdr);
|
||||
switch(frag->hdr.type) {
|
||||
case MCA_BTL_TCP_HDR_TYPE_FIN:
|
||||
frag->endpoint->endpoint_state = MCA_BTL_TCP_CLOSED;
|
||||
mca_btl_tcp_endpoint_close(frag->endpoint);
|
||||
break;
|
||||
case MCA_BTL_TCP_HDR_TYPE_SEND:
|
||||
if(frag->iov_idx == 1 && frag->hdr.size) {
|
||||
frag->segments[0].seg_addr.pval = frag+1;
|
||||
|
@ -2,7 +2,7 @@
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* Copyright (c) 2004-2020 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
@ -33,6 +33,18 @@ BEGIN_C_DECLS
|
||||
#define MCA_BTL_TCP_HDR_TYPE_SEND 1
|
||||
#define MCA_BTL_TCP_HDR_TYPE_PUT 2
|
||||
#define MCA_BTL_TCP_HDR_TYPE_GET 3
|
||||
#define MCA_BTL_TCP_HDR_TYPE_FIN 4
|
||||
/* The MCA_BTL_TCP_HDR_TYPE_FIN is a special kind of message sent during normal
|
||||
* connexion closing. Before the endpoint closes the socket, it performs a
|
||||
* 1-way handshake by sending a FIN message in the socket. This lets the other
|
||||
* end of the connexion discriminate between the case in which the peer has
|
||||
* closed intentionnally (e.g., during MPI_FINALIZE), or unintentionally (e.g.,
|
||||
* as the result of some transmission or process failure).
|
||||
* The process initiating the close sends the FIN message but does not wait
|
||||
* for a 2-way handshake and closes the socket immediately. Thus, the recipient
|
||||
* of a FIN message can simply close the socket and mark the endpoint as closed
|
||||
* without error, and without answering a FIN message itself.
|
||||
*/
|
||||
|
||||
struct mca_btl_tcp_hdr_t {
|
||||
mca_btl_base_header_t base;
|
||||
|
Загрузка…
Ссылка в новой задаче
Block a user