Make the TCP BTL really fail-safe. It now trigger the error callback on
all pending fragments when the destination goes down. This allows the PML to recalibrate its behavior, either find an alternate route or just give up. This commit was SVN r27881.
Этот коммит содержится в:
родитель
6bdd2e063a
Коммит
42753b4690
@ -2,7 +2,7 @@
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2008 The University of Tennessee and The University
|
||||
* Copyright (c) 2004-2013 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
@ -295,6 +295,7 @@ static int mca_btl_tcp_endpoint_send_blocking(mca_btl_base_endpoint_t* btl_endpo
|
||||
if(opal_socket_errno != EINTR && opal_socket_errno != EAGAIN && opal_socket_errno != EWOULDBLOCK) {
|
||||
BTL_ERROR(("send() failed: %s (%d)",
|
||||
strerror(opal_socket_errno), opal_socket_errno));
|
||||
btl_endpoint->endpoint_state = MCA_BTL_TCP_FAILED;
|
||||
mca_btl_tcp_endpoint_close(btl_endpoint);
|
||||
return -1;
|
||||
}
|
||||
@ -359,6 +360,7 @@ bool mca_btl_tcp_endpoint_accept(mca_btl_base_endpoint_t* btl_endpoint,
|
||||
mca_btl_tcp_endpoint_close(btl_endpoint);
|
||||
btl_endpoint->endpoint_sd = sd;
|
||||
if(mca_btl_tcp_endpoint_send_connect_ack(btl_endpoint) != OMPI_SUCCESS) {
|
||||
btl_endpoint->endpoint_state = MCA_BTL_TCP_FAILED;
|
||||
mca_btl_tcp_endpoint_close(btl_endpoint);
|
||||
OPAL_THREAD_UNLOCK(&btl_endpoint->endpoint_send_lock);
|
||||
OPAL_THREAD_UNLOCK(&btl_endpoint->endpoint_recv_lock);
|
||||
@ -389,7 +391,6 @@ void mca_btl_tcp_endpoint_close(mca_btl_base_endpoint_t* btl_endpoint)
|
||||
{
|
||||
if(btl_endpoint->endpoint_sd < 0)
|
||||
return;
|
||||
btl_endpoint->endpoint_state = MCA_BTL_TCP_CLOSED;
|
||||
btl_endpoint->endpoint_retries++;
|
||||
opal_event_del(&btl_endpoint->endpoint_recv_event);
|
||||
opal_event_del(&btl_endpoint->endpoint_send_event);
|
||||
@ -401,6 +402,24 @@ void mca_btl_tcp_endpoint_close(mca_btl_base_endpoint_t* btl_endpoint)
|
||||
btl_endpoint->endpoint_cache_pos = NULL;
|
||||
btl_endpoint->endpoint_cache_length = 0;
|
||||
#endif /* MCA_BTL_TCP_ENDPOINT_CACHE */
|
||||
/**
|
||||
* If we keep failing to connect to the peer let the caller know about
|
||||
* this situation by triggering all the pending fragments callback and
|
||||
* reporting the error.
|
||||
*/
|
||||
if( MCA_BTL_TCP_FAILED == btl_endpoint->endpoint_state ) {
|
||||
mca_btl_tcp_frag_t* frag = btl_endpoint->endpoint_send_frag;
|
||||
if( NULL == frag )
|
||||
frag = (mca_btl_tcp_frag_t*)opal_list_remove_first(&btl_endpoint->endpoint_frags);
|
||||
while(NULL != frag) {
|
||||
frag->base.des_cbfunc(&frag->btl->super, frag->endpoint, &frag->base, OMPI_ERR_UNREACH);
|
||||
|
||||
frag = (mca_btl_tcp_frag_t*)opal_list_remove_first(&btl_endpoint->endpoint_frags);
|
||||
}
|
||||
} else {
|
||||
btl_endpoint->endpoint_state = MCA_BTL_TCP_CLOSED;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
@ -444,6 +463,7 @@ static int mca_btl_tcp_endpoint_recv_blocking(mca_btl_base_endpoint_t* btl_endpo
|
||||
|
||||
/* remote closed connection */
|
||||
if(retval == 0) {
|
||||
btl_endpoint->endpoint_state = MCA_BTL_TCP_FAILED;
|
||||
mca_btl_tcp_endpoint_close(btl_endpoint);
|
||||
return -1;
|
||||
}
|
||||
@ -453,6 +473,7 @@ static int mca_btl_tcp_endpoint_recv_blocking(mca_btl_base_endpoint_t* btl_endpo
|
||||
if(opal_socket_errno != EINTR && opal_socket_errno != EAGAIN && opal_socket_errno != EWOULDBLOCK) {
|
||||
BTL_ERROR(("recv(%d) failed: %s (%d)",
|
||||
btl_endpoint->endpoint_sd, strerror(opal_socket_errno), opal_socket_errno));
|
||||
btl_endpoint->endpoint_state = MCA_BTL_TCP_FAILED;
|
||||
mca_btl_tcp_endpoint_close(btl_endpoint);
|
||||
return -1;
|
||||
}
|
||||
@ -589,6 +610,7 @@ static int mca_btl_tcp_endpoint_start_connect(mca_btl_base_endpoint_t* btl_endpo
|
||||
address,
|
||||
btl_endpoint->endpoint_addr->addr_port, strerror(opal_socket_errno) ) );
|
||||
}
|
||||
btl_endpoint->endpoint_state = MCA_BTL_TCP_FAILED;
|
||||
mca_btl_tcp_endpoint_close(btl_endpoint);
|
||||
btl_endpoint->endpoint_retries++;
|
||||
return OMPI_ERR_UNREACH;
|
||||
@ -599,6 +621,7 @@ static int mca_btl_tcp_endpoint_start_connect(mca_btl_base_endpoint_t* btl_endpo
|
||||
btl_endpoint->endpoint_state = MCA_BTL_TCP_CONNECT_ACK;
|
||||
opal_event_add(&btl_endpoint->endpoint_recv_event, 0);
|
||||
} else {
|
||||
btl_endpoint->endpoint_state = MCA_BTL_TCP_FAILED;
|
||||
mca_btl_tcp_endpoint_close(btl_endpoint);
|
||||
}
|
||||
return rc;
|
||||
@ -645,6 +668,7 @@ static void mca_btl_tcp_endpoint_complete_connect(mca_btl_base_endpoint_t* btl_e
|
||||
btl_endpoint->endpoint_state = MCA_BTL_TCP_CONNECT_ACK;
|
||||
opal_event_add(&btl_endpoint->endpoint_recv_event, 0);
|
||||
} else {
|
||||
btl_endpoint->endpoint_state = MCA_BTL_TCP_FAILED;
|
||||
mca_btl_tcp_endpoint_close(btl_endpoint);
|
||||
}
|
||||
}
|
||||
@ -747,6 +771,7 @@ static void mca_btl_tcp_endpoint_recv_handler(int sd, short flags, void* user)
|
||||
default:
|
||||
OPAL_THREAD_UNLOCK(&btl_endpoint->endpoint_recv_lock);
|
||||
BTL_ERROR(("invalid socket state(%d)", btl_endpoint->endpoint_state));
|
||||
btl_endpoint->endpoint_state = MCA_BTL_TCP_FAILED;
|
||||
mca_btl_tcp_endpoint_close(btl_endpoint);
|
||||
break;
|
||||
}
|
||||
|
@ -2,7 +2,7 @@
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2006 The University of Tennessee and The University
|
||||
* Copyright (c) 2004-2013 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
@ -115,12 +115,14 @@ bool mca_btl_tcp_frag_send(mca_btl_tcp_frag_t* frag, int sd)
|
||||
BTL_ERROR(("mca_btl_tcp_frag_send: writev error (%p, %lu)\n\t%s(%lu)\n",
|
||||
frag->iov_ptr[0].iov_base, (unsigned long) frag->iov_ptr[0].iov_len,
|
||||
strerror(opal_socket_errno), (unsigned long) frag->iov_cnt));
|
||||
frag->endpoint->endpoint_state = MCA_BTL_TCP_FAILED;
|
||||
mca_btl_tcp_endpoint_close(frag->endpoint);
|
||||
return false;
|
||||
default:
|
||||
BTL_ERROR(("mca_btl_tcp_frag_send: writev failed: %s (%d)",
|
||||
strerror(opal_socket_errno),
|
||||
opal_socket_errno));
|
||||
frag->endpoint->endpoint_state = MCA_BTL_TCP_FAILED;
|
||||
mca_btl_tcp_endpoint_close(frag->endpoint);
|
||||
return false;
|
||||
}
|
||||
@ -195,6 +197,7 @@ bool mca_btl_tcp_frag_recv(mca_btl_tcp_frag_t* frag, int sd)
|
||||
cnt = readv(sd, frag->iov_ptr, num_vecs);
|
||||
if( 0 < cnt ) goto advance_iov_position;
|
||||
if( cnt == 0 ) {
|
||||
btl_endpoint->endpoint_state = MCA_BTL_TCP_FAILED;
|
||||
mca_btl_tcp_endpoint_close(btl_endpoint);
|
||||
return false;
|
||||
}
|
||||
@ -207,12 +210,14 @@ bool mca_btl_tcp_frag_recv(mca_btl_tcp_frag_t* frag, int sd)
|
||||
BTL_ERROR(("mca_btl_tcp_frag_recv: readv error (%p, %lu)\n\t%s(%lu)\n",
|
||||
frag->iov_ptr[0].iov_base, (unsigned long) frag->iov_ptr[0].iov_len,
|
||||
strerror(opal_socket_errno), (unsigned long) frag->iov_cnt));
|
||||
btl_endpoint->endpoint_state = MCA_BTL_TCP_FAILED;
|
||||
mca_btl_tcp_endpoint_close(btl_endpoint);
|
||||
return false;
|
||||
default:
|
||||
BTL_ERROR(("mca_btl_tcp_frag_recv: readv failed: %s (%d)",
|
||||
strerror(opal_socket_errno),
|
||||
opal_socket_errno));
|
||||
btl_endpoint->endpoint_state = MCA_BTL_TCP_FAILED;
|
||||
mca_btl_tcp_endpoint_close(btl_endpoint);
|
||||
return false;
|
||||
}
|
||||
|
@ -680,7 +680,7 @@ int mca_btl_tcp_proc_remove(mca_btl_tcp_proc_t* btl_proc, mca_btl_base_endpoint_
|
||||
{
|
||||
size_t i;
|
||||
OPAL_THREAD_LOCK(&btl_proc->proc_lock);
|
||||
for(i=0; i<btl_proc->proc_endpoint_count; i++) {
|
||||
for(i = 0; i < btl_proc->proc_endpoint_count; i++) {
|
||||
if(btl_proc->proc_endpoints[i] == btl_endpoint) {
|
||||
memmove(btl_proc->proc_endpoints+i, btl_proc->proc_endpoints+i+1,
|
||||
(btl_proc->proc_endpoint_count-i-1)*sizeof(mca_btl_base_endpoint_t*));
|
||||
|
Загрузка…
Ссылка в новой задаче
Block a user