Merge pull request #3955 from mohanasudhan/master
Btl tcp: Improved diagnostic output and failure mode
Этот коммит содержится в:
Коммит
c667719a3f
@ -31,12 +31,14 @@
|
|||||||
#include "opal/mca/mpool/base/base.h"
|
#include "opal/mca/mpool/base/base.h"
|
||||||
#include "opal/mca/mpool/mpool.h"
|
#include "opal/mca/mpool/mpool.h"
|
||||||
#include "opal/mca/btl/base/btl_base_error.h"
|
#include "opal/mca/btl/base/btl_base_error.h"
|
||||||
|
#include "opal/opal_socket_errno.h"
|
||||||
|
|
||||||
#include "btl_tcp.h"
|
#include "btl_tcp.h"
|
||||||
#include "btl_tcp_frag.h"
|
#include "btl_tcp_frag.h"
|
||||||
#include "btl_tcp_proc.h"
|
#include "btl_tcp_proc.h"
|
||||||
#include "btl_tcp_endpoint.h"
|
#include "btl_tcp_endpoint.h"
|
||||||
|
|
||||||
|
|
||||||
mca_btl_tcp_module_t mca_btl_tcp_module = {
|
mca_btl_tcp_module_t mca_btl_tcp_module = {
|
||||||
.super = {
|
.super = {
|
||||||
.btl_component = &mca_btl_tcp_component.super,
|
.btl_component = &mca_btl_tcp_component.super,
|
||||||
@ -531,3 +533,68 @@ void mca_btl_tcp_dump(struct mca_btl_base_module_t* base_btl,
|
|||||||
}
|
}
|
||||||
#endif /* OPAL_ENABLE_DEBUG && WANT_PEER_DUMP */
|
#endif /* OPAL_ENABLE_DEBUG && WANT_PEER_DUMP */
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* A blocking recv for both blocking and non-blocking socket.
|
||||||
|
* Used to receive the small amount of connection information
|
||||||
|
* that identifies the endpoints
|
||||||
|
*
|
||||||
|
* when the socket is blocking (the caller introduces timeout)
|
||||||
|
* which happens during initial handshake otherwise socket is
|
||||||
|
* non-blocking most of the time.
|
||||||
|
*/
|
||||||
|
|
||||||
|
int mca_btl_tcp_recv_blocking(int sd, void* data, size_t size)
|
||||||
|
{
|
||||||
|
unsigned char* ptr = (unsigned char*)data;
|
||||||
|
size_t cnt = 0;
|
||||||
|
while (cnt < size) {
|
||||||
|
int retval = recv(sd, ((char *)ptr) + cnt, size - cnt, 0);
|
||||||
|
/* remote closed connection */
|
||||||
|
if (0 == retval) {
|
||||||
|
BTL_ERROR(("remote peer unexpectedly closed connection while I was waiting for blocking message"));
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* socket is non-blocking so handle errors */
|
||||||
|
if (retval < 0) {
|
||||||
|
if (opal_socket_errno != EINTR &&
|
||||||
|
opal_socket_errno != EAGAIN &&
|
||||||
|
opal_socket_errno != EWOULDBLOCK) {
|
||||||
|
BTL_ERROR(("recv(%d) failed: %s (%d)", sd, strerror(opal_socket_errno), opal_socket_errno));
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
cnt += retval;
|
||||||
|
}
|
||||||
|
return cnt;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* A blocking send on a non-blocking socket. Used to send the small
|
||||||
|
* amount of connection information that identifies the endpoints
|
||||||
|
* endpoint.
|
||||||
|
*/
|
||||||
|
|
||||||
|
int mca_btl_tcp_send_blocking(int sd, const void* data, size_t size)
|
||||||
|
{
|
||||||
|
unsigned char* ptr = (unsigned char*)data;
|
||||||
|
size_t cnt = 0;
|
||||||
|
while(cnt < size) {
|
||||||
|
int retval = send(sd, ((const char *)ptr) + cnt, size - cnt, 0);
|
||||||
|
if (retval < 0) {
|
||||||
|
if (opal_socket_errno != EINTR &&
|
||||||
|
opal_socket_errno != EAGAIN &&
|
||||||
|
opal_socket_errno != EWOULDBLOCK) {
|
||||||
|
BTL_ERROR(("send() failed: %s (%d)", strerror(opal_socket_errno), opal_socket_errno));
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
cnt += retval;
|
||||||
|
}
|
||||||
|
return cnt;
|
||||||
|
}
|
||||||
|
@ -351,5 +351,23 @@ mca_btl_tcp_dump(struct mca_btl_base_module_t* btl,
|
|||||||
*/
|
*/
|
||||||
int mca_btl_tcp_ft_event(int state);
|
int mca_btl_tcp_ft_event(int state);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* A blocking send on a non-blocking socket. Used to send the small
|
||||||
|
* amount of connection information that identifies the endpoints
|
||||||
|
* endpoint.
|
||||||
|
*/
|
||||||
|
int mca_btl_tcp_send_blocking(int sd, const void* data, size_t size);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* A blocking recv for both blocking and non-blocking socket.
|
||||||
|
* Used to receive the small amount of connection information
|
||||||
|
* that identifies the endpoints
|
||||||
|
*
|
||||||
|
* when the socket is blocking (the caller introduces timeout)
|
||||||
|
* which happens during initial handshake otherwise socket is
|
||||||
|
* non-blocking most of the time.
|
||||||
|
*/
|
||||||
|
int mca_btl_tcp_recv_blocking(int sd, void* data, size_t size);
|
||||||
|
|
||||||
END_C_DECLS
|
END_C_DECLS
|
||||||
#endif
|
#endif
|
||||||
|
@ -54,6 +54,9 @@
|
|||||||
#endif
|
#endif
|
||||||
#include <ctype.h>
|
#include <ctype.h>
|
||||||
#include <limits.h>
|
#include <limits.h>
|
||||||
|
#ifdef HAVE_SYS_TIME_H
|
||||||
|
#include <sys/time.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
#include "opal/mca/event/event.h"
|
#include "opal/mca/event/event.h"
|
||||||
#include "opal/util/ethtool.h"
|
#include "opal/util/ethtool.h"
|
||||||
@ -729,7 +732,9 @@ static int mca_btl_tcp_component_create_instances(void)
|
|||||||
char* if_name = *argv;
|
char* if_name = *argv;
|
||||||
int if_index = opal_ifnametokindex(if_name);
|
int if_index = opal_ifnametokindex(if_name);
|
||||||
if(if_index < 0) {
|
if(if_index < 0) {
|
||||||
BTL_ERROR(("invalid interface \"%s\"", if_name));
|
opal_show_help("help-mpi-btl-tcp.txt", "invalid if_inexclude",
|
||||||
|
true, "include", opal_process_info.nodename,
|
||||||
|
if_name, "Unknown interface name");
|
||||||
ret = OPAL_ERR_NOT_FOUND;
|
ret = OPAL_ERR_NOT_FOUND;
|
||||||
goto cleanup;
|
goto cleanup;
|
||||||
}
|
}
|
||||||
@ -856,13 +861,18 @@ static int mca_btl_tcp_component_create_listen(uint16_t af_family)
|
|||||||
freeaddrinfo (res);
|
freeaddrinfo (res);
|
||||||
|
|
||||||
#ifdef IPV6_V6ONLY
|
#ifdef IPV6_V6ONLY
|
||||||
/* in case of AF_INET6, disable v4-mapped addresses */
|
/* If this OS supports the "IPV6_V6ONLY" constant, then set it
|
||||||
|
on this socket. It specifies that *only* V6 connections
|
||||||
|
should be accepted on this socket (vs. allowing incoming
|
||||||
|
both V4 and V6 connections -- which is actually defined
|
||||||
|
behavior for V6<-->V4 interop stuff). See
|
||||||
|
https://github.com/open-mpi/ompi/commit/95d7e08a6617530d57b6700c57738b351bfccbf8 for some
|
||||||
|
more details. */
|
||||||
if (AF_INET6 == af_family) {
|
if (AF_INET6 == af_family) {
|
||||||
int flg = 1;
|
int flg = 1;
|
||||||
if (setsockopt (sd, IPPROTO_IPV6, IPV6_V6ONLY,
|
if (setsockopt (sd, IPPROTO_IPV6, IPV6_V6ONLY,
|
||||||
(char *) &flg, sizeof (flg)) < 0) {
|
(char *) &flg, sizeof (flg)) < 0) {
|
||||||
opal_output(0,
|
BTL_ERROR((0, "mca_btl_tcp_create_listen: unable to set IPV6_V6ONLY\n"));
|
||||||
"mca_btl_tcp_create_listen: unable to disable v4-mapped addresses\n");
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif /* IPV6_V6ONLY */
|
#endif /* IPV6_V6ONLY */
|
||||||
@ -904,6 +914,10 @@ static int mca_btl_tcp_component_create_listen(uint16_t af_family)
|
|||||||
#else
|
#else
|
||||||
((struct sockaddr_in*) &inaddr)->sin_port = htons(port + index);
|
((struct sockaddr_in*) &inaddr)->sin_port = htons(port + index);
|
||||||
#endif /* OPAL_ENABLE_IPV6 */
|
#endif /* OPAL_ENABLE_IPV6 */
|
||||||
|
opal_output_verbose(30, opal_btl_base_framework.framework_output,
|
||||||
|
"btl:tcp: Attempting to bind to %s port %d",
|
||||||
|
(AF_INET == af_family) ? "AF_INET" : "AF_INET6",
|
||||||
|
port + index);
|
||||||
if(bind(sd, (struct sockaddr*)&inaddr, addrlen) < 0) {
|
if(bind(sd, (struct sockaddr*)&inaddr, addrlen) < 0) {
|
||||||
if( (EADDRINUSE == opal_socket_errno) || (EADDRNOTAVAIL == opal_socket_errno) ) {
|
if( (EADDRINUSE == opal_socket_errno) || (EADDRNOTAVAIL == opal_socket_errno) ) {
|
||||||
continue;
|
continue;
|
||||||
@ -913,6 +927,10 @@ static int mca_btl_tcp_component_create_listen(uint16_t af_family)
|
|||||||
CLOSE_THE_SOCKET(sd);
|
CLOSE_THE_SOCKET(sd);
|
||||||
return OPAL_ERROR;
|
return OPAL_ERROR;
|
||||||
}
|
}
|
||||||
|
opal_output_verbose(30, opal_btl_base_framework.framework_output,
|
||||||
|
"btl:tcp: Successfully bound to %s port %d",
|
||||||
|
(AF_INET == af_family) ? "AF_INET" : "AF_INET6",
|
||||||
|
port + index);
|
||||||
goto socket_binded;
|
goto socket_binded;
|
||||||
}
|
}
|
||||||
#if OPAL_ENABLE_IPV6
|
#if OPAL_ENABLE_IPV6
|
||||||
@ -943,11 +961,19 @@ static int mca_btl_tcp_component_create_listen(uint16_t af_family)
|
|||||||
if (AF_INET6 == af_family) {
|
if (AF_INET6 == af_family) {
|
||||||
mca_btl_tcp_component.tcp6_listen_port = ((struct sockaddr_in6*) &inaddr)->sin6_port;
|
mca_btl_tcp_component.tcp6_listen_port = ((struct sockaddr_in6*) &inaddr)->sin6_port;
|
||||||
mca_btl_tcp_component.tcp6_listen_sd = sd;
|
mca_btl_tcp_component.tcp6_listen_sd = sd;
|
||||||
|
opal_output_verbose(30, opal_btl_base_framework.framework_output,
|
||||||
|
"btl:tcp: my listening v6 socket port is %d",
|
||||||
|
ntohs(mca_btl_tcp_component.tcp6_listen_port));
|
||||||
} else
|
} else
|
||||||
#endif
|
#endif
|
||||||
{
|
{
|
||||||
|
char str[16];
|
||||||
mca_btl_tcp_component.tcp_listen_port = ((struct sockaddr_in*) &inaddr)->sin_port;
|
mca_btl_tcp_component.tcp_listen_port = ((struct sockaddr_in*) &inaddr)->sin_port;
|
||||||
mca_btl_tcp_component.tcp_listen_sd = sd;
|
mca_btl_tcp_component.tcp_listen_sd = sd;
|
||||||
|
inet_ntop(AF_INET, &(((struct sockaddr_in*)&inaddr)->sin_addr), str, sizeof(str));
|
||||||
|
opal_output_verbose(30, opal_btl_base_framework.framework_output,
|
||||||
|
"btl:tcp: my listening v4 socket is %s:%u",
|
||||||
|
str, ntohs(mca_btl_tcp_component.tcp_listen_port));
|
||||||
}
|
}
|
||||||
|
|
||||||
/* setup listen backlog to maximum allowed by kernel */
|
/* setup listen backlog to maximum allowed by kernel */
|
||||||
@ -960,15 +986,20 @@ static int mca_btl_tcp_component_create_listen(uint16_t af_family)
|
|||||||
|
|
||||||
/* set socket up to be non-blocking, otherwise accept could block */
|
/* set socket up to be non-blocking, otherwise accept could block */
|
||||||
if((flags = fcntl(sd, F_GETFL, 0)) < 0) {
|
if((flags = fcntl(sd, F_GETFL, 0)) < 0) {
|
||||||
BTL_ERROR(("fcntl(F_GETFL) failed: %s (%d)",
|
opal_show_help("help-mpi-btl-tcp.txt", "socket flag fail",
|
||||||
strerror(opal_socket_errno), opal_socket_errno));
|
true, opal_process_info.nodename,
|
||||||
|
getpid(), "fcntl(sd, F_GETFL, 0)",
|
||||||
|
strerror(opal_socket_errno), opal_socket_errno);
|
||||||
CLOSE_THE_SOCKET(sd);
|
CLOSE_THE_SOCKET(sd);
|
||||||
return OPAL_ERROR;
|
return OPAL_ERROR;
|
||||||
} else {
|
} else {
|
||||||
flags |= O_NONBLOCK;
|
flags |= O_NONBLOCK;
|
||||||
if(fcntl(sd, F_SETFL, flags) < 0) {
|
if(fcntl(sd, F_SETFL, flags) < 0) {
|
||||||
BTL_ERROR(("fcntl(F_SETFL) failed: %s (%d)",
|
opal_show_help("help-mpi-btl-tcp.txt", "socket flag fail",
|
||||||
strerror(opal_socket_errno), opal_socket_errno));
|
true, opal_process_info.nodename,
|
||||||
|
getpid(),
|
||||||
|
"fcntl(sd, F_SETFL, flags & O_NONBLOCK)",
|
||||||
|
strerror(opal_socket_errno), opal_socket_errno);
|
||||||
CLOSE_THE_SOCKET(sd);
|
CLOSE_THE_SOCKET(sd);
|
||||||
return OPAL_ERROR;
|
return OPAL_ERROR;
|
||||||
}
|
}
|
||||||
@ -1081,6 +1112,7 @@ static int mca_btl_tcp_component_exchange(void)
|
|||||||
size_t current_addr = 0;
|
size_t current_addr = 0;
|
||||||
|
|
||||||
if(mca_btl_tcp_component.tcp_num_btls != 0) {
|
if(mca_btl_tcp_component.tcp_num_btls != 0) {
|
||||||
|
char ifn[32];
|
||||||
mca_btl_tcp_addr_t *addrs = (mca_btl_tcp_addr_t *)malloc(size);
|
mca_btl_tcp_addr_t *addrs = (mca_btl_tcp_addr_t *)malloc(size);
|
||||||
memset(addrs, 0, size);
|
memset(addrs, 0, size);
|
||||||
|
|
||||||
@ -1098,6 +1130,9 @@ static int mca_btl_tcp_component_exchange(void)
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
opal_ifindextoname(index, ifn, sizeof(ifn));
|
||||||
|
opal_output_verbose(30, opal_btl_base_framework.framework_output,
|
||||||
|
"btl:tcp: examining interface %s", ifn);
|
||||||
if (OPAL_SUCCESS !=
|
if (OPAL_SUCCESS !=
|
||||||
opal_ifindextoaddr(index, (struct sockaddr*) &my_ss,
|
opal_ifindextoaddr(index, (struct sockaddr*) &my_ss,
|
||||||
sizeof (my_ss))) {
|
sizeof (my_ss))) {
|
||||||
@ -1121,6 +1156,8 @@ static int mca_btl_tcp_component_exchange(void)
|
|||||||
addrs[current_addr].addr_ifkindex =
|
addrs[current_addr].addr_ifkindex =
|
||||||
opal_ifindextokindex (index);
|
opal_ifindextokindex (index);
|
||||||
current_addr++;
|
current_addr++;
|
||||||
|
opal_output_verbose(30, opal_btl_base_framework.framework_output,
|
||||||
|
"btl:tcp: using ipv4 interface %s", ifn);
|
||||||
} else
|
} else
|
||||||
#endif
|
#endif
|
||||||
if ((AF_INET == my_ss.ss_family) &&
|
if ((AF_INET == my_ss.ss_family) &&
|
||||||
@ -1136,6 +1173,8 @@ static int mca_btl_tcp_component_exchange(void)
|
|||||||
addrs[current_addr].addr_ifkindex =
|
addrs[current_addr].addr_ifkindex =
|
||||||
opal_ifindextokindex (index);
|
opal_ifindextokindex (index);
|
||||||
current_addr++;
|
current_addr++;
|
||||||
|
opal_output_verbose(30, opal_btl_base_framework.framework_output,
|
||||||
|
"btl:tcp: using ipv6 interface %s", ifn);
|
||||||
}
|
}
|
||||||
} /* end of for opal_ifbegin() */
|
} /* end of for opal_ifbegin() */
|
||||||
} /* end of for tcp_num_btls */
|
} /* end of for tcp_num_btls */
|
||||||
@ -1299,45 +1338,159 @@ static void mca_btl_tcp_component_recv_handler(int sd, short flags, void* user)
|
|||||||
struct sockaddr_storage addr;
|
struct sockaddr_storage addr;
|
||||||
opal_socklen_t addr_len = sizeof(addr);
|
opal_socklen_t addr_len = sizeof(addr);
|
||||||
mca_btl_tcp_proc_t* btl_proc;
|
mca_btl_tcp_proc_t* btl_proc;
|
||||||
int retval;
|
bool sockopt = true;
|
||||||
|
size_t retval, len = strlen(mca_btl_tcp_magic_id_string);
|
||||||
|
mca_btl_tcp_endpoint_hs_msg_t hs_msg;
|
||||||
|
struct timeval save, tv;
|
||||||
|
socklen_t rcvtimeo_save_len = sizeof(save);
|
||||||
|
char str[128];
|
||||||
|
|
||||||
|
/* Note, Socket will be in blocking mode during intial handshake
|
||||||
|
* hence setting SO_RCVTIMEO to say 2 seconds here to avoid chance
|
||||||
|
* of spin forever if it tries to connect to old version
|
||||||
|
* as older version will send just process id which won't be long enough
|
||||||
|
* to cross sizeof(str) length + process id struct
|
||||||
|
* or when the remote side isn't OMPI where it's not going to send
|
||||||
|
* any data*/
|
||||||
|
|
||||||
|
/* get the current timeout value so we can reset to it */
|
||||||
|
if (0 != getsockopt(sd, SOL_SOCKET, SO_RCVTIMEO, (void*)&save, &rcvtimeo_save_len)) {
|
||||||
|
if (ENOPROTOOPT == errno) {
|
||||||
|
sockopt = false;
|
||||||
|
} else {
|
||||||
|
opal_output_verbose(20, opal_btl_base_framework.framework_output,
|
||||||
|
"Cannot get current recv timeout value of the socket"
|
||||||
|
"Local_host:%s PID:%d",
|
||||||
|
opal_process_info.nodename, getpid());
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
tv.tv_sec = 2;
|
||||||
|
tv.tv_usec = 0;
|
||||||
|
if (0 != setsockopt(sd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv))) {
|
||||||
|
opal_output_verbose(20, opal_btl_base_framework.framework_output,
|
||||||
|
"Cannot set new recv timeout value of the socket"
|
||||||
|
"Local_host:%s PID:%d",
|
||||||
|
opal_process_info.nodename, getpid());
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
OBJ_RELEASE(event);
|
OBJ_RELEASE(event);
|
||||||
|
retval = mca_btl_tcp_recv_blocking(sd, (void *)&hs_msg, sizeof(hs_msg));
|
||||||
|
guid = hs_msg.guid;
|
||||||
|
|
||||||
/* recv the process identifier */
|
/* An unknown process attempted to connect to Open MPI via TCP.
|
||||||
retval = recv(sd, (char *)&guid, sizeof(guid), 0);
|
* Open MPI uses a "magic" string to trivially verify that the connecting
|
||||||
if(retval != sizeof(guid)) {
|
* process is a fellow Open MPI process. An MPI process accepted a TCP
|
||||||
|
* connection but did not receive the correct magic string. This might
|
||||||
|
* indicate an Open MPI version mismatch between different MPI processes
|
||||||
|
* in the same job, or it may indicate that some other agent is
|
||||||
|
* mistakenly attempting to connect to Open MPI's TCP listening sockets.
|
||||||
|
|
||||||
|
* This attempted connection will be ignored; your MPI job may or may not
|
||||||
|
* continue properly.
|
||||||
|
*/
|
||||||
|
if (sizeof(hs_msg) != retval) {
|
||||||
|
opal_output_verbose(20, opal_btl_base_framework.framework_output,
|
||||||
|
"server did not receive entire connect ACK "
|
||||||
|
"Local_host:%s PID:%d Role:%s String_received:%s Test_fail:%s",
|
||||||
|
opal_process_info.nodename,
|
||||||
|
getpid(), "server",
|
||||||
|
(retval > 0) ? hs_msg.magic_id : "<nothing>",
|
||||||
|
"handshake message length");
|
||||||
|
|
||||||
|
/* The other side probably isn't OMPI, so just hang up */
|
||||||
|
CLOSE_THE_SOCKET(sd);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (0 != strncmp(hs_msg.magic_id, mca_btl_tcp_magic_id_string, len)) {
|
||||||
|
opal_output_verbose(20, opal_btl_base_framework.framework_output,
|
||||||
|
"server did not receive right magic string. "
|
||||||
|
"Local_host:%s PID:%d Role:%s String_received:%s Test_fail:%s",
|
||||||
|
opal_process_info.nodename,
|
||||||
|
getpid(), "server", hs_msg.magic_id,
|
||||||
|
"string value");
|
||||||
|
/* The other side probably isn't OMPI, so just hang up */
|
||||||
CLOSE_THE_SOCKET(sd);
|
CLOSE_THE_SOCKET(sd);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (sockopt) {
|
||||||
|
/* reset RECVTIMEO option to its original state */
|
||||||
|
if (0 != setsockopt(sd, SOL_SOCKET, SO_RCVTIMEO, &save, sizeof(save))) {
|
||||||
|
opal_output_verbose(20, opal_btl_base_framework.framework_output,
|
||||||
|
"Cannot reset recv timeout value"
|
||||||
|
"Local_host:%s PID:%d",
|
||||||
|
opal_process_info.nodename, getpid());
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
OPAL_PROCESS_NAME_NTOH(guid);
|
OPAL_PROCESS_NAME_NTOH(guid);
|
||||||
|
|
||||||
/* now set socket up to be non-blocking */
|
/* now set socket up to be non-blocking */
|
||||||
if((flags = fcntl(sd, F_GETFL, 0)) < 0) {
|
if((flags = fcntl(sd, F_GETFL, 0)) < 0) {
|
||||||
BTL_ERROR(("fcntl(F_GETFL) failed: %s (%d)",
|
opal_show_help("help-mpi-btl-tcp.txt", "socket flag fail",
|
||||||
strerror(opal_socket_errno), opal_socket_errno));
|
true, opal_process_info.nodename,
|
||||||
|
getpid(), "fcntl(sd, F_GETFL, 0)",
|
||||||
|
strerror(opal_socket_errno), opal_socket_errno);
|
||||||
|
CLOSE_THE_SOCKET(sd);
|
||||||
} else {
|
} else {
|
||||||
flags |= O_NONBLOCK;
|
flags |= O_NONBLOCK;
|
||||||
if(fcntl(sd, F_SETFL, flags) < 0) {
|
if(fcntl(sd, F_SETFL, flags) < 0) {
|
||||||
BTL_ERROR(("fcntl(F_SETFL) failed: %s (%d)",
|
opal_show_help("help-mpi-btl-tcp.txt", "socket flag fail",
|
||||||
strerror(opal_socket_errno), opal_socket_errno));
|
true, opal_process_info.nodename,
|
||||||
|
getpid(),
|
||||||
|
"fcntl(sd, F_SETFL, flags & O_NONBLOCK)",
|
||||||
|
strerror(opal_socket_errno), opal_socket_errno);
|
||||||
|
CLOSE_THE_SOCKET(sd);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* lookup the corresponding process */
|
/* lookup the corresponding process */
|
||||||
btl_proc = mca_btl_tcp_proc_lookup(&guid);
|
btl_proc = mca_btl_tcp_proc_lookup(&guid);
|
||||||
if(NULL == btl_proc) {
|
if(NULL == btl_proc) {
|
||||||
|
opal_show_help("help-mpi-btl-tcp.txt",
|
||||||
|
"server accept cannot find guid",
|
||||||
|
true, opal_process_info.nodename,
|
||||||
|
getpid());
|
||||||
CLOSE_THE_SOCKET(sd);
|
CLOSE_THE_SOCKET(sd);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* lookup peer address */
|
/* lookup peer address */
|
||||||
if(getpeername(sd, (struct sockaddr*)&addr, &addr_len) != 0) {
|
if(getpeername(sd, (struct sockaddr*)&addr, &addr_len) != 0) {
|
||||||
BTL_ERROR(("getpeername() failed: %s (%d)",
|
opal_show_help("help-mpi-btl-tcp.txt",
|
||||||
strerror(opal_socket_errno), opal_socket_errno));
|
"server getpeername failed",
|
||||||
|
true, opal_process_info.nodename,
|
||||||
|
getpid(),
|
||||||
|
strerror(opal_socket_errno), opal_socket_errno);
|
||||||
CLOSE_THE_SOCKET(sd);
|
CLOSE_THE_SOCKET(sd);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* are there any existing peer instances willing to accept this connection */
|
/* are there any existing peer instances willing to accept this connection */
|
||||||
(void)mca_btl_tcp_proc_accept(btl_proc, (struct sockaddr*)&addr, sd);
|
(void)mca_btl_tcp_proc_accept(btl_proc, (struct sockaddr*)&addr, sd);
|
||||||
|
|
||||||
|
switch (addr.ss_family) {
|
||||||
|
case AF_INET:
|
||||||
|
inet_ntop(AF_INET, &(((struct sockaddr_in*) &addr)->sin_addr), str, sizeof(str));
|
||||||
|
break;
|
||||||
|
|
||||||
|
#if OPAL_ENABLE_IPV6
|
||||||
|
case AF_INET6:
|
||||||
|
inet_ntop(AF_INET6, &(((struct sockaddr_in6*) &addr)->sin6_addr), str, sizeof(str));
|
||||||
|
break;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
default:
|
||||||
|
BTL_ERROR(("Got an accept() from an unknown address family -- this shouldn't happen"));
|
||||||
|
CLOSE_THE_SOCKET(sd);
|
||||||
|
return;
|
||||||
|
|
||||||
|
}
|
||||||
|
opal_output_verbose(10, opal_btl_base_framework.framework_output,
|
||||||
|
"btl:tcp: now connected to %s, process %s", str,
|
||||||
|
OPAL_NAME_PRINT(btl_proc->proc_opal->proc_name));
|
||||||
}
|
}
|
||||||
|
@ -62,6 +62,11 @@
|
|||||||
#include "btl_tcp_frag.h"
|
#include "btl_tcp_frag.h"
|
||||||
#include "btl_tcp_addr.h"
|
#include "btl_tcp_addr.h"
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Magic ID string send during connect/accept handshake
|
||||||
|
*/
|
||||||
|
|
||||||
|
const char mca_btl_tcp_magic_id_string[MCA_BTL_TCP_MAGIC_STRING_LENGTH] = "OPAL-TCP-BTL";
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Initialize state of the endpoint instance.
|
* Initialize state of the endpoint instance.
|
||||||
@ -371,48 +376,42 @@ int mca_btl_tcp_endpoint_send(mca_btl_base_endpoint_t* btl_endpoint, mca_btl_tcp
|
|||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* A blocking send on a non-blocking socket. Used to send the small amount of connection
|
* A blocking send on a non-blocking socket. Used to send the small
|
||||||
* information that identifies the endpoints endpoint.
|
* amount of connection information that identifies the endpoints endpoint.
|
||||||
*/
|
*/
|
||||||
static int
|
static int
|
||||||
mca_btl_tcp_endpoint_send_blocking(mca_btl_base_endpoint_t* btl_endpoint,
|
mca_btl_tcp_endpoint_send_blocking(mca_btl_base_endpoint_t* btl_endpoint,
|
||||||
void* data, size_t size)
|
const void* data, size_t size)
|
||||||
{
|
{
|
||||||
unsigned char* ptr = (unsigned char*)data;
|
int ret = mca_btl_tcp_send_blocking(btl_endpoint->endpoint_sd, data, size);
|
||||||
size_t cnt = 0;
|
if (ret < 0) {
|
||||||
while(cnt < size) {
|
mca_btl_tcp_endpoint_close(btl_endpoint);
|
||||||
int retval = send(btl_endpoint->endpoint_sd, (const char *)ptr+cnt, size-cnt, 0);
|
|
||||||
if(retval < 0) {
|
|
||||||
if(opal_socket_errno != EINTR && opal_socket_errno != EAGAIN && opal_socket_errno != EWOULDBLOCK) {
|
|
||||||
BTL_ERROR(("send(%d, %p, %lu/%lu) failed: %s (%d)",
|
|
||||||
btl_endpoint->endpoint_sd, data, cnt, size,
|
|
||||||
strerror(opal_socket_errno), opal_socket_errno));
|
|
||||||
btl_endpoint->endpoint_state = MCA_BTL_TCP_FAILED;
|
|
||||||
mca_btl_tcp_endpoint_close(btl_endpoint);
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
cnt += retval;
|
|
||||||
}
|
}
|
||||||
return cnt;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Send the globally unique identifier for this process to a endpoint on
|
* Send the globally unique identifier for this process to a endpoint on
|
||||||
* a newly connected socket.
|
* a newly connected socket.
|
||||||
*/
|
*/
|
||||||
|
static int
|
||||||
static int mca_btl_tcp_endpoint_send_connect_ack(mca_btl_base_endpoint_t* btl_endpoint)
|
mca_btl_tcp_endpoint_send_connect_ack(mca_btl_base_endpoint_t* btl_endpoint)
|
||||||
{
|
{
|
||||||
/* send process identifier to remote endpoint */
|
|
||||||
opal_process_name_t guid = opal_proc_local_get()->proc_name;
|
opal_process_name_t guid = opal_proc_local_get()->proc_name;
|
||||||
|
|
||||||
OPAL_PROCESS_NAME_HTON(guid);
|
OPAL_PROCESS_NAME_HTON(guid);
|
||||||
if(mca_btl_tcp_endpoint_send_blocking(btl_endpoint, &guid, sizeof(guid)) !=
|
|
||||||
sizeof(guid)) {
|
mca_btl_tcp_endpoint_hs_msg_t hs_msg;
|
||||||
return OPAL_ERR_UNREACH;
|
strcpy(hs_msg.magic_id, mca_btl_tcp_magic_id_string);
|
||||||
|
hs_msg.guid = guid;
|
||||||
|
|
||||||
|
if(sizeof(hs_msg) !=
|
||||||
|
mca_btl_tcp_endpoint_send_blocking(btl_endpoint,
|
||||||
|
&hs_msg, sizeof(hs_msg))) {
|
||||||
|
opal_show_help("help-mpi-btl-tcp.txt", "client handshake fail",
|
||||||
|
true, opal_process_info.nodename,
|
||||||
|
sizeof(hs_msg),
|
||||||
|
"connect ACK failed to send magic-id and guid");
|
||||||
|
return OPAL_ERR_UNREACH;
|
||||||
}
|
}
|
||||||
return OPAL_SUCCESS;
|
return OPAL_SUCCESS;
|
||||||
}
|
}
|
||||||
@ -573,31 +572,11 @@ static void mca_btl_tcp_endpoint_connected(mca_btl_base_endpoint_t* btl_endpoint
|
|||||||
*/
|
*/
|
||||||
static int mca_btl_tcp_endpoint_recv_blocking(mca_btl_base_endpoint_t* btl_endpoint, void* data, size_t size)
|
static int mca_btl_tcp_endpoint_recv_blocking(mca_btl_base_endpoint_t* btl_endpoint, void* data, size_t size)
|
||||||
{
|
{
|
||||||
unsigned char* ptr = (unsigned char*)data;
|
int ret = mca_btl_tcp_recv_blocking(btl_endpoint->endpoint_sd, data, size);
|
||||||
size_t cnt = 0;
|
if (ret <= 0) {
|
||||||
while(cnt < size) {
|
mca_btl_tcp_endpoint_close(btl_endpoint);
|
||||||
int retval = recv(btl_endpoint->endpoint_sd, (char *)ptr+cnt, size-cnt, 0);
|
|
||||||
|
|
||||||
/* remote closed connection */
|
|
||||||
if(retval == 0) {
|
|
||||||
mca_btl_tcp_endpoint_close(btl_endpoint);
|
|
||||||
return cnt;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* socket is non-blocking so handle errors */
|
|
||||||
if(retval < 0) {
|
|
||||||
if(opal_socket_errno != EINTR && opal_socket_errno != EAGAIN && opal_socket_errno != EWOULDBLOCK) {
|
|
||||||
BTL_ERROR(("recv(%d, %lu/%lu) failed: %s (%d)",
|
|
||||||
btl_endpoint->endpoint_sd, cnt, size, strerror(opal_socket_errno), opal_socket_errno));
|
|
||||||
btl_endpoint->endpoint_state = MCA_BTL_TCP_FAILED;
|
|
||||||
mca_btl_tcp_endpoint_close(btl_endpoint);
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
cnt += retval;
|
|
||||||
}
|
}
|
||||||
return cnt;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -605,17 +584,22 @@ static int mca_btl_tcp_endpoint_recv_blocking(mca_btl_base_endpoint_t* btl_endpo
|
|||||||
* Receive the endpoints globally unique process identification from a newly
|
* Receive the endpoints globally unique process identification from a newly
|
||||||
* connected socket and verify the expected response. If so, move the
|
* connected socket and verify the expected response. If so, move the
|
||||||
* socket to a connected state.
|
* socket to a connected state.
|
||||||
|
*
|
||||||
|
* NOTE: The return codes from this function are checked in
|
||||||
|
* mca_btl_tcp_endpoint_recv_handler(). Don't change them here
|
||||||
|
* without also changing the handling in _recv_handler()!
|
||||||
*/
|
*/
|
||||||
static int mca_btl_tcp_endpoint_recv_connect_ack(mca_btl_base_endpoint_t* btl_endpoint)
|
static int mca_btl_tcp_endpoint_recv_connect_ack(mca_btl_base_endpoint_t* btl_endpoint)
|
||||||
{
|
{
|
||||||
size_t s;
|
size_t retval, len = strlen(mca_btl_tcp_magic_id_string);;
|
||||||
opal_process_name_t guid;
|
|
||||||
mca_btl_tcp_proc_t* btl_proc = btl_endpoint->endpoint_proc;
|
mca_btl_tcp_proc_t* btl_proc = btl_endpoint->endpoint_proc;
|
||||||
|
opal_process_name_t guid;
|
||||||
|
|
||||||
s = mca_btl_tcp_endpoint_recv_blocking(btl_endpoint,
|
mca_btl_tcp_endpoint_hs_msg_t hs_msg;
|
||||||
&guid, sizeof(opal_process_name_t));
|
retval = mca_btl_tcp_endpoint_recv_blocking(btl_endpoint, &hs_msg, sizeof(hs_msg));
|
||||||
if (s != sizeof(opal_process_name_t)) {
|
|
||||||
if (0 == s) {
|
if (sizeof(hs_msg) != retval) {
|
||||||
|
if (0 == retval) {
|
||||||
/* If we get zero bytes, the peer closed the socket. This
|
/* If we get zero bytes, the peer closed the socket. This
|
||||||
can happen when the two peers started the connection
|
can happen when the two peers started the connection
|
||||||
protocol simultaneously. Just report the problem
|
protocol simultaneously. Just report the problem
|
||||||
@ -624,10 +608,19 @@ static int mca_btl_tcp_endpoint_recv_connect_ack(mca_btl_base_endpoint_t* btl_en
|
|||||||
}
|
}
|
||||||
opal_show_help("help-mpi-btl-tcp.txt", "client handshake fail",
|
opal_show_help("help-mpi-btl-tcp.txt", "client handshake fail",
|
||||||
true, opal_process_info.nodename,
|
true, opal_process_info.nodename,
|
||||||
getpid(),
|
getpid(), "did not receive entire connect ACK from peer");
|
||||||
"did not receive entire connect ACK from peer");
|
|
||||||
return OPAL_ERR_UNREACH;
|
return OPAL_ERR_BAD_PARAM;
|
||||||
}
|
}
|
||||||
|
if (0 != strncmp(hs_msg.magic_id, mca_btl_tcp_magic_id_string, len)) {
|
||||||
|
opal_show_help("help-mpi-btl-tcp.txt", "server did not receive magic string",
|
||||||
|
true, opal_process_info.nodename,
|
||||||
|
getpid(), "client", hs_msg.magic_id,
|
||||||
|
"string value");
|
||||||
|
return OPAL_ERR_BAD_PARAM;
|
||||||
|
}
|
||||||
|
|
||||||
|
guid = hs_msg.guid;
|
||||||
OPAL_PROCESS_NAME_NTOH(guid);
|
OPAL_PROCESS_NAME_NTOH(guid);
|
||||||
/* compare this to the expected values */
|
/* compare this to the expected values */
|
||||||
/* TODO: this deserve a little bit more thinking as we are not supposed
|
/* TODO: this deserve a little bit more thinking as we are not supposed
|
||||||
@ -708,25 +701,39 @@ static int mca_btl_tcp_endpoint_start_connect(mca_btl_base_endpoint_t* btl_endpo
|
|||||||
|
|
||||||
/* setup the socket as non-blocking */
|
/* setup the socket as non-blocking */
|
||||||
if((flags = fcntl(btl_endpoint->endpoint_sd, F_GETFL, 0)) < 0) {
|
if((flags = fcntl(btl_endpoint->endpoint_sd, F_GETFL, 0)) < 0) {
|
||||||
BTL_ERROR(("fcntl(F_GETFL) failed: %s (%d)",
|
opal_show_help("help-mpi-btl-tcp.txt", "socket flag fail",
|
||||||
strerror(opal_socket_errno), opal_socket_errno));
|
true, opal_process_info.nodename,
|
||||||
|
getpid(), "fcntl(sd, F_GETFL, 0)",
|
||||||
|
strerror(opal_socket_errno), opal_socket_errno);
|
||||||
|
/* Upper layer will handler the error */
|
||||||
|
return OPAL_ERR_UNREACH;
|
||||||
} else {
|
} else {
|
||||||
flags |= O_NONBLOCK;
|
flags |= O_NONBLOCK;
|
||||||
if(fcntl(btl_endpoint->endpoint_sd, F_SETFL, flags) < 0)
|
if(fcntl(btl_endpoint->endpoint_sd, F_SETFL, flags) < 0) {
|
||||||
BTL_ERROR(("fcntl(F_SETFL) failed: %s (%d)",
|
opal_show_help("help-mpi-btl-tcp.txt", "socket flag fail",
|
||||||
strerror(opal_socket_errno), opal_socket_errno));
|
true, opal_process_info.nodename,
|
||||||
|
getpid(),
|
||||||
|
"fcntl(sd, F_SETFL, flags & O_NONBLOCK)",
|
||||||
|
strerror(opal_socket_errno), opal_socket_errno);
|
||||||
|
/* Upper layer will handler the error */
|
||||||
|
return OPAL_ERR_UNREACH;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* start the connect - will likely fail with EINPROGRESS */
|
/* start the connect - will likely fail with EINPROGRESS */
|
||||||
mca_btl_tcp_proc_tosocks(btl_endpoint->endpoint_addr, &endpoint_addr);
|
mca_btl_tcp_proc_tosocks(btl_endpoint->endpoint_addr, &endpoint_addr);
|
||||||
|
|
||||||
opal_output_verbose(20, opal_btl_base_framework.framework_output,
|
opal_output_verbose(10, opal_btl_base_framework.framework_output,
|
||||||
"btl: tcp: attempting to connect() to %s address %s on port %d",
|
"btl: tcp: attempting to connect() to %s address %s on port %d",
|
||||||
OPAL_NAME_PRINT(btl_endpoint->endpoint_proc->proc_opal->proc_name),
|
OPAL_NAME_PRINT(btl_endpoint->endpoint_proc->proc_opal->proc_name),
|
||||||
opal_net_get_hostname((struct sockaddr*) &endpoint_addr),
|
opal_net_get_hostname((struct sockaddr*) &endpoint_addr),
|
||||||
ntohs(btl_endpoint->endpoint_addr->addr_port));
|
ntohs(btl_endpoint->endpoint_addr->addr_port));
|
||||||
|
|
||||||
if(0 == connect(btl_endpoint->endpoint_sd, (struct sockaddr*)&endpoint_addr, addrlen)) {
|
if(0 == connect(btl_endpoint->endpoint_sd, (struct sockaddr*)&endpoint_addr, addrlen)) {
|
||||||
|
opal_output_verbose(10, opal_btl_base_framework.framework_output,
|
||||||
|
"btl:tcp: connect() to %s:%d completed",
|
||||||
|
opal_net_get_hostname((struct sockaddr*) &endpoint_addr),
|
||||||
|
ntohs(((struct sockaddr_in*) &endpoint_addr)->sin_port));
|
||||||
/* send our globally unique process identifier to the endpoint */
|
/* send our globally unique process identifier to the endpoint */
|
||||||
if((rc = mca_btl_tcp_endpoint_send_connect_ack(btl_endpoint)) == OPAL_SUCCESS) {
|
if((rc = mca_btl_tcp_endpoint_send_connect_ack(btl_endpoint)) == OPAL_SUCCESS) {
|
||||||
btl_endpoint->endpoint_state = MCA_BTL_TCP_CONNECT_ACK;
|
btl_endpoint->endpoint_state = MCA_BTL_TCP_CONNECT_ACK;
|
||||||
@ -742,6 +749,8 @@ static int mca_btl_tcp_endpoint_start_connect(mca_btl_base_endpoint_t* btl_endpo
|
|||||||
btl_endpoint->endpoint_state = MCA_BTL_TCP_CONNECTING;
|
btl_endpoint->endpoint_state = MCA_BTL_TCP_CONNECTING;
|
||||||
MCA_BTL_TCP_ENDPOINT_DUMP(10, btl_endpoint, true, "event_add(send) [start_connect]");
|
MCA_BTL_TCP_ENDPOINT_DUMP(10, btl_endpoint, true, "event_add(send) [start_connect]");
|
||||||
MCA_BTL_TCP_ACTIVATE_EVENT(&btl_endpoint->endpoint_send_event, 0);
|
MCA_BTL_TCP_ACTIVATE_EVENT(&btl_endpoint->endpoint_send_event, 0);
|
||||||
|
opal_output_verbose(30, opal_btl_base_framework.framework_output,
|
||||||
|
"btl:tcp: would block, so allowing background progress");
|
||||||
return OPAL_SUCCESS;
|
return OPAL_SUCCESS;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -765,7 +774,7 @@ static int mca_btl_tcp_endpoint_start_connect(mca_btl_base_endpoint_t* btl_endpo
|
|||||||
* later. Otherwise, send this processes identifier to the endpoint on the
|
* later. Otherwise, send this processes identifier to the endpoint on the
|
||||||
* newly connected socket.
|
* newly connected socket.
|
||||||
*/
|
*/
|
||||||
static void mca_btl_tcp_endpoint_complete_connect(mca_btl_base_endpoint_t* btl_endpoint)
|
static int mca_btl_tcp_endpoint_complete_connect(mca_btl_base_endpoint_t* btl_endpoint)
|
||||||
{
|
{
|
||||||
int so_error = 0;
|
int so_error = 0;
|
||||||
opal_socklen_t so_length = sizeof(so_error);
|
opal_socklen_t so_length = sizeof(so_error);
|
||||||
@ -781,32 +790,49 @@ static void mca_btl_tcp_endpoint_complete_connect(mca_btl_base_endpoint_t* btl_e
|
|||||||
|
|
||||||
/* check connect completion status */
|
/* check connect completion status */
|
||||||
if(getsockopt(btl_endpoint->endpoint_sd, SOL_SOCKET, SO_ERROR, (char *)&so_error, &so_length) < 0) {
|
if(getsockopt(btl_endpoint->endpoint_sd, SOL_SOCKET, SO_ERROR, (char *)&so_error, &so_length) < 0) {
|
||||||
BTL_ERROR(("getsockopt() to %s failed: %s (%d)",
|
opal_show_help("help-mpi-btl-tcp.txt", "socket flag fail",
|
||||||
|
true, opal_process_info.nodename,
|
||||||
|
getpid(), "fcntl(sd, F_GETFL, 0)",
|
||||||
|
strerror(opal_socket_errno), opal_socket_errno);
|
||||||
|
BTL_ERROR(("getsockopt() to %s:%d failed: %s (%d)",
|
||||||
opal_net_get_hostname((struct sockaddr*) &endpoint_addr),
|
opal_net_get_hostname((struct sockaddr*) &endpoint_addr),
|
||||||
|
((struct sockaddr_in*) &endpoint_addr)->sin_port,
|
||||||
strerror(opal_socket_errno), opal_socket_errno));
|
strerror(opal_socket_errno), opal_socket_errno));
|
||||||
mca_btl_tcp_endpoint_close(btl_endpoint);
|
mca_btl_tcp_endpoint_close(btl_endpoint);
|
||||||
return;
|
return OPAL_ERROR;
|
||||||
}
|
}
|
||||||
if(so_error == EINPROGRESS || so_error == EWOULDBLOCK) {
|
if(so_error == EINPROGRESS || so_error == EWOULDBLOCK) {
|
||||||
return;
|
return OPAL_SUCCESS;
|
||||||
}
|
}
|
||||||
if(so_error != 0) {
|
if(so_error != 0) {
|
||||||
BTL_ERROR(("connect() to %s failed: %s (%d)",
|
char *msg;
|
||||||
opal_net_get_hostname((struct sockaddr*) &endpoint_addr),
|
asprintf(&msg, "connect() to %s:%d failed",
|
||||||
strerror(so_error), so_error));
|
opal_net_get_hostname((struct sockaddr*) &endpoint_addr),
|
||||||
|
ntohs(((struct sockaddr_in*) &endpoint_addr)->sin_port));
|
||||||
|
opal_show_help("help-mpi-btl-tcp.txt", "client connect fail",
|
||||||
|
true, opal_process_info.nodename,
|
||||||
|
getpid(), msg,
|
||||||
|
strerror(opal_socket_errno), opal_socket_errno);
|
||||||
|
free(msg);
|
||||||
mca_btl_tcp_endpoint_close(btl_endpoint);
|
mca_btl_tcp_endpoint_close(btl_endpoint);
|
||||||
return;
|
return OPAL_ERROR;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
opal_output_verbose(10, opal_btl_base_framework.framework_output,
|
||||||
|
"btl:tcp: connect() to %s:%d completed (complete_connect), sending connect ACK",
|
||||||
|
opal_net_get_hostname((struct sockaddr*) &endpoint_addr),
|
||||||
|
ntohs(((struct sockaddr_in*) &endpoint_addr)->sin_port));
|
||||||
|
|
||||||
if(mca_btl_tcp_endpoint_send_connect_ack(btl_endpoint) == OPAL_SUCCESS) {
|
if(mca_btl_tcp_endpoint_send_connect_ack(btl_endpoint) == OPAL_SUCCESS) {
|
||||||
btl_endpoint->endpoint_state = MCA_BTL_TCP_CONNECT_ACK;
|
btl_endpoint->endpoint_state = MCA_BTL_TCP_CONNECT_ACK;
|
||||||
opal_event_add(&btl_endpoint->endpoint_recv_event, 0);
|
opal_event_add(&btl_endpoint->endpoint_recv_event, 0);
|
||||||
MCA_BTL_TCP_ENDPOINT_DUMP(10, btl_endpoint, false, "event_add(recv) [complete_connect]");
|
MCA_BTL_TCP_ENDPOINT_DUMP(10, btl_endpoint, false, "event_add(recv) [complete_connect]");
|
||||||
return;
|
return OPAL_SUCCESS;
|
||||||
}
|
}
|
||||||
MCA_BTL_TCP_ENDPOINT_DUMP(1, btl_endpoint, false, " [complete_connect]");
|
MCA_BTL_TCP_ENDPOINT_DUMP(1, btl_endpoint, false, " [complete_connect]");
|
||||||
btl_endpoint->endpoint_state = MCA_BTL_TCP_FAILED;
|
btl_endpoint->endpoint_state = MCA_BTL_TCP_FAILED;
|
||||||
mca_btl_tcp_endpoint_close(btl_endpoint);
|
mca_btl_tcp_endpoint_close(btl_endpoint);
|
||||||
|
return OPAL_ERROR;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -855,6 +881,26 @@ static void mca_btl_tcp_endpoint_recv_handler(int sd, short flags, void* user)
|
|||||||
OPAL_THREAD_UNLOCK(&btl_endpoint->endpoint_send_lock);
|
OPAL_THREAD_UNLOCK(&btl_endpoint->endpoint_send_lock);
|
||||||
MCA_BTL_TCP_ENDPOINT_DUMP(10, btl_endpoint, true, "connected");
|
MCA_BTL_TCP_ENDPOINT_DUMP(10, btl_endpoint, true, "connected");
|
||||||
}
|
}
|
||||||
|
else if (OPAL_ERR_BAD_PARAM == rc) {
|
||||||
|
/* If we get a BAD_PARAM, it means that it probably wasn't
|
||||||
|
an OMPI process on the other end of the socket (e.g.,
|
||||||
|
the magic string ID failed). So we can probably just
|
||||||
|
close the socket and ignore this connection. */
|
||||||
|
CLOSE_THE_SOCKET(sd);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
/* Otherwise, it probably *was* an OMPI peer process on
|
||||||
|
the other end, and something bad has probably
|
||||||
|
happened. */
|
||||||
|
mca_btl_tcp_module_t *m = btl_endpoint->endpoint_btl;
|
||||||
|
|
||||||
|
/* Fail up to the PML */
|
||||||
|
if (NULL != m->tcp_error_cb) {
|
||||||
|
m->tcp_error_cb((mca_btl_base_module_t*) m, MCA_BTL_ERROR_FLAGS_FATAL,
|
||||||
|
btl_endpoint->endpoint_proc->proc_opal,
|
||||||
|
"TCP ACK is neither SUCCESS nor ERR (something bad has probably happened)");
|
||||||
|
}
|
||||||
|
}
|
||||||
OPAL_THREAD_UNLOCK(&btl_endpoint->endpoint_recv_lock);
|
OPAL_THREAD_UNLOCK(&btl_endpoint->endpoint_recv_lock);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -26,7 +26,7 @@
|
|||||||
BEGIN_C_DECLS
|
BEGIN_C_DECLS
|
||||||
|
|
||||||
#define MCA_BTL_TCP_ENDPOINT_CACHE 1
|
#define MCA_BTL_TCP_ENDPOINT_CACHE 1
|
||||||
|
#define MCA_BTL_TCP_MAGIC_STRING_LENGTH 16
|
||||||
/**
|
/**
|
||||||
* State of TCP endpoint connection.
|
* State of TCP endpoint connection.
|
||||||
*/
|
*/
|
||||||
@ -75,6 +75,14 @@ typedef struct mca_btl_base_endpoint_t mca_btl_base_endpoint_t;
|
|||||||
typedef mca_btl_base_endpoint_t mca_btl_tcp_endpoint_t;
|
typedef mca_btl_base_endpoint_t mca_btl_tcp_endpoint_t;
|
||||||
OBJ_CLASS_DECLARATION(mca_btl_tcp_endpoint_t);
|
OBJ_CLASS_DECLARATION(mca_btl_tcp_endpoint_t);
|
||||||
|
|
||||||
|
/* Magic socket handshake string */
|
||||||
|
extern const char mca_btl_tcp_magic_id_string[MCA_BTL_TCP_MAGIC_STRING_LENGTH];
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
opal_process_name_t guid;
|
||||||
|
char magic_id[MCA_BTL_TCP_MAGIC_STRING_LENGTH];
|
||||||
|
} mca_btl_tcp_endpoint_hs_msg_t;
|
||||||
|
|
||||||
void mca_btl_tcp_set_socket_options(int sd);
|
void mca_btl_tcp_set_socket_options(int sd);
|
||||||
void mca_btl_tcp_endpoint_close(mca_btl_base_endpoint_t*);
|
void mca_btl_tcp_endpoint_close(mca_btl_base_endpoint_t*);
|
||||||
int mca_btl_tcp_endpoint_send(mca_btl_base_endpoint_t*, struct mca_btl_tcp_frag_t*);
|
int mca_btl_tcp_endpoint_send(mca_btl_base_endpoint_t*, struct mca_btl_tcp_frag_t*);
|
||||||
|
@ -421,6 +421,7 @@ int mca_btl_tcp_proc_insert( mca_btl_tcp_proc_t* btl_proc,
|
|||||||
mca_btl_tcp_proc_data_t _proc_data, *proc_data=&_proc_data;
|
mca_btl_tcp_proc_data_t _proc_data, *proc_data=&_proc_data;
|
||||||
size_t max_peer_interfaces;
|
size_t max_peer_interfaces;
|
||||||
memset(proc_data, 0, sizeof(mca_btl_tcp_proc_data_t));
|
memset(proc_data, 0, sizeof(mca_btl_tcp_proc_data_t));
|
||||||
|
char str_local[128], str_remote[128];
|
||||||
|
|
||||||
if (NULL == (proc_hostname = opal_get_proc_hostname(btl_proc->proc_opal))) {
|
if (NULL == (proc_hostname = opal_get_proc_hostname(btl_proc->proc_opal))) {
|
||||||
return OPAL_ERR_UNREACH;
|
return OPAL_ERR_UNREACH;
|
||||||
@ -508,10 +509,7 @@ int mca_btl_tcp_proc_insert( mca_btl_tcp_proc_t* btl_proc,
|
|||||||
default:
|
default:
|
||||||
opal_output(0, "unknown address family for tcp: %d\n",
|
opal_output(0, "unknown address family for tcp: %d\n",
|
||||||
endpoint_addr_ss.ss_family);
|
endpoint_addr_ss.ss_family);
|
||||||
/*
|
return OPAL_ERR_UNREACH;
|
||||||
* return OPAL_UNREACH or some error, as this is not
|
|
||||||
* good
|
|
||||||
*/
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -553,14 +551,26 @@ int mca_btl_tcp_proc_insert( mca_btl_tcp_proc_t* btl_proc,
|
|||||||
if(NULL != proc_data->local_interfaces[i]->ipv4_address &&
|
if(NULL != proc_data->local_interfaces[i]->ipv4_address &&
|
||||||
NULL != peer_interfaces[j]->ipv4_address) {
|
NULL != peer_interfaces[j]->ipv4_address) {
|
||||||
|
|
||||||
|
/* Convert the IPv4 addresses into nicely-printable strings for verbose debugging output */
|
||||||
|
inet_ntop(AF_INET, &(((struct sockaddr_in*) proc_data->local_interfaces[i]->ipv4_address))->sin_addr,
|
||||||
|
str_local, sizeof(str_local));
|
||||||
|
inet_ntop(AF_INET, &(((struct sockaddr_in*) peer_interfaces[j]->ipv4_address))->sin_addr,
|
||||||
|
str_remote, sizeof(str_remote));
|
||||||
|
|
||||||
if(opal_net_addr_isipv4public((struct sockaddr*) local_interface->ipv4_address) &&
|
if(opal_net_addr_isipv4public((struct sockaddr*) local_interface->ipv4_address) &&
|
||||||
opal_net_addr_isipv4public((struct sockaddr*) peer_interfaces[j]->ipv4_address)) {
|
opal_net_addr_isipv4public((struct sockaddr*) peer_interfaces[j]->ipv4_address)) {
|
||||||
if(opal_net_samenetwork((struct sockaddr*) local_interface->ipv4_address,
|
if(opal_net_samenetwork((struct sockaddr*) local_interface->ipv4_address,
|
||||||
(struct sockaddr*) peer_interfaces[j]->ipv4_address,
|
(struct sockaddr*) peer_interfaces[j]->ipv4_address,
|
||||||
local_interface->ipv4_netmask)) {
|
local_interface->ipv4_netmask)) {
|
||||||
proc_data->weights[i][j] = CQ_PUBLIC_SAME_NETWORK;
|
proc_data->weights[i][j] = CQ_PUBLIC_SAME_NETWORK;
|
||||||
|
opal_output_verbose(20, opal_btl_base_framework.framework_output,
|
||||||
|
"btl:tcp: path from %s to %s: IPV4 PUBLIC SAME NETWORK",
|
||||||
|
str_local, str_remote);
|
||||||
} else {
|
} else {
|
||||||
proc_data->weights[i][j] = CQ_PUBLIC_DIFFERENT_NETWORK;
|
proc_data->weights[i][j] = CQ_PUBLIC_DIFFERENT_NETWORK;
|
||||||
|
opal_output_verbose(20, opal_btl_base_framework.framework_output,
|
||||||
|
"btl:tcp: path from %s to %s: IPV4 PUBLIC DIFFERENT NETWORK",
|
||||||
|
str_local, str_remote);
|
||||||
}
|
}
|
||||||
proc_data->best_addr[i][j] = peer_interfaces[j]->ipv4_endpoint_addr;
|
proc_data->best_addr[i][j] = peer_interfaces[j]->ipv4_endpoint_addr;
|
||||||
continue;
|
continue;
|
||||||
@ -569,8 +579,14 @@ int mca_btl_tcp_proc_insert( mca_btl_tcp_proc_t* btl_proc,
|
|||||||
(struct sockaddr*) peer_interfaces[j]->ipv4_address,
|
(struct sockaddr*) peer_interfaces[j]->ipv4_address,
|
||||||
local_interface->ipv4_netmask)) {
|
local_interface->ipv4_netmask)) {
|
||||||
proc_data->weights[i][j] = CQ_PRIVATE_SAME_NETWORK;
|
proc_data->weights[i][j] = CQ_PRIVATE_SAME_NETWORK;
|
||||||
|
opal_output_verbose(20, opal_btl_base_framework.framework_output,
|
||||||
|
"btl:tcp: path from %s to %s: IPV4 PRIVATE SAME NETWORK",
|
||||||
|
str_local, str_remote);
|
||||||
} else {
|
} else {
|
||||||
proc_data->weights[i][j] = CQ_PRIVATE_DIFFERENT_NETWORK;
|
proc_data->weights[i][j] = CQ_PRIVATE_DIFFERENT_NETWORK;
|
||||||
|
opal_output_verbose(20, opal_btl_base_framework.framework_output,
|
||||||
|
"btl:tcp: path from %s to %s: IPV4 PRIVATE DIFFERENT NETWORK",
|
||||||
|
str_local, str_remote);
|
||||||
}
|
}
|
||||||
proc_data->best_addr[i][j] = peer_interfaces[j]->ipv4_endpoint_addr;
|
proc_data->best_addr[i][j] = peer_interfaces[j]->ipv4_endpoint_addr;
|
||||||
continue;
|
continue;
|
||||||
@ -582,12 +598,24 @@ int mca_btl_tcp_proc_insert( mca_btl_tcp_proc_t* btl_proc,
|
|||||||
if(NULL != local_interface->ipv6_address &&
|
if(NULL != local_interface->ipv6_address &&
|
||||||
NULL != peer_interfaces[j]->ipv6_address) {
|
NULL != peer_interfaces[j]->ipv6_address) {
|
||||||
|
|
||||||
|
/* Convert the IPv6 addresses into nicely-printable strings for verbose debugging output */
|
||||||
|
inet_ntop(AF_INET6, &(((struct sockaddr_in6*) local_interface->ipv6_address))->sin6_addr,
|
||||||
|
str_local, sizeof(str_local));
|
||||||
|
inet_ntop(AF_INET6, &(((struct sockaddr_in6*) peer_interfaces[j]->ipv6_address))->sin6_addr,
|
||||||
|
str_remote, sizeof(str_remote));
|
||||||
|
|
||||||
if(opal_net_samenetwork((struct sockaddr*) local_interface->ipv6_address,
|
if(opal_net_samenetwork((struct sockaddr*) local_interface->ipv6_address,
|
||||||
(struct sockaddr*) peer_interfaces[j]->ipv6_address,
|
(struct sockaddr*) peer_interfaces[j]->ipv6_address,
|
||||||
local_interface->ipv6_netmask)) {
|
local_interface->ipv6_netmask)) {
|
||||||
proc_data->weights[i][j] = CQ_PUBLIC_SAME_NETWORK;
|
proc_data->weights[i][j] = CQ_PUBLIC_SAME_NETWORK;
|
||||||
|
opal_output_verbose(20, opal_btl_base_framework.framework_output,
|
||||||
|
"btl:tcp: path from %s to %s: IPV6 PUBLIC SAME NETWORK",
|
||||||
|
str_local, str_remote);
|
||||||
} else {
|
} else {
|
||||||
proc_data->weights[i][j] = CQ_PUBLIC_DIFFERENT_NETWORK;
|
proc_data->weights[i][j] = CQ_PUBLIC_DIFFERENT_NETWORK;
|
||||||
|
opal_output_verbose(20, opal_btl_base_framework.framework_output,
|
||||||
|
"btl:tcp: path from %s to %s: IPV6 PUBLIC DIFFERENT NETWORK",
|
||||||
|
str_local, str_remote);
|
||||||
}
|
}
|
||||||
proc_data->best_addr[i][j] = peer_interfaces[j]->ipv6_endpoint_addr;
|
proc_data->best_addr[i][j] = peer_interfaces[j]->ipv6_endpoint_addr;
|
||||||
continue;
|
continue;
|
||||||
@ -660,6 +688,12 @@ int mca_btl_tcp_proc_insert( mca_btl_tcp_proc_t* btl_proc,
|
|||||||
rc = OPAL_SUCCESS;
|
rc = OPAL_SUCCESS;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (OPAL_ERR_UNREACH == rc) {
|
||||||
|
opal_output_verbose(10, opal_btl_base_framework.framework_output,
|
||||||
|
"btl:tcp: host %s, process %s UNREACHABLE",
|
||||||
|
proc_hostname,
|
||||||
|
OPAL_NAME_PRINT(btl_proc->proc_opal->proc_name));
|
||||||
|
}
|
||||||
|
|
||||||
for(i = 0; i < perm_size; ++i) {
|
for(i = 0; i < perm_size; ++i) {
|
||||||
free(proc_data->weights[i]);
|
free(proc_data->weights[i]);
|
||||||
@ -716,7 +750,7 @@ int mca_btl_tcp_proc_remove(mca_btl_tcp_proc_t* btl_proc, mca_btl_base_endpoint_
|
|||||||
OBJ_RELEASE(btl_proc);
|
OBJ_RELEASE(btl_proc);
|
||||||
return OPAL_SUCCESS;
|
return OPAL_SUCCESS;
|
||||||
}
|
}
|
||||||
/* The endpoint_addr may still be NULL if this enpoint is
|
/* The endpoint_addr may still be NULL if this endpoint is
|
||||||
being removed early in the wireup sequence (e.g., if it
|
being removed early in the wireup sequence (e.g., if it
|
||||||
is unreachable by all other procs) */
|
is unreachable by all other procs) */
|
||||||
if (NULL != btl_endpoint->endpoint_addr) {
|
if (NULL != btl_endpoint->endpoint_addr) {
|
||||||
|
@ -100,3 +100,68 @@ hopefully be able to continue).
|
|||||||
Peer hostname: %s (%s)
|
Peer hostname: %s (%s)
|
||||||
Source IP of socket: %s
|
Source IP of socket: %s
|
||||||
Known IPs of peer: %s
|
Known IPs of peer: %s
|
||||||
|
#
|
||||||
|
[socket flag fail]
|
||||||
|
WARNING: Open MPI failed to set flags on a TCP socket. This should
|
||||||
|
not happen. It is likely that your MPI job will now fail.
|
||||||
|
|
||||||
|
Local host: %s
|
||||||
|
PID: %d
|
||||||
|
Flag: %s
|
||||||
|
Error: %s (%d)
|
||||||
|
#
|
||||||
|
[server did not get guid]
|
||||||
|
WARNING: Open MPI accepted a TCP connection from what appears to be a
|
||||||
|
another Open MPI process but the peer process did not complete the
|
||||||
|
initial handshake properly. This should not happen.
|
||||||
|
|
||||||
|
This attempted connection will be ignored; your MPI job may or may not
|
||||||
|
continue properly.
|
||||||
|
|
||||||
|
Local host: %s
|
||||||
|
PID: %d
|
||||||
|
#
|
||||||
|
[server accept cannot find guid]
|
||||||
|
WARNING: Open MPI accepted a TCP connection from what appears to be a
|
||||||
|
another Open MPI process but cannot find a corresponding process
|
||||||
|
entry for that peer.
|
||||||
|
|
||||||
|
This attempted connection will be ignored; your MPI job may or may not
|
||||||
|
continue properly.
|
||||||
|
|
||||||
|
Local host: %s
|
||||||
|
PID: %d
|
||||||
|
#
|
||||||
|
[server getpeername failed]
|
||||||
|
WARNING: Open MPI failed to look up the peer IP address information of
|
||||||
|
a TCP connection that it just accepted. This should not happen.
|
||||||
|
|
||||||
|
This attempted connection will be ignored; your MPI job may or may not
|
||||||
|
continue properly.
|
||||||
|
|
||||||
|
Local host: %s
|
||||||
|
PID: %d
|
||||||
|
Error: %s (%d)
|
||||||
|
#
|
||||||
|
[server cannot find endpoint]
|
||||||
|
WARNING: Open MPI accepted a TCP connection from what appears to be a
|
||||||
|
valid peer Open MPI process but cannot find a corresponding endpoint
|
||||||
|
entry for that peer. This should not happen.
|
||||||
|
|
||||||
|
This attempted connection will be ignored; your MPI job may or may not
|
||||||
|
continue properly.
|
||||||
|
|
||||||
|
Local host: %s
|
||||||
|
PID: %d
|
||||||
|
#
|
||||||
|
[client connect fail]
|
||||||
|
WARNING: Open MPI failed to TCP connect to a peer MPI process via
|
||||||
|
TCP. This should not happen.
|
||||||
|
|
||||||
|
Your Open MPI job may now fail.
|
||||||
|
|
||||||
|
Local host: %s
|
||||||
|
PID: %d
|
||||||
|
Message: %s
|
||||||
|
Error: %s (%d)
|
||||||
|
#
|
Загрузка…
x
Ссылка в новой задаче
Block a user