Merge pull request #4942 from jsquyres/pr/tcp-btl-help-message-updates
TCP help message updates
Этот коммит содержится в:
Коммит
023a4a82d3
@ -10,7 +10,7 @@
|
|||||||
* University of Stuttgart. All rights reserved.
|
* University of Stuttgart. All rights reserved.
|
||||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||||
* All rights reserved.
|
* All rights reserved.
|
||||||
* Copyright (c) 2007-2015 Cisco Systems, Inc. All rights reserved.
|
* Copyright (c) 2007-2018 Cisco Systems, Inc. All rights reserved
|
||||||
* Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved.
|
* Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved.
|
||||||
* Copyright (c) 2009 Oak Ridge National Laboratory
|
* Copyright (c) 2009 Oak Ridge National Laboratory
|
||||||
* Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights
|
* Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights
|
||||||
@ -1363,7 +1363,6 @@ static void mca_btl_tcp_component_recv_handler(int sd, short flags, void* user)
|
|||||||
mca_btl_tcp_endpoint_hs_msg_t hs_msg;
|
mca_btl_tcp_endpoint_hs_msg_t hs_msg;
|
||||||
struct timeval save, tv;
|
struct timeval save, tv;
|
||||||
socklen_t rcvtimeo_save_len = sizeof(save);
|
socklen_t rcvtimeo_save_len = sizeof(save);
|
||||||
char str[128];
|
|
||||||
|
|
||||||
/* Note, Socket will be in blocking mode during intial handshake
|
/* Note, Socket will be in blocking mode during intial handshake
|
||||||
* hence setting SO_RCVTIMEO to say 2 seconds here to avoid waiting
|
* hence setting SO_RCVTIMEO to say 2 seconds here to avoid waiting
|
||||||
@ -1376,20 +1375,22 @@ static void mca_btl_tcp_component_recv_handler(int sd, short flags, void* user)
|
|||||||
if (ENOPROTOOPT == errno) {
|
if (ENOPROTOOPT == errno) {
|
||||||
sockopt = false;
|
sockopt = false;
|
||||||
} else {
|
} else {
|
||||||
opal_output_verbose(20, opal_btl_base_framework.framework_output,
|
opal_show_help("help-mpi-btl-tcp.txt", "socket flag fail",
|
||||||
"Cannot get current recv timeout value of the socket"
|
true, opal_process_info.nodename,
|
||||||
"Local_host:%s PID:%d",
|
getpid(),
|
||||||
opal_process_info.nodename, getpid());
|
"getsockopt(sd, SOL_SOCKET, SO_RCVTIMEO, ...)",
|
||||||
|
strerror(opal_socket_errno), opal_socket_errno);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
tv.tv_sec = 2;
|
tv.tv_sec = 2;
|
||||||
tv.tv_usec = 0;
|
tv.tv_usec = 0;
|
||||||
if (0 != setsockopt(sd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv))) {
|
if (0 != setsockopt(sd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv))) {
|
||||||
opal_output_verbose(20, opal_btl_base_framework.framework_output,
|
opal_show_help("help-mpi-btl-tcp.txt", "socket flag fail",
|
||||||
"Cannot set new recv timeout value of the socket"
|
true, opal_process_info.nodename,
|
||||||
"Local_host:%s PID:%d",
|
getpid(),
|
||||||
opal_process_info.nodename, getpid());
|
"setsockopt(sd, SOL_SOCKET, SO_RCVTIMEO, ...)",
|
||||||
|
strerror(opal_socket_errno), opal_socket_errno);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1408,14 +1409,16 @@ static void mca_btl_tcp_component_recv_handler(int sd, short flags, void* user)
|
|||||||
* This attempted connection will be ignored; your MPI job may or may not
|
* This attempted connection will be ignored; your MPI job may or may not
|
||||||
* continue properly.
|
* continue properly.
|
||||||
*/
|
*/
|
||||||
if (sizeof(hs_msg) != retval) {
|
if (sizeof(hs_msg) != retval) {
|
||||||
opal_output_verbose(20, opal_btl_base_framework.framework_output,
|
const char *peer = opal_fd_get_peer_name(sd);
|
||||||
"process did not receive full connect ACK "
|
opal_show_help("help-mpi-btl-tcp.txt",
|
||||||
"Local_host:%s PID:%d String_received:%s Test_fail:%s",
|
"did not receive full magic id string",
|
||||||
opal_process_info.nodename,
|
true,
|
||||||
getpid(),
|
opal_process_info.nodename,
|
||||||
(retval > 0) ? hs_msg.magic_id : "<nothing>",
|
getpid(),
|
||||||
"handshake message length");
|
opal_version_string,
|
||||||
|
peer);
|
||||||
|
free((char*) peer);
|
||||||
|
|
||||||
/* The other side probably isn't OMPI, so just hang up */
|
/* The other side probably isn't OMPI, so just hang up */
|
||||||
CLOSE_THE_SOCKET(sd);
|
CLOSE_THE_SOCKET(sd);
|
||||||
@ -1424,12 +1427,18 @@ static void mca_btl_tcp_component_recv_handler(int sd, short flags, void* user)
|
|||||||
|
|
||||||
guid = hs_msg.guid;
|
guid = hs_msg.guid;
|
||||||
if (0 != strncmp(hs_msg.magic_id, mca_btl_tcp_magic_id_string, len)) {
|
if (0 != strncmp(hs_msg.magic_id, mca_btl_tcp_magic_id_string, len)) {
|
||||||
opal_output_verbose(20, opal_btl_base_framework.framework_output,
|
const char *peer = opal_fd_get_peer_name(sd);
|
||||||
"process did not receive right magic string. "
|
opal_show_help("help-mpi-btl-tcp.txt",
|
||||||
"Local_host:%s PID:%d String_received:%s Test_fail:%s",
|
"received incorrect magic id string",
|
||||||
opal_process_info.nodename,
|
true,
|
||||||
getpid(), hs_msg.magic_id,
|
opal_process_info.nodename,
|
||||||
"string value");
|
getpid(),
|
||||||
|
opal_version_string,
|
||||||
|
peer,
|
||||||
|
hs_msg.magic_id,
|
||||||
|
mca_btl_tcp_magic_id_string);
|
||||||
|
free((char*) peer);
|
||||||
|
|
||||||
/* The other side probably isn't OMPI, so just hang up */
|
/* The other side probably isn't OMPI, so just hang up */
|
||||||
CLOSE_THE_SOCKET(sd);
|
CLOSE_THE_SOCKET(sd);
|
||||||
return;
|
return;
|
||||||
@ -1438,10 +1447,11 @@ static void mca_btl_tcp_component_recv_handler(int sd, short flags, void* user)
|
|||||||
if (sockopt) {
|
if (sockopt) {
|
||||||
/* reset RECVTIMEO option to its original state */
|
/* reset RECVTIMEO option to its original state */
|
||||||
if (0 != setsockopt(sd, SOL_SOCKET, SO_RCVTIMEO, &save, sizeof(save))) {
|
if (0 != setsockopt(sd, SOL_SOCKET, SO_RCVTIMEO, &save, sizeof(save))) {
|
||||||
opal_output_verbose(20, opal_btl_base_framework.framework_output,
|
opal_show_help("help-mpi-btl-tcp.txt", "socket flag fail",
|
||||||
"Cannot reset recv timeout value"
|
true, opal_process_info.nodename,
|
||||||
"Local_host:%s PID:%d",
|
getpid(),
|
||||||
opal_process_info.nodename, getpid());
|
"setsockopt(sd, SOL_SOCKET, SO_RCVTIMEO, ...)",
|
||||||
|
strerror(opal_socket_errno), opal_socket_errno);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1492,24 +1502,9 @@ static void mca_btl_tcp_component_recv_handler(int sd, short flags, void* user)
|
|||||||
/* are there any existing peer instances willing to accept this connection */
|
/* are there any existing peer instances willing to accept this connection */
|
||||||
(void)mca_btl_tcp_proc_accept(btl_proc, (struct sockaddr*)&addr, sd);
|
(void)mca_btl_tcp_proc_accept(btl_proc, (struct sockaddr*)&addr, sd);
|
||||||
|
|
||||||
switch (addr.ss_family) {
|
const char *str = opal_fd_get_peer_name(sd);
|
||||||
case AF_INET:
|
|
||||||
inet_ntop(AF_INET, &(((struct sockaddr_in*) &addr)->sin_addr), str, sizeof(str));
|
|
||||||
break;
|
|
||||||
|
|
||||||
#if OPAL_ENABLE_IPV6
|
|
||||||
case AF_INET6:
|
|
||||||
inet_ntop(AF_INET6, &(((struct sockaddr_in6*) &addr)->sin6_addr), str, sizeof(str));
|
|
||||||
break;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
default:
|
|
||||||
BTL_ERROR(("Got an accept() from an unknown address family -- this shouldn't happen"));
|
|
||||||
CLOSE_THE_SOCKET(sd);
|
|
||||||
return;
|
|
||||||
|
|
||||||
}
|
|
||||||
opal_output_verbose(10, opal_btl_base_framework.framework_output,
|
opal_output_verbose(10, opal_btl_base_framework.framework_output,
|
||||||
"btl:tcp: now connected to %s, process %s", str,
|
"btl:tcp: now connected to %s, process %s", str,
|
||||||
OPAL_NAME_PRINT(btl_proc->proc_opal->proc_name));
|
OPAL_NAME_PRINT(btl_proc->proc_opal->proc_name));
|
||||||
|
free((char*) str);
|
||||||
}
|
}
|
||||||
|
@ -35,7 +35,7 @@ values are in the range [1 .. 2^16-1]. This value will be ignored
|
|||||||
WARNING: Open MPI failed to TCP connect to a peer MPI process. This
|
WARNING: Open MPI failed to TCP connect to a peer MPI process. This
|
||||||
should not happen.
|
should not happen.
|
||||||
|
|
||||||
Your Open MPI job may now fail.
|
Your Open MPI job may now hang or fail.
|
||||||
|
|
||||||
Local host: %s
|
Local host: %s
|
||||||
PID: %d
|
PID: %d
|
||||||
@ -46,7 +46,7 @@ Your Open MPI job may now fail.
|
|||||||
WARNING: Open MPI failed to handshake with a connecting peer MPI
|
WARNING: Open MPI failed to handshake with a connecting peer MPI
|
||||||
process over TCP. This should not happen.
|
process over TCP. This should not happen.
|
||||||
|
|
||||||
Your Open MPI job may now fail.
|
Your Open MPI job may now hang or fail.
|
||||||
|
|
||||||
Local host: %s
|
Local host: %s
|
||||||
PID: %d
|
PID: %d
|
||||||
@ -102,8 +102,11 @@ hopefully be able to continue).
|
|||||||
Known IPs of peer: %s
|
Known IPs of peer: %s
|
||||||
#
|
#
|
||||||
[socket flag fail]
|
[socket flag fail]
|
||||||
WARNING: Open MPI failed to set flags on a TCP socket. This should
|
WARNING: Open MPI failed to get or set flags on a TCP socket. This
|
||||||
not happen. It is likely that your MPI job will now fail.
|
should not happen.
|
||||||
|
|
||||||
|
This may cause unpredictable behavior, and may end up hanging or
|
||||||
|
aborting your job.
|
||||||
|
|
||||||
Local host: %s
|
Local host: %s
|
||||||
PID: %d
|
PID: %d
|
||||||
@ -164,4 +167,43 @@ Your Open MPI job may now fail.
|
|||||||
PID: %d
|
PID: %d
|
||||||
Message: %s
|
Message: %s
|
||||||
Error: %s (%d)
|
Error: %s (%d)
|
||||||
#
|
#
|
||||||
|
[did not receive full magic id string]
|
||||||
|
The TCP BTL received an inbound socket connection from an unidentified
|
||||||
|
peer. This typically means one of two things:
|
||||||
|
|
||||||
|
1. A non-Open MPI process tried to connect to this Open MPI process.
|
||||||
|
2. An Open MPI process compiled against a different version of Open
|
||||||
|
MPI tried to connect to this Open MPI process.
|
||||||
|
|
||||||
|
Open MPI only supports running exactly the same version between all
|
||||||
|
processes in a single job.
|
||||||
|
|
||||||
|
This may cause unpredictable behavior, and may end up aborting your
|
||||||
|
job.
|
||||||
|
|
||||||
|
Local host: %s
|
||||||
|
Local PID: %d
|
||||||
|
Local Open MPI version: %s
|
||||||
|
Peer IP address: %s
|
||||||
|
#
|
||||||
|
[received incorrect magic id string]
|
||||||
|
The TCP BTL received an inbound socket connection from a peer that did
|
||||||
|
not identify itself correctly as an Open MPI process. This typically
|
||||||
|
means one of two things:
|
||||||
|
|
||||||
|
1. A non-Open MPI process tried to connect to this Open MPI process.
|
||||||
|
2. An Open MPI process compiled against a different version of Open
|
||||||
|
MPI tried to connect to this Open MPI process.
|
||||||
|
|
||||||
|
Open MPI only supports running exactly the same version between all
|
||||||
|
processes in a single job.
|
||||||
|
|
||||||
|
This may cause unpredictable behavior, and may end up hanging or
|
||||||
|
aborting your job.
|
||||||
|
|
||||||
|
Local host: %s
|
||||||
|
Local PID: %d
|
||||||
|
Local Open MPI version: %s
|
||||||
|
Peer IP address: %s
|
||||||
|
Peer identifier: %s (expected %s)
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2008-2014 Cisco Systems, Inc. All rights reserved.
|
* Copyright (c) 2008-2018 Cisco Systems, Inc. All rights reserved
|
||||||
* Copyright (c) 2009 Sandia National Laboratories. All rights reserved.
|
* Copyright (c) 2009 Sandia National Laboratories. All rights reserved.
|
||||||
* Copyright (c) 2017 Mellanox Technologies. All rights reserved.
|
* Copyright (c) 2017 Mellanox Technologies. All rights reserved.
|
||||||
*
|
*
|
||||||
@ -18,13 +18,22 @@
|
|||||||
#ifdef HAVE_SYS_STAT_H
|
#ifdef HAVE_SYS_STAT_H
|
||||||
#include <sys/stat.h>
|
#include <sys/stat.h>
|
||||||
#endif
|
#endif
|
||||||
|
#ifdef HAVE_SYS_SOCKET_H
|
||||||
|
#include <sys/socket.h>
|
||||||
|
#endif
|
||||||
|
#ifdef HAVE_ARPA_INET_H
|
||||||
|
#include <arpa/inet.h>
|
||||||
|
#endif
|
||||||
|
#ifdef HAVE_NETINET_IN_H
|
||||||
|
#include <netinet/in.h>
|
||||||
|
#endif
|
||||||
#ifdef HAVE_UNISTD_H
|
#ifdef HAVE_UNISTD_H
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
#endif
|
#endif
|
||||||
#include <errno.h>
|
#include <errno.h>
|
||||||
#include <fcntl.h>
|
#include <fcntl.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
#include "opal/util/fd.h"
|
#include "opal/util/fd.h"
|
||||||
#include "opal/constants.h"
|
#include "opal/constants.h"
|
||||||
@ -126,3 +135,49 @@ bool opal_fd_is_blkdev(int fd)
|
|||||||
return S_ISBLK(buf.st_mode);
|
return S_ISBLK(buf.st_mode);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const char *opal_fd_get_peer_name(int fd)
|
||||||
|
{
|
||||||
|
char *str;
|
||||||
|
const char *ret;
|
||||||
|
struct sockaddr sa;
|
||||||
|
socklen_t slt = (socklen_t) sizeof(sa);
|
||||||
|
|
||||||
|
int rc = getpeername(fd, &sa, &slt);
|
||||||
|
if (0 != rc) {
|
||||||
|
ret = strdup("Unknown");
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t len = INET_ADDRSTRLEN;
|
||||||
|
#if OPAL_ENABLE_IPV6
|
||||||
|
len = INET6_ADDRSTRLEN;
|
||||||
|
#endif
|
||||||
|
str = malloc(len);
|
||||||
|
if (NULL == str) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (sa.sa_family == AF_INET) {
|
||||||
|
struct sockaddr_in *si;
|
||||||
|
si = (struct sockaddr_in*) &sa;
|
||||||
|
ret = inet_ntop(AF_INET, &(si->sin_addr), str, INET_ADDRSTRLEN);
|
||||||
|
if (NULL == ret) {
|
||||||
|
free(str);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#if OPAL_ENABLE_IPV6
|
||||||
|
else if (sa.sa_family == AF_INET6) {
|
||||||
|
struct sockaddr_in6 *si6;
|
||||||
|
si6 = (struct sockaddr_in6*) &sa;
|
||||||
|
ret = inet_ntop(AF_INET6, &(si6->sin6_addr), str, INET6_ADDRSTRLEN);
|
||||||
|
if (NULL == ret) {
|
||||||
|
free(str);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
else {
|
||||||
|
ret = strdup("Unknown");
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2008-2014 Cisco Systems, Inc. All rights reserved.
|
* Copyright (c) 2008-2018 Cisco Systems, Inc. All rights reserved
|
||||||
* Copyright (c) 2009 Sandia National Laboratories. All rights reserved.
|
* Copyright (c) 2009 Sandia National Laboratories. All rights reserved.
|
||||||
* Copyright (c) 2017 Mellanox Technologies. All rights reserved.
|
* Copyright (c) 2017 Mellanox Technologies. All rights reserved.
|
||||||
*
|
*
|
||||||
@ -94,6 +94,15 @@ OPAL_DECLSPEC bool opal_fd_is_chardev(int fd);
|
|||||||
*/
|
*/
|
||||||
OPAL_DECLSPEC bool opal_fd_is_blkdev(int fd);
|
OPAL_DECLSPEC bool opal_fd_is_blkdev(int fd);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Convenience function to get a string name of the peer on the other
|
||||||
|
* end of this internet socket.
|
||||||
|
*
|
||||||
|
* @param fd File descriptor of an AF_INET/AF_INET6 socket
|
||||||
|
*
|
||||||
|
* @returns resolvable IP name, or "a.b.c.d". This string must be freed by the caller.
|
||||||
|
*/
|
||||||
|
OPAL_DECLSPEC const char *opal_fd_get_peer_name(int fd);
|
||||||
|
|
||||||
END_C_DECLS
|
END_C_DECLS
|
||||||
|
|
||||||
|
@ -11,7 +11,7 @@
|
|||||||
# Copyright (c) 2004-2005 The Regents of the University of California.
|
# Copyright (c) 2004-2005 The Regents of the University of California.
|
||||||
# All rights reserved.
|
# All rights reserved.
|
||||||
# Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
|
# Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
|
||||||
# Copyright (c) 2015 Cisco Systems, Inc. All rights reserved.
|
# Copyright (c) 2015-2018 Cisco Systems, Inc. All rights reserved
|
||||||
# $COPYRIGHT$
|
# $COPYRIGHT$
|
||||||
#
|
#
|
||||||
# Additional copyrights may follow
|
# Additional copyrights may follow
|
||||||
@ -106,10 +106,29 @@ levels.
|
|||||||
Remote host: %s
|
Remote host: %s
|
||||||
Remote port: %d
|
Remote port: %d
|
||||||
|
|
||||||
|
|
||||||
The connection was rejected.
|
The connection was rejected.
|
||||||
#
|
#
|
||||||
[static-fwd]
|
[static-fwd]
|
||||||
Static ports were requested while orte_fwd_mpirun_port was set.
|
Static ports were requested while orte_fwd_mpirun_port was set.
|
||||||
Both options cannot be simultaneously set. Please either set
|
Both options cannot be simultaneously set. Please either set
|
||||||
orte_fwd_mpirun_port=false or remove any static port directives.
|
orte_fwd_mpirun_port=false or remove any static port directives.
|
||||||
|
#
|
||||||
|
[version mismatch]
|
||||||
|
Open MPI detected a mismatch in versions between two processes. This
|
||||||
|
typically means that you executed "mpirun" (or "mpiexec") from one
|
||||||
|
version of Open MPI on on node, but your default path on one of the
|
||||||
|
other nodes upon which you launched found a different version of Open
|
||||||
|
MPI.
|
||||||
|
|
||||||
|
Open MPI only supports running exactly the same version between all
|
||||||
|
processes in a single job.
|
||||||
|
|
||||||
|
This will almost certainly cause unpredictable behavior, and may end
|
||||||
|
up aborting your job.
|
||||||
|
|
||||||
|
Local host: %s
|
||||||
|
Local process name: %s
|
||||||
|
Local Open MPI version: %s
|
||||||
|
Peer host: %s
|
||||||
|
Peer process name: %s
|
||||||
|
Peer Open MPI version: %s
|
||||||
|
@ -11,7 +11,7 @@
|
|||||||
* All rights reserved.
|
* All rights reserved.
|
||||||
* Copyright (c) 2006-2013 Los Alamos National Security, LLC.
|
* Copyright (c) 2006-2013 Los Alamos National Security, LLC.
|
||||||
* All rights reserved.
|
* All rights reserved.
|
||||||
* Copyright (c) 2009-2014 Cisco Systems, Inc. All rights reserved.
|
* Copyright (c) 2009-2018 Cisco Systems, Inc. All rights reserved
|
||||||
* Copyright (c) 2011 Oak Ridge National Labs. All rights reserved.
|
* Copyright (c) 2011 Oak Ridge National Labs. All rights reserved.
|
||||||
* Copyright (c) 2013-2017 Intel, Inc. All rights reserved.
|
* Copyright (c) 2013-2017 Intel, Inc. All rights reserved.
|
||||||
* Copyright (c) 2014-2015 Research Organization for Information Science
|
* Copyright (c) 2014-2015 Research Organization for Information Science
|
||||||
@ -58,6 +58,7 @@
|
|||||||
#include "opal/util/net.h"
|
#include "opal/util/net.h"
|
||||||
#include "opal/util/fd.h"
|
#include "opal/util/fd.h"
|
||||||
#include "opal/util/error.h"
|
#include "opal/util/error.h"
|
||||||
|
#include "opal/util/show_help.h"
|
||||||
#include "opal/class/opal_hash_table.h"
|
#include "opal/class/opal_hash_table.h"
|
||||||
#include "opal/mca/event/event.h"
|
#include "opal/mca/event/event.h"
|
||||||
|
|
||||||
@ -701,6 +702,7 @@ static bool retry(mca_oob_tcp_peer_t* peer, int sd, bool fatal)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
int mca_oob_tcp_peer_recv_connect_ack(mca_oob_tcp_peer_t* pr,
|
int mca_oob_tcp_peer_recv_connect_ack(mca_oob_tcp_peer_t* pr,
|
||||||
int sd, mca_oob_tcp_hdr_t *dhdr)
|
int sd, mca_oob_tcp_hdr_t *dhdr)
|
||||||
{
|
{
|
||||||
@ -890,11 +892,15 @@ int mca_oob_tcp_peer_recv_connect_ack(mca_oob_tcp_peer_t* pr,
|
|||||||
version = (char*)((char*)msg + offset);
|
version = (char*)((char*)msg + offset);
|
||||||
offset += strlen(version) + 1;
|
offset += strlen(version) + 1;
|
||||||
if (0 != strcmp(version, orte_version_string)) {
|
if (0 != strcmp(version, orte_version_string)) {
|
||||||
opal_output(0, "%s tcp_peer_recv_connect_ack: "
|
opal_show_help("help-oob-tcp.txt", "version mismatch",
|
||||||
"received different version from %s: %s instead of %s\n",
|
true,
|
||||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
opal_process_info.nodename,
|
||||||
ORTE_NAME_PRINT(&(peer->name)),
|
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||||
version, orte_version_string);
|
orte_version_string,
|
||||||
|
opal_fd_get_peer_name(peer->sd),
|
||||||
|
ORTE_NAME_PRINT(&(peer->name)),
|
||||||
|
version);
|
||||||
|
|
||||||
peer->state = MCA_OOB_TCP_FAILED;
|
peer->state = MCA_OOB_TCP_FAILED;
|
||||||
mca_oob_tcp_peer_close(peer);
|
mca_oob_tcp_peer_close(peer);
|
||||||
free(msg);
|
free(msg);
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user