oob/tcp: add show_help message about version mismatch
Be more explicit about version mismatch between ORTE processes. Signed-off-by: Jeff Squyres <jsquyres@cisco.com>
Этот коммит содержится в:
родитель
40afd525f8
Коммит
0f8077ace6
@ -24,6 +24,9 @@
|
||||
#ifdef HAVE_ARPA_INET_H
|
||||
#include <arpa/inet.h>
|
||||
#endif
|
||||
#ifdef HAVE_NETINET_IN_H
|
||||
#include <netinet/in.h>
|
||||
#endif
|
||||
#ifdef HAVE_UNISTD_H
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
@ -11,7 +11,7 @@
|
||||
# Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
# All rights reserved.
|
||||
# Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
|
||||
# Copyright (c) 2015 Cisco Systems, Inc. All rights reserved.
|
||||
# Copyright (c) 2015-2018 Cisco Systems, Inc. All rights reserved
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
@ -106,10 +106,29 @@ levels.
|
||||
Remote host: %s
|
||||
Remote port: %d
|
||||
|
||||
|
||||
The connection was rejected.
|
||||
#
|
||||
[static-fwd]
|
||||
Static ports were requested while orte_fwd_mpirun_port was set.
|
||||
Both options cannot be simultaneously set. Please either set
|
||||
orte_fwd_mpirun_port=false or remove any static port directives.
|
||||
#
|
||||
[version mismatch]
|
||||
Open MPI detected a mismatch in versions between two processes. This
|
||||
typically means that you executed "mpirun" (or "mpiexec") from one
|
||||
version of Open MPI on on node, but your default path on one of the
|
||||
other nodes upon which you launched found a different version of Open
|
||||
MPI.
|
||||
|
||||
Open MPI only supports running exactly the same version between all
|
||||
processes in a single job.
|
||||
|
||||
This will almost certainly cause unpredictable behavior, and may end
|
||||
up aborting your job.
|
||||
|
||||
Local host: %s
|
||||
Local process name: %s
|
||||
Local Open MPI version: %s
|
||||
Peer host: %s
|
||||
Peer process name: %s
|
||||
Peer Open MPI version: %s
|
||||
|
@ -11,7 +11,7 @@
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2006-2013 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2009-2014 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2009-2018 Cisco Systems, Inc. All rights reserved
|
||||
* Copyright (c) 2011 Oak Ridge National Labs. All rights reserved.
|
||||
* Copyright (c) 2013-2017 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2015 Research Organization for Information Science
|
||||
@ -58,6 +58,7 @@
|
||||
#include "opal/util/net.h"
|
||||
#include "opal/util/fd.h"
|
||||
#include "opal/util/error.h"
|
||||
#include "opal/util/show_help.h"
|
||||
#include "opal/class/opal_hash_table.h"
|
||||
#include "opal/mca/event/event.h"
|
||||
|
||||
@ -701,6 +702,7 @@ static bool retry(mca_oob_tcp_peer_t* peer, int sd, bool fatal)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
int mca_oob_tcp_peer_recv_connect_ack(mca_oob_tcp_peer_t* pr,
|
||||
int sd, mca_oob_tcp_hdr_t *dhdr)
|
||||
{
|
||||
@ -890,11 +892,15 @@ int mca_oob_tcp_peer_recv_connect_ack(mca_oob_tcp_peer_t* pr,
|
||||
version = (char*)((char*)msg + offset);
|
||||
offset += strlen(version) + 1;
|
||||
if (0 != strcmp(version, orte_version_string)) {
|
||||
opal_output(0, "%s tcp_peer_recv_connect_ack: "
|
||||
"received different version from %s: %s instead of %s\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->name)),
|
||||
version, orte_version_string);
|
||||
opal_show_help("help-oob-tcp.txt", "version mismatch",
|
||||
true,
|
||||
opal_process_info.nodename,
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_version_string,
|
||||
opal_fd_get_peer_name(peer->sd),
|
||||
ORTE_NAME_PRINT(&(peer->name)),
|
||||
version);
|
||||
|
||||
peer->state = MCA_OOB_TCP_FAILED;
|
||||
mca_oob_tcp_peer_close(peer);
|
||||
free(msg);
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user