Detect that we have a mix of BE/LE in the system, provide a warning that OMPI doesn't currently support this environment, and error out
Fixes #2817 Signed-off-by: Ralph Castain <rhc@open-mpi.org>
Этот коммит содержится в:
родитель
d1c5955b73
Коммит
2753f53e6d
@ -588,7 +588,7 @@ AC_CACHE_SAVE
|
|||||||
opal_show_title "Header file tests"
|
opal_show_title "Header file tests"
|
||||||
|
|
||||||
AC_CHECK_HEADERS([alloca.h aio.h arpa/inet.h dirent.h \
|
AC_CHECK_HEADERS([alloca.h aio.h arpa/inet.h dirent.h \
|
||||||
dlfcn.h execinfo.h err.h fcntl.h grp.h libgen.h \
|
dlfcn.h endian.h execinfo.h err.h fcntl.h grp.h libgen.h \
|
||||||
libutil.h memory.h netdb.h netinet/in.h netinet/tcp.h \
|
libutil.h memory.h netdb.h netinet/in.h netinet/tcp.h \
|
||||||
poll.h pthread.h pty.h pwd.h sched.h \
|
poll.h pthread.h pty.h pwd.h sched.h \
|
||||||
strings.h stropts.h linux/ethtool.h linux/sockios.h \
|
strings.h stropts.h linux/ethtool.h linux/sockios.h \
|
||||||
|
@ -32,6 +32,9 @@
|
|||||||
#ifdef HAVE_UNISTD_H
|
#ifdef HAVE_UNISTD_H
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
#endif
|
#endif
|
||||||
|
#ifdef HAVE_ENDIAN_H
|
||||||
|
#include <endian.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
#include "opal/runtime/opal.h"
|
#include "opal/runtime/opal.h"
|
||||||
#include "opal/constants.h"
|
#include "opal/constants.h"
|
||||||
@ -2155,7 +2158,7 @@ int opal_hwloc_get_sorted_numa_list(hwloc_topology_t topo, char* device_name, op
|
|||||||
char* opal_hwloc_base_get_topo_signature(hwloc_topology_t topo)
|
char* opal_hwloc_base_get_topo_signature(hwloc_topology_t topo)
|
||||||
{
|
{
|
||||||
int nnuma, nsocket, nl3, nl2, nl1, ncore, nhwt;
|
int nnuma, nsocket, nl3, nl2, nl1, ncore, nhwt;
|
||||||
char *sig=NULL, *arch=NULL;
|
char *sig=NULL, *arch = NULL, *endian;
|
||||||
hwloc_obj_t obj;
|
hwloc_obj_t obj;
|
||||||
unsigned i;
|
unsigned i;
|
||||||
|
|
||||||
@ -2175,14 +2178,22 @@ char* opal_hwloc_base_get_topo_signature(hwloc_topology_t topo)
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (NULL == arch) {
|
if (NULL == arch) {
|
||||||
asprintf(&sig, "%dN:%dS:%dL3:%dL2:%dL1:%dC:%dH",
|
arch = "unknown";
|
||||||
nnuma, nsocket, nl3, nl2, nl1, ncore, nhwt);
|
|
||||||
} else {
|
|
||||||
asprintf(&sig, "%dN:%dS:%dL3:%dL2:%dL1:%dC:%dH:%s",
|
|
||||||
nnuma, nsocket, nl3, nl2, nl1, ncore, nhwt, arch);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef __BYTE_ORDER
|
||||||
|
#if __BYTE_ORDER == __LITTLE_ENDIAN
|
||||||
|
endian = "le";
|
||||||
|
#else
|
||||||
|
endian = "be";
|
||||||
|
#endif
|
||||||
|
#else
|
||||||
|
endian = "unknown";
|
||||||
|
#endif
|
||||||
|
|
||||||
|
asprintf(&sig, "%dN:%dS:%dL3:%dL2:%dL1:%dC:%dH:%s:%s",
|
||||||
|
nnuma, nsocket, nl3, nl2, nl1, ncore, nhwt, arch, endian);
|
||||||
return sig;
|
return sig;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -10,7 +10,7 @@
|
|||||||
# University of Stuttgart. All rights reserved.
|
# University of Stuttgart. All rights reserved.
|
||||||
# Copyright (c) 2004-2005 The Regents of the University of California.
|
# Copyright (c) 2004-2005 The Regents of the University of California.
|
||||||
# All rights reserved.
|
# All rights reserved.
|
||||||
# Copyright (c) 2015 Intel, Inc. All rights reserved.
|
# Copyright (c) 2015-2017 Intel, Inc. All rights reserved.
|
||||||
# $COPYRIGHT$
|
# $COPYRIGHT$
|
||||||
#
|
#
|
||||||
# Additional copyrights may follow
|
# Additional copyrights may follow
|
||||||
@ -162,3 +162,14 @@ A call was made to launch additional processes, but this process has
|
|||||||
no active out-of-band transports and therefore cannot execute this call.
|
no active out-of-band transports and therefore cannot execute this call.
|
||||||
Please check to see if you have the "oob" MCA parameter set and ensure
|
Please check to see if you have the "oob" MCA parameter set and ensure
|
||||||
that it is either unset or at least includes the tcp transport.
|
that it is either unset or at least includes the tcp transport.
|
||||||
|
#
|
||||||
|
[multi-endian]
|
||||||
|
Open MPI does not currently support multi-endian operations. We have
|
||||||
|
detected that the following node differs in endianness:
|
||||||
|
|
||||||
|
|
||||||
|
Nodename: %s
|
||||||
|
Endian: %s
|
||||||
|
Local endian: %s
|
||||||
|
|
||||||
|
Please correct the situation and try again.
|
||||||
|
@ -1058,12 +1058,23 @@ void orte_plm_base_daemon_callback(int status, orte_process_name_t* sender,
|
|||||||
orte_daemon_cmd_flag_t cmd;
|
orte_daemon_cmd_flag_t cmd;
|
||||||
int32_t flag;
|
int32_t flag;
|
||||||
opal_value_t *kv;
|
opal_value_t *kv;
|
||||||
|
char *myendian;
|
||||||
|
|
||||||
/* get the daemon job, if necessary */
|
/* get the daemon job, if necessary */
|
||||||
if (NULL == jdatorted) {
|
if (NULL == jdatorted) {
|
||||||
jdatorted = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid);
|
jdatorted = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* get my endianness */
|
||||||
|
t = (orte_topology_t*)opal_pointer_array_get_item(orte_node_topologies, 0);
|
||||||
|
if (NULL == t) {
|
||||||
|
/* should never happen */
|
||||||
|
myendian = "unknown";
|
||||||
|
} else {
|
||||||
|
myendian = strrchr(t->sig, ':');
|
||||||
|
++myendian;
|
||||||
|
}
|
||||||
|
|
||||||
/* multiple daemons could be in this buffer, so unpack until we exhaust the data */
|
/* multiple daemons could be in this buffer, so unpack until we exhaust the data */
|
||||||
idx = 1;
|
idx = 1;
|
||||||
while (OPAL_SUCCESS == (rc = opal_dss.unpack(buffer, &dname, &idx, ORTE_NAME))) {
|
while (OPAL_SUCCESS == (rc = opal_dss.unpack(buffer, &dname, &idx, ORTE_NAME))) {
|
||||||
@ -1263,8 +1274,24 @@ void orte_plm_base_daemon_callback(int status, orte_process_name_t* sender,
|
|||||||
}
|
}
|
||||||
free(sig);
|
free(sig);
|
||||||
break;
|
break;
|
||||||
|
} else {
|
||||||
|
/* check if the difference is due to the endianness */
|
||||||
|
ptr = strrchr(sig, ':');
|
||||||
|
++ptr;
|
||||||
|
if (0 != strcmp(ptr, myendian)) {
|
||||||
|
/* we don't currently handle multi-endian operations in the
|
||||||
|
* MPI support */
|
||||||
|
orte_show_help("help-plm-base", "multi-endian", true,
|
||||||
|
nodename, ptr, myendian);
|
||||||
|
orted_failed_launch = true;
|
||||||
|
if (NULL != topo) {
|
||||||
|
hwloc_topology_destroy(topo);
|
||||||
|
}
|
||||||
|
goto CLEANUP;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!found) {
|
if (!found) {
|
||||||
/* nope - save the signature and request the complete topology from that node */
|
/* nope - save the signature and request the complete topology from that node */
|
||||||
OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output,
|
OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output,
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user