If we get OMPI_ERR_UNREACH from the PML, print a slightly more
specific error. Suggested by Nick Edmonds: http://www.open-mpi.org/community/lists/users/2010/03/12339.php This commit was SVN r22828.
Этот коммит содержится в:
родитель
f6e4694d67
Коммит
bb314911b3
@ -10,7 +10,7 @@
|
|||||||
# University of Stuttgart. All rights reserved.
|
# University of Stuttgart. All rights reserved.
|
||||||
# Copyright (c) 2004-2005 The Regents of the University of California.
|
# Copyright (c) 2004-2005 The Regents of the University of California.
|
||||||
# All rights reserved.
|
# All rights reserved.
|
||||||
# Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
|
# Copyright (c) 2007-2010 Cisco Systems, Inc. All rights reserved.
|
||||||
# $COPYRIGHT$
|
# $COPYRIGHT$
|
||||||
#
|
#
|
||||||
# Additional copyrights may follow
|
# Additional copyrights may follow
|
||||||
@ -29,6 +29,23 @@ developer):
|
|||||||
|
|
||||||
%s
|
%s
|
||||||
--> Returned "%s" (%d) instead of "Success" (0)
|
--> Returned "%s" (%d) instead of "Success" (0)
|
||||||
|
#
|
||||||
|
[mpi_init:startup:pml-add-procs-fail]
|
||||||
|
|
||||||
|
MPI_INIT has failed because at least one MPI process is unreachable
|
||||||
|
from another. This *usually* means that an underlying communication
|
||||||
|
plugin -- such as a BTL or an MTL -- has either not loaded or not
|
||||||
|
allowed itself to be used. Your MPI job will now abort.
|
||||||
|
|
||||||
|
You may wish to try to narrow down the problem;
|
||||||
|
|
||||||
|
* Check the output of ompi_info to see which BTL/MTL plugins are
|
||||||
|
available.
|
||||||
|
* Run your application with MPI_THREAD_SINGLE.
|
||||||
|
* Set the MCA parameter btl_base_verbose to 100 (or mtl_base_verbose,
|
||||||
|
if using MTL-based communications) to see exactly which
|
||||||
|
communication plugins were considered and/or discarded.
|
||||||
|
#
|
||||||
[mpi-param-check-enabled-but-compiled-out]
|
[mpi-param-check-enabled-but-compiled-out]
|
||||||
WARNING: The MCA parameter mpi_param_check has been set to true, but
|
WARNING: The MCA parameter mpi_param_check has been set to true, but
|
||||||
parameter checking has been compiled out of Open MPI. The
|
parameter checking has been compiled out of Open MPI. The
|
||||||
|
@ -9,7 +9,7 @@
|
|||||||
* University of Stuttgart. All rights reserved.
|
* University of Stuttgart. All rights reserved.
|
||||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||||
* All rights reserved.
|
* All rights reserved.
|
||||||
* Copyright (c) 2006-2009 Cisco Systems, Inc. All rights reserved.
|
* Copyright (c) 2006-2010 Cisco Systems, Inc. All rights reserved.
|
||||||
* Copyright (c) 2006-2007 Los Alamos National Security, LLC. All rights
|
* Copyright (c) 2006-2007 Los Alamos National Security, LLC. All rights
|
||||||
* reserved.
|
* reserved.
|
||||||
* Copyright (c) 2006-2009 University of Houston. All rights reserved.
|
* Copyright (c) 2006-2009 University of Houston. All rights reserved.
|
||||||
@ -723,7 +723,15 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
|
|||||||
}
|
}
|
||||||
ret = MCA_PML_CALL(add_procs(procs, nprocs));
|
ret = MCA_PML_CALL(add_procs(procs, nprocs));
|
||||||
free(procs);
|
free(procs);
|
||||||
if( OMPI_SUCCESS != ret ) {
|
/* If we got "unreachable", then print a specific error message.
|
||||||
|
Otherwise, if we got some other failure, fall through to print
|
||||||
|
a generic message. */
|
||||||
|
if (OMPI_ERR_UNREACH == ret) {
|
||||||
|
orte_show_help("help-mpi-runtime",
|
||||||
|
"mpi_init:startup:pml-add-procs-fail", true);
|
||||||
|
error = NULL;
|
||||||
|
goto error;
|
||||||
|
} else if (OMPI_SUCCESS != ret) {
|
||||||
error = "PML add procs failed";
|
error = "PML add procs failed";
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
@ -892,6 +900,8 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
|
|||||||
|
|
||||||
error:
|
error:
|
||||||
if (ret != OMPI_SUCCESS) {
|
if (ret != OMPI_SUCCESS) {
|
||||||
|
/* Only print a message if one was not already printed */
|
||||||
|
if (NULL != error) {
|
||||||
const char *err_msg = opal_strerror(ret);
|
const char *err_msg = opal_strerror(ret);
|
||||||
/* If ORTE was not setup yet, don't use orte_show_help */
|
/* If ORTE was not setup yet, don't use orte_show_help */
|
||||||
if (orte_setup) {
|
if (orte_setup) {
|
||||||
@ -903,6 +913,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
|
|||||||
"mpi_init:startup:internal-failure", true,
|
"mpi_init:startup:internal-failure", true,
|
||||||
"MPI_INIT", "MPI_INIT", error, err_msg, ret);
|
"MPI_INIT", "MPI_INIT", error, err_msg, ret);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user