Fix singletons and ensure adequate PMIx version
OMPI can only support PMIx v3 and above. PRRTE requires at least PMIx v4, so protect against the case where OMPI is built against an external PMIx v3. Fix check of PMIx_Init return code for singleton operations. Ensure that the PMIx framework gets properly opened. Signed-off-by: Ralph Castain <rhc@pmix.org>
Этот коммит содержится в:
родитель
973d10159a
Коммит
95dacd2086
@ -46,6 +46,15 @@ AC_DEFUN([OMPI_SETUP_PRRTE],[
|
|||||||
[AC_HELP_STRING([--enable-prte-prefix-by-default],
|
[AC_HELP_STRING([--enable-prte-prefix-by-default],
|
||||||
[Make "mpirun ..." behave exactly the same as "mpirun --prefix \$prefix" (where \$prefix is the value given to --prefix in configure) (default:enabled)])])
|
[Make "mpirun ..." behave exactly the same as "mpirun --prefix \$prefix" (where \$prefix is the value given to --prefix in configure) (default:enabled)])])
|
||||||
|
|
||||||
|
AS_IF([test "$opal_external_pmix_happy" = "yes" && test $opal_numerical_pmix_version -lt 4 && test "$enable_internal_rte" != "no"],
|
||||||
|
[AC_MSG_WARN([OMPI's internal runtime environment "PRRTE" does not support])
|
||||||
|
AC_MSG_WARN([PMIx versions less than v4.x as they lack adequate tool])
|
||||||
|
AC_MSG_WARN([support. You can, if desired, build OMPI against an earlier])
|
||||||
|
AC_MSG_WARN([version of PMIx for strictly direct-launch purposes - e.g., using)])
|
||||||
|
AC_MSG_WARN([Slurm's srun to launch the job - by configuring with the])
|
||||||
|
AC_MSG_WARN([--disable-internal-rte option.])
|
||||||
|
AC_MSG_ERROR([Cannot continue])])
|
||||||
|
|
||||||
AC_MSG_CHECKING([if RTE support is enabled])
|
AC_MSG_CHECKING([if RTE support is enabled])
|
||||||
if test "$enable_internal_rte" != "no"; then
|
if test "$enable_internal_rte" != "no"; then
|
||||||
AC_MSG_RESULT([yes])
|
AC_MSG_RESULT([yes])
|
||||||
@ -81,7 +90,7 @@ AC_DEFUN([OMPI_SETUP_PRRTE],[
|
|||||||
opal_prrte_prefix_arg=
|
opal_prrte_prefix_arg=
|
||||||
fi
|
fi
|
||||||
|
|
||||||
opal_prrte_args="--prefix=$prefix --disable-dlopen $opal_prrte_prefix_arg $opal_prrte_libevent_arg $opal_prrte_hwloc_arg $opal_prrte_pmix_arg"
|
opal_prrte_args="--prefix=$prefix $opal_prrte_prefix_arg $opal_prrte_libevent_arg $opal_prrte_hwloc_arg $opal_prrte_pmix_arg"
|
||||||
AS_IF([test "$enable_debug" = "yes"],
|
AS_IF([test "$enable_debug" = "yes"],
|
||||||
[opal_prrte_args="--enable-debug $opal_prrte_args"
|
[opal_prrte_args="--enable-debug $opal_prrte_args"
|
||||||
CFLAGS="$OPAL_CFLAGS_BEFORE_PICKY $OPAL_VISIBILITY_CFLAGS -g"],
|
CFLAGS="$OPAL_CFLAGS_BEFORE_PICKY $OPAL_VISIBILITY_CFLAGS -g"],
|
||||||
|
@ -125,6 +125,7 @@ AC_DEFUN([OPAL_CHECK_PMIX_LIB],[
|
|||||||
], [])],
|
], [])],
|
||||||
[AC_MSG_RESULT([found])
|
[AC_MSG_RESULT([found])
|
||||||
opal_external_pmix_version=4x
|
opal_external_pmix_version=4x
|
||||||
|
opal_numerical_pmix_version=4
|
||||||
opal_external_pmix_version_found=1
|
opal_external_pmix_version_found=1
|
||||||
opal_external_pmix_happy=yes],
|
opal_external_pmix_happy=yes],
|
||||||
[AC_MSG_RESULT([not found])])])
|
[AC_MSG_RESULT([not found])])])
|
||||||
@ -139,6 +140,7 @@ AC_DEFUN([OPAL_CHECK_PMIX_LIB],[
|
|||||||
], [])],
|
], [])],
|
||||||
[AC_MSG_RESULT([found])
|
[AC_MSG_RESULT([found])
|
||||||
opal_external_pmix_version=3x
|
opal_external_pmix_version=3x
|
||||||
|
opal_numerical_pmix_version=3
|
||||||
opal_external_pmix_version_found=1
|
opal_external_pmix_version_found=1
|
||||||
opal_external_pmix_happy=yes],
|
opal_external_pmix_happy=yes],
|
||||||
[AC_MSG_RESULT([not found])])])
|
[AC_MSG_RESULT([not found])])])
|
||||||
@ -153,6 +155,7 @@ AC_DEFUN([OPAL_CHECK_PMIX_LIB],[
|
|||||||
], [])],
|
], [])],
|
||||||
[AC_MSG_RESULT([found])
|
[AC_MSG_RESULT([found])
|
||||||
opal_external_pmix_version=2x
|
opal_external_pmix_version=2x
|
||||||
|
opal_numerical_pmix_version=2
|
||||||
opal_external_pmix_version_found=1
|
opal_external_pmix_version_found=1
|
||||||
opal_external_pmix_happy=yes],
|
opal_external_pmix_happy=yes],
|
||||||
[AC_MSG_RESULT([not found])])])
|
[AC_MSG_RESULT([not found])])])
|
||||||
@ -167,6 +170,7 @@ AC_DEFUN([OPAL_CHECK_PMIX_LIB],[
|
|||||||
], [])],
|
], [])],
|
||||||
[AC_MSG_RESULT([found])
|
[AC_MSG_RESULT([found])
|
||||||
opal_external_pmix_version=1x
|
opal_external_pmix_version=1x
|
||||||
|
opal_numerical_pmix_version=1
|
||||||
opal_external_pmix_version_found=1
|
opal_external_pmix_version_found=1
|
||||||
opal_external_have_pmix1=1
|
opal_external_have_pmix1=1
|
||||||
opal_external_pmix_happy=yes],
|
opal_external_pmix_happy=yes],
|
||||||
@ -179,6 +183,12 @@ AC_DEFUN([OPAL_CHECK_PMIX_LIB],[
|
|||||||
opal_external_pmix_happy=no])
|
opal_external_pmix_happy=no])
|
||||||
|
|
||||||
])
|
])
|
||||||
|
AS_IF([test "$opal_external_pmix_happy" = "yes" && test $opal_numerical_pmix_version -lt 3],
|
||||||
|
[AC_MSG_WARN([OMPI no longer supports PMIx versions prior to v3])
|
||||||
|
AC_MSG_WARN([Please direct us to a more current PMIx release or])
|
||||||
|
AC_MSG_WARN([use the internally provided one])
|
||||||
|
AC_MSG_ERROR([Cannot continue])])
|
||||||
|
|
||||||
AS_IF([test "$opal_external_pmix_happy" = "yes"],
|
AS_IF([test "$opal_external_pmix_happy" = "yes"],
|
||||||
[$3
|
[$3
|
||||||
# add the new flags to our wrapper compilers
|
# add the new flags to our wrapper compilers
|
||||||
|
@ -121,6 +121,10 @@ int ompi_interlib_declare(int threadlevel, char *version)
|
|||||||
PMIX_INFO_DESTRUCT(&info[3]);
|
PMIX_INFO_DESTRUCT(&info[3]);
|
||||||
/* account for our refcount on pmix_init */
|
/* account for our refcount on pmix_init */
|
||||||
PMIx_Finalize(NULL, 0);
|
PMIx_Finalize(NULL, 0);
|
||||||
ret = opal_pmix_convert_status(rc);
|
if (ompi_singleton && PMIX_ERR_UNREACH == rc) {
|
||||||
|
ret = OMPI_SUCCESS;
|
||||||
|
} else {
|
||||||
|
ret = opal_pmix_convert_status(rc);
|
||||||
|
}
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
@ -85,8 +85,8 @@ static void try_kill_peers(ompi_communicator_t *comm,
|
|||||||
|
|
||||||
procs = (ompi_process_name_t*) calloc(nprocs, sizeof(ompi_process_name_t));
|
procs = (ompi_process_name_t*) calloc(nprocs, sizeof(ompi_process_name_t));
|
||||||
if (NULL == procs) {
|
if (NULL == procs) {
|
||||||
/* quick clean orte and get out */
|
/* quick clean RTE and get out */
|
||||||
ompi_rte_abort(errno, "Abort: unable to alloc memory to kill procs");
|
ompi_rte_abort(errcode, "Abort: unable to alloc memory to kill procs");
|
||||||
}
|
}
|
||||||
|
|
||||||
/* put all the local group procs in the abort list */
|
/* put all the local group procs in the abort list */
|
||||||
|
@ -61,7 +61,27 @@
|
|||||||
opal_process_name_t pmix_name_wildcard = {UINT32_MAX-1, UINT32_MAX-1};
|
opal_process_name_t pmix_name_wildcard = {UINT32_MAX-1, UINT32_MAX-1};
|
||||||
opal_process_name_t pmix_name_invalid = {UINT32_MAX, UINT32_MAX};
|
opal_process_name_t pmix_name_invalid = {UINT32_MAX, UINT32_MAX};
|
||||||
hwloc_cpuset_t ompi_proc_applied_binding = NULL;
|
hwloc_cpuset_t ompi_proc_applied_binding = NULL;
|
||||||
pmix_process_info_t pmix_process_info = {0};
|
pmix_process_info_t pmix_process_info = {
|
||||||
|
.my_name = {OPAL_JOBID_INVALID, OPAL_VPID_INVALID},
|
||||||
|
.nodename = NULL,
|
||||||
|
.pid = 0,
|
||||||
|
.top_session_dir = NULL,
|
||||||
|
.job_session_dir = NULL,
|
||||||
|
.proc_session_dir = NULL,
|
||||||
|
.my_local_rank = 0,
|
||||||
|
.my_node_rank = 0,
|
||||||
|
.num_local_peers = 0,
|
||||||
|
.num_procs = 0,
|
||||||
|
.app_num = 0,
|
||||||
|
.univ_size = 0,
|
||||||
|
.app_sizes = NULL,
|
||||||
|
.app_ldrs = NULL,
|
||||||
|
.cpuset = NULL,
|
||||||
|
.command = NULL,
|
||||||
|
.num_apps = 0,
|
||||||
|
.initial_wdir = NULL,
|
||||||
|
.reincarnation = 0
|
||||||
|
};
|
||||||
bool pmix_proc_is_bound = false;
|
bool pmix_proc_is_bound = false;
|
||||||
bool ompi_singleton = false;
|
bool ompi_singleton = false;
|
||||||
|
|
||||||
|
@ -54,6 +54,7 @@
|
|||||||
#include "opal/mca/installdirs/base/base.h"
|
#include "opal/mca/installdirs/base/base.h"
|
||||||
#include "opal/mca/memory/base/base.h"
|
#include "opal/mca/memory/base/base.h"
|
||||||
#include "opal/mca/patcher/base/base.h"
|
#include "opal/mca/patcher/base/base.h"
|
||||||
|
#include "opal/mca/pmix/base/base.h"
|
||||||
#include "opal/mca/memcpy/base/base.h"
|
#include "opal/mca/memcpy/base/base.h"
|
||||||
#include "opal/mca/hwloc/base/base.h"
|
#include "opal/mca/hwloc/base/base.h"
|
||||||
#include "opal/mca/reachable/base/base.h"
|
#include "opal/mca/reachable/base/base.h"
|
||||||
@ -630,7 +631,7 @@ opal_init_util(int* pargc, char*** pargv)
|
|||||||
static mca_base_framework_t *opal_init_frameworks[] = {
|
static mca_base_framework_t *opal_init_frameworks[] = {
|
||||||
&opal_hwloc_base_framework, &opal_memcpy_base_framework, &opal_memchecker_base_framework,
|
&opal_hwloc_base_framework, &opal_memcpy_base_framework, &opal_memchecker_base_framework,
|
||||||
&opal_backtrace_base_framework, &opal_timer_base_framework, &opal_event_base_framework,
|
&opal_backtrace_base_framework, &opal_timer_base_framework, &opal_event_base_framework,
|
||||||
&opal_shmem_base_framework, &opal_reachable_base_framework,
|
&opal_shmem_base_framework, &opal_reachable_base_framework, &opal_pmix_base_framework,
|
||||||
NULL,
|
NULL,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user