diff --git a/config/ompi_setup_prrte.m4 b/config/ompi_setup_prrte.m4 index 3871440a0f..a83bd618f7 100644 --- a/config/ompi_setup_prrte.m4 +++ b/config/ompi_setup_prrte.m4 @@ -46,6 +46,15 @@ AC_DEFUN([OMPI_SETUP_PRRTE],[ [AC_HELP_STRING([--enable-prte-prefix-by-default], [Make "mpirun ..." behave exactly the same as "mpirun --prefix \$prefix" (where \$prefix is the value given to --prefix in configure) (default:enabled)])]) + AS_IF([test "$opal_external_pmix_happy" = "yes" && test $opal_numerical_pmix_version -lt 4 && test "$enable_internal_rte" != "no"], + [AC_MSG_WARN([OMPI's internal runtime environment "PRRTE" does not support]) + AC_MSG_WARN([PMIx versions less than v4.x as they lack adequate tool]) + AC_MSG_WARN([support. You can, if desired, build OMPI against an earlier]) + AC_MSG_WARN([version of PMIx for strictly direct-launch purposes - e.g., using)]) + AC_MSG_WARN([Slurm's srun to launch the job - by configuring with the]) + AC_MSG_WARN([--disable-internal-rte option.]) + AC_MSG_ERROR([Cannot continue])]) + AC_MSG_CHECKING([if RTE support is enabled]) if test "$enable_internal_rte" != "no"; then AC_MSG_RESULT([yes]) @@ -81,7 +90,7 @@ AC_DEFUN([OMPI_SETUP_PRRTE],[ opal_prrte_prefix_arg= fi - opal_prrte_args="--prefix=$prefix --disable-dlopen $opal_prrte_prefix_arg $opal_prrte_libevent_arg $opal_prrte_hwloc_arg $opal_prrte_pmix_arg" + opal_prrte_args="--prefix=$prefix $opal_prrte_prefix_arg $opal_prrte_libevent_arg $opal_prrte_hwloc_arg $opal_prrte_pmix_arg" AS_IF([test "$enable_debug" = "yes"], [opal_prrte_args="--enable-debug $opal_prrte_args" CFLAGS="$OPAL_CFLAGS_BEFORE_PICKY $OPAL_VISIBILITY_CFLAGS -g"], diff --git a/config/opal_check_pmi.m4 b/config/opal_check_pmi.m4 index c5706f4bff..6f18c02cd1 100644 --- a/config/opal_check_pmi.m4 +++ b/config/opal_check_pmi.m4 @@ -125,6 +125,7 @@ AC_DEFUN([OPAL_CHECK_PMIX_LIB],[ ], [])], [AC_MSG_RESULT([found]) opal_external_pmix_version=4x + opal_numerical_pmix_version=4 opal_external_pmix_version_found=1 opal_external_pmix_happy=yes], [AC_MSG_RESULT([not found])])]) @@ -139,6 +140,7 @@ AC_DEFUN([OPAL_CHECK_PMIX_LIB],[ ], [])], [AC_MSG_RESULT([found]) opal_external_pmix_version=3x + opal_numerical_pmix_version=3 opal_external_pmix_version_found=1 opal_external_pmix_happy=yes], [AC_MSG_RESULT([not found])])]) @@ -153,6 +155,7 @@ AC_DEFUN([OPAL_CHECK_PMIX_LIB],[ ], [])], [AC_MSG_RESULT([found]) opal_external_pmix_version=2x + opal_numerical_pmix_version=2 opal_external_pmix_version_found=1 opal_external_pmix_happy=yes], [AC_MSG_RESULT([not found])])]) @@ -167,6 +170,7 @@ AC_DEFUN([OPAL_CHECK_PMIX_LIB],[ ], [])], [AC_MSG_RESULT([found]) opal_external_pmix_version=1x + opal_numerical_pmix_version=1 opal_external_pmix_version_found=1 opal_external_have_pmix1=1 opal_external_pmix_happy=yes], @@ -179,6 +183,12 @@ AC_DEFUN([OPAL_CHECK_PMIX_LIB],[ opal_external_pmix_happy=no]) ]) + AS_IF([test "$opal_external_pmix_happy" = "yes" && test $opal_numerical_pmix_version -lt 3], + [AC_MSG_WARN([OMPI no longer supports PMIx versions prior to v3]) + AC_MSG_WARN([Please direct us to a more current PMIx release or]) + AC_MSG_WARN([use the internally provided one]) + AC_MSG_ERROR([Cannot continue])]) + AS_IF([test "$opal_external_pmix_happy" = "yes"], [$3 # add the new flags to our wrapper compilers diff --git a/ompi/interlib/interlib.c b/ompi/interlib/interlib.c index 69062c5de3..da18b69419 100644 --- a/ompi/interlib/interlib.c +++ b/ompi/interlib/interlib.c @@ -121,6 +121,10 @@ int ompi_interlib_declare(int threadlevel, char *version) PMIX_INFO_DESTRUCT(&info[3]); /* account for our refcount on pmix_init */ PMIx_Finalize(NULL, 0); - ret = opal_pmix_convert_status(rc); + if (ompi_singleton && PMIX_ERR_UNREACH == rc) { + ret = OMPI_SUCCESS; + } else { + ret = opal_pmix_convert_status(rc); + } return ret; } diff --git a/ompi/runtime/ompi_mpi_abort.c b/ompi/runtime/ompi_mpi_abort.c index a550e7b9f6..a42109b5de 100644 --- a/ompi/runtime/ompi_mpi_abort.c +++ b/ompi/runtime/ompi_mpi_abort.c @@ -85,8 +85,8 @@ static void try_kill_peers(ompi_communicator_t *comm, procs = (ompi_process_name_t*) calloc(nprocs, sizeof(ompi_process_name_t)); if (NULL == procs) { - /* quick clean orte and get out */ - ompi_rte_abort(errno, "Abort: unable to alloc memory to kill procs"); + /* quick clean RTE and get out */ + ompi_rte_abort(errcode, "Abort: unable to alloc memory to kill procs"); } /* put all the local group procs in the abort list */ diff --git a/ompi/runtime/ompi_rte.c b/ompi/runtime/ompi_rte.c index 66a2db21ed..d459024231 100644 --- a/ompi/runtime/ompi_rte.c +++ b/ompi/runtime/ompi_rte.c @@ -61,7 +61,27 @@ opal_process_name_t pmix_name_wildcard = {UINT32_MAX-1, UINT32_MAX-1}; opal_process_name_t pmix_name_invalid = {UINT32_MAX, UINT32_MAX}; hwloc_cpuset_t ompi_proc_applied_binding = NULL; -pmix_process_info_t pmix_process_info = {0}; +pmix_process_info_t pmix_process_info = { + .my_name = {OPAL_JOBID_INVALID, OPAL_VPID_INVALID}, + .nodename = NULL, + .pid = 0, + .top_session_dir = NULL, + .job_session_dir = NULL, + .proc_session_dir = NULL, + .my_local_rank = 0, + .my_node_rank = 0, + .num_local_peers = 0, + .num_procs = 0, + .app_num = 0, + .univ_size = 0, + .app_sizes = NULL, + .app_ldrs = NULL, + .cpuset = NULL, + .command = NULL, + .num_apps = 0, + .initial_wdir = NULL, + .reincarnation = 0 +}; bool pmix_proc_is_bound = false; bool ompi_singleton = false; diff --git a/opal/runtime/opal_init.c b/opal/runtime/opal_init.c index ab17e8e9bb..09e10a4fd5 100644 --- a/opal/runtime/opal_init.c +++ b/opal/runtime/opal_init.c @@ -54,6 +54,7 @@ #include "opal/mca/installdirs/base/base.h" #include "opal/mca/memory/base/base.h" #include "opal/mca/patcher/base/base.h" +#include "opal/mca/pmix/base/base.h" #include "opal/mca/memcpy/base/base.h" #include "opal/mca/hwloc/base/base.h" #include "opal/mca/reachable/base/base.h" @@ -630,7 +631,7 @@ opal_init_util(int* pargc, char*** pargv) static mca_base_framework_t *opal_init_frameworks[] = { &opal_hwloc_base_framework, &opal_memcpy_base_framework, &opal_memchecker_base_framework, &opal_backtrace_base_framework, &opal_timer_base_framework, &opal_event_base_framework, - &opal_shmem_base_framework, &opal_reachable_base_framework, + &opal_shmem_base_framework, &opal_reachable_base_framework, &opal_pmix_base_framework, NULL, };