Merge pull request #7559 from rhc54/topic/fixes
Bunch of fixes plus PMIx/PRRTE updates
Этот коммит содержится в:
Коммит
9bb06d0077
@ -46,6 +46,15 @@ AC_DEFUN([OMPI_SETUP_PRRTE],[
|
||||
[AC_HELP_STRING([--enable-prte-prefix-by-default],
|
||||
[Make "mpirun ..." behave exactly the same as "mpirun --prefix \$prefix" (where \$prefix is the value given to --prefix in configure) (default:enabled)])])
|
||||
|
||||
AS_IF([test "$opal_external_pmix_happy" = "yes" && test $opal_numerical_pmix_version -lt 4 && test "$enable_internal_rte" != "no"],
|
||||
[AC_MSG_WARN([OMPI's internal runtime environment "PRRTE" does not support])
|
||||
AC_MSG_WARN([PMIx versions less than v4.x as they lack adequate tool])
|
||||
AC_MSG_WARN([support. You can, if desired, build OMPI against an earlier])
|
||||
AC_MSG_WARN([version of PMIx for strictly direct-launch purposes - e.g., using)])
|
||||
AC_MSG_WARN([Slurm's srun to launch the job - by configuring with the])
|
||||
AC_MSG_WARN([--disable-internal-rte option.])
|
||||
AC_MSG_ERROR([Cannot continue])])
|
||||
|
||||
AC_MSG_CHECKING([if RTE support is enabled])
|
||||
if test "$enable_internal_rte" != "no"; then
|
||||
AC_MSG_RESULT([yes])
|
||||
@ -81,7 +90,7 @@ AC_DEFUN([OMPI_SETUP_PRRTE],[
|
||||
opal_prrte_prefix_arg=
|
||||
fi
|
||||
|
||||
opal_prrte_args="--prefix=$prefix --disable-dlopen $opal_prrte_prefix_arg $opal_prrte_libevent_arg $opal_prrte_hwloc_arg $opal_prrte_pmix_arg"
|
||||
opal_prrte_args="--prefix=$prefix $opal_prrte_prefix_arg $opal_prrte_libevent_arg $opal_prrte_hwloc_arg $opal_prrte_pmix_arg"
|
||||
AS_IF([test "$enable_debug" = "yes"],
|
||||
[opal_prrte_args="--enable-debug $opal_prrte_args"
|
||||
CFLAGS="$OPAL_CFLAGS_BEFORE_PICKY $OPAL_VISIBILITY_CFLAGS -g"],
|
||||
|
@ -125,6 +125,7 @@ AC_DEFUN([OPAL_CHECK_PMIX_LIB],[
|
||||
], [])],
|
||||
[AC_MSG_RESULT([found])
|
||||
opal_external_pmix_version=4x
|
||||
opal_numerical_pmix_version=4
|
||||
opal_external_pmix_version_found=1
|
||||
opal_external_pmix_happy=yes],
|
||||
[AC_MSG_RESULT([not found])])])
|
||||
@ -139,6 +140,7 @@ AC_DEFUN([OPAL_CHECK_PMIX_LIB],[
|
||||
], [])],
|
||||
[AC_MSG_RESULT([found])
|
||||
opal_external_pmix_version=3x
|
||||
opal_numerical_pmix_version=3
|
||||
opal_external_pmix_version_found=1
|
||||
opal_external_pmix_happy=yes],
|
||||
[AC_MSG_RESULT([not found])])])
|
||||
@ -153,6 +155,7 @@ AC_DEFUN([OPAL_CHECK_PMIX_LIB],[
|
||||
], [])],
|
||||
[AC_MSG_RESULT([found])
|
||||
opal_external_pmix_version=2x
|
||||
opal_numerical_pmix_version=2
|
||||
opal_external_pmix_version_found=1
|
||||
opal_external_pmix_happy=yes],
|
||||
[AC_MSG_RESULT([not found])])])
|
||||
@ -167,6 +170,7 @@ AC_DEFUN([OPAL_CHECK_PMIX_LIB],[
|
||||
], [])],
|
||||
[AC_MSG_RESULT([found])
|
||||
opal_external_pmix_version=1x
|
||||
opal_numerical_pmix_version=1
|
||||
opal_external_pmix_version_found=1
|
||||
opal_external_have_pmix1=1
|
||||
opal_external_pmix_happy=yes],
|
||||
@ -179,6 +183,12 @@ AC_DEFUN([OPAL_CHECK_PMIX_LIB],[
|
||||
opal_external_pmix_happy=no])
|
||||
|
||||
])
|
||||
AS_IF([test "$opal_external_pmix_happy" = "yes" && test $opal_numerical_pmix_version -lt 3],
|
||||
[AC_MSG_WARN([OMPI no longer supports PMIx versions prior to v3])
|
||||
AC_MSG_WARN([Please direct us to a more current PMIx release or])
|
||||
AC_MSG_WARN([use the internally provided one])
|
||||
AC_MSG_ERROR([Cannot continue])])
|
||||
|
||||
AS_IF([test "$opal_external_pmix_happy" = "yes"],
|
||||
[$3
|
||||
# add the new flags to our wrapper compilers
|
||||
|
@ -121,6 +121,10 @@ int ompi_interlib_declare(int threadlevel, char *version)
|
||||
PMIX_INFO_DESTRUCT(&info[3]);
|
||||
/* account for our refcount on pmix_init */
|
||||
PMIx_Finalize(NULL, 0);
|
||||
ret = opal_pmix_convert_status(rc);
|
||||
if (ompi_singleton && PMIX_ERR_UNREACH == rc) {
|
||||
ret = OMPI_SUCCESS;
|
||||
} else {
|
||||
ret = opal_pmix_convert_status(rc);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
@ -85,8 +85,8 @@ static void try_kill_peers(ompi_communicator_t *comm,
|
||||
|
||||
procs = (ompi_process_name_t*) calloc(nprocs, sizeof(ompi_process_name_t));
|
||||
if (NULL == procs) {
|
||||
/* quick clean orte and get out */
|
||||
ompi_rte_abort(errno, "Abort: unable to alloc memory to kill procs");
|
||||
/* quick clean RTE and get out */
|
||||
ompi_rte_abort(errcode, "Abort: unable to alloc memory to kill procs");
|
||||
}
|
||||
|
||||
/* put all the local group procs in the abort list */
|
||||
|
@ -61,7 +61,27 @@
|
||||
opal_process_name_t pmix_name_wildcard = {UINT32_MAX-1, UINT32_MAX-1};
|
||||
opal_process_name_t pmix_name_invalid = {UINT32_MAX, UINT32_MAX};
|
||||
hwloc_cpuset_t ompi_proc_applied_binding = NULL;
|
||||
pmix_process_info_t pmix_process_info = {0};
|
||||
pmix_process_info_t pmix_process_info = {
|
||||
.my_name = {OPAL_JOBID_INVALID, OPAL_VPID_INVALID},
|
||||
.nodename = NULL,
|
||||
.pid = 0,
|
||||
.top_session_dir = NULL,
|
||||
.job_session_dir = NULL,
|
||||
.proc_session_dir = NULL,
|
||||
.my_local_rank = 0,
|
||||
.my_node_rank = 0,
|
||||
.num_local_peers = 0,
|
||||
.num_procs = 0,
|
||||
.app_num = 0,
|
||||
.univ_size = 0,
|
||||
.app_sizes = NULL,
|
||||
.app_ldrs = NULL,
|
||||
.cpuset = NULL,
|
||||
.command = NULL,
|
||||
.num_apps = 0,
|
||||
.initial_wdir = NULL,
|
||||
.reincarnation = 0
|
||||
};
|
||||
bool pmix_proc_is_bound = false;
|
||||
bool ompi_singleton = false;
|
||||
|
||||
|
@ -1 +1 @@
|
||||
Subproject commit 98d14d55f8d4bd27fe6eb1e508c336702e1fbf76
|
||||
Subproject commit a18e53138298d61a01fec4471518140304539e8c
|
@ -187,7 +187,7 @@ static inline mca_rcache_base_registration_t *mca_rcache_grdma_remove_lru_head(m
|
||||
/* registration has been selected for removal and is no longer in the LRU. mark it
|
||||
* as such. */
|
||||
new_flags = (old_flags & ~MCA_RCACHE_GRDMA_REG_FLAG_IN_LRU) | MCA_RCACHE_FLAGS_INVALID;
|
||||
if (opal_atomic_compare_exchange_strong_32(&old_reg->flags, &old_flags, new_flags)) {
|
||||
if (opal_atomic_compare_exchange_strong_32((opal_atomic_int32_t*)&old_reg->flags, &old_flags, new_flags)) {
|
||||
break;
|
||||
}
|
||||
} while (1);
|
||||
|
@ -54,6 +54,7 @@
|
||||
#include "opal/mca/installdirs/base/base.h"
|
||||
#include "opal/mca/memory/base/base.h"
|
||||
#include "opal/mca/patcher/base/base.h"
|
||||
#include "opal/mca/pmix/base/base.h"
|
||||
#include "opal/mca/memcpy/base/base.h"
|
||||
#include "opal/mca/hwloc/base/base.h"
|
||||
#include "opal/mca/reachable/base/base.h"
|
||||
@ -630,7 +631,7 @@ opal_init_util(int* pargc, char*** pargv)
|
||||
static mca_base_framework_t *opal_init_frameworks[] = {
|
||||
&opal_hwloc_base_framework, &opal_memcpy_base_framework, &opal_memchecker_base_framework,
|
||||
&opal_backtrace_base_framework, &opal_timer_base_framework, &opal_event_base_framework,
|
||||
&opal_shmem_base_framework, &opal_reachable_base_framework,
|
||||
&opal_shmem_base_framework, &opal_reachable_base_framework, &opal_pmix_base_framework,
|
||||
NULL,
|
||||
};
|
||||
|
||||
|
2
prrte
2
prrte
@ -1 +1 @@
|
||||
Subproject commit 9add90bcfe88af1994914a78544d6236327be10e
|
||||
Subproject commit cdea5231171b2fdea11269033de9e265fc7f3a63
|
Загрузка…
Ссылка в новой задаче
Block a user