1
1

Merge pull request #7559 from rhc54/topic/fixes

Bunch of fixes plus PMIx/PRRTE updates
Этот коммит содержится в:
Ralph Castain 2020-03-23 12:49:18 -07:00 коммит произвёл GitHub
родитель 48b52478ef 43f79be2e3
Коммит 9bb06d0077
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
9 изменённых файлов: 53 добавлений и 9 удалений

Просмотреть файл

@ -46,6 +46,15 @@ AC_DEFUN([OMPI_SETUP_PRRTE],[
[AC_HELP_STRING([--enable-prte-prefix-by-default],
[Make "mpirun ..." behave exactly the same as "mpirun --prefix \$prefix" (where \$prefix is the value given to --prefix in configure) (default:enabled)])])
AS_IF([test "$opal_external_pmix_happy" = "yes" && test $opal_numerical_pmix_version -lt 4 && test "$enable_internal_rte" != "no"],
[AC_MSG_WARN([OMPI's internal runtime environment "PRRTE" does not support])
AC_MSG_WARN([PMIx versions less than v4.x as they lack adequate tool])
AC_MSG_WARN([support. You can, if desired, build OMPI against an earlier])
AC_MSG_WARN([version of PMIx for strictly direct-launch purposes - e.g., using)])
AC_MSG_WARN([Slurm's srun to launch the job - by configuring with the])
AC_MSG_WARN([--disable-internal-rte option.])
AC_MSG_ERROR([Cannot continue])])
AC_MSG_CHECKING([if RTE support is enabled])
if test "$enable_internal_rte" != "no"; then
AC_MSG_RESULT([yes])
@ -81,7 +90,7 @@ AC_DEFUN([OMPI_SETUP_PRRTE],[
opal_prrte_prefix_arg=
fi
opal_prrte_args="--prefix=$prefix --disable-dlopen $opal_prrte_prefix_arg $opal_prrte_libevent_arg $opal_prrte_hwloc_arg $opal_prrte_pmix_arg"
opal_prrte_args="--prefix=$prefix $opal_prrte_prefix_arg $opal_prrte_libevent_arg $opal_prrte_hwloc_arg $opal_prrte_pmix_arg"
AS_IF([test "$enable_debug" = "yes"],
[opal_prrte_args="--enable-debug $opal_prrte_args"
CFLAGS="$OPAL_CFLAGS_BEFORE_PICKY $OPAL_VISIBILITY_CFLAGS -g"],

Просмотреть файл

@ -125,6 +125,7 @@ AC_DEFUN([OPAL_CHECK_PMIX_LIB],[
], [])],
[AC_MSG_RESULT([found])
opal_external_pmix_version=4x
opal_numerical_pmix_version=4
opal_external_pmix_version_found=1
opal_external_pmix_happy=yes],
[AC_MSG_RESULT([not found])])])
@ -139,6 +140,7 @@ AC_DEFUN([OPAL_CHECK_PMIX_LIB],[
], [])],
[AC_MSG_RESULT([found])
opal_external_pmix_version=3x
opal_numerical_pmix_version=3
opal_external_pmix_version_found=1
opal_external_pmix_happy=yes],
[AC_MSG_RESULT([not found])])])
@ -153,6 +155,7 @@ AC_DEFUN([OPAL_CHECK_PMIX_LIB],[
], [])],
[AC_MSG_RESULT([found])
opal_external_pmix_version=2x
opal_numerical_pmix_version=2
opal_external_pmix_version_found=1
opal_external_pmix_happy=yes],
[AC_MSG_RESULT([not found])])])
@ -167,6 +170,7 @@ AC_DEFUN([OPAL_CHECK_PMIX_LIB],[
], [])],
[AC_MSG_RESULT([found])
opal_external_pmix_version=1x
opal_numerical_pmix_version=1
opal_external_pmix_version_found=1
opal_external_have_pmix1=1
opal_external_pmix_happy=yes],
@ -179,6 +183,12 @@ AC_DEFUN([OPAL_CHECK_PMIX_LIB],[
opal_external_pmix_happy=no])
])
AS_IF([test "$opal_external_pmix_happy" = "yes" && test $opal_numerical_pmix_version -lt 3],
[AC_MSG_WARN([OMPI no longer supports PMIx versions prior to v3])
AC_MSG_WARN([Please direct us to a more current PMIx release or])
AC_MSG_WARN([use the internally provided one])
AC_MSG_ERROR([Cannot continue])])
AS_IF([test "$opal_external_pmix_happy" = "yes"],
[$3
# add the new flags to our wrapper compilers

Просмотреть файл

@ -121,6 +121,10 @@ int ompi_interlib_declare(int threadlevel, char *version)
PMIX_INFO_DESTRUCT(&info[3]);
/* account for our refcount on pmix_init */
PMIx_Finalize(NULL, 0);
ret = opal_pmix_convert_status(rc);
if (ompi_singleton && PMIX_ERR_UNREACH == rc) {
ret = OMPI_SUCCESS;
} else {
ret = opal_pmix_convert_status(rc);
}
return ret;
}

Просмотреть файл

@ -85,8 +85,8 @@ static void try_kill_peers(ompi_communicator_t *comm,
procs = (ompi_process_name_t*) calloc(nprocs, sizeof(ompi_process_name_t));
if (NULL == procs) {
/* quick clean orte and get out */
ompi_rte_abort(errno, "Abort: unable to alloc memory to kill procs");
/* quick clean RTE and get out */
ompi_rte_abort(errcode, "Abort: unable to alloc memory to kill procs");
}
/* put all the local group procs in the abort list */

Просмотреть файл

@ -61,7 +61,27 @@
opal_process_name_t pmix_name_wildcard = {UINT32_MAX-1, UINT32_MAX-1};
opal_process_name_t pmix_name_invalid = {UINT32_MAX, UINT32_MAX};
hwloc_cpuset_t ompi_proc_applied_binding = NULL;
pmix_process_info_t pmix_process_info = {0};
pmix_process_info_t pmix_process_info = {
.my_name = {OPAL_JOBID_INVALID, OPAL_VPID_INVALID},
.nodename = NULL,
.pid = 0,
.top_session_dir = NULL,
.job_session_dir = NULL,
.proc_session_dir = NULL,
.my_local_rank = 0,
.my_node_rank = 0,
.num_local_peers = 0,
.num_procs = 0,
.app_num = 0,
.univ_size = 0,
.app_sizes = NULL,
.app_ldrs = NULL,
.cpuset = NULL,
.command = NULL,
.num_apps = 0,
.initial_wdir = NULL,
.reincarnation = 0
};
bool pmix_proc_is_bound = false;
bool ompi_singleton = false;

@ -1 +1 @@
Subproject commit 98d14d55f8d4bd27fe6eb1e508c336702e1fbf76
Subproject commit a18e53138298d61a01fec4471518140304539e8c

Просмотреть файл

@ -187,7 +187,7 @@ static inline mca_rcache_base_registration_t *mca_rcache_grdma_remove_lru_head(m
/* registration has been selected for removal and is no longer in the LRU. mark it
* as such. */
new_flags = (old_flags & ~MCA_RCACHE_GRDMA_REG_FLAG_IN_LRU) | MCA_RCACHE_FLAGS_INVALID;
if (opal_atomic_compare_exchange_strong_32(&old_reg->flags, &old_flags, new_flags)) {
if (opal_atomic_compare_exchange_strong_32((opal_atomic_int32_t*)&old_reg->flags, &old_flags, new_flags)) {
break;
}
} while (1);

Просмотреть файл

@ -54,6 +54,7 @@
#include "opal/mca/installdirs/base/base.h"
#include "opal/mca/memory/base/base.h"
#include "opal/mca/patcher/base/base.h"
#include "opal/mca/pmix/base/base.h"
#include "opal/mca/memcpy/base/base.h"
#include "opal/mca/hwloc/base/base.h"
#include "opal/mca/reachable/base/base.h"
@ -630,7 +631,7 @@ opal_init_util(int* pargc, char*** pargv)
static mca_base_framework_t *opal_init_frameworks[] = {
&opal_hwloc_base_framework, &opal_memcpy_base_framework, &opal_memchecker_base_framework,
&opal_backtrace_base_framework, &opal_timer_base_framework, &opal_event_base_framework,
&opal_shmem_base_framework, &opal_reachable_base_framework,
&opal_shmem_base_framework, &opal_reachable_base_framework, &opal_pmix_base_framework,
NULL,
};

2
prrte

@ -1 +1 @@
Subproject commit 9add90bcfe88af1994914a78544d6236327be10e
Subproject commit cdea5231171b2fdea11269033de9e265fc7f3a63