1
1

Resolve the PMIx v3 incompatibility

Fix a couple of spots in OMPI to resolve warnings. The one in comm_cid
in particular may be responsible for some/all of the comm_spawn issues
as it was passing an incorrect pointer to a macro, thus causing memory
corruption.

Update PRRTE and PMIx to deal with v3/v4 differences.

Signed-off-by: Ralph Castain <rhc@pmix.org>
Этот коммит содержится в:
Ralph Castain 2020-02-14 20:37:14 -08:00
родитель cb0bc201f3
Коммит 133e8eba22
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: B63B630167D26BB5
5 изменённых файлов: 41 добавлений и 11 удалений

Просмотреть файл

@ -17,7 +17,7 @@
* Copyright (c) 2012-2016 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
* Copyright (c) 2013-2019 Intel, Inc. All rights reserved.
* Copyright (c) 2013-2020 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2016 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2016 IBM Corporation. All rights reserved.
@ -923,7 +923,7 @@ static int ompi_comm_allreduce_pmix_reduce_complete (ompi_comm_request_t *reques
cid_context->port_string,
cid_context->pmix_tag,
cid_context->iter);
PMIX_LOAD_KEY(&info, key);
PMIX_LOAD_KEY(info.key, key);
free(key);
if (bytes_written == -1) {
opal_output_verbose (verbosity_level, output_id, "writing info.key failed\n");
@ -934,7 +934,7 @@ static int ompi_comm_allreduce_pmix_reduce_complete (ompi_comm_request_t *reques
cid_context->port_string,
cid_context->pmix_tag,
cid_context->iter);
PMIX_LOAD_KEY(&pdat.key, key);
PMIX_LOAD_KEY((char*)pdat.key, key);
free(key);
if (bytes_written == -1) {
opal_output_verbose (verbosity_level, output_id, "writing pdat.value.key failed\n");

Просмотреть файл

@ -14,7 +14,7 @@
* Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2015-2019 Intel, Inc. All rights reserved.
* Copyright (c) 2015-2020 Intel, Inc. All rights reserved.
* Copyright (c) 2017 IBM Corporation. All rights reserved.
* $COPYRIGHT$
*
@ -68,12 +68,23 @@ static void model_callback(size_t refid, pmix_status_t status,
}
}
static void evhandler_reg_callbk(pmix_status_t status,
size_t evhandler_ref,
void *cbdata)
{
opal_pmix_lock_t *lock = (opal_pmix_lock_t*)cbdata;
lock->status = status;
OPAL_PMIX_WAKEUP_THREAD(lock);
}
int ompi_interlib_declare(int threadlevel, char *version)
{
pmix_info_t info[4], directives;
int ret;
pmix_status_t rc;
pmix_status_t code = PMIX_MODEL_DECLARED;
opal_pmix_lock_t mylock;
/* Register an event handler for library model declarations */
/* give it a name so we can distinguish it */
@ -82,10 +93,13 @@ int ompi_interlib_declare(int threadlevel, char *version)
* isn't required so long as the code that generates
* the event stipulates its range as proc_local. We rely
* on that here */
rc = PMIx_Register_event_handler(&code, 1, &directives, 1, model_callback, NULL, NULL);
OPAL_PMIX_CONSTRUCT_LOCK(&mylock);
PMIx_Register_event_handler(&code, 1, &directives, 1, model_callback, evhandler_reg_callbk, (void*)&mylock);
OPAL_PMIX_WAIT_THREAD(&mylock);
PMIX_INFO_DESTRUCT(&directives);
if (rc < 0) {
PMIX_INFO_DESTRUCT(&directives);
rc = mylock.status;
OPAL_PMIX_DESTRUCT_LOCK(&mylock);
if (PMIX_SUCCESS != rc) {
return OMPI_ERROR;
}

Просмотреть файл

@ -370,6 +370,17 @@ static void fence_release(pmix_status_t status, void *cbdata)
OPAL_POST_OBJECT(active);
}
static void evhandler_reg_callbk(pmix_status_t status,
size_t evhandler_ref,
void *cbdata)
{
opal_pmix_lock_t *lock = (opal_pmix_lock_t*)cbdata;
lock->status = status;
OPAL_PMIX_WAKEUP_THREAD(lock);
}
int ompi_mpi_init(int argc, char **argv, int requested, int *provided,
bool reinit_ok)
{
@ -382,6 +393,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided,
pmix_info_t info[2];
pmix_status_t rc;
OMPI_TIMING_INIT(64);
opal_pmix_lock_t mylock;
ompi_hook_base_mpi_init_top(argc, argv, requested, provided);
@ -523,10 +535,14 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided,
PMIX_INFO_LOAD(&info[0], PMIX_EVENT_HDLR_PREPEND, NULL, PMIX_BOOL);
/* give it a name so we can distinguish it */
PMIX_INFO_LOAD(&info[1], PMIX_EVENT_HDLR_NAME, "MPI-Default", PMIX_STRING);
rc = PMIx_Register_event_handler(NULL, 0, info, 2, ompi_errhandler_callback, NULL, NULL);
OPAL_PMIX_CONSTRUCT_LOCK(&mylock);
PMIx_Register_event_handler(NULL, 0, info, 2, ompi_errhandler_callback, evhandler_reg_callbk, (void*)&mylock);
OPAL_PMIX_WAIT_THREAD(&mylock);
rc = mylock.status;
OPAL_PMIX_DESTRUCT_LOCK(&mylock);
PMIX_INFO_DESTRUCT(&info[0]);
PMIX_INFO_DESTRUCT(&info[1]);
if (0 > rc) {
if (PMIX_SUCCESS != rc) {
error = "Error handler registration";
ret = opal_pmix_convert_status(rc);
goto error;

@ -1 +1 @@
Subproject commit c883ebee78b1d88f3a1fb145d31ede74b3c14fa5
Subproject commit bc8da6cf72f7d6c44f353ac23b3cbf703aa82827

2
prrte

@ -1 +1 @@
Subproject commit b571937ec4286c4e04a20882f5e02a63176e8190
Subproject commit d2d32607233f62caad880eba36eb238a3d80e1f5