Per discussion on the PMIx side, do a better job of detecting mismatches between location directives for OPAL and PMIx. Provide a more helpful error message and error out if we find a mismatch. If any OPAL values are set and the PMIx equivalent is not, then transfer it.
Do not clear PMIX_INSTALL_PREFIX from the daemon's launch environment Fixes #3980 Closes #4007 Refs #3985 Signed-off-by: Ralph Castain <rhc@open-mpi.org>
Этот коммит содержится в:
родитель
e79eb85690
Коммит
a239b4c3c3
@ -12,6 +12,8 @@
|
||||
|
||||
EXTRA_DIST = autogen.subdirs
|
||||
|
||||
dist_opaldata_DATA = help-pmix-pmix2x.txt
|
||||
|
||||
SUBDIRS = pmix
|
||||
|
||||
sources = \
|
||||
|
32
opal/mca/pmix/pmix2x/help-pmix-pmix2x.txt
Обычный файл
32
opal/mca/pmix/pmix2x/help-pmix-pmix2x.txt
Обычный файл
@ -0,0 +1,32 @@
|
||||
# -*- text -*-
|
||||
#
|
||||
# Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
|
||||
# University Research and Technology
|
||||
# Corporation. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
# of Tennessee Research Foundation. All rights
|
||||
# reserved.
|
||||
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
# University of Stuttgart. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
# All rights reserved.
|
||||
# Copyright (c) 2017 Intel, Inc. All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
# This is the US/English help file for Open MPI MCA error messages.
|
||||
#
|
||||
[evars]
|
||||
We found conflicting directives regarding the location of OPAL vs PMIx
|
||||
installation directories:
|
||||
|
||||
%s
|
||||
|
||||
This usually indicates that OMPI was configured to use its internal copy
|
||||
of PMIx, but another installation of PMIx is also in use on this system
|
||||
and could potentially cause confusion between the two sets of plugins.
|
||||
Please either unset the indicated environment variables, or configure
|
||||
OMPI to use the external PMIx installation.
|
@ -34,6 +34,7 @@
|
||||
#include "opal/threads/threads.h"
|
||||
#include "opal/util/argv.h"
|
||||
#include "opal/util/error.h"
|
||||
#include "opal/util/opal_environ.h"
|
||||
#include "opal/util/output.h"
|
||||
#include "opal/util/proc.h"
|
||||
#include "opal/util/show_help.h"
|
||||
@ -1391,6 +1392,113 @@ opal_pmix_alloc_directive_t pmix2x_convert_allocdir(pmix_alloc_directive_t dir)
|
||||
}
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
opal_list_item_t super;
|
||||
char *opalname;
|
||||
char *opalvalue;
|
||||
char *pmixname;
|
||||
char *pmixvalue;
|
||||
bool mismatched;
|
||||
} opal_pmix_evar_t;
|
||||
static void econ(opal_pmix_evar_t *p)
|
||||
{
|
||||
p->opalname = NULL;
|
||||
p->opalvalue = NULL;
|
||||
p->pmixname = NULL;
|
||||
p->pmixvalue = NULL;
|
||||
p->mismatched = false;
|
||||
}
|
||||
static OBJ_CLASS_INSTANCE(opal_pmix_evar_t,
|
||||
opal_list_item_t,
|
||||
econ, NULL);
|
||||
struct known_value {
|
||||
char *opalname;
|
||||
char *pmixname;
|
||||
};
|
||||
|
||||
static struct known_value known_values[] = {
|
||||
{"OPAL_PREFIX", "PMIX_INSTALL_PREFIX"},
|
||||
{"OPAL_EXEC_PREFIX", "PMIX_EXEC_PREFIX"},
|
||||
{"OPAL_BINDIR", "PMIX_BINDIR"},
|
||||
{"OPAL_SBINDIR", "PMIX_SBINDIR"},
|
||||
{"OPAL_LIBEXECDIR", "PMIX_LIBEXECDIR"},
|
||||
{"OPAL_DATAROOTDIR", "PMIX_DATAROOTDIR"},
|
||||
{"OPAL_DATADIR", "PMIX_DATADIR"},
|
||||
{"OPAL_SYSCONFDIR", "PMIX_SYSCONFDIR"},
|
||||
{"OPAL_SHAREDSTATEDIR", "PMIX_SHAREDSTATEDIR"},
|
||||
{"OPAL_LOCALSTATEDIR", "PMIX_LOCALSTATEDIR"},
|
||||
{"OPAL_LIBDIR", "PMIX_LIBDIR"},
|
||||
{"OPAL_INCLUDEDIR", "PMIX_INCLUDEDIR"},
|
||||
{"OPAL_INFODIR", "PMIX_INFODIR"},
|
||||
{"OPAL_MANDIR", "PMIX_MANDIR"},
|
||||
{"OPAL_PKGDATADIR", "PMIX_PKGDATADIR"},
|
||||
{"OPAL_PKGLIBDIR", "PMIX_PKGLIBDIR"},
|
||||
{"OPAL_PKGINCLUDEDIR", "PMIX_PKGINCLUDEDIR"}
|
||||
};
|
||||
|
||||
|
||||
int opal_pmix_pmix2x_check_evars(void)
|
||||
{
|
||||
opal_list_t values;
|
||||
int nvals, i;
|
||||
opal_pmix_evar_t *evar;
|
||||
bool mismatched = false;
|
||||
char *tmp=NULL, *tmp2;
|
||||
|
||||
OBJ_CONSTRUCT(&values, opal_list_t);
|
||||
nvals = sizeof(known_values) / sizeof(struct known_value);
|
||||
for (i=0; i < nvals; i++) {
|
||||
evar = OBJ_NEW(opal_pmix_evar_t);
|
||||
evar->opalname = known_values[i].opalname;
|
||||
evar->opalvalue = getenv(evar->opalname);
|
||||
evar->pmixname = known_values[i].pmixname;
|
||||
evar->pmixvalue = getenv(evar->pmixname);
|
||||
/* if the OPAL value is not set and the PMIx value is,
|
||||
* then that is a problem. Likewise, if both are set
|
||||
* and are different, then that is also a problem. Note that
|
||||
* it is okay for the OPAL value to be set and the PMIx
|
||||
* value to not be set */
|
||||
if ((NULL == evar->opalvalue && NULL != evar->pmixvalue) ||
|
||||
(NULL != evar->opalvalue && NULL != evar->pmixvalue &&
|
||||
0 != strcmp(evar->opalvalue, evar->pmixvalue))) {
|
||||
evar->mismatched = true;
|
||||
mismatched = true;
|
||||
}
|
||||
opal_list_append(&values, &evar->super);
|
||||
}
|
||||
if (!mismatched) {
|
||||
/* transfer any OPAL values that were set - we already verified
|
||||
* that the equivalent PMIx value, if present, matches, so
|
||||
* don't overwrite it */
|
||||
OPAL_LIST_FOREACH(evar, &values, opal_pmix_evar_t) {
|
||||
if (NULL != evar->opalvalue && NULL == evar->pmixvalue) {
|
||||
opal_setenv(evar->pmixname, evar->opalvalue, true, &environ);
|
||||
}
|
||||
}
|
||||
OPAL_LIST_DESTRUCT(&values);
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
/* we have at least one mismatch somewhere, so print out the table */
|
||||
OPAL_LIST_FOREACH(evar, &values, opal_pmix_evar_t) {
|
||||
if (evar->mismatched) {
|
||||
if (NULL == tmp) {
|
||||
asprintf(&tmp, " %s: %s\n %s: %s",
|
||||
evar->opalname, (NULL == evar->opalvalue) ? "NULL" : evar->opalvalue,
|
||||
evar->pmixname, (NULL == evar->pmixvalue) ? "NULL" : evar->pmixvalue);
|
||||
} else {
|
||||
asprintf(&tmp2, "%s\n\n %s: %s\n %s: %s", tmp,
|
||||
evar->opalname, (NULL == evar->opalvalue) ? "NULL" : evar->opalvalue,
|
||||
evar->pmixname, (NULL == evar->pmixvalue) ? "NULL" : evar->pmixvalue);
|
||||
free(tmp);
|
||||
tmp = tmp2;
|
||||
}
|
||||
}
|
||||
}
|
||||
opal_show_help("help-pmix-pmix2x.txt", "evars", true, tmp);
|
||||
free(tmp);
|
||||
return OPAL_ERR_SILENT;
|
||||
}
|
||||
|
||||
/**** INSTANTIATE INTERNAL CLASSES ****/
|
||||
OBJ_CLASS_INSTANCE(opal_pmix2x_jobid_trkr_t,
|
||||
opal_list_item_t,
|
||||
|
@ -46,6 +46,7 @@ typedef struct {
|
||||
int cache_size;
|
||||
opal_list_t cache;
|
||||
opal_list_t dmdx;
|
||||
bool silence_warning;
|
||||
} mca_pmix_pmix2x_component_t;
|
||||
|
||||
OPAL_DECLSPEC extern mca_pmix_pmix2x_component_t mca_pmix_pmix2x_component;
|
||||
@ -290,6 +291,8 @@ OPAL_MODULE_DECLSPEC int pmix2x_server_notify_event(int status,
|
||||
|
||||
|
||||
/**** COMPONENT UTILITY FUNCTIONS ****/
|
||||
OPAL_MODULE_DECLSPEC int opal_pmix_pmix2x_check_evars(void);
|
||||
|
||||
OPAL_MODULE_DECLSPEC void pmix2x_event_hdlr(size_t evhdlr_registration_id,
|
||||
pmix_status_t status, const pmix_proc_t *source,
|
||||
pmix_info_t info[], size_t ninfo,
|
||||
|
@ -31,6 +31,7 @@
|
||||
#include "opal/util/argv.h"
|
||||
#include "opal/util/opal_environ.h"
|
||||
#include "opal/util/proc.h"
|
||||
#include "opal/util/show_help.h"
|
||||
|
||||
#include "opal/mca/pmix/base/base.h"
|
||||
#include "pmix2x.h"
|
||||
@ -66,7 +67,6 @@ int pmix2x_client_init(opal_list_t *ilist)
|
||||
pmix_info_t *pinfo;
|
||||
size_t ninfo, n;
|
||||
opal_value_t *ival;
|
||||
char *evar;
|
||||
|
||||
opal_output_verbose(1, opal_pmix_base_framework.framework_output,
|
||||
"PMIx_client init");
|
||||
@ -78,9 +78,9 @@ int pmix2x_client_init(opal_list_t *ilist)
|
||||
asprintf(&dbgvalue, "PMIX_DEBUG=%d", dbg);
|
||||
putenv(dbgvalue);
|
||||
}
|
||||
if ((NULL != (evar = getenv("OPAL_PREFIX"))) &&
|
||||
(NULL == getenv("PMIX_INSTALL_PREFIX"))) {
|
||||
opal_setenv("PMIX_INSTALL_PREFIX", evar, false, &environ);
|
||||
/* check the evars for a mismatch */
|
||||
if (OPAL_SUCCESS != (dbg = opal_pmix_pmix2x_check_evars())) {
|
||||
return dbg;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -33,6 +33,7 @@ const char *opal_pmix_pmix2x_component_version_string =
|
||||
/*
|
||||
* Local function
|
||||
*/
|
||||
static int external_register(void);
|
||||
static int external_open(void);
|
||||
static int external_close(void);
|
||||
static int external_component_query(mca_base_module_t **module, int *priority);
|
||||
@ -65,6 +66,7 @@ mca_pmix_pmix2x_component_t mca_pmix_pmix2x_component = {
|
||||
.mca_open_component = external_open,
|
||||
.mca_close_component = external_close,
|
||||
.mca_query_component = external_component_query,
|
||||
.mca_register_component_params = external_register
|
||||
},
|
||||
/* Next the MCA v1.0.0 component meta data */
|
||||
.base_data = {
|
||||
@ -75,6 +77,21 @@ mca_pmix_pmix2x_component_t mca_pmix_pmix2x_component = {
|
||||
.native_launch = false
|
||||
};
|
||||
|
||||
static int external_register(void)
|
||||
{
|
||||
mca_base_component_t *component = &mca_pmix_pmix2x_component.super.base_version;
|
||||
|
||||
mca_pmix_pmix2x_component.silence_warning = false;
|
||||
(void) mca_base_component_var_register (component, "silence_warning",
|
||||
"Silence warning about PMIX_INSTALL_PREFIX",
|
||||
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
|
||||
OPAL_INFO_LVL_4,
|
||||
MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&mca_pmix_pmix2x_component.silence_warning);
|
||||
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
||||
static int external_open(void)
|
||||
{
|
||||
mca_pmix_pmix2x_component.evindex = 0;
|
||||
|
@ -100,7 +100,6 @@ int pmix2x_server_init(opal_pmix_server_module_t *module,
|
||||
opal_pmix2x_event_t *event;
|
||||
opal_pmix2x_jobid_trkr_t *job;
|
||||
opal_pmix_lock_t lk;
|
||||
char *evar;
|
||||
|
||||
OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock);
|
||||
|
||||
@ -109,9 +108,9 @@ int pmix2x_server_init(opal_pmix_server_module_t *module,
|
||||
asprintf(&dbgvalue, "PMIX_DEBUG=%d", dbg);
|
||||
putenv(dbgvalue);
|
||||
}
|
||||
if ((NULL != (evar = getenv("OPAL_PREFIX"))) &&
|
||||
(NULL == getenv("PMIX_INSTALL_PREFIX"))) {
|
||||
opal_setenv("PMIX_INSTALL_PREFIX", evar, false, &environ);
|
||||
/* check the evars for a mismatch */
|
||||
if (OPAL_SUCCESS != (dbg = opal_pmix_pmix2x_check_evars())) {
|
||||
return dbg;
|
||||
}
|
||||
}
|
||||
++opal_pmix_base.initialized;
|
||||
|
@ -285,8 +285,9 @@ int orte_daemon(int argc, char *argv[])
|
||||
*/
|
||||
orte_launch_environ = opal_argv_copy(environ);
|
||||
|
||||
/* purge any ess flag set in the environ when we were launched */
|
||||
/* purge any ess/pmix flags set in the environ when we were launched */
|
||||
opal_unsetenv(OPAL_MCA_PREFIX"ess", &orte_launch_environ);
|
||||
opal_unsetenv(OPAL_MCA_PREFIX"pmix", &orte_launch_environ);
|
||||
|
||||
/* if orte_daemon_debug is set, let someone know we are alive right
|
||||
* away just in case we have a problem along the way
|
||||
|
@ -537,11 +537,6 @@ int orte_submit_init(int argc, char *argv[],
|
||||
*/
|
||||
opal_finalize();
|
||||
|
||||
/* clear params from the environment so our children
|
||||
* don't pick them up */
|
||||
opal_unsetenv(OPAL_MCA_PREFIX"ess", &environ);
|
||||
opal_unsetenv(OPAL_MCA_PREFIX"pmix", &environ);
|
||||
|
||||
if (ORTE_PROC_IS_TOOL) {
|
||||
opal_value_t val;
|
||||
/* extract the name */
|
||||
@ -589,6 +584,10 @@ int orte_submit_init(int argc, char *argv[],
|
||||
* orterun
|
||||
*/
|
||||
orte_launch_environ = opal_argv_copy(environ);
|
||||
/* clear params from the environment so our children
|
||||
* don't pick them up */
|
||||
opal_unsetenv(OPAL_MCA_PREFIX"ess", &orte_launch_environ);
|
||||
opal_unsetenv(OPAL_MCA_PREFIX"pmix", &orte_launch_environ);
|
||||
}
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
|
Загрузка…
Ссылка в новой задаче
Block a user