1
1

Per discussion on the PMIx side, do a better job of detecting mismatches between location directives for OPAL and PMIx. Provide a more helpful error message and error out if we find a mismatch. If any OPAL values are set and the PMIx equivalent is not, then transfer it.

Do not clear PMIX_INSTALL_PREFIX from the daemon's launch environment

Fixes #3980
Closes #4007
Refs #3985

Signed-off-by: Ralph Castain <rhc@open-mpi.org>
Этот коммит содержится в:
Ralph Castain 2017-08-02 19:46:38 -06:00
родитель e79eb85690
Коммит a239b4c3c3
9 изменённых файлов: 175 добавлений и 14 удалений

Просмотреть файл

@ -12,6 +12,8 @@
EXTRA_DIST = autogen.subdirs
dist_opaldata_DATA = help-pmix-pmix2x.txt
SUBDIRS = pmix
sources = \

32
opal/mca/pmix/pmix2x/help-pmix-pmix2x.txt Обычный файл
Просмотреть файл

@ -0,0 +1,32 @@
# -*- text -*-
#
# Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2005 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# Copyright (c) 2017 Intel, Inc. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# This is the US/English help file for Open MPI MCA error messages.
#
[evars]
We found conflicting directives regarding the location of OPAL vs PMIx
installation directories:
%s
This usually indicates that OMPI was configured to use its internal copy
of PMIx, but another installation of PMIx is also in use on this system
and could potentially cause confusion between the two sets of plugins.
Please either unset the indicated environment variables, or configure
OMPI to use the external PMIx installation.

Просмотреть файл

@ -34,6 +34,7 @@
#include "opal/threads/threads.h"
#include "opal/util/argv.h"
#include "opal/util/error.h"
#include "opal/util/opal_environ.h"
#include "opal/util/output.h"
#include "opal/util/proc.h"
#include "opal/util/show_help.h"
@ -1391,6 +1392,113 @@ opal_pmix_alloc_directive_t pmix2x_convert_allocdir(pmix_alloc_directive_t dir)
}
}
typedef struct {
opal_list_item_t super;
char *opalname;
char *opalvalue;
char *pmixname;
char *pmixvalue;
bool mismatched;
} opal_pmix_evar_t;
static void econ(opal_pmix_evar_t *p)
{
p->opalname = NULL;
p->opalvalue = NULL;
p->pmixname = NULL;
p->pmixvalue = NULL;
p->mismatched = false;
}
static OBJ_CLASS_INSTANCE(opal_pmix_evar_t,
opal_list_item_t,
econ, NULL);
struct known_value {
char *opalname;
char *pmixname;
};
static struct known_value known_values[] = {
{"OPAL_PREFIX", "PMIX_INSTALL_PREFIX"},
{"OPAL_EXEC_PREFIX", "PMIX_EXEC_PREFIX"},
{"OPAL_BINDIR", "PMIX_BINDIR"},
{"OPAL_SBINDIR", "PMIX_SBINDIR"},
{"OPAL_LIBEXECDIR", "PMIX_LIBEXECDIR"},
{"OPAL_DATAROOTDIR", "PMIX_DATAROOTDIR"},
{"OPAL_DATADIR", "PMIX_DATADIR"},
{"OPAL_SYSCONFDIR", "PMIX_SYSCONFDIR"},
{"OPAL_SHAREDSTATEDIR", "PMIX_SHAREDSTATEDIR"},
{"OPAL_LOCALSTATEDIR", "PMIX_LOCALSTATEDIR"},
{"OPAL_LIBDIR", "PMIX_LIBDIR"},
{"OPAL_INCLUDEDIR", "PMIX_INCLUDEDIR"},
{"OPAL_INFODIR", "PMIX_INFODIR"},
{"OPAL_MANDIR", "PMIX_MANDIR"},
{"OPAL_PKGDATADIR", "PMIX_PKGDATADIR"},
{"OPAL_PKGLIBDIR", "PMIX_PKGLIBDIR"},
{"OPAL_PKGINCLUDEDIR", "PMIX_PKGINCLUDEDIR"}
};
int opal_pmix_pmix2x_check_evars(void)
{
opal_list_t values;
int nvals, i;
opal_pmix_evar_t *evar;
bool mismatched = false;
char *tmp=NULL, *tmp2;
OBJ_CONSTRUCT(&values, opal_list_t);
nvals = sizeof(known_values) / sizeof(struct known_value);
for (i=0; i < nvals; i++) {
evar = OBJ_NEW(opal_pmix_evar_t);
evar->opalname = known_values[i].opalname;
evar->opalvalue = getenv(evar->opalname);
evar->pmixname = known_values[i].pmixname;
evar->pmixvalue = getenv(evar->pmixname);
/* if the OPAL value is not set and the PMIx value is,
* then that is a problem. Likewise, if both are set
* and are different, then that is also a problem. Note that
* it is okay for the OPAL value to be set and the PMIx
* value to not be set */
if ((NULL == evar->opalvalue && NULL != evar->pmixvalue) ||
(NULL != evar->opalvalue && NULL != evar->pmixvalue &&
0 != strcmp(evar->opalvalue, evar->pmixvalue))) {
evar->mismatched = true;
mismatched = true;
}
opal_list_append(&values, &evar->super);
}
if (!mismatched) {
/* transfer any OPAL values that were set - we already verified
* that the equivalent PMIx value, if present, matches, so
* don't overwrite it */
OPAL_LIST_FOREACH(evar, &values, opal_pmix_evar_t) {
if (NULL != evar->opalvalue && NULL == evar->pmixvalue) {
opal_setenv(evar->pmixname, evar->opalvalue, true, &environ);
}
}
OPAL_LIST_DESTRUCT(&values);
return OPAL_SUCCESS;
}
/* we have at least one mismatch somewhere, so print out the table */
OPAL_LIST_FOREACH(evar, &values, opal_pmix_evar_t) {
if (evar->mismatched) {
if (NULL == tmp) {
asprintf(&tmp, " %s: %s\n %s: %s",
evar->opalname, (NULL == evar->opalvalue) ? "NULL" : evar->opalvalue,
evar->pmixname, (NULL == evar->pmixvalue) ? "NULL" : evar->pmixvalue);
} else {
asprintf(&tmp2, "%s\n\n %s: %s\n %s: %s", tmp,
evar->opalname, (NULL == evar->opalvalue) ? "NULL" : evar->opalvalue,
evar->pmixname, (NULL == evar->pmixvalue) ? "NULL" : evar->pmixvalue);
free(tmp);
tmp = tmp2;
}
}
}
opal_show_help("help-pmix-pmix2x.txt", "evars", true, tmp);
free(tmp);
return OPAL_ERR_SILENT;
}
/**** INSTANTIATE INTERNAL CLASSES ****/
OBJ_CLASS_INSTANCE(opal_pmix2x_jobid_trkr_t,
opal_list_item_t,

Просмотреть файл

@ -46,6 +46,7 @@ typedef struct {
int cache_size;
opal_list_t cache;
opal_list_t dmdx;
bool silence_warning;
} mca_pmix_pmix2x_component_t;
OPAL_DECLSPEC extern mca_pmix_pmix2x_component_t mca_pmix_pmix2x_component;
@ -290,6 +291,8 @@ OPAL_MODULE_DECLSPEC int pmix2x_server_notify_event(int status,
/**** COMPONENT UTILITY FUNCTIONS ****/
OPAL_MODULE_DECLSPEC int opal_pmix_pmix2x_check_evars(void);
OPAL_MODULE_DECLSPEC void pmix2x_event_hdlr(size_t evhdlr_registration_id,
pmix_status_t status, const pmix_proc_t *source,
pmix_info_t info[], size_t ninfo,

Просмотреть файл

@ -31,6 +31,7 @@
#include "opal/util/argv.h"
#include "opal/util/opal_environ.h"
#include "opal/util/proc.h"
#include "opal/util/show_help.h"
#include "opal/mca/pmix/base/base.h"
#include "pmix2x.h"
@ -66,7 +67,6 @@ int pmix2x_client_init(opal_list_t *ilist)
pmix_info_t *pinfo;
size_t ninfo, n;
opal_value_t *ival;
char *evar;
opal_output_verbose(1, opal_pmix_base_framework.framework_output,
"PMIx_client init");
@ -78,9 +78,9 @@ int pmix2x_client_init(opal_list_t *ilist)
asprintf(&dbgvalue, "PMIX_DEBUG=%d", dbg);
putenv(dbgvalue);
}
if ((NULL != (evar = getenv("OPAL_PREFIX"))) &&
(NULL == getenv("PMIX_INSTALL_PREFIX"))) {
opal_setenv("PMIX_INSTALL_PREFIX", evar, false, &environ);
/* check the evars for a mismatch */
if (OPAL_SUCCESS != (dbg = opal_pmix_pmix2x_check_evars())) {
return dbg;
}
}

Просмотреть файл

@ -33,6 +33,7 @@ const char *opal_pmix_pmix2x_component_version_string =
/*
* Local function
*/
static int external_register(void);
static int external_open(void);
static int external_close(void);
static int external_component_query(mca_base_module_t **module, int *priority);
@ -65,6 +66,7 @@ mca_pmix_pmix2x_component_t mca_pmix_pmix2x_component = {
.mca_open_component = external_open,
.mca_close_component = external_close,
.mca_query_component = external_component_query,
.mca_register_component_params = external_register
},
/* Next the MCA v1.0.0 component meta data */
.base_data = {
@ -75,6 +77,21 @@ mca_pmix_pmix2x_component_t mca_pmix_pmix2x_component = {
.native_launch = false
};
static int external_register(void)
{
mca_base_component_t *component = &mca_pmix_pmix2x_component.super.base_version;
mca_pmix_pmix2x_component.silence_warning = false;
(void) mca_base_component_var_register (component, "silence_warning",
"Silence warning about PMIX_INSTALL_PREFIX",
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
OPAL_INFO_LVL_4,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_pmix_pmix2x_component.silence_warning);
return OPAL_SUCCESS;
}
static int external_open(void)
{
mca_pmix_pmix2x_component.evindex = 0;

Просмотреть файл

@ -100,7 +100,6 @@ int pmix2x_server_init(opal_pmix_server_module_t *module,
opal_pmix2x_event_t *event;
opal_pmix2x_jobid_trkr_t *job;
opal_pmix_lock_t lk;
char *evar;
OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock);
@ -109,9 +108,9 @@ int pmix2x_server_init(opal_pmix_server_module_t *module,
asprintf(&dbgvalue, "PMIX_DEBUG=%d", dbg);
putenv(dbgvalue);
}
if ((NULL != (evar = getenv("OPAL_PREFIX"))) &&
(NULL == getenv("PMIX_INSTALL_PREFIX"))) {
opal_setenv("PMIX_INSTALL_PREFIX", evar, false, &environ);
/* check the evars for a mismatch */
if (OPAL_SUCCESS != (dbg = opal_pmix_pmix2x_check_evars())) {
return dbg;
}
}
++opal_pmix_base.initialized;

Просмотреть файл

@ -285,8 +285,9 @@ int orte_daemon(int argc, char *argv[])
*/
orte_launch_environ = opal_argv_copy(environ);
/* purge any ess flag set in the environ when we were launched */
/* purge any ess/pmix flags set in the environ when we were launched */
opal_unsetenv(OPAL_MCA_PREFIX"ess", &orte_launch_environ);
opal_unsetenv(OPAL_MCA_PREFIX"pmix", &orte_launch_environ);
/* if orte_daemon_debug is set, let someone know we are alive right
* away just in case we have a problem along the way

Просмотреть файл

@ -537,11 +537,6 @@ int orte_submit_init(int argc, char *argv[],
*/
opal_finalize();
/* clear params from the environment so our children
* don't pick them up */
opal_unsetenv(OPAL_MCA_PREFIX"ess", &environ);
opal_unsetenv(OPAL_MCA_PREFIX"pmix", &environ);
if (ORTE_PROC_IS_TOOL) {
opal_value_t val;
/* extract the name */
@ -589,6 +584,10 @@ int orte_submit_init(int argc, char *argv[],
* orterun
*/
orte_launch_environ = opal_argv_copy(environ);
/* clear params from the environment so our children
* don't pick them up */
opal_unsetenv(OPAL_MCA_PREFIX"ess", &orte_launch_environ);
opal_unsetenv(OPAL_MCA_PREFIX"pmix", &orte_launch_environ);
}
return ORTE_SUCCESS;