
The ess pmi module was not handling aprun launched daemons. All daemons were thinking they were vpid 1. Also, turns out that on cray systems using MOM nodes for launched jobs, just detecting whether or not a process is in a PAGG container is not sufficient. Crank up the priority of the alps PLM component in the event that the configure detected the presence of both slurm and alps. Have the ESS pmi component open the pmix framework and select a pmix component. This commit was SVN r32773.
102 строки
2.6 KiB
C
102 строки
2.6 KiB
C
/*
|
|
* Copyright (c) 2011 Cisco Systems, Inc. All rights reserved.
|
|
* Copyright (c) 2011-2013 Los Alamos National Security, LLC. All
|
|
* rights reserved.
|
|
* Copyright (c) 2014 Intel, Inc. All rights reserved.
|
|
* $COPYRIGHT$
|
|
*
|
|
* Additional copyrights may follow
|
|
*
|
|
* $HEADER$
|
|
*
|
|
* These symbols are in a file by themselves to provide nice linker
|
|
* semantics. Since linkers generally pull in symbols by object
|
|
* files, keeping these symbols as the only symbols in this file
|
|
* prevents utility programs such as "ompi_info" from having to import
|
|
* entire components just to query their version and parameters.
|
|
*/
|
|
|
|
#include "orte_config.h"
|
|
#include "orte/constants.h"
|
|
|
|
#include "opal/runtime/opal_params.h"
|
|
#include "opal/mca/pmix/pmix.h"
|
|
#include "opal/mca/pmix/base/base.h"
|
|
|
|
#include "orte/util/proc_info.h"
|
|
|
|
#include "orte/mca/ess/ess.h"
|
|
#include "orte/mca/ess/pmi/ess_pmi.h"
|
|
|
|
extern orte_ess_base_module_t orte_ess_pmi_module;
|
|
|
|
static int pmi_component_open(void);
|
|
static int pmi_component_close(void);
|
|
static int pmi_component_query(mca_base_module_t **module, int *priority);
|
|
|
|
/*
|
|
* Instantiate the public struct with all of our public information
|
|
* and pointers to our public functions in it
|
|
*/
|
|
orte_ess_base_component_t mca_ess_pmi_component = {
|
|
{
|
|
ORTE_ESS_BASE_VERSION_3_0_0,
|
|
|
|
/* Component name and version */
|
|
"pmi",
|
|
ORTE_MAJOR_VERSION,
|
|
ORTE_MINOR_VERSION,
|
|
ORTE_RELEASE_VERSION,
|
|
|
|
/* Component open and close functions */
|
|
pmi_component_open,
|
|
pmi_component_close,
|
|
pmi_component_query
|
|
},
|
|
{
|
|
/* The component is checkpoint ready */
|
|
MCA_BASE_METADATA_PARAM_CHECKPOINT
|
|
}
|
|
};
|
|
|
|
static int pmi_component_open(void)
|
|
{
|
|
|
|
if (OPAL_SUCCESS != mca_base_framework_open(&opal_pmix_base_framework, 0)) {
|
|
return ORTE_ERROR;
|
|
}
|
|
|
|
if (OPAL_SUCCESS != opal_pmix_base_select()) {
|
|
return ORTE_ERROR;
|
|
}
|
|
|
|
return ORTE_SUCCESS;
|
|
}
|
|
|
|
static int pmi_component_query(mca_base_module_t **module, int *priority)
|
|
{
|
|
/* we are available anywhere PMI is available, but not for HNP itself */
|
|
if (!ORTE_PROC_IS_HNP && NULL != opal_pmix.init &&
|
|
OPAL_SUCCESS == opal_pmix.init()) {
|
|
/* if PMI is available, use it */
|
|
*priority = 35;
|
|
*module = (mca_base_module_t *)&orte_ess_pmi_module;
|
|
return ORTE_SUCCESS;
|
|
}
|
|
|
|
/* we can't run */
|
|
*priority = -1;
|
|
*module = NULL;
|
|
return ORTE_ERROR;
|
|
}
|
|
|
|
|
|
static int pmi_component_close(void)
|
|
{
|
|
if (NULL != opal_pmix.finalize) {
|
|
opal_pmix.finalize(); // balances query
|
|
}
|
|
return mca_base_framework_close(&opal_pmix_base_framework);
|
|
}
|
|
|