215d6290e0
Fine tuning of flux component Fix a few minor issues with the initial cut: * Job id could be obtained from the PMI kvsname like SLURM, but simpler to getenv (FLUX_JOB_ID) * Flux pmi-1 doesn't define PMI_BOOL, PMI_TRUE, PMI_FALSE * Flux pmi-1 maps the deprecated PMI_Get_kvs_domain_id() to PMI_KVS_Get_my_name() internally, so just call that instead. * Drop residual slurm references. Add wrappers for PMI functions so that if HAVE_FLUX_PMI_LIBRARY is not defined, the component can dlopen libpmi.so at location specified by the FLUX_PMI_LIBRARY_PATH env variable, which adds flexibility. If HAVE_FLUX_PMI_LIBRARY is defined, link with libpmi.so at build time in the usual way. Update configury for flux component Update m4 so the configure options work as follows: --with-flux-pmi Build Flux PMI support (default: yes) --with-flux-pmi-library Link Flux PMI support with PMI library at build time. Otherwise the library is opened at runtime at location specified by FLUX_PMI_LIBRARY_PATH environment variable. Use this option to enable Flux support when building statically or without dlopen support (default: no) If the latter option is provided, the library/header is located at build time using the pkg-config module 'flux-pmi'. Otherwise there is no library/header dependency. Handle the case where ompi is configured with --disable-dlopen or --enable-statkc. In those cases, don't build the component unless --with-flux-pmi-library is provided. It is fatal if the user explicitly requests --with-flux-pmi but it cannot be built (e.g. due to --disable-dlopen). Add a schizo/flux component Update schizo/flux component Eliminate slurm-specific usage cases. Since the module is only loaded if FLUX_JOB_ID is set, there are only two cases to handle: 1) App was launched indirectly through mpirun. This is not yet supported with Flux, but hook remains in case this mode is supported in the future. 2) App was launched directly by Flux, with Flux providing CPU binding, if any. Fix up white space in pmix/flux component Drop non-blocking fence from pmix:flux component The flux PMI-1 library is not thread safe, therefore register a regular blocking fence callback instead of the thread-shifting fencenb(). pmix/flux component avoids extra PMI_KVS_Gets Keys stored into the base cache under the wildcard rank are not intended to be part of the global key namespace. These keys therefore should not trigger a PMI_KVS_Get() if they are not found in the cache. Minor pmix/flux component cleanup pmix/flux: drop code for fetching unused pmix_id pmix/flux: err_exit must return error Problem: in flux_init(), although 'ret' (variable holding err_exit return code) is initialized to OPAL_ERROR, the variable is reused as a temporary result code, so if there are some successes followed by a failure that doesn't set 'ret', flux_init() could return success with PMI not initialized. Ensure that a "goto err_exit" returns OPAL_ERROR if 'ret' is not set to some other error code. pmix/flux: don't mix OPAL_ and PMI_ return codes Problem: flux_init() can return both PMI_ and OPAL_ return codes. Although OPAL_SUCCESS and PMI_SUCCESS are both defined as 0, other codes are not compatible. Ensure that flux_init() consistently uses 'rc' for PMI_ return codes and 'ret' for OPAL_ return codes. pmix/flux: factor out repeated code for cache put Signed-off-by: Ralph Castain <rhc@open-mpi.org>
105 строки
3.2 KiB
C
105 строки
3.2 KiB
C
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
|
/*
|
|
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
|
|
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
|
|
* reserved.
|
|
* Copyright (c) 2016 Cisco Systems, Inc. All rights reserved.
|
|
* $COPYRIGHT$
|
|
*
|
|
* Additional copyrights may follow
|
|
*
|
|
* $HEADER$
|
|
*
|
|
* These symbols are in a file by themselves to provide nice linker
|
|
* semantics. Since linkers generally pull in symbols by object
|
|
* files, keeping these symbols as the only symbols in this file
|
|
* prevents utility programs such as "ompi_info" from having to import
|
|
* entire components just to query their version and parameters.
|
|
*/
|
|
|
|
#include "opal_config.h"
|
|
|
|
#include "opal/constants.h"
|
|
#include "opal/mca/pmix/pmix.h"
|
|
#include "pmix_flux.h"
|
|
|
|
/*
|
|
* Public string showing the pmix flux component version number
|
|
*/
|
|
const char *opal_pmix_flux_component_version_string =
|
|
"OPAL flux pmix MCA component version " OPAL_VERSION;
|
|
|
|
/*
|
|
* Local function
|
|
*/
|
|
static int pmix_flux_component_query(mca_base_module_t **module, int *priority);
|
|
static int pmix_flux_component_register(void);
|
|
|
|
|
|
/*
|
|
* Instantiate the public struct with all of our public information
|
|
* and pointers to our public functions in it
|
|
*/
|
|
|
|
opal_pmix_base_component_t mca_pmix_flux_component = {
|
|
|
|
/* First, the mca_component_t struct containing meta information
|
|
about the component itself */
|
|
|
|
.base_version = {
|
|
/* Indicate that we are a pmix v1.1.0 component (which also
|
|
implies a specific MCA version) */
|
|
|
|
OPAL_PMIX_BASE_VERSION_2_0_0,
|
|
|
|
/* Component name and version */
|
|
|
|
.mca_component_name = "flux",
|
|
MCA_BASE_MAKE_VERSION(component, OPAL_MAJOR_VERSION, OPAL_MINOR_VERSION,
|
|
OPAL_RELEASE_VERSION),
|
|
|
|
/* Component open and close functions */
|
|
.mca_query_component = pmix_flux_component_query,
|
|
.mca_register_component_params = pmix_flux_component_register,
|
|
},
|
|
/* Next the MCA v1.0.0 component meta data */
|
|
.base_data = {
|
|
/* The component is checkpoint ready */
|
|
MCA_BASE_METADATA_PARAM_CHECKPOINT
|
|
},
|
|
.priority = 10,
|
|
};
|
|
|
|
static int pmix_flux_component_register(void)
|
|
{
|
|
int ret;
|
|
mca_base_component_t *component = &mca_pmix_flux_component.base_version;
|
|
|
|
mca_pmix_flux_component.priority = 20;
|
|
ret = mca_base_component_var_register(component, "priority",
|
|
"Priority of the pmix flux component (default: 20)",
|
|
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
|
|
OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
|
|
&mca_pmix_flux_component.priority);
|
|
if (0 > ret) {
|
|
return ret;
|
|
}
|
|
|
|
return OPAL_SUCCESS;
|
|
}
|
|
|
|
static int pmix_flux_component_query(mca_base_module_t **module, int *priority)
|
|
{
|
|
/* disqualify ourselves if we are not under Flux */
|
|
if (NULL == getenv("FLUX_JOB_ID")) {
|
|
*priority = 0;
|
|
*module = NULL;
|
|
return OPAL_ERROR;
|
|
}
|
|
|
|
/* we can be considered */
|
|
*priority = mca_pmix_flux_component.priority;
|
|
*module = (mca_base_module_t *)&opal_pmix_flux_module;
|
|
return OPAL_SUCCESS;
|
|
}
|