Modify the internal logic for resolve nodes/peers
The current code path for PMIx_Resolve_peers and PMIx_Resolve_nodes executes a threadshift in the preg components themselves. This is done to ensure thread safety when called from the user level. However, it causes thread-stall when someone attempts to call the regex functions from _inside_ the PMIx code base should the call occur from within an event. Accordingly, move the threadshift to the client-level functions and make the preg components just execute their algorithms. Create a new pnet/test component to verify that the prge code can be safely accessed - set that component to be selected only when the user directly specifies it. The new component will be used to validate various logical extensions during development, and can then be discarded. Signed-off-by: Ralph Castain <rhc@open-mpi.org> (cherry picked from commit 456ac7f7af3d9ba09888e3c899eb001daaa24aef)
Этот коммит содержится в:
родитель
17c40f4cea
Коммит
7241043809
@ -1175,11 +1175,27 @@ static void _commitfn(int sd, short args, void *cbdata)
|
||||
return rc;
|
||||
}
|
||||
|
||||
static void _resolve_peers(int sd, short args, void *cbdata)
|
||||
{
|
||||
pmix_cb_t *cb = (pmix_cb_t*)cbdata;
|
||||
pmix_status_t rc;
|
||||
|
||||
cb->status = pmix_preg.resolve_peers(cb->key, cb->pname.nspace,
|
||||
&cb->procs, &cb->nprocs);
|
||||
/* post the data so the receiving thread can acquire it */
|
||||
PMIX_POST_OBJECT(cb);
|
||||
PMIX_WAKEUP_THREAD(&cb->lock);
|
||||
}
|
||||
|
||||
/* need to thread-shift this request */
|
||||
PMIX_EXPORT pmix_status_t PMIx_Resolve_peers(const char *nodename,
|
||||
const char *nspace,
|
||||
pmix_proc_t **procs, size_t *nprocs)
|
||||
{
|
||||
pmix_cb_t *cb;
|
||||
pmix_status_t rc;
|
||||
pmix_proc_t proc;
|
||||
|
||||
PMIX_ACQUIRE_THREAD(&pmix_global_lock);
|
||||
if (pmix_globals.init_cntr <= 0) {
|
||||
PMIX_RELEASE_THREAD(&pmix_global_lock);
|
||||
@ -1187,16 +1203,71 @@ PMIX_EXPORT pmix_status_t PMIx_Resolve_peers(const char *nodename,
|
||||
}
|
||||
PMIX_RELEASE_THREAD(&pmix_global_lock);
|
||||
|
||||
/* set default */
|
||||
*procs = NULL;
|
||||
*nprocs = 0;
|
||||
|
||||
return pmix_preg.resolve_peers(nodename, nspace, procs, nprocs);
|
||||
cb = PMIX_NEW(pmix_cb_t);
|
||||
cb->key = (char*)nodename;
|
||||
cb->pname.nspace = strdup(nspace);
|
||||
|
||||
PMIX_THREADSHIFT(cb, _resolve_peers);
|
||||
|
||||
/* wait for the result */
|
||||
PMIX_WAIT_THREAD(&cb->lock);
|
||||
|
||||
/* if the nspace wasn't found, then we need to
|
||||
* ask the server for that info */
|
||||
if (PMIX_ERR_INVALID_NAMESPACE == cb->status) {
|
||||
(void)strncpy(proc.nspace, nspace, PMIX_MAX_NSLEN);
|
||||
proc.rank = PMIX_RANK_WILDCARD;
|
||||
/* any key will suffice as it will bring down
|
||||
* the entire data blob */
|
||||
rc = PMIx_Get(&proc, PMIX_UNIV_SIZE, NULL, 0, NULL);
|
||||
if (PMIX_SUCCESS != rc) {
|
||||
PMIX_RELEASE(cb);
|
||||
return rc;
|
||||
}
|
||||
/* retry the fetch */
|
||||
cb->lock.active = true;
|
||||
PMIX_THREADSHIFT(cb, _resolve_peers);
|
||||
PMIX_WAIT_THREAD(&cb->lock);
|
||||
}
|
||||
*procs = cb->procs;
|
||||
*nprocs = cb->nprocs;
|
||||
|
||||
rc = cb->status;
|
||||
PMIX_RELEASE(cb);
|
||||
return rc;
|
||||
}
|
||||
|
||||
static void _resolve_nodes(int fd, short args, void *cbdata)
|
||||
{
|
||||
pmix_cb_t *cb = (pmix_cb_t*)cbdata;
|
||||
char *regex, **names;
|
||||
|
||||
/* get a regular expression describing the PMIX_NODE_MAP */
|
||||
cb->status = pmix_preg.resolve_nodes(cb->pname.nspace, ®ex);
|
||||
if (PMIX_SUCCESS == cb->status) {
|
||||
/* parse it into an argv array of names */
|
||||
cb->status = pmix_preg.parse_nodes(regex, &names);
|
||||
if (PMIX_SUCCESS == cb->status) {
|
||||
/* assemble it into a comma-delimited list */
|
||||
cb->key = pmix_argv_join(names, ',');
|
||||
pmix_argv_free(names);
|
||||
} else {
|
||||
free(regex);
|
||||
}
|
||||
}
|
||||
/* post the data so the receiving thread can acquire it */
|
||||
PMIX_POST_OBJECT(cb);
|
||||
PMIX_WAKEUP_THREAD(&cb->lock);
|
||||
}
|
||||
|
||||
/* need to thread-shift this request */
|
||||
PMIX_EXPORT pmix_status_t PMIx_Resolve_nodes(const char *nspace, char **nodelist)
|
||||
{
|
||||
pmix_cb_t *cb;
|
||||
pmix_status_t rc;
|
||||
pmix_proc_t proc;
|
||||
|
||||
PMIX_ACQUIRE_THREAD(&pmix_global_lock);
|
||||
if (pmix_globals.init_cntr <= 0) {
|
||||
PMIX_RELEASE_THREAD(&pmix_global_lock);
|
||||
@ -1204,8 +1275,35 @@ PMIX_EXPORT pmix_status_t PMIx_Resolve_nodes(const char *nspace, char **nodelist
|
||||
}
|
||||
PMIX_RELEASE_THREAD(&pmix_global_lock);
|
||||
|
||||
/* set default */
|
||||
*nodelist = NULL;
|
||||
cb = PMIX_NEW(pmix_cb_t);
|
||||
cb->pname.nspace = strdup(nspace);
|
||||
|
||||
return pmix_preg.resolve_nodes(nspace, nodelist);
|
||||
PMIX_THREADSHIFT(cb, _resolve_nodes);
|
||||
|
||||
/* wait for the result */
|
||||
PMIX_WAIT_THREAD(&cb->lock);
|
||||
|
||||
/* if the nspace wasn't found, then we need to
|
||||
* ask the server for that info */
|
||||
if (PMIX_ERR_INVALID_NAMESPACE == cb->status) {
|
||||
(void)strncpy(proc.nspace, nspace, PMIX_MAX_NSLEN);
|
||||
proc.rank = PMIX_RANK_WILDCARD;
|
||||
/* any key will suffice as it will bring down
|
||||
* the entire data blob */
|
||||
rc = PMIx_Get(&proc, PMIX_UNIV_SIZE, NULL, 0, NULL);
|
||||
if (PMIX_SUCCESS != rc) {
|
||||
PMIX_RELEASE(cb);
|
||||
return rc;
|
||||
}
|
||||
/* retry the fetch */
|
||||
cb->lock.active = true;
|
||||
PMIX_THREADSHIFT(cb, _resolve_nodes);
|
||||
PMIX_WAIT_THREAD(&cb->lock);
|
||||
}
|
||||
/* the string we want is in the key field */
|
||||
*nodelist = cb->key;
|
||||
|
||||
rc = cb->status;
|
||||
PMIX_RELEASE(cb);
|
||||
return rc;
|
||||
}
|
||||
|
@ -333,7 +333,8 @@ static pmix_status_t store_map(pmix_hash_table_t *ht,
|
||||
}
|
||||
|
||||
/* store the comma-delimited list of nodes hosting
|
||||
* procs in this nspace */
|
||||
* procs in this nspace in case someone using PMIx v2
|
||||
* requests it */
|
||||
kp2 = PMIX_NEW(pmix_kval_t);
|
||||
kp2->key = strdup(PMIX_NODE_LIST);
|
||||
kp2->value = (pmix_value_t*)malloc(sizeof(pmix_value_t));
|
||||
@ -397,6 +398,19 @@ pmix_status_t hash_cache_job_info(struct pmix_nspace_t *ns,
|
||||
ht = &trk->internal;
|
||||
for (n=0; n < ninfo; n++) {
|
||||
if (0 == strcmp(info[n].key, PMIX_NODE_MAP)) {
|
||||
/* store the node map itself since that is
|
||||
* what v3 uses */
|
||||
kp2 = PMIX_NEW(pmix_kval_t);
|
||||
kp2->key = strdup(PMIX_NODE_MAP);
|
||||
kp2->value = (pmix_value_t*)malloc(sizeof(pmix_value_t));
|
||||
kp2->value->type = PMIX_STRING;
|
||||
kp2->value->data.string = strdup(info[n].value.data.string);
|
||||
if (PMIX_SUCCESS != (rc = pmix_hash_store(ht, PMIX_RANK_WILDCARD, kp2))) {
|
||||
PMIX_ERROR_LOG(rc);
|
||||
PMIX_RELEASE(kp2);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* parse the regex to get the argv array of node names */
|
||||
if (PMIX_SUCCESS != (rc = pmix_preg.parse_nodes(info[n].value.data.string, &nodes))) {
|
||||
PMIX_ERROR_LOG(rc);
|
||||
|
@ -37,6 +37,7 @@
|
||||
#include "src/util/error.h"
|
||||
#include "src/util/output.h"
|
||||
#include "src/util/pmix_environ.h"
|
||||
#include "src/mca/preg/preg.h"
|
||||
|
||||
#include "src/mca/pnet/pnet.h"
|
||||
#include "src/mca/pnet/base/base.h"
|
||||
@ -298,6 +299,10 @@ static pmix_status_t setup_local_network(pmix_nspace_t *nptr,
|
||||
size_t n;
|
||||
pmix_status_t rc;
|
||||
pmix_kval_t *kv;
|
||||
char *nodestring, **nodes;
|
||||
pmix_proc_t *procs;
|
||||
size_t nprocs;
|
||||
|
||||
|
||||
if (NULL != info) {
|
||||
for (n=0; n < ninfo; n++) {
|
||||
@ -321,6 +326,7 @@ static pmix_status_t setup_local_network(pmix_nspace_t *nptr,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return PMIX_SUCCESS;
|
||||
}
|
||||
|
||||
|
52
opal/mca/pmix/pmix3x/pmix/src/mca/pnet/test/Makefile.am
Обычный файл
52
opal/mca/pmix/pmix3x/pmix/src/mca/pnet/test/Makefile.am
Обычный файл
@ -0,0 +1,52 @@
|
||||
# -*- makefile -*-
|
||||
#
|
||||
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
# University Research and Technology
|
||||
# Corporation. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
# of Tennessee Research Foundation. All rights
|
||||
# reserved.
|
||||
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
# University of Stuttgart. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
# All rights reserved.
|
||||
# Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved.
|
||||
# Copyright (c) 2013-2018 Intel, Inc. All rights reserved.
|
||||
# Copyright (c) 2017 Research Organization for Information Science
|
||||
# and Technology (RIST). All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
headers = pnet_test.h
|
||||
sources = \
|
||||
pnet_test_component.c \
|
||||
pnet_test.c
|
||||
|
||||
# Make the output library in this directory, and name it either
|
||||
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
|
||||
# (for static builds).
|
||||
|
||||
if MCA_BUILD_pmix_pnet_test_DSO
|
||||
lib =
|
||||
lib_sources =
|
||||
component = mca_pnet_test.la
|
||||
component_sources = $(headers) $(sources)
|
||||
else
|
||||
lib = libmca_pnet_test.la
|
||||
lib_sources = $(headers) $(sources)
|
||||
component =
|
||||
component_sources =
|
||||
endif
|
||||
|
||||
mcacomponentdir = $(pmixlibdir)
|
||||
mcacomponent_LTLIBRARIES = $(component)
|
||||
mca_pnet_test_la_SOURCES = $(component_sources)
|
||||
mca_pnet_test_la_LDFLAGS = -module -avoid-version
|
||||
|
||||
noinst_LTLIBRARIES = $(lib)
|
||||
libmca_pnet_test_la_SOURCES = $(lib_sources)
|
||||
libmca_pnet_test_la_LDFLAGS = -module -avoid-version
|
221
opal/mca/pmix/pmix3x/pmix/src/mca/pnet/test/pnet_test.c
Обычный файл
221
opal/mca/pmix/pmix3x/pmix/src/mca/pnet/test/pnet_test.c
Обычный файл
@ -0,0 +1,221 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2018 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2016 IBM Corporation. All rights reserved.
|
||||
*
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include <src/include/pmix_config.h>
|
||||
|
||||
#include <string.h>
|
||||
#ifdef HAVE_UNISTD_H
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
#ifdef HAVE_SYS_TYPES_H
|
||||
#include <sys/types.h>
|
||||
#endif
|
||||
#ifdef HAVE_SYS_STAT_H
|
||||
#include <sys/stat.h>
|
||||
#endif
|
||||
#ifdef HAVE_FCNTL_H
|
||||
#include <fcntl.h>
|
||||
#endif
|
||||
#include <time.h>
|
||||
|
||||
#include <pmix_common.h>
|
||||
|
||||
#include "src/mca/base/pmix_mca_base_var.h"
|
||||
#include "src/include/pmix_socket_errno.h"
|
||||
#include "src/include/pmix_globals.h"
|
||||
#include "src/class/pmix_list.h"
|
||||
#include "src/util/alfg.h"
|
||||
#include "src/util/argv.h"
|
||||
#include "src/util/error.h"
|
||||
#include "src/util/output.h"
|
||||
#include "src/util/pmix_environ.h"
|
||||
#include "src/mca/preg/preg.h"
|
||||
|
||||
#include "src/mca/pnet/pnet.h"
|
||||
#include "src/mca/pnet/base/base.h"
|
||||
#include "pnet_test.h"
|
||||
|
||||
static pmix_status_t test_init(void);
|
||||
static void test_finalize(void);
|
||||
static pmix_status_t setup_app(pmix_nspace_t *nptr,
|
||||
pmix_info_t info[], size_t ninfo,
|
||||
pmix_list_t *ilist);
|
||||
static pmix_status_t setup_local_network(pmix_nspace_t *nptr,
|
||||
pmix_info_t info[],
|
||||
size_t ninfo);
|
||||
static pmix_status_t setup_fork(pmix_nspace_t *nptr, char ***env);
|
||||
static void child_finalized(pmix_peer_t *peer);
|
||||
static void local_app_finalized(char *nspace);
|
||||
|
||||
pmix_pnet_module_t pmix_test_module = {
|
||||
.init = test_init,
|
||||
.finalize = test_finalize,
|
||||
.setup_app = setup_app,
|
||||
.setup_local_network = setup_local_network,
|
||||
.setup_fork = setup_fork,
|
||||
.child_finalized = child_finalized,
|
||||
.local_app_finalized = local_app_finalized
|
||||
};
|
||||
|
||||
static pmix_status_t test_init(void)
|
||||
{
|
||||
pmix_output_verbose(2, pmix_pnet_base_framework.framework_output,
|
||||
"pnet: test init");
|
||||
return PMIX_SUCCESS;
|
||||
}
|
||||
|
||||
static void test_finalize(void)
|
||||
{
|
||||
pmix_output_verbose(2, pmix_pnet_base_framework.framework_output,
|
||||
"pnet: test finalize");
|
||||
}
|
||||
|
||||
/* NOTE: if there is any binary data to be transferred, then
|
||||
* this function MUST pack it for transport as the host will
|
||||
* not know how to do so */
|
||||
static pmix_status_t setup_app(pmix_nspace_t *nptr,
|
||||
pmix_info_t info[], size_t ninfo,
|
||||
pmix_list_t *ilist)
|
||||
{
|
||||
uint64_t unique_key[2];
|
||||
char *string_key, *cs_env;
|
||||
int fd_rand;
|
||||
size_t n, bytes_read, len;
|
||||
pmix_kval_t *kv, *next;
|
||||
int i, j;
|
||||
bool envars, seckeys;
|
||||
|
||||
if (NULL == info) {
|
||||
envars = true;
|
||||
seckeys = true;
|
||||
} else {
|
||||
envars = false;
|
||||
seckeys = false;
|
||||
for (n=0; n < ninfo; n++) {
|
||||
if (0 == strncmp(info[n].key, PMIX_SETUP_APP_ENVARS, PMIX_MAX_KEYLEN)) {
|
||||
envars = PMIX_INFO_TRUE(&info[n]);
|
||||
} else if (0 == strncmp(info[n].key, PMIX_SETUP_APP_ALL, PMIX_MAX_KEYLEN)) {
|
||||
envars = PMIX_INFO_TRUE(&info[n]);
|
||||
seckeys = PMIX_INFO_TRUE(&info[n]);
|
||||
} else if (0 == strncmp(info[n].key, PMIX_SETUP_APP_NONENVARS, PMIX_MAX_KEYLEN)) {
|
||||
seckeys = PMIX_INFO_TRUE(&info[n]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (seckeys) {
|
||||
kv = PMIX_NEW(pmix_kval_t);
|
||||
if (NULL == kv) {
|
||||
return PMIX_ERR_NOMEM;
|
||||
}
|
||||
kv->key = strdup(PMIX_SET_ENVAR);
|
||||
kv->value = (pmix_value_t*)malloc(sizeof(pmix_value_t));
|
||||
if (NULL == kv->value) {
|
||||
PMIX_RELEASE(kv);
|
||||
return PMIX_ERR_NOMEM;
|
||||
}
|
||||
kv->value->type = PMIX_ENVAR;
|
||||
PMIX_ENVAR_LOAD(&kv->value->data.envar, "PMIX_TEST_SECKEY", "1", ':');
|
||||
pmix_list_append(ilist, &kv->super);
|
||||
}
|
||||
|
||||
if (envars) {
|
||||
kv = PMIX_NEW(pmix_kval_t);
|
||||
if (NULL == kv) {
|
||||
return PMIX_ERR_NOMEM;
|
||||
}
|
||||
kv->key = strdup(PMIX_SET_ENVAR);
|
||||
kv->value = (pmix_value_t*)malloc(sizeof(pmix_value_t));
|
||||
if (NULL == kv->value) {
|
||||
PMIX_RELEASE(kv);
|
||||
return PMIX_ERR_NOMEM;
|
||||
}
|
||||
kv->value->type = PMIX_ENVAR;
|
||||
PMIX_ENVAR_LOAD(&kv->value->data.envar, "PMIX_TEST_ENVAR", "1", ':');
|
||||
pmix_list_append(ilist, &kv->super);
|
||||
}
|
||||
|
||||
/* provide a blob so setup_local_network will get called */
|
||||
kv = PMIX_NEW(pmix_kval_t);
|
||||
if (NULL == kv) {
|
||||
return PMIX_ERR_NOMEM;
|
||||
}
|
||||
kv->key = strdup("pmix-pnet-test-blob");
|
||||
kv->value = (pmix_value_t*)malloc(sizeof(pmix_value_t));
|
||||
if (NULL == kv->value) {
|
||||
PMIX_RELEASE(kv);
|
||||
return PMIX_ERR_NOMEM;
|
||||
}
|
||||
kv->value->type = PMIX_STRING;
|
||||
kv->value->data.string = strdup("foobar");
|
||||
pmix_list_append(ilist, &kv->super);
|
||||
|
||||
|
||||
return PMIX_SUCCESS;
|
||||
}
|
||||
|
||||
static pmix_status_t setup_local_network(pmix_nspace_t *nptr,
|
||||
pmix_info_t info[],
|
||||
size_t ninfo)
|
||||
{
|
||||
size_t n, m;
|
||||
pmix_status_t rc;
|
||||
pmix_kval_t *kv;
|
||||
char *nodestring, **nodes;
|
||||
pmix_proc_t *procs;
|
||||
size_t nprocs;
|
||||
|
||||
/* get the list of nodes in this job - returns a regex */
|
||||
pmix_output(0, "pnet:setup_local_network NSPACE %s", (NULL == nptr) ? "NULL" : nptr->nspace);
|
||||
pmix_preg.resolve_nodes(nptr->nspace, &nodestring);
|
||||
if (NULL == nodestring) {
|
||||
return PMIX_SUCCESS;
|
||||
}
|
||||
pmix_preg.parse_nodes(nodestring, &nodes); // get an argv array of node names
|
||||
pmix_output(0, "pnet:setup_local_network NODES %s", (NULL == nodes) ? "NULL" : "NON-NULL");
|
||||
if (NULL == nodes) {
|
||||
free(nodestring);
|
||||
return PMIX_SUCCESS;
|
||||
}
|
||||
for (n=0; NULL != nodes[n]; n++) {
|
||||
pmix_output(0, "pnet:setup_local_network NODE: %s", nodes[n]);
|
||||
}
|
||||
|
||||
for (n=0; NULL != nodes[n]; n++) {
|
||||
/* get an array of pmix_proc_t containing the names of the procs on that node */
|
||||
pmix_preg.resolve_peers(nodes[n], nptr->nspace, &procs, &nprocs);
|
||||
if (NULL == procs) {
|
||||
continue;
|
||||
}
|
||||
for (m=0; m < nprocs; m++) {
|
||||
pmix_output(0, "pnet:setup_local_network NODE %s: peer %s:%d", nodes[n], procs[m].nspace, procs[m].rank);
|
||||
}
|
||||
/* do stuff */
|
||||
free(procs);
|
||||
}
|
||||
|
||||
return PMIX_SUCCESS;
|
||||
}
|
||||
|
||||
static pmix_status_t setup_fork(pmix_nspace_t *nptr, char ***env)
|
||||
{
|
||||
return PMIX_SUCCESS;
|
||||
}
|
||||
|
||||
static void child_finalized(pmix_peer_t *peer)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
static void local_app_finalized(char *nspace)
|
||||
{
|
||||
|
||||
}
|
36
opal/mca/pmix/pmix3x/pmix/src/mca/pnet/test/pnet_test.h
Обычный файл
36
opal/mca/pmix/pmix3x/pmix/src/mca/pnet/test/pnet_test.h
Обычный файл
@ -0,0 +1,36 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2018 Intel, Inc. All rights reserved.
|
||||
*
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#ifndef PMIX_PNET_test_H
|
||||
#define PMIX_PNET_test_H
|
||||
|
||||
#include <src/include/pmix_config.h>
|
||||
|
||||
|
||||
#include "src/mca/pnet/pnet.h"
|
||||
|
||||
BEGIN_C_DECLS
|
||||
|
||||
typedef struct {
|
||||
pmix_pnet_base_component_t super;
|
||||
char **include;
|
||||
char **exclude;
|
||||
} pmix_pnet_test_component_t;
|
||||
|
||||
/* the component must be visible data for the linker to find it */
|
||||
PMIX_EXPORT extern pmix_pnet_test_component_t mca_pnet_test_component;
|
||||
extern pmix_pnet_module_t pmix_test_module;
|
||||
|
||||
/* define a key for any blob we need to send in a launch msg */
|
||||
#define PMIX_PNET_TEST_BLOB "pmix.pnet.test.blob"
|
||||
|
||||
END_C_DECLS
|
||||
|
||||
#endif
|
101
opal/mca/pmix/pmix3x/pmix/src/mca/pnet/test/pnet_test_component.c
Обычный файл
101
opal/mca/pmix/pmix3x/pmix/src/mca/pnet/test/pnet_test_component.c
Обычный файл
@ -0,0 +1,101 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2016-2018 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*
|
||||
* These symbols are in a file by themselves to provide nice linker
|
||||
* semantics. Since linkers generally pull in symbols by object
|
||||
* files, keeping these symbols as the only symbols in this file
|
||||
* prevents utility programs such as "ompi_info" from having to import
|
||||
* entire components just to query their version and parameters.
|
||||
*/
|
||||
|
||||
#include <src/include/pmix_config.h>
|
||||
#include "pmix_common.h"
|
||||
|
||||
#include "src/util/argv.h"
|
||||
#include "src/mca/pnet/pnet.h"
|
||||
#include "pnet_test.h"
|
||||
|
||||
static pmix_status_t component_open(void);
|
||||
static pmix_status_t component_close(void);
|
||||
static pmix_status_t component_query(pmix_mca_base_module_t **module, int *priority);
|
||||
|
||||
/*
|
||||
* Instantiate the public struct with all of our public information
|
||||
* and pointers to our public functions in it
|
||||
*/
|
||||
pmix_pnet_test_component_t mca_pnet_test_component = {
|
||||
.super = {
|
||||
.base = {
|
||||
PMIX_PNET_BASE_VERSION_1_0_0,
|
||||
|
||||
/* Component name and version */
|
||||
.pmix_mca_component_name = "test",
|
||||
PMIX_MCA_BASE_MAKE_VERSION(component,
|
||||
PMIX_MAJOR_VERSION,
|
||||
PMIX_MINOR_VERSION,
|
||||
PMIX_RELEASE_VERSION),
|
||||
|
||||
/* Component open and close functions */
|
||||
.pmix_mca_open_component = component_open,
|
||||
.pmix_mca_close_component = component_close,
|
||||
.pmix_mca_query_component = component_query,
|
||||
},
|
||||
.data = {
|
||||
/* The component is checkpoint ready */
|
||||
PMIX_MCA_BASE_METADATA_PARAM_CHECKPOINT
|
||||
}
|
||||
},
|
||||
.include = NULL,
|
||||
.exclude = NULL
|
||||
};
|
||||
|
||||
static pmix_status_t component_open(void)
|
||||
{
|
||||
int index;
|
||||
const pmix_mca_base_var_storage_t *value=NULL;
|
||||
|
||||
/* we only allow ourselves to be considered IF the user
|
||||
* specifically requested so */
|
||||
if (0 > (index = pmix_mca_base_var_find("pmix", "pnet", NULL, NULL))) {
|
||||
return PMIX_ERROR;
|
||||
}
|
||||
pmix_mca_base_var_get_value(index, &value, NULL, NULL);
|
||||
if (NULL != value && NULL != value->stringval && '\0' != value->stringval[0]) {
|
||||
if (NULL != strstr(value->stringval, "test")) {
|
||||
return PMIX_SUCCESS;
|
||||
}
|
||||
}
|
||||
return PMIX_ERROR;
|
||||
}
|
||||
|
||||
|
||||
static pmix_status_t component_query(pmix_mca_base_module_t **module, int *priority)
|
||||
{
|
||||
*priority = 0;
|
||||
*module = (pmix_mca_base_module_t *)&pmix_test_module;
|
||||
return PMIX_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
static pmix_status_t component_close(void)
|
||||
{
|
||||
return PMIX_SUCCESS;
|
||||
}
|
@ -501,27 +501,32 @@ static pmix_status_t parse_procs(const char *regexp,
|
||||
return rc;
|
||||
}
|
||||
|
||||
static void _resolve_peers(int sd, short args, void *cbdata)
|
||||
static pmix_status_t resolve_peers(const char *nodename,
|
||||
const char *nspace,
|
||||
pmix_proc_t **procs, size_t *nprocs)
|
||||
{
|
||||
pmix_cb_t *cb = (pmix_cb_t*)cbdata;
|
||||
pmix_cb_t cb;
|
||||
pmix_status_t rc;
|
||||
pmix_kval_t *kv;
|
||||
pmix_proc_t proc;
|
||||
char **ptr;
|
||||
pmix_info_t *info;
|
||||
pmix_proc_t *procs;
|
||||
size_t ninfo, nprocs, n, j;
|
||||
pmix_proc_t *p=NULL;
|
||||
size_t ninfo, np=0, n, j;
|
||||
|
||||
PMIX_CONSTRUCT(&cb, pmix_cb_t);
|
||||
|
||||
cb.key = strdup(nodename);
|
||||
/* this data isn't going anywhere, so we don't require a copy */
|
||||
cb->copy = false;
|
||||
cb.copy = false;
|
||||
/* scope is irrelevant as the info we seek must be local */
|
||||
cb->scope = PMIX_SCOPE_UNDEF;
|
||||
cb.scope = PMIX_SCOPE_UNDEF;
|
||||
/* let the proc point to the nspace */
|
||||
(void)strncpy(proc.nspace, cb->pname.nspace, PMIX_MAX_NSLEN);
|
||||
(void)strncpy(proc.nspace, nspace, PMIX_MAX_NSLEN);
|
||||
proc.rank = PMIX_RANK_WILDCARD;
|
||||
cb->proc = &proc;
|
||||
cb.proc = &proc;
|
||||
|
||||
PMIX_GDS_FETCH_KV(rc, pmix_client_globals.myserver, cb);
|
||||
PMIX_GDS_FETCH_KV(rc, pmix_client_globals.myserver, &cb);
|
||||
if (PMIX_SUCCESS != rc) {
|
||||
if (PMIX_ERR_INVALID_NAMESPACE != rc) {
|
||||
PMIX_ERROR_LOG(rc);
|
||||
@ -529,12 +534,12 @@ static void _resolve_peers(int sd, short args, void *cbdata)
|
||||
goto complete;
|
||||
}
|
||||
/* should just be the one value on the list */
|
||||
if (1 != pmix_list_get_size(&cb->kvs)) {
|
||||
if (1 != pmix_list_get_size(&cb.kvs)) {
|
||||
PMIX_ERROR_LOG(PMIX_ERR_BAD_PARAM);
|
||||
rc = PMIX_ERR_BAD_PARAM;
|
||||
goto complete;
|
||||
}
|
||||
kv = (pmix_kval_t*)pmix_list_get_first(&cb->kvs);
|
||||
kv = (pmix_kval_t*)pmix_list_get_first(&cb.kvs);
|
||||
/* the hostname used as a key with wildcard rank will return
|
||||
* a pmix_data_array_t of pmix_info_t structs */
|
||||
if (NULL == kv->value ||
|
||||
@ -552,184 +557,103 @@ static void _resolve_peers(int sd, short args, void *cbdata)
|
||||
if (0 == strncmp(info[n].key, PMIX_LOCAL_PEERS, PMIX_MAX_KEYLEN)) {
|
||||
/* split the string */
|
||||
ptr = pmix_argv_split(info[n].value.data.string, ',');
|
||||
nprocs = pmix_argv_count(ptr);
|
||||
PMIX_PROC_CREATE(procs, nprocs);
|
||||
if (NULL == procs) {
|
||||
np = pmix_argv_count(ptr);
|
||||
PMIX_PROC_CREATE(p, np);
|
||||
if (NULL == p) {
|
||||
rc = PMIX_ERR_NOMEM;
|
||||
pmix_argv_free(ptr);
|
||||
goto complete;
|
||||
}
|
||||
for (j=0; j < nprocs; j++) {
|
||||
(void)strncpy(procs[j].nspace, cb->pname.nspace, PMIX_MAX_NSLEN);
|
||||
procs[j].rank = strtoul(ptr[j], NULL, 10);
|
||||
for (j=0; j < np; j++) {
|
||||
(void)strncpy(p[j].nspace, nspace, PMIX_MAX_NSLEN);
|
||||
p[j].rank = strtoul(ptr[j], NULL, 10);
|
||||
}
|
||||
cb->procs = procs;
|
||||
cb->nprocs = nprocs;
|
||||
rc = PMIX_SUCCESS;
|
||||
pmix_argv_free(ptr);
|
||||
goto complete;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
complete:
|
||||
cb->status = rc;
|
||||
if (NULL != cb->info) {
|
||||
PMIX_INFO_FREE(cb->info, cb->ninfo);
|
||||
if (NULL != cb.info) {
|
||||
PMIX_INFO_FREE(cb.info, cb.ninfo);
|
||||
}
|
||||
cb->pstatus = rc;
|
||||
/* post the data so the receiving thread can acquire it */
|
||||
PMIX_POST_OBJECT(cb);
|
||||
PMIX_WAKEUP_THREAD(&cb->lock);
|
||||
return;
|
||||
}
|
||||
|
||||
static pmix_status_t resolve_peers(const char *nodename,
|
||||
const char *nspace,
|
||||
pmix_proc_t **procs, size_t *nprocs)
|
||||
{
|
||||
pmix_cb_t *cb;
|
||||
pmix_status_t rc;
|
||||
pmix_proc_t proc;
|
||||
|
||||
cb = PMIX_NEW(pmix_cb_t);
|
||||
cb->key = (char*)nodename;
|
||||
cb->pname.nspace = strdup(nspace);
|
||||
|
||||
PMIX_THREADSHIFT(cb, _resolve_peers);
|
||||
|
||||
/* wait for the result */
|
||||
PMIX_WAIT_THREAD(&cb->lock);
|
||||
|
||||
/* if the nspace wasn't found, then we need to
|
||||
* ask the server for that info */
|
||||
if (PMIX_ERR_INVALID_NAMESPACE == cb->status) {
|
||||
(void)strncpy(proc.nspace, nspace, PMIX_MAX_NSLEN);
|
||||
proc.rank = PMIX_RANK_WILDCARD;
|
||||
/* any key will suffice as it will bring down
|
||||
* the entire data blob */
|
||||
rc = PMIx_Get(&proc, PMIX_UNIV_SIZE, NULL, 0, NULL);
|
||||
if (PMIX_SUCCESS != rc) {
|
||||
PMIX_RELEASE(cb);
|
||||
return rc;
|
||||
}
|
||||
/* retry the fetch */
|
||||
cb->lock.active = true;
|
||||
PMIX_THREADSHIFT(cb, _resolve_peers);
|
||||
PMIX_WAIT_THREAD(&cb->lock);
|
||||
if (NULL != cb.key) {
|
||||
free(cb.key);
|
||||
cb.key = NULL;
|
||||
}
|
||||
*procs = cb->procs;
|
||||
*nprocs = cb->nprocs;
|
||||
PMIX_DESTRUCT(&cb);
|
||||
*procs = p;
|
||||
*nprocs = np;
|
||||
|
||||
rc = cb->status;
|
||||
PMIX_RELEASE(cb);
|
||||
return rc;
|
||||
}
|
||||
|
||||
static void _resolve_nodes(int sd, short args, void *cbdata)
|
||||
{
|
||||
pmix_cb_t *cb = (pmix_cb_t*)cbdata;
|
||||
pmix_status_t rc;
|
||||
pmix_kval_t *kv;
|
||||
pmix_proc_t proc;
|
||||
|
||||
/* create a pmix_info_t so we can pass the nspace
|
||||
* into the fetch as a qualifier */
|
||||
PMIX_INFO_CREATE(cb->info, 1);
|
||||
if (NULL == cb->info) {
|
||||
cb->status = PMIX_ERR_NOMEM;
|
||||
PMIX_POST_OBJECT(cb);
|
||||
PMIX_WAKEUP_THREAD(&cb->lock);
|
||||
return;
|
||||
}
|
||||
cb->ninfo = 1;
|
||||
PMIX_INFO_LOAD(&cb->info[0], PMIX_NSPACE, cb->pname.nspace, PMIX_STRING);
|
||||
/* tell the GDS what we want */
|
||||
cb->key = PMIX_NODE_LIST;
|
||||
/* this data isn't going anywhere, so we don't require a copy */
|
||||
cb->copy = false;
|
||||
/* scope is irrelevant as the info we seek must be local */
|
||||
cb->scope = PMIX_SCOPE_UNDEF;
|
||||
/* put the nspace in the proc field */
|
||||
(void)strncpy(proc.nspace, cb->pname.nspace, PMIX_MAX_NSLEN);
|
||||
/* the info will be associated with PMIX_RANK_WILDCARD */
|
||||
proc.rank = PMIX_RANK_WILDCARD;
|
||||
cb->proc = &proc;
|
||||
|
||||
PMIX_GDS_FETCH_KV(rc, pmix_client_globals.myserver, cb);
|
||||
if (PMIX_SUCCESS != rc) {
|
||||
PMIX_ERROR_LOG(rc);
|
||||
goto complete;
|
||||
}
|
||||
/* should just be the one value on the list */
|
||||
if (1 != pmix_list_get_size(&cb->kvs)) {
|
||||
PMIX_ERROR_LOG(PMIX_ERR_BAD_PARAM);
|
||||
rc = PMIX_ERR_BAD_PARAM;
|
||||
goto complete;
|
||||
}
|
||||
kv = (pmix_kval_t*)pmix_list_get_first(&cb->kvs);
|
||||
/* the PMIX_NODE_LIST key is supposed to return a comma-delimited
|
||||
* string of nodes in this - check that it did */
|
||||
if (NULL == kv->value ||
|
||||
PMIX_STRING != kv->value->type) {
|
||||
PMIX_ERROR_LOG(PMIX_ERR_DATA_VALUE_NOT_FOUND);
|
||||
rc = PMIX_ERR_DATA_VALUE_NOT_FOUND;
|
||||
goto complete;
|
||||
}
|
||||
/* return the string */
|
||||
if (NULL != kv->value->data.string) {
|
||||
cb->key = strdup(kv->value->data.string);
|
||||
}
|
||||
|
||||
complete:
|
||||
cb->status = rc;
|
||||
if (NULL != cb->info) {
|
||||
PMIX_INFO_FREE(cb->info, cb->ninfo);
|
||||
}
|
||||
/* post the data so the receiving thread can acquire it */
|
||||
PMIX_POST_OBJECT(cb);
|
||||
PMIX_WAKEUP_THREAD(&cb->lock);
|
||||
return;
|
||||
}
|
||||
|
||||
static pmix_status_t resolve_nodes(const char *nspace,
|
||||
char **nodelist)
|
||||
{
|
||||
pmix_cb_t *cb;
|
||||
pmix_cb_t cb;
|
||||
pmix_status_t rc;
|
||||
pmix_kval_t *kv;
|
||||
pmix_proc_t proc;
|
||||
|
||||
cb = PMIX_NEW(pmix_cb_t);
|
||||
cb->pname.nspace = strdup(nspace);
|
||||
PMIX_CONSTRUCT(&cb, pmix_cb_t);
|
||||
|
||||
PMIX_THREADSHIFT(cb, _resolve_nodes);
|
||||
/* setup default answer */
|
||||
*nodelist = NULL;
|
||||
|
||||
/* wait for the result */
|
||||
PMIX_WAIT_THREAD(&cb->lock);
|
||||
|
||||
/* if the nspace wasn't found, then we need to
|
||||
* ask the server for that info */
|
||||
if (PMIX_ERR_INVALID_NAMESPACE == cb->status) {
|
||||
(void)strncpy(proc.nspace, nspace, PMIX_MAX_NSLEN);
|
||||
proc.rank = PMIX_RANK_WILDCARD;
|
||||
/* any key will suffice as it will bring down
|
||||
* the entire data blob */
|
||||
rc = PMIx_Get(&proc, PMIX_UNIV_SIZE, NULL, 0, NULL);
|
||||
if (PMIX_SUCCESS != rc) {
|
||||
PMIX_RELEASE(cb);
|
||||
return rc;
|
||||
}
|
||||
/* retry the fetch */
|
||||
cb->lock.active = true;
|
||||
PMIX_THREADSHIFT(cb, _resolve_nodes);
|
||||
PMIX_WAIT_THREAD(&cb->lock);
|
||||
/* create a pmix_info_t so we can pass the nspace
|
||||
* into the fetch as a qualifier */
|
||||
PMIX_INFO_CREATE(cb.info, 1);
|
||||
if (NULL == cb.info) {
|
||||
PMIX_DESTRUCT(&cb);
|
||||
return PMIX_ERR_NOMEM;
|
||||
}
|
||||
/* the string we want is in the key field */
|
||||
*nodelist = cb->key;
|
||||
cb.ninfo = 1;
|
||||
PMIX_INFO_LOAD(&cb.info[0], PMIX_NSPACE, nspace, PMIX_STRING);
|
||||
|
||||
rc = cb->status;
|
||||
PMIX_RELEASE(cb);
|
||||
/* tell the GDS what we want */
|
||||
cb.key = PMIX_NODE_MAP;
|
||||
/* this data isn't going anywhere, so we don't require a copy */
|
||||
cb.copy = false;
|
||||
/* scope is irrelevant as the info we seek must be local */
|
||||
cb.scope = PMIX_SCOPE_UNDEF;
|
||||
/* put the nspace in the proc field */
|
||||
(void)strncpy(proc.nspace, nspace, PMIX_MAX_NSLEN);
|
||||
/* the info will be associated with PMIX_RANK_WILDCARD */
|
||||
proc.rank = PMIX_RANK_WILDCARD;
|
||||
cb.proc = &proc;
|
||||
|
||||
PMIX_GDS_FETCH_KV(rc, pmix_client_globals.myserver, &cb);
|
||||
if (PMIX_SUCCESS != rc) {
|
||||
PMIX_ERROR_LOG(rc);
|
||||
goto complete;
|
||||
}
|
||||
/* should just be the one value on the list */
|
||||
if (1 != pmix_list_get_size(&cb.kvs)) {
|
||||
PMIX_ERROR_LOG(PMIX_ERR_BAD_PARAM);
|
||||
rc = PMIX_ERR_BAD_PARAM;
|
||||
goto complete;
|
||||
}
|
||||
kv = (pmix_kval_t*)pmix_list_get_first(&cb.kvs);
|
||||
/* the PMIX_NODE_MAP key is supposed to return
|
||||
* a regex string - check that it did */
|
||||
if (NULL == kv->value ||
|
||||
PMIX_STRING != kv->value->type) {
|
||||
PMIX_ERROR_LOG(PMIX_ERR_DATA_VALUE_NOT_FOUND);
|
||||
rc = PMIX_ERR_DATA_VALUE_NOT_FOUND;
|
||||
goto complete;
|
||||
}
|
||||
/* return the string */
|
||||
if (NULL != kv->value->data.string) {
|
||||
*nodelist = strdup(kv->value->data.string);
|
||||
}
|
||||
|
||||
complete:
|
||||
if (NULL != cb.info) {
|
||||
PMIX_INFO_FREE(cb.info, cb.ninfo);
|
||||
}
|
||||
return rc;
|
||||
|
||||
}
|
||||
|
||||
static pmix_status_t pmix_regex_extract_nodes(char *regexp, char ***names)
|
||||
|
@ -646,16 +646,6 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *buffer,
|
||||
}
|
||||
OPAL_LIST_DESTRUCT(&cache);
|
||||
}
|
||||
if (0 < opal_list_get_size(&local_support) &&
|
||||
NULL != opal_pmix.server_setup_local_support) {
|
||||
if (OPAL_SUCCESS != (rc = opal_pmix.server_setup_local_support(jdata->jobid, &local_support,
|
||||
ls_cbunc, &lock))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto REPORT_ERROR;
|
||||
}
|
||||
} else {
|
||||
lock.active = false; // we won't get a callback
|
||||
}
|
||||
|
||||
/* now that the node array in the job map and jdata are completely filled out,.
|
||||
* we need to "wireup" the procs to their nodes so other utilities can
|
||||
@ -751,6 +741,27 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *buffer,
|
||||
orte_rmaps_base_display_map(jdata);
|
||||
}
|
||||
|
||||
/* register this job with the PMIx server - need to wait until after we
|
||||
* have computed the #local_procs before calling the function */
|
||||
if (ORTE_SUCCESS != (rc = orte_pmix_server_register_nspace(jdata, false))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto REPORT_ERROR;
|
||||
}
|
||||
|
||||
/* if we have local support setup info, then execute it here - we
|
||||
* have to do so AFTER we register the nspace so the PMIx server
|
||||
* has the nspace info it needs */
|
||||
if (0 < opal_list_get_size(&local_support) &&
|
||||
NULL != opal_pmix.server_setup_local_support) {
|
||||
if (OPAL_SUCCESS != (rc = opal_pmix.server_setup_local_support(jdata->jobid, &local_support,
|
||||
ls_cbunc, &lock))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto REPORT_ERROR;
|
||||
}
|
||||
} else {
|
||||
lock.active = false; // we won't get a callback
|
||||
}
|
||||
|
||||
/* if we have a file map, then we need to load it */
|
||||
if (orte_get_attribute(&jdata->attributes, ORTE_JOB_FILE_MAPS, (void**)&bptr, OPAL_BUFFER)) {
|
||||
if (NULL != orte_dfs.load_file_maps) {
|
||||
@ -763,13 +774,6 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *buffer,
|
||||
/* load any controls into the job */
|
||||
orte_rtc.assign(jdata);
|
||||
|
||||
/* register this job with the PMIx server - need to wait until after we
|
||||
* have computed the #local_procs before calling the function */
|
||||
if (ORTE_SUCCESS != (rc = orte_pmix_server_register_nspace(jdata, false))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto REPORT_ERROR;
|
||||
}
|
||||
|
||||
/* spin up the spawn threads */
|
||||
orte_odls_base_start_threads(jdata);
|
||||
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user