Sync with PMIx master 43e45c3. Get multi-node publish/lookup/unpublish working
Этот коммит содержится в:
родитель
c9710660af
Коммит
f6948c2bb4
@ -73,17 +73,21 @@ static OBJ_CLASS_INSTANCE(ompi_dpm_proct_caddy_t,
|
||||
NULL, NULL);
|
||||
|
||||
struct lookup_caddy_t {
|
||||
bool active;
|
||||
volatile bool active;
|
||||
int status;
|
||||
opal_pmix_pdata_t *pdat;
|
||||
};
|
||||
|
||||
static void lookup_cbfunc(int status, opal_list_t *data, void *cbdata)
|
||||
{
|
||||
struct lookup_caddy_t *cd = (struct lookup_caddy_t*)cbdata;
|
||||
opal_pmix_pdata_t *p = (opal_pmix_pdata_t*)opal_list_get_first(data);
|
||||
if (NULL != p && OPAL_STRING == p->value.type &&
|
||||
NULL != p->value.data.string) {
|
||||
cd->pdat->value.data.string = strdup(p->value.data.string);
|
||||
cd->status = status;
|
||||
if (OPAL_SUCCESS == status && NULL != data) {
|
||||
opal_pmix_pdata_t *p = (opal_pmix_pdata_t*)opal_list_get_first(data);
|
||||
if (NULL != p && OPAL_STRING == p->value.type &&
|
||||
NULL != p->value.data.string) {
|
||||
cd->pdat->value.data.string = strdup(p->value.data.string);
|
||||
}
|
||||
}
|
||||
cd->active = false;
|
||||
}
|
||||
@ -197,17 +201,13 @@ int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root,
|
||||
|
||||
if (send_first) {
|
||||
(void)asprintf(&info->key, "%s:connect", port_string);
|
||||
info->type = OPAL_STRING;
|
||||
info->data.string = opal_argv_join(members, ':');
|
||||
} else {
|
||||
(void)asprintf(&info->key, "%s:accept", port_string);
|
||||
info->type = OPAL_STRING;
|
||||
info->data.string = opal_argv_join(members, ':');
|
||||
}
|
||||
info->type = OPAL_STRING;
|
||||
info->data.string = opal_argv_join(members, ':');
|
||||
/* publish it with "session" scope */
|
||||
rc = opal_pmix.publish(OPAL_PMIX_SESSION,
|
||||
OPAL_PMIX_PERSIST_APP,
|
||||
&ilist);
|
||||
rc = opal_pmix.publish(&ilist);
|
||||
OPAL_LIST_DESTRUCT(&ilist);
|
||||
if (OPAL_SUCCESS != rc) {
|
||||
opal_argv_free(members);
|
||||
@ -228,7 +228,7 @@ int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root,
|
||||
}
|
||||
opal_list_append(&ilist, &pdat->super);
|
||||
if (NULL == opal_pmix.lookup_nb) {
|
||||
rc = opal_pmix.lookup(OPAL_PMIX_SESSION, &ilist);
|
||||
rc = opal_pmix.lookup(&ilist, NULL);
|
||||
if (OPAL_SUCCESS != rc) {
|
||||
OPAL_LIST_DESTRUCT(&ilist);
|
||||
opal_argv_free(members);
|
||||
@ -242,8 +242,7 @@ int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root,
|
||||
opal_argv_append_nosize(&keys, pdat->value.key);
|
||||
caddy.active = true;
|
||||
caddy.pdat = pdat;
|
||||
rc = opal_pmix.lookup_nb(OPAL_PMIX_SESSION, true, keys,
|
||||
lookup_cbfunc, &caddy);
|
||||
rc = opal_pmix.lookup_nb(keys, NULL, lookup_cbfunc, &caddy);
|
||||
if (OPAL_SUCCESS != rc) {
|
||||
OPAL_LIST_DESTRUCT(&ilist);
|
||||
opal_argv_free(keys);
|
||||
@ -252,6 +251,11 @@ int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root,
|
||||
}
|
||||
OMPI_WAIT_FOR_COMPLETION(caddy.active);
|
||||
opal_argv_free(keys);
|
||||
if (OPAL_SUCCESS != caddy.status) {
|
||||
OPAL_LIST_DESTRUCT(&ilist);
|
||||
opal_argv_free(members);
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
}
|
||||
/* initiate a list of participants for the connect,
|
||||
* starting with our own members, remembering to
|
||||
|
@ -30,7 +30,7 @@ int vprotocol_pessimist_event_logger_connect(int el_rank, ompi_communicator_t **
|
||||
asprintf(&pdat->value.key, VPROTOCOL_EVENT_LOGGER_NAME_FMT, el_rank);
|
||||
opal_list_append(&results, &pdat->super);
|
||||
|
||||
rc = opal_pmix.lookup(OPAL_PMIX_NAMESPACE, &results);
|
||||
rc = opal_pmix.lookup(&results, NULL);
|
||||
if (OPAL_SUCCESS != rc ||
|
||||
OPAL_STRING != pdat->value.type ||
|
||||
NULL == pdat->value.data.string) {
|
||||
|
@ -46,9 +46,8 @@ int MPI_Lookup_name(const char *service_name, MPI_Info info, char *port_name)
|
||||
{
|
||||
char range[OPAL_MAX_INFO_VAL];
|
||||
int flag=0, ret;
|
||||
opal_pmix_data_range_t rng;
|
||||
bool range_given = false;
|
||||
opal_list_t results;
|
||||
opal_value_t *rng;
|
||||
opal_list_t results, pinfo;
|
||||
opal_pmix_pdata_t *pdat;
|
||||
|
||||
if ( MPI_PARAM_CHECK ) {
|
||||
@ -70,27 +69,33 @@ int MPI_Lookup_name(const char *service_name, MPI_Info info, char *port_name)
|
||||
|
||||
OPAL_CR_ENTER_LIBRARY();
|
||||
|
||||
OBJ_CONSTRUCT(&pinfo, opal_list_t);
|
||||
|
||||
/* OMPI supports info keys to pass the range to
|
||||
* be searched for the given key */
|
||||
if (MPI_INFO_NULL != info) {
|
||||
ompi_info_get (info, "range", sizeof(range) - 1, range, &flag);
|
||||
if (flag) {
|
||||
range_given = true;
|
||||
if (0 == strcmp(range, "nspace")) {
|
||||
rng = OPAL_PMIX_NAMESPACE; // share only with procs in same nspace
|
||||
rng = OBJ_NEW(opal_value_t);
|
||||
rng->key = strdup(OPAL_PMIX_RANGE);
|
||||
rng->type = OPAL_INT;
|
||||
rng->data.integer = OPAL_PMIX_NAMESPACE; // share only with procs in same nspace
|
||||
opal_list_append(&pinfo, &rng->super);
|
||||
} else if (0 == strcmp(range, "session")) {
|
||||
rng = OPAL_PMIX_SESSION; // share only with procs in same session
|
||||
rng = OBJ_NEW(opal_value_t);
|
||||
rng->key = strdup(OPAL_PMIX_RANGE);
|
||||
rng->type = OPAL_INT;
|
||||
rng->data.integer = OPAL_PMIX_SESSION; // share only with procs in same session
|
||||
opal_list_append(&pinfo, &rng->super);
|
||||
} else {
|
||||
/* unrecognized scope */
|
||||
OPAL_LIST_DESTRUCT(&pinfo);
|
||||
return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG,
|
||||
FUNC_NAME);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!range_given) {
|
||||
/* default to nspace */
|
||||
rng = OPAL_PMIX_NAMESPACE;
|
||||
}
|
||||
|
||||
/* collect the findings */
|
||||
OBJ_CONSTRUCT(&results, opal_list_t);
|
||||
@ -98,7 +103,8 @@ int MPI_Lookup_name(const char *service_name, MPI_Info info, char *port_name)
|
||||
pdat->value.key = strdup(service_name);
|
||||
opal_list_append(&results, &pdat->super);
|
||||
|
||||
ret = opal_pmix.lookup(rng, &results);
|
||||
ret = opal_pmix.lookup(&results, &pinfo);
|
||||
OPAL_LIST_DESTRUCT(&pinfo);
|
||||
if (OPAL_SUCCESS != ret ||
|
||||
OPAL_STRING != pdat->value.type ||
|
||||
NULL == pdat->value.data.string) {
|
||||
|
@ -48,12 +48,8 @@ int MPI_Publish_name(const char *service_name, MPI_Info info,
|
||||
int rc;
|
||||
char range[OPAL_MAX_INFO_VAL];
|
||||
int flag=0;
|
||||
opal_pmix_data_range_t rng;
|
||||
bool range_given = false;
|
||||
opal_pmix_persistence_t persist;
|
||||
bool persistence_given = false;
|
||||
opal_value_t *rng;
|
||||
opal_list_t values;
|
||||
opal_value_t *pinfo;
|
||||
|
||||
if ( MPI_PARAM_CHECK ) {
|
||||
OMPI_ERR_INIT_FINALIZE(FUNC_NAME);
|
||||
@ -73,58 +69,75 @@ int MPI_Publish_name(const char *service_name, MPI_Info info,
|
||||
}
|
||||
|
||||
OPAL_CR_ENTER_LIBRARY();
|
||||
OBJ_CONSTRUCT(&values, opal_list_t);
|
||||
|
||||
/* OMPI supports info keys to pass the range and persistence to
|
||||
* be used for the given key */
|
||||
if (MPI_INFO_NULL != info) {
|
||||
ompi_info_get (info, "range", sizeof(range) - 1, range, &flag);
|
||||
if (flag) {
|
||||
range_given = true;
|
||||
if (0 == strcmp(range, "nspace")) {
|
||||
rng = OPAL_PMIX_NAMESPACE; // share only with procs in same nspace
|
||||
rng = OBJ_NEW(opal_value_t);
|
||||
rng->key = strdup(OPAL_PMIX_RANGE);
|
||||
rng->type = OPAL_INT;
|
||||
rng->data.integer = OPAL_PMIX_NAMESPACE; // share only with procs in same nspace
|
||||
opal_list_append(&values, &rng->super);
|
||||
} else if (0 == strcmp(range, "session")) {
|
||||
rng = OPAL_PMIX_SESSION; // share only with procs in same session
|
||||
rng = OBJ_NEW(opal_value_t);
|
||||
rng->key = strdup(OPAL_PMIX_RANGE);
|
||||
rng->type = OPAL_INT;
|
||||
rng->data.integer = OPAL_PMIX_SESSION; // share only with procs in same session
|
||||
opal_list_append(&values, &rng->super);
|
||||
} else {
|
||||
/* unrecognized range */
|
||||
/* unrecognized scope */
|
||||
OPAL_LIST_DESTRUCT(&values);
|
||||
return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG,
|
||||
FUNC_NAME);
|
||||
}
|
||||
}
|
||||
ompi_info_get (info, "persistence", sizeof(range) - 1, range, &flag);
|
||||
if (flag) {
|
||||
persistence_given = true;
|
||||
if (0 == strcmp(range, "indef")) {
|
||||
persist = OPAL_PMIX_PERSIST_INDEF; // retain until specifically deleted
|
||||
rng = OBJ_NEW(opal_value_t);
|
||||
rng->key = strdup(OPAL_PMIX_PERSISTENCE);
|
||||
rng->type = OPAL_INT;
|
||||
rng->data.integer = OPAL_PMIX_PERSIST_INDEF; // retain until specifically deleted
|
||||
opal_list_append(&values, &rng->super);
|
||||
} else if (0 == strcmp(range, "proc")) {
|
||||
persist = OPAL_PMIX_PERSIST_PROC; // retain until publishing process terminates
|
||||
rng = OBJ_NEW(opal_value_t);
|
||||
rng->key = strdup(OPAL_PMIX_PERSISTENCE);
|
||||
rng->type = OPAL_INT;
|
||||
rng->data.integer = OPAL_PMIX_PERSIST_PROC; // retain until publishing process terminates
|
||||
opal_list_append(&values, &rng->super);
|
||||
} else if (0 == strcmp(range, "app")) {
|
||||
persist = OPAL_PMIX_PERSIST_APP; // retain until application terminates
|
||||
rng = OBJ_NEW(opal_value_t);
|
||||
rng->key = strdup(OPAL_PMIX_PERSISTENCE);
|
||||
rng->type = OPAL_INT;
|
||||
rng->data.integer = OPAL_PMIX_PERSIST_APP; // retain until application terminates
|
||||
opal_list_append(&values, &rng->super);
|
||||
} else if (0 == strcmp(range, "session")) {
|
||||
persist = OPAL_PMIX_PERSIST_SESSION; // retain until session/allocation terminates
|
||||
rng = OBJ_NEW(opal_value_t);
|
||||
rng->key = strdup(OPAL_PMIX_PERSISTENCE);
|
||||
rng->type = OPAL_INT;
|
||||
rng->data.integer = OPAL_PMIX_PERSIST_SESSION; // retain until session/allocation terminates
|
||||
opal_list_append(&values, &rng->super);
|
||||
} else {
|
||||
/* unrecognized persistence */
|
||||
OPAL_LIST_DESTRUCT(&values);
|
||||
return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG,
|
||||
FUNC_NAME);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!range_given) {
|
||||
/* default to nspace */
|
||||
rng = OPAL_PMIX_NAMESPACE;
|
||||
}
|
||||
if (!persistence_given) {
|
||||
persist = OPAL_PMIX_PERSIST_APP;
|
||||
}
|
||||
|
||||
/* publish the values */
|
||||
OBJ_CONSTRUCT(&values, opal_list_t);
|
||||
pinfo = OBJ_NEW(opal_value_t);
|
||||
pinfo->key = strdup(service_name);
|
||||
pinfo->type = OPAL_STRING;
|
||||
pinfo->data.string = strdup(port_name);
|
||||
opal_list_append(&values, &pinfo->super);
|
||||
/* publish the service name */
|
||||
rng = OBJ_NEW(opal_value_t);
|
||||
rng->key = strdup(service_name);
|
||||
rng->type = OPAL_STRING;
|
||||
rng->data.string = strdup(port_name);
|
||||
opal_list_append(&values, &rng->super);
|
||||
|
||||
rc = opal_pmix.publish(rng, persist, &values);
|
||||
rc = opal_pmix.publish(&values);
|
||||
OPAL_LIST_DESTRUCT(&values);
|
||||
|
||||
OPAL_CR_EXIT_LIBRARY();
|
||||
|
@ -49,8 +49,8 @@ int MPI_Unpublish_name(const char *service_name, MPI_Info info,
|
||||
int rc;
|
||||
char range[OPAL_MAX_INFO_VAL];
|
||||
int flag=0;
|
||||
opal_pmix_data_range_t rng;
|
||||
bool range_given = false;
|
||||
opal_list_t pinfo;
|
||||
opal_value_t *rng;
|
||||
char **keys = NULL;
|
||||
|
||||
if ( MPI_PARAM_CHECK ) {
|
||||
@ -71,34 +71,40 @@ int MPI_Unpublish_name(const char *service_name, MPI_Info info,
|
||||
}
|
||||
|
||||
OPAL_CR_ENTER_LIBRARY();
|
||||
OBJ_CONSTRUCT(&pinfo, opal_list_t);
|
||||
|
||||
/* OMPI supports info keys to pass the range to
|
||||
* be searched for the given key */
|
||||
if (MPI_INFO_NULL != info) {
|
||||
ompi_info_get (info, "range", sizeof(range) - 1, range, &flag);
|
||||
if (flag) {
|
||||
range_given = true;
|
||||
if (0 == strcmp(range, "nspace")) {
|
||||
rng = OPAL_PMIX_NAMESPACE; // share only with procs in same nspace
|
||||
rng = OBJ_NEW(opal_value_t);
|
||||
rng->key = strdup(OPAL_PMIX_RANGE);
|
||||
rng->type = OPAL_INT;
|
||||
rng->data.integer = OPAL_PMIX_NAMESPACE; // share only with procs in same nspace
|
||||
opal_list_append(&pinfo, &rng->super);
|
||||
} else if (0 == strcmp(range, "session")) {
|
||||
rng = OPAL_PMIX_SESSION; // share only with procs in same session
|
||||
rng = OBJ_NEW(opal_value_t);
|
||||
rng->key = strdup(OPAL_PMIX_RANGE);
|
||||
rng->type = OPAL_INT;
|
||||
rng->data.integer = OPAL_PMIX_SESSION; // share only with procs in same session
|
||||
opal_list_append(&pinfo, &rng->super);
|
||||
} else {
|
||||
/* unrecognized range */
|
||||
/* unrecognized scope */
|
||||
OPAL_LIST_DESTRUCT(&pinfo);
|
||||
return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG,
|
||||
FUNC_NAME);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!range_given) {
|
||||
/* default to nspace */
|
||||
rng = OPAL_PMIX_NAMESPACE;
|
||||
}
|
||||
|
||||
/* unpublish the service_name */
|
||||
opal_argv_append_nosize(&keys, service_name);
|
||||
|
||||
rc = opal_pmix.unpublish(rng, keys);
|
||||
rc = opal_pmix.unpublish(keys, &pinfo);
|
||||
opal_argv_free(keys);
|
||||
OPAL_LIST_DESTRUCT(&pinfo);
|
||||
|
||||
if ( OPAL_SUCCESS != rc ) {
|
||||
if (OPAL_ERR_NOT_FOUND == rc) {
|
||||
|
@ -325,12 +325,8 @@ typedef int (*opal_pmix_base_module_get_nb_fn_t)(const opal_process_name_t *proc
|
||||
* data has been posted and is available. The non-blocking form will
|
||||
* return immediately, executing the callback when the server confirms
|
||||
* availability of the data */
|
||||
typedef int (*opal_pmix_base_module_publish_fn_t)(opal_pmix_data_range_t scope,
|
||||
opal_pmix_persistence_t persist,
|
||||
opal_list_t *info);
|
||||
typedef int (*opal_pmix_base_module_publish_nb_fn_t)(opal_pmix_data_range_t scope,
|
||||
opal_pmix_persistence_t persist,
|
||||
opal_list_t *info,
|
||||
typedef int (*opal_pmix_base_module_publish_fn_t)(opal_list_t *info);
|
||||
typedef int (*opal_pmix_base_module_publish_nb_fn_t)(opal_list_t *info,
|
||||
opal_pmix_op_cbfunc_t cbfunc, void *cbdata);
|
||||
|
||||
/* Lookup information published by another process within the
|
||||
@ -352,8 +348,8 @@ typedef int (*opal_pmix_base_module_publish_nb_fn_t)(opal_pmix_data_range_t scop
|
||||
* and return any found items. Thus, the caller is responsible for
|
||||
* ensuring that data is published prior to executing a lookup, or
|
||||
* for retrying until the requested data is found */
|
||||
typedef int (*opal_pmix_base_module_lookup_fn_t)(opal_pmix_data_range_t scope,
|
||||
opal_list_t *data);
|
||||
typedef int (*opal_pmix_base_module_lookup_fn_t)(opal_list_t *data,
|
||||
opal_list_t *info);
|
||||
|
||||
/* Non-blocking form of the _PMIx_Lookup_ function. Data for
|
||||
* the provided NULL-terminated keys array will be returned
|
||||
@ -362,7 +358,7 @@ typedef int (*opal_pmix_base_module_lookup_fn_t)(opal_pmix_data_range_t scope,
|
||||
* wait for _all_ requested data before executing the callback
|
||||
* (_true_), or to callback once the server returns whatever
|
||||
* data is immediately available (_false_) */
|
||||
typedef int (*opal_pmix_base_module_lookup_nb_fn_t)(opal_pmix_data_range_t scope, int wait, char **keys,
|
||||
typedef int (*opal_pmix_base_module_lookup_nb_fn_t)(char **keys, opal_list_t *info,
|
||||
opal_pmix_lookup_cbfunc_t cbfunc, void *cbdata);
|
||||
|
||||
/* Unpublish data posted by this process using the given keys
|
||||
@ -370,14 +366,14 @@ typedef int (*opal_pmix_base_module_lookup_nb_fn_t)(opal_pmix_data_range_t scope
|
||||
* the data has been removed by the server. A value of _NULL_
|
||||
* for the keys parameter instructs the server to remove
|
||||
* _all_ data published by this process within the given scope */
|
||||
typedef int (*opal_pmix_base_module_unpublish_fn_t)(opal_pmix_data_range_t scope, char **keys);
|
||||
typedef int (*opal_pmix_base_module_unpublish_fn_t)(char **keys, opal_list_t *info);
|
||||
|
||||
/* Non-blocking form of the _PMIx_Unpublish_ function. The
|
||||
* callback function will be executed once the server confirms
|
||||
* removal of the specified data. A value of _NULL_
|
||||
* for the keys parameter instructs the server to remove
|
||||
* _all_ data published by this process within the given scope */
|
||||
typedef int (*opal_pmix_base_module_unpublish_nb_fn_t)(opal_pmix_data_range_t scope, char **keys,
|
||||
typedef int (*opal_pmix_base_module_unpublish_nb_fn_t)(char **keys, opal_list_t *info,
|
||||
opal_pmix_op_cbfunc_t cbfunc, void *cbdata);
|
||||
|
||||
/* Spawn a new job. The spawned applications are automatically
|
||||
|
@ -30,7 +30,7 @@ greek=a1
|
||||
# command, or with the date (if "git describe" fails) in the form of
|
||||
# "date<date>".
|
||||
|
||||
repo_rev=git51479b0
|
||||
repo_rev=git6afbc98
|
||||
|
||||
# If tarball_version is not empty, it is used as the version string in
|
||||
# the tarball filename, regardless of all other versions listed in
|
||||
@ -44,7 +44,7 @@ tarball_version=
|
||||
|
||||
# The date when this release was created
|
||||
|
||||
date="Sep 01, 2015"
|
||||
date="Sep 04, 2015"
|
||||
|
||||
# The shared library version of each of PMIx's public libraries.
|
||||
# These versions are maintained in accordance with the "Library
|
||||
|
@ -77,7 +77,7 @@ int main(int argc, char **argv)
|
||||
(void)strncpy(info[1].key, "PANDA", PMIX_MAX_KEYLEN);
|
||||
info[1].value.type = PMIX_SIZE;
|
||||
info[1].value.data.size = 123456;
|
||||
if (PMIX_SUCCESS != (rc = PMIx_Publish(PMIX_NAMESPACE, PMIX_PERSIST_APP, info, 2))) {
|
||||
if (PMIX_SUCCESS != (rc = PMIx_Publish(info, 2))) {
|
||||
fprintf(stderr, "Client ns %s rank %d: PMIx_Publish failed: %d\n", myproc.nspace, myproc.rank, rc);
|
||||
goto done;
|
||||
}
|
||||
@ -95,7 +95,7 @@ int main(int argc, char **argv)
|
||||
if (0 != myproc.rank) {
|
||||
PMIX_PDATA_CREATE(pdata, 1);
|
||||
(void)strncpy(pdata[0].key, "FOOBAR", PMIX_MAX_KEYLEN);
|
||||
if (PMIX_SUCCESS != (rc = PMIx_Lookup(PMIX_NAMESPACE, NULL, 0, pdata, 1))) {
|
||||
if (PMIX_SUCCESS != (rc = PMIx_Lookup(pdata, 1, NULL, 0))) {
|
||||
fprintf(stderr, "Client ns %s rank %d: PMIx_Lookup failed: %d\n", myproc.nspace, myproc.rank, rc);
|
||||
goto done;
|
||||
}
|
||||
@ -137,7 +137,7 @@ int main(int argc, char **argv)
|
||||
keys[1] = "PANDA";
|
||||
keys[2] = NULL;
|
||||
|
||||
if (PMIX_SUCCESS != (rc = PMIx_Unpublish(PMIX_NAMESPACE, keys))) {
|
||||
if (PMIX_SUCCESS != (rc = PMIx_Unpublish(keys, NULL, 0))) {
|
||||
fprintf(stderr, "Client ns %s rank %d: PMIx_Unpublish failed: %d\n", myproc.nspace, myproc.rank, rc);
|
||||
free(keys);
|
||||
goto done;
|
||||
|
@ -52,15 +52,13 @@ static int dmodex_fn(const pmix_proc_t *proc,
|
||||
const pmix_info_t info[], size_t ninfo,
|
||||
pmix_modex_cbfunc_t cbfunc, void *cbdata);
|
||||
static int publish_fn(const pmix_proc_t *proc,
|
||||
pmix_data_range_t scope, pmix_persistence_t persist,
|
||||
const pmix_info_t info[], size_t ninfo,
|
||||
pmix_op_cbfunc_t cbfunc, void *cbdata);
|
||||
static int lookup_fn(const pmix_proc_t *proc,
|
||||
pmix_data_range_t scope,
|
||||
const pmix_info_t info[], size_t ninfo, char **keys,
|
||||
static int lookup_fn(const pmix_proc_t *proc, char **keys,
|
||||
const pmix_info_t info[], size_t ninfo,
|
||||
pmix_lookup_cbfunc_t cbfunc, void *cbdata);
|
||||
static int unpublish_fn(const pmix_proc_t *proc,
|
||||
pmix_data_range_t scope, char **keys,
|
||||
static int unpublish_fn(const pmix_proc_t *proc, char **keys,
|
||||
const pmix_info_t info[], size_t ninfo,
|
||||
pmix_op_cbfunc_t cbfunc, void *cbdata);
|
||||
static int spawn_fn(const pmix_proc_t *proc,
|
||||
const pmix_info_t job_info[], size_t ninfo,
|
||||
@ -443,7 +441,6 @@ static int dmodex_fn(const pmix_proc_t *proc,
|
||||
|
||||
|
||||
static int publish_fn(const pmix_proc_t *proc,
|
||||
pmix_data_range_t scope, pmix_persistence_t persist,
|
||||
const pmix_info_t info[], size_t ninfo,
|
||||
pmix_op_cbfunc_t cbfunc, void *cbdata)
|
||||
{
|
||||
@ -467,9 +464,8 @@ static int publish_fn(const pmix_proc_t *proc,
|
||||
}
|
||||
|
||||
|
||||
static int lookup_fn(const pmix_proc_t *proc,
|
||||
pmix_data_range_t scope,
|
||||
const pmix_info_t info[], size_t ninfo, char **keys,
|
||||
static int lookup_fn(const pmix_proc_t *proc, char **keys,
|
||||
const pmix_info_t info[], size_t ninfo,
|
||||
pmix_lookup_cbfunc_t cbfunc, void *cbdata)
|
||||
{
|
||||
pmix_locdat_t *p, *p2;
|
||||
@ -517,8 +513,8 @@ static int lookup_fn(const pmix_proc_t *proc,
|
||||
}
|
||||
|
||||
|
||||
static int unpublish_fn(const pmix_proc_t *proc,
|
||||
pmix_data_range_t scope, char **keys,
|
||||
static int unpublish_fn(const pmix_proc_t *proc, char **keys,
|
||||
const pmix_info_t info[], size_t ninfo,
|
||||
pmix_op_cbfunc_t cbfunc, void *cbdata)
|
||||
{
|
||||
pmix_locdat_t *p, *p2;
|
||||
|
@ -65,27 +65,24 @@ BEGIN_C_DECLS
|
||||
|
||||
/**** PMIX API ****/
|
||||
|
||||
/* NOTE: calls to these APIs must be thread-protected as there
|
||||
* currently is NO internal thread safety. */
|
||||
|
||||
/* Initialize the PMIx client, returning the namespace assigned
|
||||
* to this client's application in the provided character array
|
||||
* (must be of size PMIX_MAX_NSLEN or greater). Passing a parameter
|
||||
* of _NULL_ for either or both parameters is allowed if the user
|
||||
/* Initialize the PMIx client, returning the process identifier assigned
|
||||
* to this client's application in the provided pmix_proc_t struct.
|
||||
* Passing a parameter of _NULL_ for this parameter is allowed if the user
|
||||
* wishes solely to initialize the PMIx system and does not require
|
||||
* return of the NULL parameter(s) at that time.
|
||||
* return of the identifier at that time.
|
||||
*
|
||||
* When called the PMIx client will check for the required connection
|
||||
* information of the local PMIx server and will establish the connection.
|
||||
* If the information is not found, or the server connection fails, then
|
||||
* an appropriate error constant will be returned.
|
||||
*
|
||||
* If successful, the function will return PMIX_SUCCESS, will fill the
|
||||
* provided namespace array with the server-assigned namespace, and return
|
||||
* the rank of the process within the application. Note that the PMIx
|
||||
* client library is referenced counted, and so multiple calls to PMIx_Init
|
||||
* are allowed. Thus, one way to obtain the namespace and rank of the
|
||||
* process is to simply call PMIx_Init with non-NULL parameters. */
|
||||
* If successful, the function will return PMIX_SUCCESS and will fill the
|
||||
* provided structure with the server-assigned namespace and rank of the
|
||||
* process within the application.
|
||||
*
|
||||
* Note that the PMIx client library is referenced counted, and so multiple
|
||||
* calls to PMIx_Init are allowed. Thus, one way to obtain the namespace and
|
||||
* rank of the process is to simply call PMIx_Init with a non-NULL parameter. */
|
||||
pmix_status_t PMIx_Init(pmix_proc_t *proc);
|
||||
|
||||
/* Finalize the PMIx client, closing the connection to the local server.
|
||||
@ -116,12 +113,18 @@ int PMIx_Initialized(void);
|
||||
* Passing a _NULL_ msg parameter is allowed. Note that race conditions
|
||||
* caused by multiple processes calling PMIx_Abort are left to the
|
||||
* server implementation to resolve with regard to which status is
|
||||
* returned and what messages (if any) are printed.
|
||||
*/
|
||||
* returned and what messages (if any) are printed. */
|
||||
pmix_status_t PMIx_Abort(int status, const char msg[],
|
||||
pmix_proc_t procs[], size_t nprocs);
|
||||
|
||||
|
||||
/* Push a value into the client's namespace. The client library will cache
|
||||
* the information locally until _PMIx_Commit_ is called. The provided scope
|
||||
* value is passed to the local PMIx server, which will distribute the data
|
||||
* as directed. */
|
||||
pmix_status_t PMIx_Put(pmix_scope_t scope, const char key[], pmix_value_t *val);
|
||||
|
||||
|
||||
/* Push all previously _PMIx_Put_ values to the local PMIx server.
|
||||
* This is an asynchronous operation - the library will immediately
|
||||
* return to the caller while the data is transmitted to the local
|
||||
@ -132,7 +135,7 @@ pmix_status_t PMIx_Commit(void);
|
||||
/* Execute a blocking barrier across the processes identified in the
|
||||
* specified array. Passing a _NULL_ pointer as the _procs_ parameter
|
||||
* indicates that the barrier is to span all processes in the client's
|
||||
* namespace. Each provided proc struct can pass PMIX_RANK_WILDCARD to
|
||||
* namespace. Each provided pmix_proc_t struct can pass PMIX_RANK_WILDCARD to
|
||||
* indicate that all processes in the given namespace are
|
||||
* participating.
|
||||
*
|
||||
@ -144,19 +147,22 @@ pmix_status_t PMIx_Commit(void);
|
||||
* A value of _false_ indicates that the callback is just used as a release
|
||||
* and no data is to be returned at that time. A value of _true_ indicates
|
||||
* that all _put_ data is to be collected by the barrier. Returned data is
|
||||
* locally cached so that subsequent calls to _PMIx_Get_ can be serviced
|
||||
* without communicating to/from the server, but at the cost of increased
|
||||
* memory footprint
|
||||
* cached at the server to reduce memory footprint, and can be retrieved
|
||||
* as needed by calls to PMIx_Get(nb).
|
||||
*
|
||||
* Note that for scalability reasons, the default behavior for PMIx_Fence
|
||||
* is to _not_ collect the data.
|
||||
*
|
||||
* (b) PMIX_COLLECTIVE_ALGO - a comma-delimited string indicating the algos
|
||||
* to be used for executing the barrier, in priority order. The _mandatory_
|
||||
* flag can instruct the host RM that it should return an error if none
|
||||
* of the provided algos are available. Otherwise, the RM is to use one
|
||||
* of the algos if possible, but is otherwise free to use any of its
|
||||
* available methods to execute the operation.
|
||||
* to be used for executing the barrier, in priority order.
|
||||
*
|
||||
* (c) PMIX_TIMEOUT - maximum time for the fence to execute before declaring
|
||||
* an error. The RM shall terminate the operation and notify participants
|
||||
* (c) PMIX_COLLECTIVE_ALGO_REQD - instructs the host RM that it should return
|
||||
* an error if none of the specified algos are available. Otherwise, the RM
|
||||
* is to use one of the algos if possible, but is otherwise free to use any
|
||||
* of its available methods to execute the operation.
|
||||
*
|
||||
* (d) PMIX_TIMEOUT - maximum time for the fence to execute before declaring
|
||||
* an error. By default, the RM shall terminate the operation and notify participants
|
||||
* if one or more of the indicated procs fails during the fence. However,
|
||||
* the timeout parameter can help avoid "hangs" due to programming errors
|
||||
* that prevent one or more procs from reaching the "fence".
|
||||
@ -164,7 +170,6 @@ pmix_status_t PMIx_Commit(void);
|
||||
pmix_status_t PMIx_Fence(const pmix_proc_t procs[], size_t nprocs,
|
||||
const pmix_info_t info[], size_t ninfo);
|
||||
|
||||
/* Fence_nb */
|
||||
/* Non-blocking version of PMIx_Fence. Note that the function will return
|
||||
* an error if a _NULL_ callback function is given. */
|
||||
pmix_status_t PMIx_Fence_nb(const pmix_proc_t procs[], size_t nprocs,
|
||||
@ -172,16 +177,9 @@ pmix_status_t PMIx_Fence_nb(const pmix_proc_t procs[], size_t nprocs,
|
||||
pmix_op_cbfunc_t cbfunc, void *cbdata);
|
||||
|
||||
|
||||
/* Push a value into the client's namespace. The client library will cache
|
||||
* the information locally until _PMIx_Commit_ is called. The provided scope
|
||||
* value is passed to the local PMIx server, which will distribute the data
|
||||
* as directed. */
|
||||
pmix_status_t PMIx_Put(pmix_scope_t scope, const char key[], pmix_value_t *val);
|
||||
|
||||
/* Retrieve information for the specified _key_ as published by the given _rank_
|
||||
* within the provided _namespace_, returning a pointer to the value in the
|
||||
* given address. A _NULL_ value for the namespace indicates that the rank
|
||||
* is within the caller's namespace.
|
||||
/* Retrieve information for the specified _key_ as published by the process
|
||||
* identified in the given pmix_proc_t, returning a pointer to the value in the
|
||||
* given address.
|
||||
*
|
||||
* This is a blocking operation - the caller will block until
|
||||
* the specified data has been _PMIx_Put_ by the specified rank. The caller is
|
||||
@ -199,55 +197,48 @@ pmix_status_t PMIx_Get(const pmix_proc_t *proc, const char key[],
|
||||
const pmix_info_t info[], size_t ninfo,
|
||||
pmix_value_t **val);
|
||||
|
||||
/* Retrieve information for the specified _key_ as _PMIx_Put_ by the given _rank_
|
||||
* within the provided _namespace_. This is a non-blocking operation - the
|
||||
* callback function will be executed once the specified data has been _PMIx_Put_
|
||||
* by the specified rank and retrieved by the local server. The info
|
||||
/* A non-blocking operation version of PMIx_Get - the callback function will
|
||||
* be executed once the specified data has been _PMIx_Put_
|
||||
* by the identified process and retrieved by the local server. The info
|
||||
* array is used as described above for the blocking form of this call. */
|
||||
pmix_status_t PMIx_Get_nb(const pmix_proc_t *proc, const char key[],
|
||||
const pmix_info_t info[], size_t ninfo,
|
||||
pmix_value_cbfunc_t cbfunc, void *cbdata);
|
||||
|
||||
|
||||
/* Publish the data in the info array for lookup subject to the provided
|
||||
* data range. Note that the keys must be unique within the specified
|
||||
/* Publish the data in the info array for lookup. By default,
|
||||
* the data will be published into the PMIX_SESSION range and
|
||||
* with PMIX_PERSIST_APP persistence. Changes to those values,
|
||||
* and any additional directives, can be included in the pmix_info_t
|
||||
* array.
|
||||
*
|
||||
* Note that the keys must be unique within the specified
|
||||
* data range or else an error will be returned (first published
|
||||
* wins). Attempts to access the data by procs outside of
|
||||
* the provided data range will be rejected.
|
||||
*
|
||||
* Note: Some host environments may support user/group level
|
||||
* access controls on the information in addition to the data range.
|
||||
* These can be specified in the info array using the appropriately
|
||||
* defined keys.
|
||||
*
|
||||
* The persistence parameter instructs the server as to how long
|
||||
* the data is to be retained, within the context of the range.
|
||||
* For example, data published within _PMIX_NAMESPACE_ will be
|
||||
* deleted along with the namespace regardless of the persistence.
|
||||
* However, data published within PMIX_USER would be retained if
|
||||
* the persistence was set to _PMIX_PERSIST_SESSION_ until the
|
||||
* allocation terminates.
|
||||
* the data is to be retained.
|
||||
*
|
||||
* The blocking form will block until the server confirms that the
|
||||
* data has been posted and is available. The non-blocking form will
|
||||
* return immediately, executing the callback when the server confirms
|
||||
* availability of the data.
|
||||
*/
|
||||
pmix_status_t PMIx_Publish(pmix_data_range_t range,
|
||||
pmix_persistence_t persist,
|
||||
const pmix_info_t info[],
|
||||
size_t ninfo);
|
||||
pmix_status_t PMIx_Publish_nb(pmix_data_range_t range,
|
||||
pmix_persistence_t persist,
|
||||
const pmix_info_t info[],
|
||||
size_t ninfo,
|
||||
pmix_status_t PMIx_Publish(const pmix_info_t info[], size_t ninfo);
|
||||
pmix_status_t PMIx_Publish_nb(const pmix_info_t info[], size_t ninfo,
|
||||
pmix_op_cbfunc_t cbfunc, void *cbdata);
|
||||
|
||||
|
||||
/* Lookup information published by another process within the
|
||||
* specified range. A rabge of _PMIX_DATA_RANGE_UNDEF_ requests that
|
||||
* the search be conducted across _all_ namespaces accessible by this
|
||||
* user.
|
||||
/* Lookup information published by this or another process. By default,
|
||||
* the search will be conducted across the PMIX_SESSION range. Changes
|
||||
* to the range, and any additional directives, can be provided
|
||||
* in the pmix_info_t array. Note that the search is also constrained
|
||||
* to only data published by the current user ID - i.e., the search
|
||||
* will not return data published by an application being executed
|
||||
* by another user. There currently is no option to override this
|
||||
* behavior - such an option may become available later via an
|
||||
* appropriate pmix_info_t directive.
|
||||
*
|
||||
* The "data" parameter consists of an array of pmix_pdata_t struct with the
|
||||
* keys specifying the requested information. Data will be returned
|
||||
@ -270,18 +261,13 @@ pmix_status_t PMIx_Publish_nb(pmix_data_range_t range,
|
||||
* by including:
|
||||
*
|
||||
* (a) PMIX_WAIT - wait for the requested data to be published. The
|
||||
* _mandatory_ flag indicates that the server is to wait until
|
||||
* all data has become available. Otherwise, the function will
|
||||
* return as soon as the specified number of values have been
|
||||
* collected. A value of -1 indicates that all values must be
|
||||
* obtained.
|
||||
* server is to wait until all data has become available.
|
||||
*
|
||||
* (b) PMIX_TIMEOUT - max time to wait for data to become available.
|
||||
*
|
||||
*/
|
||||
pmix_status_t PMIx_Lookup(pmix_data_range_t range,
|
||||
const pmix_info_t info[], size_t ninfo,
|
||||
pmix_pdata_t data[], size_t ndata);
|
||||
pmix_status_t PMIx_Lookup(pmix_pdata_t data[], size_t ndata,
|
||||
const pmix_info_t info[], size_t ninfo);
|
||||
|
||||
/* Non-blocking form of the _PMIx_Lookup_ function. Data for
|
||||
* the provided NULL-terminated keys array will be returned
|
||||
@ -289,44 +275,58 @@ pmix_status_t PMIx_Lookup(pmix_data_range_t range,
|
||||
* behavior is to _not_ wait for data to be published. The
|
||||
* info keys can be used to modify the behavior as previously
|
||||
* described */
|
||||
pmix_status_t PMIx_Lookup_nb(pmix_data_range_t range, char **keys,
|
||||
const pmix_info_t info[], size_t ninfo,
|
||||
pmix_status_t PMIx_Lookup_nb(char **keys, const pmix_info_t info[], size_t ninfo,
|
||||
pmix_lookup_cbfunc_t cbfunc, void *cbdata);
|
||||
|
||||
|
||||
/* Unpublish data posted by this process using the given keys
|
||||
* within the specified data range. The function will block until
|
||||
* the data has been removed by the server. A value of _NULL_
|
||||
* for the keys parameter instructs the server to remove
|
||||
* _all_ data published by this process within the given range */
|
||||
pmix_status_t PMIx_Unpublish(pmix_data_range_t range, char **keys);
|
||||
/* Unpublish data posted by this process using the given keys.
|
||||
* The function will block until the data has been removed by
|
||||
* the server. A value of _NULL_ for the keys parameter instructs
|
||||
* the server to remove _all_ data published by this process.
|
||||
*
|
||||
* By default, the range is assumed to be PMIX_SESSION. Changes
|
||||
* to the range, and any additional directives, can be provided
|
||||
* in the pmix_info_t array */
|
||||
pmix_status_t PMIx_Unpublish(char **keys,
|
||||
const pmix_info_t info[], size_t ninfo);
|
||||
|
||||
/* Non-blocking form of the _PMIx_Unpublish_ function. The
|
||||
* callback function will be executed once the server confirms
|
||||
* removal of the specified data. A value of _NULL_
|
||||
* for the keys parameter instructs the server to remove
|
||||
* _all_ data published by this process within the given range */
|
||||
pmix_status_t PMIx_Unpublish_nb(pmix_data_range_t range, char **keys,
|
||||
* removal of the specified data. */
|
||||
pmix_status_t PMIx_Unpublish_nb(char **keys,
|
||||
const pmix_info_t info[], size_t ninfo,
|
||||
pmix_op_cbfunc_t cbfunc, void *cbdata);
|
||||
|
||||
|
||||
/* Spawn a new job. The assigned namespace of the spawned applications
|
||||
* is returned in the nspace parameter - a _NULL_ value in that
|
||||
* location indicates that the caller doesn't wish to have the
|
||||
* namespace returned. Behavior of individual resource managers
|
||||
* namespace returned. The nspace array must be at least of size
|
||||
* PMIX_MAX_NSLEN+1. Behavior of individual resource managers
|
||||
* may differ, but it is expected that failure of any application
|
||||
* process to start will result in termination/cleanup of _all_
|
||||
* processes in the newly spawned job and return of an error
|
||||
* code to the caller.
|
||||
*
|
||||
* By default, the spawned processes will be PMIx "connected" to
|
||||
* the parent process upon successful launch (see PMIx_Connect
|
||||
* description for details). Note that this only means that the
|
||||
* parent process (a) will be given a copy of the new job's
|
||||
* information so it can query job-level info without
|
||||
* incurring any communication penalties, and (b) will receive
|
||||
* notification of errors from process in the child job.
|
||||
*
|
||||
* Job-level directives can be specified in the job_info array. This
|
||||
* can include:
|
||||
*
|
||||
* (a) PMIX_NON_MPI - the spawned job is not an MPI job and the procs will
|
||||
* (a) PMIX_NON_PMI - processes in the spawned job will
|
||||
* not be calling PMIx_Init
|
||||
*
|
||||
* (b) PMIX_TIMEOUT - declare the spawn as having failed if the launched
|
||||
* procs do not call PMIx_Init within the specified time
|
||||
*
|
||||
* (c) PMIX_NOTIFY_COMPLETION - notify the parent process when the
|
||||
* child job terminates, either normally or with error
|
||||
*/
|
||||
pmix_status_t PMIx_Spawn(const pmix_info_t job_info[], size_t ninfo,
|
||||
const pmix_app_t apps[], size_t napps,
|
||||
@ -353,8 +353,8 @@ pmix_status_t PMIx_Spawn_nb(const pmix_info_t job_info[], size_t ninfo,
|
||||
* the job-level info from those nspaces other than their own.
|
||||
*
|
||||
* Note: a process can only engage in _one_ connect operation involving the identical
|
||||
* set of ranges at a time. However, a process _can_ be simultaneously engaged
|
||||
* in multiple connect operations, each involving a different set of ranges
|
||||
* set of processes at a time. However, a process _can_ be simultaneously engaged
|
||||
* in multiple connect operations, each involving a different set of processes
|
||||
*
|
||||
* As in the case of the fence operation, the info array can be used to pass
|
||||
* user-level directives regarding the algorithm to be used for the collective
|
||||
|
@ -163,6 +163,9 @@ BEGIN_C_DECLS
|
||||
#define PMIX_WAIT "pmix.wait" // (int) caller requests that the server wait until the specified #values are found
|
||||
#define PMIX_COLLECTIVE_ALGO "pmix.calgo" // (char*) comma-delimited list of algorithms to use for collective
|
||||
#define PMIX_COLLECTIVE_ALGO_REQD "pmix.calreqd" // (bool) if true, indicates that the requested choice of algo is mandatory
|
||||
#define PMIX_NOTIFY_COMPLETION "pmix.notecomp" // (bool) notify parent process upon termination of child job
|
||||
#define PMIX_RANGE "pmix.range" // (int) pmix_data_range_t value for calls to publish/lookup/unpublish
|
||||
#define PMIX_PERSISTENCE "pmix.persist" // (int) pmix_persistence_t value for calls to publish
|
||||
|
||||
/* attributes used by host server to pass data to the server convenience library - the
|
||||
* data will then be parsed and provided to the local clients */
|
||||
|
@ -103,7 +103,7 @@ BEGIN_C_DECLS
|
||||
typedef int (*pmix_server_client_connected_fn_t)(const pmix_proc_t *proc,
|
||||
void* server_object);
|
||||
|
||||
/* Notify the host server that a client called PMIx_Finalize- note
|
||||
/* Notify the host server that a client called PMIx_Finalize - note
|
||||
* that the client will be in a blocked state until the host server
|
||||
* executes the callback function, thus allowing the PMIx server support
|
||||
* library to release the client */
|
||||
@ -158,49 +158,46 @@ typedef pmix_status_t (*pmix_server_dmodex_req_fn_t)(const pmix_proc_t *proc,
|
||||
|
||||
|
||||
/* Publish data per the PMIx API specification. The callback is to be executed
|
||||
* upon completion of the operation. The host server is not required to guarantee
|
||||
* support for the requested range - i.e., the server does not need to return an
|
||||
* error if the data store doesn't support range-based isolation. However, the
|
||||
* server must return an error (a) if the key is duplicative within the storage
|
||||
* range, and (b) if the server does not allow overwriting of published info by
|
||||
* the original publisher - it is left to the discretion of the host server to
|
||||
* allow info-key-based flags to modify this behavior. The persist flag indicates
|
||||
* how long the server should retain the data. The nspace/rank of the publishing
|
||||
* process is also provided and is expected to be returned on any subsequent
|
||||
* lookup request */
|
||||
* upon completion of the operation. The default data range is expected to be
|
||||
* PMIX_SESSION, and the default persistence PMIX_PERSIST_SESSION. These values
|
||||
* can be modified by including the respective pmix_info_t struct in the
|
||||
* provided array.
|
||||
*
|
||||
* Note that the host server is not required to guarantee support for any specific
|
||||
* range - i.e., the server does not need to return an error if the data store
|
||||
* doesn't support range-based isolation. However, the server must return an error
|
||||
* (a) if the key is duplicative within the storage range, and (b) if the server
|
||||
* does not allow overwriting of published info by the original publisher - it is
|
||||
* left to the discretion of the host server to allow info-key-based flags to modify
|
||||
* this behavior.
|
||||
*
|
||||
* The persistence indicates how long the server should retain the data.
|
||||
*
|
||||
* The identifier of the publishing process is also provided and is expected to
|
||||
* be returned on any subsequent lookup request */
|
||||
typedef pmix_status_t (*pmix_server_publish_fn_t)(const pmix_proc_t *proc,
|
||||
pmix_data_range_t range, pmix_persistence_t persist,
|
||||
const pmix_info_t info[], size_t ninfo,
|
||||
pmix_op_cbfunc_t cbfunc, void *cbdata);
|
||||
|
||||
/* Lookup published data. The host server will be passed a NULL-terminated array
|
||||
* of string keys along with the range within which the data is expected to have
|
||||
* been published. The host server is not required to guarantee support for all
|
||||
* PMIx-defined ranges, but should only search data stores within the specified
|
||||
* range within the context of the corresponding "publish" API.
|
||||
* of string keys.
|
||||
*
|
||||
* The array of info structs is used to pass user-requested options to the server.
|
||||
* This can include a wait flag to indicate that the server should wait for all
|
||||
* data to become available before executing the callback function, or should
|
||||
* immediately callback with whatever data is available. In addition, a timeout
|
||||
* can be specified on the wait to preclude an indefinite wait for data that
|
||||
* may never be published. The directives are optional _unless_ the _mandatory_ flag
|
||||
* has been set - in such cases, the host RM is required to return an error
|
||||
* if the directive cannot be met. */
|
||||
typedef pmix_status_t (*pmix_server_lookup_fn_t)(const pmix_proc_t *proc,
|
||||
pmix_data_range_t range,
|
||||
* may never be published. */
|
||||
typedef pmix_status_t (*pmix_server_lookup_fn_t)(const pmix_proc_t *proc, char **keys,
|
||||
const pmix_info_t info[], size_t ninfo,
|
||||
char **keys,
|
||||
pmix_lookup_cbfunc_t cbfunc, void *cbdata);
|
||||
|
||||
/* Delete data from the data store. The host server will be passed a NULL-terminated array
|
||||
* of string keys along with the range within which the data is expected to have
|
||||
* been published. The callback is to be executed upon completion of the delete
|
||||
* of string keys, plus potential directives such as the data range within which the
|
||||
* keys should be deleted. The callback is to be executed upon completion of the delete
|
||||
* procedure */
|
||||
typedef pmix_status_t (*pmix_server_unpublish_fn_t)(const pmix_proc_t *proc,
|
||||
pmix_data_range_t range,
|
||||
typedef pmix_status_t (*pmix_server_unpublish_fn_t)(const pmix_proc_t *proc, char **keys,
|
||||
const pmix_info_t info[], size_t ninfo,
|
||||
char **keys,
|
||||
pmix_op_cbfunc_t cbfunc, void *cbdata);
|
||||
|
||||
/* Spawn a set of applications/processes as per the PMIx API. Note that
|
||||
|
@ -1,6 +1,6 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2014 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
@ -190,7 +190,7 @@ int PMI_Publish_name(const char service_name[], const char port[])
|
||||
|
||||
/* publish the info - PMI-1 doesn't support
|
||||
* any scope other than inside our own nspace */
|
||||
rc = PMIx_Publish(PMIX_NAMESPACE, PMIX_PERSIST_APP, &info, 1);
|
||||
rc = PMIx_Publish(&info, 1);
|
||||
|
||||
return convert_err(rc);
|
||||
}
|
||||
@ -204,7 +204,7 @@ int PMI_Unpublish_name(const char service_name[])
|
||||
keys[0] = (char*)service_name;
|
||||
keys[1] = NULL;
|
||||
|
||||
rc = PMIx_Unpublish(PMIX_NAMESPACE, keys);
|
||||
rc = PMIx_Unpublish(keys, NULL, 0);
|
||||
return convert_err(rc);
|
||||
}
|
||||
|
||||
@ -219,7 +219,7 @@ int PMI_Lookup_name(const char service_name[], char port[])
|
||||
(void)strncpy(pdata.key, service_name, PMIX_MAX_KEYLEN);
|
||||
|
||||
/* PMI-1 doesn't want the nspace back */
|
||||
if (PMIX_SUCCESS != (rc = PMIx_Lookup(PMIX_NAMESPACE, NULL, 0, &pdata, 1))) {
|
||||
if (PMIX_SUCCESS != (rc = PMIx_Lookup(&pdata, 1, NULL, 0))) {
|
||||
return convert_err(rc);
|
||||
}
|
||||
|
||||
|
@ -240,7 +240,7 @@ int PMI2_Nameserv_publish(const char service_name[], const PMI_keyval_t *info_pt
|
||||
}
|
||||
/* publish the info - PMI-2 doesn't support
|
||||
* any scope other than inside our own nspace */
|
||||
rc = PMIx_Publish(PMIX_NAMESPACE, PMIX_PERSIST_APP, info, nvals);
|
||||
rc = PMIx_Publish(info, nvals);
|
||||
|
||||
return convert_err(rc);
|
||||
}
|
||||
@ -261,7 +261,7 @@ int PMI2_Nameserv_unpublish(const char service_name[],
|
||||
keys[1] = info_ptr->key;
|
||||
}
|
||||
|
||||
rc = PMIx_Unpublish(PMIX_NAMESPACE, keys);
|
||||
rc = PMIx_Unpublish(keys, NULL, 0);
|
||||
return convert_err(rc);
|
||||
}
|
||||
|
||||
@ -288,7 +288,7 @@ int PMI2_Nameserv_lookup(const char service_name[], const PMI_keyval_t *info_ptr
|
||||
}
|
||||
|
||||
/* lookup the info */
|
||||
if (PMIX_SUCCESS != (rc = PMIx_Lookup(PMIX_NAMESPACE, NULL, 0, pdata, nvals))) {
|
||||
if (PMIX_SUCCESS != (rc = PMIx_Lookup(pdata, nvals, NULL, 0))) {
|
||||
PMIX_PDATA_DESTRUCT(&pdata[0]);
|
||||
PMIX_PDATA_DESTRUCT(&pdata[1]);
|
||||
return convert_err(rc);
|
||||
|
@ -149,6 +149,7 @@ static void wait_cbfunc(struct pmix_peer_t *pr, pmix_usock_hdr_t *hdr,
|
||||
cb->active = false;
|
||||
}
|
||||
|
||||
/* callback to receive job info */
|
||||
static void job_data(struct pmix_peer_t *pr, pmix_usock_hdr_t *hdr,
|
||||
pmix_buffer_t *buf, void *cbdata)
|
||||
{
|
||||
@ -165,6 +166,7 @@ static void job_data(struct pmix_peer_t *pr, pmix_usock_hdr_t *hdr,
|
||||
}
|
||||
/* decode it */
|
||||
pmix_client_process_nspace_blob(pmix_globals.myid.nspace, buf);
|
||||
cb->status = PMIX_SUCCESS;
|
||||
cb->active = false;
|
||||
}
|
||||
|
||||
@ -247,6 +249,8 @@ int PMIx_Init(pmix_proc_t *proc)
|
||||
/* get our effective id's */
|
||||
pmix_globals.uid = geteuid();
|
||||
pmix_globals.gid = getegid();
|
||||
/* default to our internal errhandler */
|
||||
pmix_globals.errhandler = myerrhandler;
|
||||
|
||||
/* initialize the output system */
|
||||
if (!pmix_output_init()) {
|
||||
@ -323,17 +327,19 @@ int PMIx_Init(pmix_proc_t *proc)
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* connect to the server - returns job info if successful */
|
||||
/* setup an object to track server connection */
|
||||
PMIX_CONSTRUCT(&cb, pmix_cb_t);
|
||||
cb.active = true;
|
||||
/* connect to the server - returns job info if successful */
|
||||
if (PMIX_SUCCESS != (rc = connect_to_server(&address, &cb))){
|
||||
PMIX_DESTRUCT(&cb);
|
||||
return rc;
|
||||
}
|
||||
PMIX_WAIT_FOR_COMPLETION(cb.active);
|
||||
rc = cb.status;
|
||||
PMIX_DESTRUCT(&cb);
|
||||
|
||||
return PMIX_SUCCESS;
|
||||
return rc;
|
||||
}
|
||||
|
||||
int PMIx_Initialized(void)
|
||||
@ -769,10 +775,21 @@ static int recv_connect_ack(int sd)
|
||||
{
|
||||
int reply;
|
||||
int rc;
|
||||
struct timeval tv, save;
|
||||
pmix_socklen_t sz;
|
||||
|
||||
pmix_output_verbose(2, pmix_globals.debug_output,
|
||||
"pmix: RECV CONNECT ACK FROM SERVER");
|
||||
|
||||
/* get the current timeout value so we can reset to it */
|
||||
sz = sizeof(save);
|
||||
getsockopt(sd, SOL_SOCKET, SO_RCVTIMEO, (void*)&save, &sz);
|
||||
|
||||
/* set a timeout on the blocking recv so we don't hang */
|
||||
tv.tv_sec = 2;
|
||||
tv.tv_usec = 0;
|
||||
setsockopt(sd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv));
|
||||
|
||||
/* receive the status reply */
|
||||
rc = pmix_usock_recv_blocking(sd, (char*)&reply, sizeof(int));
|
||||
if (PMIX_SUCCESS != rc) {
|
||||
@ -802,6 +819,9 @@ static int recv_connect_ack(int sd)
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* return the socket to normal */
|
||||
setsockopt(sd, SOL_SOCKET, SO_RCVTIMEO, &save, sz);
|
||||
|
||||
return PMIX_SUCCESS;
|
||||
}
|
||||
|
||||
|
@ -61,9 +61,7 @@ static void wait_lookup_cbfunc(struct pmix_peer_t *pr, pmix_usock_hdr_t *hdr,
|
||||
static void lookup_cbfunc(int status, pmix_pdata_t pdata[], size_t ndata,
|
||||
void *cbdata);
|
||||
|
||||
int PMIx_Publish(pmix_data_range_t scope,
|
||||
pmix_persistence_t persist,
|
||||
const pmix_info_t info[],
|
||||
int PMIx_Publish(const pmix_info_t info[],
|
||||
size_t ninfo)
|
||||
{
|
||||
int rc;
|
||||
@ -85,7 +83,7 @@ int PMIx_Publish(pmix_data_range_t scope,
|
||||
cb = PMIX_NEW(pmix_cb_t);
|
||||
cb->active = true;
|
||||
|
||||
if (PMIX_SUCCESS != (rc = PMIx_Publish_nb(scope, persist, info, ninfo, op_cbfunc, cb))) {
|
||||
if (PMIX_SUCCESS != (rc = PMIx_Publish_nb(info, ninfo, op_cbfunc, cb))) {
|
||||
PMIX_ERROR_LOG(rc);
|
||||
PMIX_RELEASE(cb);
|
||||
return rc;
|
||||
@ -99,10 +97,7 @@ int PMIx_Publish(pmix_data_range_t scope,
|
||||
return rc;
|
||||
}
|
||||
|
||||
int PMIx_Publish_nb(pmix_data_range_t scope,
|
||||
pmix_persistence_t persist,
|
||||
const pmix_info_t info[],
|
||||
size_t ninfo,
|
||||
int PMIx_Publish_nb(const pmix_info_t info[], size_t ninfo,
|
||||
pmix_op_cbfunc_t cbfunc, void *cbdata)
|
||||
{
|
||||
pmix_buffer_t *msg;
|
||||
@ -143,24 +138,14 @@ int PMIx_Publish_nb(pmix_data_range_t scope,
|
||||
PMIX_RELEASE(msg);
|
||||
return rc;
|
||||
}
|
||||
/* pack the data range */
|
||||
if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &scope, 1, PMIX_DATA_RANGE))) {
|
||||
PMIX_ERROR_LOG(rc);
|
||||
PMIX_RELEASE(msg);
|
||||
return rc;
|
||||
}
|
||||
/* pack the persistence */
|
||||
if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &persist, 1, PMIX_PERSIST))) {
|
||||
PMIX_ERROR_LOG(rc);
|
||||
PMIX_RELEASE(msg);
|
||||
return rc;
|
||||
}
|
||||
/* pack the info keys that were given */
|
||||
/* pass the number of info structs - needed on remote end so
|
||||
* space can be malloc'd for the values */
|
||||
if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &ninfo, 1, PMIX_SIZE))) {
|
||||
PMIX_ERROR_LOG(rc);
|
||||
PMIX_RELEASE(msg);
|
||||
return rc;
|
||||
}
|
||||
/* pack the info structs */
|
||||
if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, info, ninfo, PMIX_INFO))) {
|
||||
PMIX_ERROR_LOG(rc);
|
||||
PMIX_RELEASE(msg);
|
||||
@ -181,9 +166,8 @@ int PMIx_Publish_nb(pmix_data_range_t scope,
|
||||
return PMIX_SUCCESS;
|
||||
}
|
||||
|
||||
int PMIx_Lookup(pmix_data_range_t scope,
|
||||
const pmix_info_t info[], size_t ninfo,
|
||||
pmix_pdata_t pdata[], size_t ndata)
|
||||
int PMIx_Lookup(pmix_pdata_t pdata[], size_t ndata,
|
||||
const pmix_info_t info[], size_t ninfo)
|
||||
{
|
||||
int rc;
|
||||
pmix_cb_t *cb;
|
||||
@ -213,8 +197,7 @@ int PMIx_Lookup(pmix_data_range_t scope,
|
||||
cb->nvals = ndata;
|
||||
cb->active = true;
|
||||
|
||||
if (PMIX_SUCCESS != (rc = PMIx_Lookup_nb(scope, keys,
|
||||
info, ninfo,
|
||||
if (PMIX_SUCCESS != (rc = PMIx_Lookup_nb(keys, info, ninfo,
|
||||
lookup_cbfunc, cb))) {
|
||||
PMIX_RELEASE(cb);
|
||||
pmix_argv_free(keys);
|
||||
@ -231,8 +214,7 @@ int PMIx_Lookup(pmix_data_range_t scope,
|
||||
return rc;
|
||||
}
|
||||
|
||||
int PMIx_Lookup_nb(pmix_data_range_t range, char **keys,
|
||||
const pmix_info_t info[], size_t ninfo,
|
||||
int PMIx_Lookup_nb(char **keys, const pmix_info_t info[], size_t ninfo,
|
||||
pmix_lookup_cbfunc_t cbfunc, void *cbdata)
|
||||
{
|
||||
pmix_buffer_t *msg;
|
||||
@ -267,25 +249,6 @@ int PMIx_Lookup_nb(pmix_data_range_t range, char **keys,
|
||||
PMIX_RELEASE(msg);
|
||||
return rc;
|
||||
}
|
||||
/* pack the range */
|
||||
if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &range, 1, PMIX_DATA_RANGE))) {
|
||||
PMIX_ERROR_LOG(rc);
|
||||
PMIX_RELEASE(msg);
|
||||
return rc;
|
||||
}
|
||||
/* pack the info structs */
|
||||
if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &ninfo, 1, PMIX_SIZE))) {
|
||||
PMIX_ERROR_LOG(rc);
|
||||
PMIX_RELEASE(msg);
|
||||
return rc;
|
||||
}
|
||||
if (0 < ninfo) {
|
||||
if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, info, ninfo, PMIX_INFO))) {
|
||||
PMIX_ERROR_LOG(rc);
|
||||
PMIX_RELEASE(msg);
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
/* pack the keys */
|
||||
nkeys = pmix_argv_count(keys);
|
||||
if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &nkeys, 1, PMIX_SIZE))) {
|
||||
@ -302,6 +265,19 @@ int PMIx_Lookup_nb(pmix_data_range_t range, char **keys,
|
||||
}
|
||||
}
|
||||
}
|
||||
/* pass the number of info structs - needed on remote end so
|
||||
* space can be malloc'd for the values */
|
||||
if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &ninfo, 1, PMIX_SIZE))) {
|
||||
PMIX_ERROR_LOG(rc);
|
||||
PMIX_RELEASE(msg);
|
||||
return rc;
|
||||
}
|
||||
/* pack the info structs */
|
||||
if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, info, ninfo, PMIX_INFO))) {
|
||||
PMIX_ERROR_LOG(rc);
|
||||
PMIX_RELEASE(msg);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* create a callback object as we need to pass it to the
|
||||
* recv routine so we know which callback to use when
|
||||
@ -316,7 +292,7 @@ int PMIx_Lookup_nb(pmix_data_range_t range, char **keys,
|
||||
return PMIX_SUCCESS;
|
||||
}
|
||||
|
||||
int PMIx_Unpublish(pmix_data_range_t scope, char **keys)
|
||||
int PMIx_Unpublish(char **keys, const pmix_info_t info[], size_t ninfo)
|
||||
{
|
||||
int rc;
|
||||
pmix_cb_t *cb;
|
||||
@ -331,7 +307,7 @@ int PMIx_Unpublish(pmix_data_range_t scope, char **keys)
|
||||
cb->active = true;
|
||||
|
||||
/* push the message into our event base to send to the server */
|
||||
if (PMIX_SUCCESS != (rc = PMIx_Unpublish_nb(scope, keys, op_cbfunc, cb))) {
|
||||
if (PMIX_SUCCESS != (rc = PMIx_Unpublish_nb(keys, info, ninfo, op_cbfunc, cb))) {
|
||||
PMIX_RELEASE(cb);
|
||||
return rc;
|
||||
}
|
||||
@ -344,7 +320,7 @@ int PMIx_Unpublish(pmix_data_range_t scope, char **keys)
|
||||
return rc;
|
||||
}
|
||||
|
||||
int PMIx_Unpublish_nb(pmix_data_range_t range, char **keys,
|
||||
int PMIx_Unpublish_nb(char **keys, const pmix_info_t info[], size_t ninfo,
|
||||
pmix_op_cbfunc_t cbfunc, void *cbdata)
|
||||
{
|
||||
pmix_buffer_t *msg;
|
||||
@ -374,12 +350,6 @@ int PMIx_Unpublish_nb(pmix_data_range_t range, char **keys,
|
||||
PMIX_RELEASE(msg);
|
||||
return rc;
|
||||
}
|
||||
/* pack the range */
|
||||
if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &range, 1, PMIX_DATA_RANGE))) {
|
||||
PMIX_ERROR_LOG(rc);
|
||||
PMIX_RELEASE(msg);
|
||||
return rc;
|
||||
}
|
||||
/* pack the number of keys */
|
||||
i = pmix_argv_count(keys);
|
||||
if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &i, 1, PMIX_SIZE))) {
|
||||
@ -396,6 +366,19 @@ int PMIx_Unpublish_nb(pmix_data_range_t range, char **keys,
|
||||
}
|
||||
}
|
||||
}
|
||||
/* pass the number of info structs - needed on remote end so
|
||||
* space can be malloc'd for the values */
|
||||
if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &ninfo, 1, PMIX_SIZE))) {
|
||||
PMIX_ERROR_LOG(rc);
|
||||
PMIX_RELEASE(msg);
|
||||
return rc;
|
||||
}
|
||||
/* pack the info structs */
|
||||
if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, info, ninfo, PMIX_INFO))) {
|
||||
PMIX_ERROR_LOG(rc);
|
||||
PMIX_RELEASE(msg);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* create a callback object */
|
||||
cb = PMIX_NEW(pmix_cb_t);
|
||||
|
@ -246,13 +246,16 @@ static pmix_status_t initialize_server_base(pmix_server_module_t *module)
|
||||
security_mode = strdup(pmix_sec.name);
|
||||
|
||||
/* find the temp dir */
|
||||
if (NULL == (tdir = getenv("TMPDIR"))) {
|
||||
if (NULL == (tdir = getenv("TEMP"))) {
|
||||
if (NULL == (tdir = getenv("TMP"))) {
|
||||
tdir = "/tmp";
|
||||
if (NULL == (tdir = getenv("PMIX_SERVER_TMPDIR"))) {
|
||||
if (NULL == (tdir = getenv("TMPDIR"))) {
|
||||
if (NULL == (tdir = getenv("TEMP"))) {
|
||||
if (NULL == (tdir = getenv("TMP"))) {
|
||||
tdir = "/tmp";
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* now set the address - we use the pid here to reduce collisions */
|
||||
memset(&myaddress, 0, sizeof(struct sockaddr_un));
|
||||
myaddress.sun_family = AF_UNIX;
|
||||
@ -1879,7 +1882,7 @@ static void cnct_cbfunc(int status, void *cbdata)
|
||||
scd = PMIX_NEW(pmix_shift_caddy_t);
|
||||
scd->status = status;
|
||||
scd->tracker = tracker;
|
||||
PMIX_THREADSHIFT(scd, _mdxcbfunc);
|
||||
PMIX_THREADSHIFT(scd, _cnct);
|
||||
}
|
||||
|
||||
|
||||
|
@ -979,9 +979,7 @@ pmix_status_t pmix_server_publish(pmix_peer_t *peer,
|
||||
{
|
||||
pmix_status_t rc;
|
||||
int32_t cnt;
|
||||
pmix_data_range_t range;
|
||||
pmix_persistence_t persist;
|
||||
size_t i, ninfo, einfo;
|
||||
size_t ninfo, einfo;
|
||||
pmix_info_t *info = NULL;
|
||||
pmix_proc_t proc;
|
||||
uint32_t uid;
|
||||
@ -999,18 +997,6 @@ pmix_status_t pmix_server_publish(pmix_peer_t *peer,
|
||||
PMIX_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
/* unpack the scope */
|
||||
cnt=1;
|
||||
if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &range, &cnt, PMIX_DATA_RANGE))) {
|
||||
PMIX_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
/* unpack the persistence */
|
||||
cnt=1;
|
||||
if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &persist, &cnt, PMIX_PERSIST))) {
|
||||
PMIX_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
/* unpack the number of info objects */
|
||||
cnt=1;
|
||||
if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &ninfo, &cnt, PMIX_SIZE))) {
|
||||
@ -1035,7 +1021,8 @@ pmix_status_t pmix_server_publish(pmix_peer_t *peer,
|
||||
/* call the local server */
|
||||
(void)strncpy(proc.nspace, peer->info->nptr->nspace, PMIX_MAX_NSLEN);
|
||||
proc.rank = peer->info->rank;
|
||||
rc = pmix_host_server.publish(&proc, range, persist, info, einfo, cbfunc, cbdata);
|
||||
pmix_output(0, "server passing %d values up", (int)einfo);
|
||||
rc = pmix_host_server.publish(&proc, info, einfo, cbfunc, cbdata);
|
||||
|
||||
cleanup:
|
||||
PMIX_INFO_FREE(info, einfo);
|
||||
@ -1048,8 +1035,6 @@ pmix_status_t pmix_server_lookup(pmix_peer_t *peer,
|
||||
{
|
||||
int32_t cnt;
|
||||
pmix_status_t rc;
|
||||
int wait;
|
||||
pmix_data_range_t range;
|
||||
size_t nkeys, i;
|
||||
char **keys=NULL, *sptr;
|
||||
pmix_info_t *info = NULL;
|
||||
@ -1070,12 +1055,22 @@ pmix_status_t pmix_server_lookup(pmix_peer_t *peer,
|
||||
PMIX_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
/* unpack the range */
|
||||
/* unpack the number of keys */
|
||||
cnt=1;
|
||||
if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &range, &cnt, PMIX_DATA_RANGE))) {
|
||||
PMIX_ERROR_LOG(rc);
|
||||
if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &nkeys, &cnt, PMIX_SIZE))) {
|
||||
PMIX_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
/* unpack the array of keys */
|
||||
for (i=0; i < nkeys; i++) {
|
||||
cnt=1;
|
||||
if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &sptr, &cnt, PMIX_STRING))) {
|
||||
PMIX_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
pmix_argv_append_nosize(&keys, sptr);
|
||||
free(sptr);
|
||||
}
|
||||
/* unpack the number of info objects */
|
||||
cnt=1;
|
||||
if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &ninfo, &cnt, PMIX_SIZE))) {
|
||||
@ -1098,27 +1093,10 @@ pmix_status_t pmix_server_lookup(pmix_peer_t *peer,
|
||||
info[einfo-1].value.type = PMIX_UINT32;
|
||||
info[einfo-1].value.data.uint32 = uid;
|
||||
|
||||
/* unpack the number of keys */
|
||||
cnt=1;
|
||||
if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &nkeys, &cnt, PMIX_SIZE))) {
|
||||
PMIX_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
/* unpack the array of keys */
|
||||
for (i=0; i < nkeys; i++) {
|
||||
cnt=1;
|
||||
if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &sptr, &cnt, PMIX_STRING))) {
|
||||
PMIX_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
pmix_argv_append_nosize(&keys, sptr);
|
||||
free(sptr);
|
||||
}
|
||||
|
||||
/* call the local server */
|
||||
(void)strncpy(proc.nspace, peer->info->nptr->nspace, PMIX_MAX_NSLEN);
|
||||
proc.rank = peer->info->rank;
|
||||
rc = pmix_host_server.lookup(&proc, range, info, einfo, keys, cbfunc, cbdata);
|
||||
rc = pmix_host_server.lookup(&proc, keys, info, einfo, cbfunc, cbdata);
|
||||
|
||||
cleanup:
|
||||
PMIX_INFO_FREE(info, einfo);
|
||||
@ -1132,12 +1110,11 @@ pmix_status_t pmix_server_unpublish(pmix_peer_t *peer,
|
||||
{
|
||||
int32_t cnt;
|
||||
pmix_status_t rc;
|
||||
pmix_data_range_t range;
|
||||
size_t i, nkeys;
|
||||
size_t i, nkeys, ninfo, einfo;
|
||||
char **keys=NULL, *sptr;
|
||||
pmix_proc_t proc;
|
||||
uint32_t uid;
|
||||
pmix_info_t info;
|
||||
pmix_info_t *info;
|
||||
|
||||
pmix_output_verbose(2, pmix_globals.debug_output,
|
||||
"recvd UNPUBLISH");
|
||||
@ -1152,12 +1129,6 @@ pmix_status_t pmix_server_unpublish(pmix_peer_t *peer,
|
||||
PMIX_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
/* unpack the range */
|
||||
cnt=1;
|
||||
if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &range, &cnt, PMIX_DATA_RANGE))) {
|
||||
PMIX_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
/* unpack the number of keys */
|
||||
cnt=1;
|
||||
if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &nkeys, &cnt, PMIX_SIZE))) {
|
||||
@ -1174,16 +1145,32 @@ pmix_status_t pmix_server_unpublish(pmix_peer_t *peer,
|
||||
pmix_argv_append_nosize(&keys, sptr);
|
||||
free(sptr);
|
||||
}
|
||||
/* setup the info key */
|
||||
PMIX_INFO_CONSTRUCT(&info);
|
||||
(void)strncpy(info.key, PMIX_USERID, PMIX_MAX_KEYLEN);
|
||||
info.value.type = PMIX_UINT32;
|
||||
info.value.data.uint32 = uid;
|
||||
/* unpack the number of info objects */
|
||||
cnt=1;
|
||||
if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &ninfo, &cnt, PMIX_SIZE))) {
|
||||
PMIX_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
/* we will be adding one for the user id */
|
||||
einfo = ninfo + 1;
|
||||
PMIX_INFO_CREATE(info, einfo);
|
||||
/* unpack the array of info objects */
|
||||
if (0 < ninfo) {
|
||||
PMIX_INFO_CREATE(info, ninfo);
|
||||
cnt=ninfo;
|
||||
if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, info, &cnt, PMIX_INFO))) {
|
||||
PMIX_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
}
|
||||
(void)strncpy(info[einfo-1].key, PMIX_USERID, PMIX_MAX_KEYLEN);
|
||||
info[einfo-1].value.type = PMIX_UINT32;
|
||||
info[einfo-1].value.data.uint32 = uid;
|
||||
|
||||
/* call the local server */
|
||||
(void)strncpy(proc.nspace, peer->info->nptr->nspace, PMIX_MAX_NSLEN);
|
||||
proc.rank = peer->info->rank;
|
||||
rc = pmix_host_server.unpublish(&proc, range, &info, 1, keys, cbfunc, cbdata);
|
||||
rc = pmix_host_server.unpublish(&proc, keys, info, einfo, cbfunc, cbdata);
|
||||
|
||||
cleanup:
|
||||
pmix_argv_free(keys);
|
||||
|
@ -151,7 +151,6 @@ int dmodex_fn(const pmix_proc_t *proc,
|
||||
}
|
||||
|
||||
int publish_fn(const pmix_proc_t *proc,
|
||||
pmix_data_range_t scope, pmix_persistence_t persist,
|
||||
const pmix_info_t info[], size_t ninfo,
|
||||
pmix_op_cbfunc_t cbfunc, void *cbdata)
|
||||
{
|
||||
@ -184,8 +183,8 @@ int publish_fn(const pmix_proc_t *proc,
|
||||
return PMIX_SUCCESS;
|
||||
}
|
||||
|
||||
int lookup_fn(const pmix_proc_t *proc, pmix_data_range_t scope,
|
||||
const pmix_info_t info[], size_t ninfo, char **keys,
|
||||
int lookup_fn(const pmix_proc_t *proc, char **keys,
|
||||
const pmix_info_t info[], size_t ninfo,
|
||||
pmix_lookup_cbfunc_t cbfunc, void *cbdata)
|
||||
{
|
||||
size_t i, ndata, ret;
|
||||
@ -216,26 +215,26 @@ int lookup_fn(const pmix_proc_t *proc, pmix_data_range_t scope,
|
||||
return PMIX_SUCCESS;
|
||||
}
|
||||
|
||||
int unpublish_fn(const pmix_proc_t *proc,
|
||||
pmix_data_range_t scope, char **keys,
|
||||
int unpublish_fn(const pmix_proc_t *proc, char **keys,
|
||||
const pmix_info_t info[], size_t ninfo,
|
||||
pmix_op_cbfunc_t cbfunc, void *cbdata)
|
||||
{
|
||||
size_t i, ninfo;
|
||||
pmix_test_info_t *info, *next;
|
||||
size_t i;
|
||||
pmix_test_info_t *iptr, *next;
|
||||
if (NULL == pmix_test_published_list) {
|
||||
return PMIX_ERR_NOT_FOUND;
|
||||
}
|
||||
PMIX_LIST_FOREACH_SAFE(info, next, pmix_test_published_list, pmix_test_info_t) {
|
||||
if (1) {// if data posted by this process
|
||||
PMIX_LIST_FOREACH_SAFE(iptr, next, pmix_test_published_list, pmix_test_info_t) {
|
||||
if (1) { // if data posted by this process
|
||||
if (NULL == keys) {
|
||||
pmix_list_remove_item(pmix_test_published_list, &info->super);
|
||||
PMIX_RELEASE(info);
|
||||
pmix_list_remove_item(pmix_test_published_list, &iptr->super);
|
||||
PMIX_RELEASE(iptr);
|
||||
} else {
|
||||
ninfo = pmix_argv_count(keys);
|
||||
for (i = 0; i < ninfo; i++) {
|
||||
if (!strcmp(info->data.key, keys[i])) {
|
||||
pmix_list_remove_item(pmix_test_published_list, &info->super);
|
||||
PMIX_RELEASE(info);
|
||||
if (!strcmp(iptr->data.key, keys[i])) {
|
||||
pmix_list_remove_item(pmix_test_published_list, &iptr->super);
|
||||
PMIX_RELEASE(iptr);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -29,14 +29,13 @@ pmix_status_t dmodex_fn(const pmix_proc_t *proc,
|
||||
const pmix_info_t info[], size_t ninfo,
|
||||
pmix_modex_cbfunc_t cbfunc, void *cbdata);
|
||||
pmix_status_t publish_fn(const pmix_proc_t *proc,
|
||||
pmix_data_range_t scope, pmix_persistence_t persist,
|
||||
const pmix_info_t info[], size_t ninfo,
|
||||
pmix_op_cbfunc_t cbfunc, void *cbdata);
|
||||
pmix_status_t lookup_fn(const pmix_proc_t *proc, pmix_data_range_t scope,
|
||||
const pmix_info_t info[], size_t ninfo, char **keys,
|
||||
pmix_status_t lookup_fn(const pmix_proc_t *proc, char **keys,
|
||||
const pmix_info_t info[], size_t ninfo,
|
||||
pmix_lookup_cbfunc_t cbfunc, void *cbdata);
|
||||
pmix_status_t unpublish_fn(const pmix_proc_t *proc,
|
||||
pmix_data_range_t scope, char **keys,
|
||||
pmix_status_t unpublish_fn(const pmix_proc_t *proc, char **keys,
|
||||
const pmix_info_t info[], size_t ninfo,
|
||||
pmix_op_cbfunc_t cbfunc, void *cbdata);
|
||||
pmix_status_t spawn_fn(const pmix_proc_t *proc,
|
||||
const pmix_info_t job_info[], size_t ninfo,
|
||||
|
@ -44,11 +44,11 @@ int main(int argc, char **argv)
|
||||
char *tmp;
|
||||
pmix_proc_t proc, myproc;
|
||||
uint32_t nprocs, n;
|
||||
|
||||
|
||||
/* init us */
|
||||
if (PMIX_SUCCESS != (rc = PMIx_Init(&myproc))) {
|
||||
pmix_output(0, "Client ns %s rank %d: PMIx_Init failed: %d", myproc.nspace, myproc.rank, rc);
|
||||
exit(0);
|
||||
exit(rc);
|
||||
}
|
||||
pmix_output(0, "Client ns %s rank %d: Running", myproc.nspace, myproc.rank);
|
||||
|
||||
@ -60,7 +60,7 @@ int main(int argc, char **argv)
|
||||
nprocs = val->data.uint32;
|
||||
PMIX_VALUE_RELEASE(val);
|
||||
pmix_output(0, "Client %s:%d universe size %d", myproc.nspace, myproc.rank, nprocs);
|
||||
|
||||
|
||||
/* put a few values */
|
||||
(void)asprintf(&tmp, "%s-%d-internal", myproc.nspace, myproc.rank);
|
||||
value.type = PMIX_UINT32;
|
||||
@ -99,7 +99,7 @@ int main(int argc, char **argv)
|
||||
pmix_output(0, "Client ns %s rank %d: PMIx_Fence failed: %d", myproc.nspace, myproc.rank, rc);
|
||||
goto done;
|
||||
}
|
||||
|
||||
|
||||
/* check the returned data */
|
||||
(void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN);
|
||||
for (n=0; n < nprocs; n++) {
|
||||
@ -156,5 +156,5 @@ int main(int argc, char **argv)
|
||||
fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize successfully completed\n", myproc.nspace, myproc.rank);
|
||||
}
|
||||
fflush(stderr);
|
||||
return(0);
|
||||
return(rc);
|
||||
}
|
||||
|
@ -63,7 +63,7 @@ int main(int argc, char **argv)
|
||||
nprocs = val->data.uint32;
|
||||
PMIX_VALUE_RELEASE(val);
|
||||
pmix_output(0, "Client %s:%d universe size %d", myproc.nspace, myproc.rank, nprocs);
|
||||
|
||||
|
||||
/* call fence to ensure the data is received */
|
||||
PMIX_PROC_CONSTRUCT(&proc);
|
||||
(void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN);
|
||||
@ -72,7 +72,7 @@ int main(int argc, char **argv)
|
||||
pmix_output(0, "Client ns %s rank %d: PMIx_Fence failed: %d", myproc.nspace, myproc.rank, rc);
|
||||
goto done;
|
||||
}
|
||||
|
||||
|
||||
/* publish something */
|
||||
if (0 == myproc.rank) {
|
||||
PMIX_INFO_CREATE(info, 2);
|
||||
@ -82,7 +82,7 @@ int main(int argc, char **argv)
|
||||
(void)strncpy(info[1].key, "PANDA", PMIX_MAX_KEYLEN);
|
||||
info[1].value.type = PMIX_SIZE;
|
||||
info[1].value.data.size = 123456;
|
||||
if (PMIX_SUCCESS != (rc = PMIx_Publish(PMIX_GLOBAL, PMIX_PERSIST_APP, info, 2))) {
|
||||
if (PMIX_SUCCESS != (rc = PMIx_Publish(info, 2))) {
|
||||
pmix_output(0, "Client ns %s rank %d: PMIx_Publish failed: %d", myproc.nspace, myproc.rank, rc);
|
||||
goto done;
|
||||
}
|
||||
@ -100,7 +100,7 @@ int main(int argc, char **argv)
|
||||
if (0 != myproc.rank) {
|
||||
PMIX_PDATA_CREATE(pdata, 1);
|
||||
(void)strncpy(pdata[0].key, "FOOBAR", PMIX_MAX_KEYLEN);
|
||||
if (PMIX_SUCCESS != (rc = PMIx_Lookup(PMIX_GLOBAL, NULL, 0, pdata, 1))) {
|
||||
if (PMIX_SUCCESS != (rc = PMIx_Lookup(pdata, 1, NULL, 0))) {
|
||||
pmix_output(0, "Client ns %s rank %d: PMIx_Lookup failed: %d", myproc.nspace, myproc.rank, rc);
|
||||
goto done;
|
||||
}
|
||||
@ -140,7 +140,7 @@ int main(int argc, char **argv)
|
||||
pmix_argv_append_nosize(&keys, "FOOBAR");
|
||||
pmix_argv_append_nosize(&keys, "PANDA");
|
||||
|
||||
if (PMIX_SUCCESS != (rc = PMIx_Unpublish(PMIX_GLOBAL, keys))) {
|
||||
if (PMIX_SUCCESS != (rc = PMIx_Unpublish(keys, NULL, 0))) {
|
||||
pmix_output(0, "Client ns %s rank %d: PMIx_Unpublish failed: %d", myproc.nspace, myproc.rank, rc);
|
||||
goto done;
|
||||
}
|
||||
|
@ -24,17 +24,24 @@
|
||||
|
||||
#include <private/autogen/config.h>
|
||||
#include <pmix_server.h>
|
||||
#include <private/types.h>
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
#include <time.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/wait.h>
|
||||
#include <errno.h>
|
||||
#include <signal.h>
|
||||
#include PMIX_EVENT_HEADER
|
||||
|
||||
#include "src/util/pmix_environ.h"
|
||||
#include "src/util/output.h"
|
||||
#include "src/util/printf.h"
|
||||
#include "src/util/argv.h"
|
||||
#include "src/buffer_ops/buffer_ops.h"
|
||||
#include "src/usock/usock.h"
|
||||
|
||||
static pmix_status_t connected(const pmix_proc_t *proc, void *server_object);
|
||||
static pmix_status_t finalized(const pmix_proc_t *proc, void *server_object,
|
||||
@ -51,14 +58,13 @@ static pmix_status_t dmodex_fn(const pmix_proc_t *proc,
|
||||
const pmix_info_t info[], size_t ninfo,
|
||||
pmix_modex_cbfunc_t cbfunc, void *cbdata);
|
||||
static pmix_status_t publish_fn(const pmix_proc_t *proc,
|
||||
pmix_data_range_t scope, pmix_persistence_t persist,
|
||||
const pmix_info_t info[], size_t ninfo,
|
||||
pmix_op_cbfunc_t cbfunc, void *cbdata);
|
||||
static pmix_status_t lookup_fn(const pmix_proc_t *proc, pmix_data_range_t scope,
|
||||
const pmix_info_t info[], size_t ninfo, char **keys,
|
||||
static pmix_status_t lookup_fn(const pmix_proc_t *proc, char **keys,
|
||||
const pmix_info_t info[], size_t ninfo,
|
||||
pmix_lookup_cbfunc_t cbfunc, void *cbdata);
|
||||
static pmix_status_t unpublish_fn(const pmix_proc_t *proc,
|
||||
pmix_data_range_t scope, char **keys,
|
||||
static pmix_status_t unpublish_fn(const pmix_proc_t *proc, char **keys,
|
||||
const pmix_info_t info[], size_t ninfo,
|
||||
pmix_op_cbfunc_t cbfunc, void *cbdata);
|
||||
static pmix_status_t spawn_fn(const pmix_proc_t *proc,
|
||||
const pmix_info_t job_info[], size_t ninfo,
|
||||
@ -72,8 +78,6 @@ static pmix_status_t disconnect_fn(const pmix_proc_t procs[], size_t nprocs,
|
||||
pmix_op_cbfunc_t cbfunc, void *cbdata);
|
||||
static pmix_status_t register_event_fn(const pmix_info_t info[], size_t ninfo,
|
||||
pmix_op_cbfunc_t cbfunc, void *cbdata);
|
||||
static pmix_status_t listener_fn(int listening_sd,
|
||||
pmix_connection_cbfunc_t cbfunc);
|
||||
|
||||
static pmix_server_module_t mymodule = {
|
||||
connected,
|
||||
@ -88,7 +92,7 @@ static pmix_server_module_t mymodule = {
|
||||
connect_fn,
|
||||
disconnect_fn,
|
||||
register_event_fn,
|
||||
listener_fn
|
||||
NULL
|
||||
};
|
||||
|
||||
typedef struct {
|
||||
@ -101,7 +105,7 @@ PMIX_CLASS_INSTANCE(pmix_locdat_t,
|
||||
|
||||
typedef struct {
|
||||
pmix_object_t super;
|
||||
volatile bool completed;
|
||||
volatile bool active;
|
||||
pmix_proc_t caller;
|
||||
pmix_info_t *info;
|
||||
size_t ninfo;
|
||||
@ -113,7 +117,7 @@ static void xfcon(myxfer_t *p)
|
||||
{
|
||||
p->info = NULL;
|
||||
p->ninfo = 0;
|
||||
p->completed = false;
|
||||
p->active = true;
|
||||
p->cbfunc = NULL;
|
||||
p->spcbfunc = NULL;
|
||||
p->cbdata = NULL;
|
||||
@ -128,26 +132,35 @@ PMIX_CLASS_INSTANCE(myxfer_t,
|
||||
pmix_object_t,
|
||||
xfcon, xfdes);
|
||||
|
||||
typedef struct {
|
||||
pmix_list_item_t super;
|
||||
pid_t pid;
|
||||
} wait_tracker_t;
|
||||
PMIX_CLASS_INSTANCE(wait_tracker_t,
|
||||
pmix_list_item_t,
|
||||
NULL, NULL);
|
||||
|
||||
static volatile int wakeup;
|
||||
static pmix_list_t pubdata;
|
||||
static pmix_event_t handler;
|
||||
static pmix_list_t children;
|
||||
|
||||
static void set_namespace(int nprocs, char *ranks, char *nspace,
|
||||
pmix_op_cbfunc_t cbfunc, myxfer_t *x);
|
||||
static void errhandler(pmix_status_t status,
|
||||
pmix_proc_t procs[], size_t nprocs,
|
||||
pmix_info_t info[], size_t ninfo);
|
||||
static void wait_signal_callback(int fd, short event, void *arg);
|
||||
|
||||
static void opcbfunc(pmix_status_t status, void *cbdata)
|
||||
{
|
||||
myxfer_t *x = (myxfer_t*)cbdata;
|
||||
|
||||
x->completed = true;
|
||||
/* release the caller, if necessary - note that
|
||||
* this may result in release of x, so this must
|
||||
* be the last thing we do with it here */
|
||||
/* release the caller, if necessary */
|
||||
if (NULL != x->cbfunc) {
|
||||
x->cbfunc(PMIX_SUCCESS, x->cbdata);
|
||||
}
|
||||
x->active = false;
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
@ -161,6 +174,7 @@ int main(int argc, char **argv)
|
||||
pid_t pid;
|
||||
myxfer_t *x;
|
||||
pmix_proc_t proc;
|
||||
wait_tracker_t *child;
|
||||
|
||||
/* smoke test */
|
||||
if (PMIX_SUCCESS != 0) {
|
||||
@ -181,6 +195,12 @@ int main(int argc, char **argv)
|
||||
/* setup the pub data, in case it is used */
|
||||
PMIX_CONSTRUCT(&pubdata, pmix_list_t);
|
||||
|
||||
/* setup to see sigchld on the forked tests */
|
||||
PMIX_CONSTRUCT(&children, pmix_list_t);
|
||||
event_assign(&handler, pmix_globals.evbase, SIGCHLD,
|
||||
EV_SIGNAL|EV_PERSIST,wait_signal_callback, &handler);
|
||||
event_add(&handler, NULL);
|
||||
|
||||
/* see if we were passed the number of procs to run or
|
||||
* the executable to use */
|
||||
for (n=1; n < (argc-1); n++) {
|
||||
@ -208,7 +228,6 @@ int main(int argc, char **argv)
|
||||
tmp = pmix_argv_join(atmp, ',');
|
||||
x = PMIX_NEW(myxfer_t);
|
||||
set_namespace(nprocs, tmp, "foobar", opcbfunc, x);
|
||||
free(tmp);
|
||||
|
||||
/* set common argv and env */
|
||||
client_env = pmix_argv_copy(environ);
|
||||
@ -220,12 +239,8 @@ int main(int argc, char **argv)
|
||||
|
||||
/* if the nspace registration hasn't completed yet,
|
||||
* wait for it here */
|
||||
while (!x->completed) {
|
||||
struct timespec ts;
|
||||
ts.tv_sec = 0;
|
||||
ts.tv_nsec = 100000;
|
||||
nanosleep(&ts, NULL);
|
||||
}
|
||||
PMIX_WAIT_FOR_COMPLETION(x->active);
|
||||
free(tmp);
|
||||
PMIX_RELEASE(x);
|
||||
|
||||
/* fork/exec the test */
|
||||
@ -246,12 +261,7 @@ int main(int argc, char **argv)
|
||||
}
|
||||
/* don't fork/exec the client until we know it is registered
|
||||
* so we avoid a potential race condition in the server */
|
||||
while (!x->completed) {
|
||||
struct timespec ts;
|
||||
ts.tv_sec = 0;
|
||||
ts.tv_nsec = 100000;
|
||||
nanosleep(&ts, NULL);
|
||||
}
|
||||
PMIX_WAIT_FOR_COMPLETION(x->active);
|
||||
PMIX_RELEASE(x);
|
||||
pid = fork();
|
||||
if (pid < 0) {
|
||||
@ -259,6 +269,9 @@ int main(int argc, char **argv)
|
||||
PMIx_server_finalize();
|
||||
return -1;
|
||||
}
|
||||
child = PMIX_NEW(wait_tracker_t);
|
||||
child->pid = pid;
|
||||
pmix_list_append(&children, &child->super);
|
||||
|
||||
if (pid == 0) {
|
||||
execve(executable, client_argv, client_env);
|
||||
@ -438,7 +451,6 @@ static int dmodex_fn(const pmix_proc_t *proc,
|
||||
|
||||
|
||||
static int publish_fn(const pmix_proc_t *proc,
|
||||
pmix_data_range_t scope, pmix_persistence_t persist,
|
||||
const pmix_info_t info[], size_t ninfo,
|
||||
pmix_op_cbfunc_t cbfunc, void *cbdata)
|
||||
{
|
||||
@ -462,9 +474,8 @@ static int publish_fn(const pmix_proc_t *proc,
|
||||
}
|
||||
|
||||
|
||||
static int lookup_fn(const pmix_proc_t *proc,
|
||||
pmix_data_range_t scope,
|
||||
const pmix_info_t info[], size_t ninfo, char **keys,
|
||||
static int lookup_fn(const pmix_proc_t *proc, char **keys,
|
||||
const pmix_info_t info[], size_t ninfo,
|
||||
pmix_lookup_cbfunc_t cbfunc, void *cbdata)
|
||||
{
|
||||
pmix_locdat_t *p, *p2;
|
||||
@ -512,8 +523,8 @@ static int lookup_fn(const pmix_proc_t *proc,
|
||||
}
|
||||
|
||||
|
||||
static int unpublish_fn(const pmix_proc_t *proc,
|
||||
pmix_data_range_t scope, char **keys,
|
||||
static int unpublish_fn(const pmix_proc_t *proc, char **keys,
|
||||
const pmix_info_t info[], size_t ninfo,
|
||||
pmix_op_cbfunc_t cbfunc, void *cbdata)
|
||||
{
|
||||
pmix_locdat_t *p, *p2;
|
||||
@ -610,10 +621,39 @@ static pmix_status_t register_event_fn(const pmix_info_t info[], size_t ninfo,
|
||||
return PMIX_SUCCESS;
|
||||
}
|
||||
|
||||
static int listener_fn(int listening_sd,
|
||||
pmix_connection_cbfunc_t cbfunc)
|
||||
static void wait_signal_callback(int fd, short event, void *arg)
|
||||
{
|
||||
return PMIX_SUCCESS;
|
||||
pmix_event_t *sig = (pmix_event_t*) arg;
|
||||
int status;
|
||||
pid_t pid;
|
||||
wait_tracker_t *t2;
|
||||
|
||||
if (SIGCHLD != event_get_signal(sig)) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* we can have multiple children leave but only get one
|
||||
* sigchild callback, so reap all the waitpids until we
|
||||
* don't get anything valid back */
|
||||
while (1) {
|
||||
pid = waitpid(-1, &status, WNOHANG);
|
||||
if (-1 == pid && EINTR == errno) {
|
||||
/* try it again */
|
||||
continue;
|
||||
}
|
||||
/* if we got garbage, then nothing we can do */
|
||||
if (pid <= 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* we are already in an event, so it is safe to access the list */
|
||||
PMIX_LIST_FOREACH(t2, &children, wait_tracker_t) {
|
||||
if (pid == t2->pid) {
|
||||
/* found it! */
|
||||
--wakeup;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
@ -86,11 +86,11 @@ static void add_noise(char *noise_param, char *my_nspace, int my_rank)
|
||||
SET_KEY(key, fence_num, ind, use_same_keys); \
|
||||
(void)strncpy(foobar.nspace, ns, PMIX_MAX_NSLEN); \
|
||||
foobar.rank = r; \
|
||||
TEST_VERBOSE(("%s:%d want to get from %s:%d key %s", my_nspace, my_rank, ns, r, key)); \
|
||||
TEST_VERBOSE(("%s:%d want to get from %s:%d key %s", my_nspace, my_rank, ns, r, key)); \
|
||||
if (blocking) { \
|
||||
if (PMIX_SUCCESS != (rc = PMIx_Get(&foobar, key, NULL, 0, &val))) { \
|
||||
if (PMIX_SUCCESS != (rc = PMIx_Get(&foobar, key, NULL, 0, &val))) { \
|
||||
if( !( rc == PMIX_ERR_NOT_FOUND && ok_notfnd ) ){ \
|
||||
TEST_ERROR(("%s:%d: PMIx_Get failed: %d from %s:%d", my_nspace, my_rank, rc, ns, r)); \
|
||||
TEST_ERROR(("%s:%d: PMIx_Get failed: %d from %s:%d, key %s", my_nspace, my_rank, rc, ns, r, key)); \
|
||||
} \
|
||||
rc = PMIX_ERROR; \
|
||||
} \
|
||||
@ -99,8 +99,8 @@ static void add_noise(char *noise_param, char *my_nspace, int my_rank)
|
||||
cbdata.in_progress = 1; \
|
||||
PMIX_VALUE_CREATE(val, 1); \
|
||||
cbdata.kv = val; \
|
||||
if (PMIX_SUCCESS != (rc = PMIx_Get_nb(&foobar, key, NULL, 0, get_cb, (void*)&cbdata))) { \
|
||||
TEST_VERBOSE(("%s:%d: PMIx_Get_nb failed: %d from %s:%d", my_nspace, my_rank, rc, ns, r)); \
|
||||
if (PMIX_SUCCESS != (rc = PMIx_Get_nb(&foobar, key, NULL, 0, get_cb, (void*)&cbdata))) { \
|
||||
TEST_VERBOSE(("%s:%d: PMIx_Get_nb failed: %d from %s:%d, key=%s", my_nspace, my_rank, rc, ns, r, key)); \
|
||||
rc = PMIX_ERROR; \
|
||||
} else { \
|
||||
count = 0; \
|
||||
@ -116,7 +116,8 @@ static void add_noise(char *noise_param, char *my_nspace, int my_rank)
|
||||
if (PMIX_SUCCESS == rc) { \
|
||||
if( PMIX_SUCCESS != cbdata.status ){ \
|
||||
if( !( rc == PMIX_ERR_NOT_FOUND && ok_notfnd ) ){ \
|
||||
TEST_VERBOSE(("%s:%d: PMIx_Get_nb failed: %d from %s:%d", my_nspace, my_rank, rc, my_nspace, r));\
|
||||
TEST_VERBOSE(("%s:%d: PMIx_Get_nb failed: %d from %s:%d, key=%s", \
|
||||
my_nspace, my_rank, rc, my_nspace, r)); \
|
||||
} \
|
||||
rc = PMIX_ERROR; \
|
||||
} else if (NULL == val) { \
|
||||
|
@ -59,10 +59,10 @@ static int test_publish(char *my_nspace, int my_rank, int blocking)
|
||||
info.value.type = PMIX_STRING;
|
||||
info.value.data.string = strdup(data);
|
||||
if (blocking) {
|
||||
rc = PMIx_Publish(PMIX_NAMESPACE, PMIX_PERSIST_INDEF, &info, 1);
|
||||
rc = PMIx_Publish(&info, 1);
|
||||
} else {
|
||||
int in_progress = 1;
|
||||
rc = PMIx_Publish_nb(PMIX_NAMESPACE, PMIX_PERSIST_INDEF, &info, 1, release_cb, &in_progress);
|
||||
rc = PMIx_Publish_nb(&info, 1, release_cb, &in_progress);
|
||||
if (PMIX_SUCCESS == rc) {
|
||||
PMIX_WAIT_FOR_COMPLETION(in_progress);
|
||||
}
|
||||
@ -83,7 +83,7 @@ static int test_lookup(char *my_nspace, int my_rank, int blocking)
|
||||
(void)snprintf(data, 512, "data from proc %s:%d", my_nspace, my_rank);
|
||||
|
||||
if (blocking) {
|
||||
if (PMIX_SUCCESS != (rc = PMIx_Lookup(PMIX_NAMESPACE, NULL, 0, &pdata, 1))) {
|
||||
if (PMIX_SUCCESS != (rc = PMIx_Lookup(&pdata, 1, NULL, 0))) {
|
||||
PMIX_PDATA_DESTRUCT(&pdata);
|
||||
return rc;
|
||||
}
|
||||
@ -98,7 +98,7 @@ static int test_lookup(char *my_nspace, int my_rank, int blocking)
|
||||
cbdata.pdata = &pdata;
|
||||
/* copy the key across */
|
||||
(void)strncpy(pdata.key, keys[0], PMIX_MAX_KEYLEN);
|
||||
rc = PMIx_Lookup_nb(PMIX_NAMESPACE, keys, NULL, 0, lookup_cb, (void*)&cbdata);
|
||||
rc = PMIx_Lookup_nb(keys, NULL, 0, lookup_cb, (void*)&cbdata);
|
||||
if (PMIX_SUCCESS != rc) {
|
||||
PMIX_PDATA_DESTRUCT(&pdata);
|
||||
return rc;
|
||||
@ -130,10 +130,10 @@ static int test_unpublish(char *my_nspace, int my_rank, int blocking)
|
||||
keys[1] = NULL;
|
||||
|
||||
if (blocking) {
|
||||
rc = PMIx_Unpublish(PMIX_NAMESPACE, keys);
|
||||
rc = PMIx_Unpublish(keys, NULL, 0);
|
||||
} else {
|
||||
int in_progress = 1;
|
||||
rc = PMIx_Unpublish_nb(PMIX_NAMESPACE, keys, release_cb, &in_progress);
|
||||
rc = PMIx_Unpublish_nb(keys, NULL, 0, release_cb, &in_progress);
|
||||
if (PMIX_SUCCESS == rc) {
|
||||
PMIX_WAIT_FOR_COMPLETION(in_progress);
|
||||
}
|
||||
|
@ -89,20 +89,15 @@ OPAL_MODULE_DECLSPEC int pmix1_get(const opal_process_name_t *proc,
|
||||
OPAL_MODULE_DECLSPEC int pmix1_getnb(const opal_process_name_t *proc,
|
||||
const char *key,
|
||||
opal_pmix_value_cbfunc_t cbfunc, void *cbdata);
|
||||
OPAL_MODULE_DECLSPEC int pmix1_publish(opal_pmix_data_range_t scope,
|
||||
opal_pmix_persistence_t persist,
|
||||
opal_list_t *info);
|
||||
OPAL_MODULE_DECLSPEC int pmix1_publishnb(opal_pmix_data_range_t scope,
|
||||
opal_pmix_persistence_t persist,
|
||||
opal_list_t *info,
|
||||
OPAL_MODULE_DECLSPEC int pmix1_publish(opal_list_t *info);
|
||||
OPAL_MODULE_DECLSPEC int pmix1_publishnb(opal_list_t *info,
|
||||
opal_pmix_op_cbfunc_t cbfunc, void *cbdata);
|
||||
OPAL_MODULE_DECLSPEC int pmix1_lookup(opal_list_t *data, opal_list_t *info);
|
||||
OPAL_MODULE_DECLSPEC int pmix1_lookupnb(char **keys, opal_list_t *info,
|
||||
opal_pmix_lookup_cbfunc_t cbfunc, void *cbdata);
|
||||
OPAL_MODULE_DECLSPEC int pmix1_unpublish(char **keys, opal_list_t *info);
|
||||
OPAL_MODULE_DECLSPEC int pmix1_unpublishnb(char **keys, opal_list_t *info,
|
||||
opal_pmix_op_cbfunc_t cbfunc, void *cbdata);
|
||||
OPAL_MODULE_DECLSPEC int pmix1_lookup(opal_pmix_data_range_t scope,
|
||||
opal_list_t *data);
|
||||
OPAL_MODULE_DECLSPEC int pmix1_lookupnb(opal_pmix_data_range_t scope, int wait, char **keys,
|
||||
opal_pmix_lookup_cbfunc_t cbfunc, void *cbdata);
|
||||
OPAL_MODULE_DECLSPEC int pmix1_unpublish(opal_pmix_data_range_t scope, char **keys);
|
||||
OPAL_MODULE_DECLSPEC int pmix1_unpublishnb(opal_pmix_data_range_t scope, char **keys,
|
||||
opal_pmix_op_cbfunc_t cbfunc, void *cbdata);
|
||||
OPAL_MODULE_DECLSPEC int pmix1_spawn(opal_list_t *job_info, opal_list_t *apps, opal_jobid_t *jobid);
|
||||
OPAL_MODULE_DECLSPEC int pmix1_spawnnb(opal_list_t *job_info, opal_list_t *apps,
|
||||
opal_pmix_spawn_cbfunc_t cbfunc, void *cbdata);
|
||||
@ -119,8 +114,6 @@ OPAL_MODULE_DECLSPEC int pmix1_resolve_peers(const char *nodename, opal_jobid_t
|
||||
OPAL_MODULE_DECLSPEC int pmix1_resolve_nodes(opal_jobid_t jobid, char **nodelist);
|
||||
|
||||
/**** COMMON FUNCTIONS ****/
|
||||
OPAL_MODULE_DECLSPEC void pmix1_register_errhandler(opal_pmix_errhandler_fn_t errhandler);
|
||||
OPAL_MODULE_DECLSPEC void pmix1_deregister_errhandler(void);
|
||||
OPAL_MODULE_DECLSPEC int pmix1_store_local(const opal_process_name_t *proc,
|
||||
opal_value_t *val);
|
||||
|
||||
|
@ -33,14 +33,42 @@
|
||||
static pmix_proc_t myproc;
|
||||
static char *dbgvalue=NULL;
|
||||
|
||||
static int convert_scope(pmix_scope_t *scope,
|
||||
opal_pmix_scope_t sc);
|
||||
static int convert_persistence(pmix_persistence_t *p,
|
||||
opal_pmix_persistence_t persist);
|
||||
static int convert_data_range(pmix_data_range_t *sc,
|
||||
opal_pmix_data_range_t scope);
|
||||
static void myerr(pmix_status_t status,
|
||||
pmix_proc_t procs[], size_t nprocs,
|
||||
pmix_info_t info[], size_t ninfo)
|
||||
{
|
||||
int rc;
|
||||
opal_list_t plist, ilist;
|
||||
opal_namelist_t *nm;
|
||||
opal_value_t *iptr;
|
||||
size_t n;
|
||||
|
||||
/* convert the incoming status */
|
||||
rc = pmix1_convert_rc(status);
|
||||
|
||||
/* convert the array of procs */
|
||||
OBJ_CONSTRUCT(&plist, opal_list_t);
|
||||
for (n=0; n < nprocs; n++) {
|
||||
nm = OBJ_NEW(opal_namelist_t);
|
||||
nm->name.jobid = strtoul(procs[n].nspace, NULL, 10);
|
||||
nm->name.vpid = procs[n].rank;
|
||||
opal_list_append(&plist, &nm->super);
|
||||
}
|
||||
|
||||
/* convert the array of info */
|
||||
OBJ_CONSTRUCT(&ilist, opal_list_t);
|
||||
for (n=0; n < ninfo; n++) {
|
||||
iptr = OBJ_NEW(opal_value_t);
|
||||
iptr->key = strdup(info[n].key);
|
||||
pmix1_value_unload(iptr, &info[n].value);
|
||||
opal_list_append(&plist, &nm->super);
|
||||
}
|
||||
|
||||
/* call the base errhandler */
|
||||
opal_pmix_base_errhandler(rc, &plist, &ilist);
|
||||
OPAL_LIST_DESTRUCT(&plist);
|
||||
OPAL_LIST_DESTRUCT(&ilist);
|
||||
}
|
||||
|
||||
int pmix1_client_init(void)
|
||||
{
|
||||
@ -56,19 +84,28 @@ int pmix1_client_init(void)
|
||||
putenv(dbgvalue);
|
||||
}
|
||||
rc = PMIx_Init(&myproc);
|
||||
if (PMIX_SUCCESS == rc) {
|
||||
/* store our jobid and rank */
|
||||
opal_convert_string_to_jobid(&pname.jobid, myproc.nspace);
|
||||
pname.vpid = myproc.rank;
|
||||
opal_proc_set_name(&pname);
|
||||
if (PMIX_SUCCESS != rc) {
|
||||
return pmix1_convert_rc(rc);
|
||||
}
|
||||
return pmix1_convert_rc(rc);
|
||||
|
||||
/* store our jobid and rank */
|
||||
opal_convert_string_to_jobid(&pname.jobid, myproc.nspace);
|
||||
pname.vpid = myproc.rank;
|
||||
opal_proc_set_name(&pname);
|
||||
|
||||
/* register the errhandler */
|
||||
PMIx_Register_errhandler(NULL, 0, myerr);
|
||||
return OPAL_SUCCESS;
|
||||
|
||||
}
|
||||
|
||||
int pmix1_client_finalize(void)
|
||||
{
|
||||
pmix_status_t rc;
|
||||
|
||||
/* deregister the errhandler */
|
||||
PMIx_Deregister_errhandler();
|
||||
|
||||
rc = PMIx_Finalize();
|
||||
return pmix1_convert_rc(rc);
|
||||
}
|
||||
@ -222,28 +259,21 @@ int pmix1_fencenb(opal_list_t *procs, int collect_data,
|
||||
}
|
||||
|
||||
int pmix1_put(opal_pmix_scope_t scope,
|
||||
opal_value_t *val)
|
||||
opal_value_t *val)
|
||||
{
|
||||
pmix_scope_t pscope;
|
||||
pmix_value_t kv;
|
||||
pmix_status_t rc;
|
||||
int irc;
|
||||
|
||||
/* convert the scope */
|
||||
if (OPAL_SUCCESS != (irc = convert_scope(&pscope, scope))) {
|
||||
return irc;
|
||||
}
|
||||
|
||||
PMIX_VALUE_CONSTRUCT(&kv);
|
||||
pmix1_value_load(&kv, val);
|
||||
|
||||
rc = PMIx_Put(pscope, val->key, &kv);
|
||||
rc = PMIx_Put(scope, val->key, &kv);
|
||||
PMIX_VALUE_DESTRUCT(&kv);
|
||||
return pmix1_convert_rc(rc);
|
||||
}
|
||||
|
||||
int pmix1_get(const opal_process_name_t *proc,
|
||||
const char *key, opal_value_t **val)
|
||||
const char *key, opal_value_t **val)
|
||||
{
|
||||
int ret;
|
||||
pmix_value_t *kv;
|
||||
@ -308,9 +338,8 @@ static void val_cbfunc(pmix_status_t status,
|
||||
OBJ_RELEASE(op);
|
||||
}
|
||||
|
||||
int pmix1_getnb(const opal_process_name_t *proc,
|
||||
const char *key,
|
||||
opal_pmix_value_cbfunc_t cbfunc, void *cbdata)
|
||||
int pmix1_getnb(const opal_process_name_t *proc, const char *key,
|
||||
opal_pmix_value_cbfunc_t cbfunc, void *cbdata)
|
||||
{
|
||||
pmix1_opcaddy_t *op;
|
||||
pmix_status_t rc;
|
||||
@ -342,29 +371,13 @@ int pmix1_getnb(const opal_process_name_t *proc,
|
||||
return pmix1_convert_rc(rc);
|
||||
}
|
||||
|
||||
int pmix1_publish(opal_pmix_data_range_t scope,
|
||||
opal_pmix_persistence_t persist,
|
||||
opal_list_t *info)
|
||||
int pmix1_publish(opal_list_t *info)
|
||||
{
|
||||
pmix_data_range_t rng;
|
||||
pmix_persistence_t pst;
|
||||
int rc;
|
||||
pmix_info_t *pinfo;
|
||||
pmix_status_t ret;
|
||||
opal_value_t *iptr;
|
||||
size_t sz, n;
|
||||
|
||||
rc = convert_data_range(&rng, scope);
|
||||
if (OPAL_SUCCESS != rc) {
|
||||
OPAL_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
rc = convert_persistence(&pst, scope);
|
||||
if (OPAL_SUCCESS != rc) {
|
||||
OPAL_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
sz = opal_list_get_size(info);
|
||||
if (0 < sz) {
|
||||
PMIX_INFO_CREATE(pinfo, sz);
|
||||
@ -376,33 +389,19 @@ int pmix1_publish(opal_pmix_data_range_t scope,
|
||||
}
|
||||
}
|
||||
|
||||
ret = PMIx_Publish(rng, pst, pinfo, sz);
|
||||
ret = PMIx_Publish(pinfo, sz);
|
||||
|
||||
return pmix1_convert_rc(ret);
|
||||
}
|
||||
|
||||
int pmix1_publishnb(opal_pmix_data_range_t scope,
|
||||
opal_pmix_persistence_t persist,
|
||||
opal_list_t *info,
|
||||
opal_pmix_op_cbfunc_t cbfunc, void *cbdata)
|
||||
int pmix1_publishnb(opal_list_t *info,
|
||||
opal_pmix_op_cbfunc_t cbfunc, void *cbdata)
|
||||
{
|
||||
pmix_data_range_t rng;
|
||||
pmix_persistence_t pst;
|
||||
int rc;
|
||||
pmix_status_t ret;
|
||||
opal_value_t *iptr;
|
||||
size_t n;
|
||||
pmix1_opcaddy_t *op;
|
||||
|
||||
rc = convert_data_range(&rng, scope);
|
||||
if (OPAL_SUCCESS != rc) {
|
||||
return rc;
|
||||
}
|
||||
rc = convert_persistence(&pst, persist);
|
||||
if (OPAL_SUCCESS != rc) {
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* create the caddy */
|
||||
op = OBJ_NEW(pmix1_opcaddy_t);
|
||||
op->opcbfunc = cbfunc;
|
||||
@ -419,34 +418,39 @@ int pmix1_publishnb(opal_pmix_data_range_t scope,
|
||||
}
|
||||
}
|
||||
|
||||
ret = PMIx_Publish_nb(rng, pst, op->info, op->sz, opcbfunc, op);
|
||||
ret = PMIx_Publish_nb(op->info, op->sz, opcbfunc, op);
|
||||
|
||||
return pmix1_convert_rc(ret);
|
||||
}
|
||||
|
||||
int pmix1_lookup(opal_pmix_data_range_t scope,
|
||||
opal_list_t *data)
|
||||
int pmix1_lookup(opal_list_t *data, opal_list_t *info)
|
||||
{
|
||||
pmix_data_range_t rng;
|
||||
pmix_pdata_t *pdata;
|
||||
size_t sz, n;
|
||||
pmix_info_t *pinfo;
|
||||
size_t sz, ninfo, n;
|
||||
int rc;
|
||||
pmix_status_t ret;
|
||||
opal_pmix_pdata_t *d;
|
||||
opal_value_t *iptr;
|
||||
|
||||
rc = convert_data_range(&rng, scope);
|
||||
if (OPAL_SUCCESS != rc) {
|
||||
return rc;
|
||||
}
|
||||
sz = opal_list_get_size(data);
|
||||
|
||||
PMIX_PDATA_CREATE(pdata, sz);
|
||||
n=0;
|
||||
OPAL_LIST_FOREACH(d, data, opal_pmix_pdata_t) {
|
||||
(void)strncpy(pdata[n++].key, d->value.key, PMIX_MAX_KEYLEN);
|
||||
}
|
||||
|
||||
ret = PMIx_Lookup(rng, NULL, 0, pdata, sz);
|
||||
ninfo = opal_list_get_size(info);
|
||||
PMIX_INFO_CREATE(pinfo, ninfo);
|
||||
n=0;
|
||||
OPAL_LIST_FOREACH(iptr, info, opal_value_t) {
|
||||
(void)strncpy(pinfo[n++].key, iptr->key, PMIX_MAX_KEYLEN);
|
||||
pmix1_value_load(&pinfo[n].value, iptr);
|
||||
++n;
|
||||
}
|
||||
|
||||
ret = PMIx_Lookup(pdata, sz, pinfo, ninfo);
|
||||
PMIX_INFO_FREE(pinfo, ninfo);
|
||||
|
||||
if (PMIX_SUCCESS == ret) {
|
||||
/* transfer the data back */
|
||||
@ -523,64 +527,82 @@ static void lk_cbfunc(pmix_status_t status,
|
||||
OBJ_RELEASE(op);
|
||||
}
|
||||
|
||||
int pmix1_lookupnb(opal_pmix_data_range_t scope, int wait, char **keys,
|
||||
int pmix1_lookupnb(char **keys, opal_list_t *info,
|
||||
opal_pmix_lookup_cbfunc_t cbfunc, void *cbdata)
|
||||
{
|
||||
pmix_data_range_t rng;
|
||||
int rc;
|
||||
pmix_status_t ret;
|
||||
pmix1_opcaddy_t *op;
|
||||
|
||||
rc = convert_data_range(&rng, scope);
|
||||
if (OPAL_SUCCESS != rc) {
|
||||
return rc;
|
||||
}
|
||||
opal_value_t *iptr;
|
||||
size_t n;
|
||||
|
||||
/* create the caddy */
|
||||
op = OBJ_NEW(pmix1_opcaddy_t);
|
||||
op->lkcbfunc = cbfunc;
|
||||
op->cbdata = cbdata;
|
||||
|
||||
ret = PMIx_Lookup_nb(rng, keys, NULL, 0, lk_cbfunc, op);
|
||||
|
||||
return pmix1_convert_rc(ret);
|
||||
}
|
||||
|
||||
int pmix1_unpublish(opal_pmix_data_range_t scope, char **keys)
|
||||
{
|
||||
int rc;
|
||||
pmix_status_t ret;
|
||||
pmix_data_range_t rng;
|
||||
|
||||
rc = convert_data_range(&rng, scope);
|
||||
if (OPAL_SUCCESS != rc) {
|
||||
return rc;
|
||||
op->sz = opal_list_get_size(info);
|
||||
if (0 < op->sz) {
|
||||
PMIX_INFO_CREATE(op->info, op->sz);
|
||||
n=0;
|
||||
OPAL_LIST_FOREACH(iptr, info, opal_value_t) {
|
||||
(void)strncpy(op->info[n].key, iptr->key, PMIX_MAX_KEYLEN);
|
||||
pmix1_value_load(&op->info[n].value, iptr);
|
||||
++n;
|
||||
}
|
||||
}
|
||||
|
||||
ret = PMIx_Unpublish(rng, keys);
|
||||
ret = PMIx_Lookup_nb(keys, op->info, op->sz, lk_cbfunc, op);
|
||||
|
||||
return pmix1_convert_rc(ret);
|
||||
}
|
||||
|
||||
int pmix1_unpublishnb(opal_pmix_data_range_t scope, char **keys,
|
||||
opal_pmix_op_cbfunc_t cbfunc, void *cbdata)
|
||||
int pmix1_unpublish(char **keys, opal_list_t *info)
|
||||
{
|
||||
pmix_status_t ret;
|
||||
size_t ninfo, n;
|
||||
pmix_info_t *pinfo;
|
||||
opal_value_t *iptr;
|
||||
|
||||
ninfo = opal_list_get_size(info);
|
||||
PMIX_INFO_CREATE(pinfo, ninfo);
|
||||
n=0;
|
||||
OPAL_LIST_FOREACH(iptr, info, opal_value_t) {
|
||||
(void)strncpy(pinfo[n++].key, iptr->key, PMIX_MAX_KEYLEN);
|
||||
pmix1_value_load(&pinfo[n].value, iptr);
|
||||
++n;
|
||||
}
|
||||
|
||||
ret = PMIx_Unpublish(keys, pinfo, ninfo);
|
||||
PMIX_INFO_FREE(pinfo, ninfo);
|
||||
|
||||
return pmix1_convert_rc(ret);
|
||||
}
|
||||
|
||||
int pmix1_unpublishnb(char **keys, opal_list_t *info,
|
||||
opal_pmix_op_cbfunc_t cbfunc, void *cbdata)
|
||||
{
|
||||
int rc;
|
||||
pmix_status_t ret;
|
||||
pmix_data_range_t rng;
|
||||
pmix1_opcaddy_t *op;
|
||||
|
||||
rc = convert_data_range(&rng, scope);
|
||||
if (OPAL_SUCCESS != rc) {
|
||||
return rc;
|
||||
}
|
||||
opal_value_t *iptr;
|
||||
size_t n;
|
||||
|
||||
/* create the caddy */
|
||||
op = OBJ_NEW(pmix1_opcaddy_t);
|
||||
op->opcbfunc = cbfunc;
|
||||
op->cbdata = cbdata;
|
||||
|
||||
ret = PMIx_Unpublish_nb(rng, keys, opcbfunc, op);
|
||||
op->sz = opal_list_get_size(info);
|
||||
if (0 < op->sz) {
|
||||
PMIX_INFO_CREATE(op->info, op->sz);
|
||||
n=0;
|
||||
OPAL_LIST_FOREACH(iptr, info, opal_value_t) {
|
||||
(void)strncpy(op->info[n].key, iptr->key, PMIX_MAX_KEYLEN);
|
||||
pmix1_value_load(&op->info[n].value, iptr);
|
||||
++n;
|
||||
}
|
||||
}
|
||||
|
||||
ret = PMIx_Unpublish_nb(keys, op->info, op->sz, opcbfunc, op);
|
||||
|
||||
return pmix1_convert_rc(ret);
|
||||
}
|
||||
@ -894,77 +916,3 @@ int pmix1_resolve_nodes(opal_jobid_t jobid, char **nodelist)
|
||||
|
||||
return pmix1_convert_rc(ret);;
|
||||
}
|
||||
|
||||
/*** UTILITY FUNCTIONS ***/
|
||||
static int convert_scope(pmix_scope_t *sc,
|
||||
opal_pmix_scope_t scope)
|
||||
{
|
||||
int rc = PMIX_SUCCESS;
|
||||
|
||||
switch (scope) {
|
||||
case OPAL_PMIX_SCOPE_UNDEF:
|
||||
*sc = PMIX_SCOPE_UNDEF;
|
||||
break;
|
||||
case OPAL_PMIX_LOCAL:
|
||||
*sc = PMIX_LOCAL;
|
||||
break;
|
||||
case OPAL_PMIX_REMOTE:
|
||||
*sc = PMIX_REMOTE;
|
||||
break;
|
||||
case OPAL_PMIX_GLOBAL:
|
||||
*sc = PMIX_GLOBAL;
|
||||
break;
|
||||
default:
|
||||
*sc = PMIX_SCOPE_UNDEF;
|
||||
rc = OPAL_ERR_BAD_PARAM;
|
||||
break;
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int convert_persistence(pmix_persistence_t *p,
|
||||
opal_pmix_persistence_t persist)
|
||||
{
|
||||
int rc = OPAL_SUCCESS;
|
||||
|
||||
switch (persist) {
|
||||
case OPAL_PMIX_PERSIST_INDEF:
|
||||
*p = PMIX_PERSIST_INDEF;
|
||||
break;
|
||||
case OPAL_PMIX_PERSIST_PROC:
|
||||
*p = PMIX_PERSIST_PROC;
|
||||
break;
|
||||
case OPAL_PMIX_PERSIST_APP:
|
||||
*p = PMIX_PERSIST_APP;
|
||||
break;
|
||||
case OPAL_PMIX_PERSIST_SESSION:
|
||||
*p = PMIX_PERSIST_SESSION;
|
||||
break;
|
||||
default:
|
||||
*p = PMIX_PERSIST_PROC;
|
||||
rc = OPAL_ERR_BAD_PARAM;
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int convert_data_range(pmix_data_range_t *sc,
|
||||
opal_pmix_data_range_t scope)
|
||||
{
|
||||
int rc = OPAL_SUCCESS;
|
||||
|
||||
switch (scope) {
|
||||
case OPAL_PMIX_DATA_RANGE_UNDEF:
|
||||
*sc = PMIX_DATA_RANGE_UNDEF;
|
||||
break;
|
||||
case OPAL_PMIX_NAMESPACE:
|
||||
*sc = PMIX_NAMESPACE;
|
||||
break;
|
||||
case OPAL_PMIX_SESSION:
|
||||
*sc = PMIX_SESSION;
|
||||
break;
|
||||
default:
|
||||
*sc = PMIX_DATA_RANGE_UNDEF;
|
||||
rc = OPAL_ERR_BAD_PARAM;
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
|
@ -59,15 +59,13 @@ static pmix_status_t server_dmodex_req_fn(const pmix_proc_t *proc,
|
||||
const pmix_info_t info[], size_t ninfo,
|
||||
pmix_modex_cbfunc_t cbfunc, void *cbdata);
|
||||
static pmix_status_t server_publish_fn(const pmix_proc_t *proc,
|
||||
pmix_data_range_t scope, pmix_persistence_t persist,
|
||||
const pmix_info_t info[], size_t ninfo,
|
||||
pmix_op_cbfunc_t cbfunc, void *cbdata);
|
||||
static pmix_status_t server_lookup_fn(const pmix_proc_t *proc, pmix_data_range_t scope,
|
||||
const pmix_info_t info[], size_t ninfo, char **keys,
|
||||
static pmix_status_t server_lookup_fn(const pmix_proc_t *proc, char **keys,
|
||||
const pmix_info_t info[], size_t ninfo,
|
||||
pmix_lookup_cbfunc_t cbfunc, void *cbdata);
|
||||
static pmix_status_t server_unpublish_fn(const pmix_proc_t *proc,
|
||||
pmix_data_range_t scope,
|
||||
const pmix_info_t info[], size_t ninfo, char **keys,
|
||||
static pmix_status_t server_unpublish_fn(const pmix_proc_t *proc, char **keys,
|
||||
const pmix_info_t info[], size_t ninfo,
|
||||
pmix_op_cbfunc_t cbfunc, void *cbdata);
|
||||
static pmix_status_t server_spawn_fn(const pmix_proc_t *proc,
|
||||
const pmix_info_t job_info[], size_t ninfo,
|
||||
@ -101,10 +99,6 @@ pmix_server_module_t mymodule = {
|
||||
};
|
||||
|
||||
opal_pmix_server_module_t *host_module = NULL;
|
||||
static int convert_data_range(opal_pmix_data_range_t *sc,
|
||||
pmix_data_range_t scope);
|
||||
static int convert_persistence(opal_pmix_persistence_t *p,
|
||||
pmix_persistence_t persist);
|
||||
|
||||
|
||||
static void opal_opcbfunc(int status, void *cbdata)
|
||||
@ -351,7 +345,6 @@ static pmix_status_t server_dmodex_req_fn(const pmix_proc_t *p,
|
||||
}
|
||||
|
||||
static pmix_status_t server_publish_fn(const pmix_proc_t *p,
|
||||
pmix_data_range_t scope, pmix_persistence_t persist,
|
||||
const pmix_info_t info[], size_t ninfo,
|
||||
pmix_op_cbfunc_t cbfunc, void *cbdata)
|
||||
{
|
||||
@ -359,8 +352,6 @@ static pmix_status_t server_publish_fn(const pmix_proc_t *p,
|
||||
size_t n;
|
||||
pmix1_opalcaddy_t *opalcaddy;
|
||||
opal_process_name_t proc;
|
||||
opal_pmix_data_range_t oscp;
|
||||
opal_pmix_persistence_t opers;
|
||||
opal_value_t *oinfo;
|
||||
|
||||
if (NULL == host_module || NULL == host_module->publish) {
|
||||
@ -377,16 +368,6 @@ static pmix_status_t server_publish_fn(const pmix_proc_t *p,
|
||||
proc.vpid = p->rank;
|
||||
}
|
||||
|
||||
/* convert the data range */
|
||||
if (OPAL_SUCCESS != (rc = convert_data_range(&oscp, scope))) {
|
||||
return pmix1_convert_opalrc(rc);
|
||||
}
|
||||
|
||||
/* convert the persistence */
|
||||
if (OPAL_SUCCESS != (rc = convert_persistence(&opers, persist))) {
|
||||
return pmix1_convert_opalrc(rc);
|
||||
}
|
||||
|
||||
/* setup the caddy */
|
||||
opalcaddy = OBJ_NEW(pmix1_opalcaddy_t);
|
||||
opalcaddy->opcbfunc = cbfunc;
|
||||
@ -404,7 +385,7 @@ static pmix_status_t server_publish_fn(const pmix_proc_t *p,
|
||||
}
|
||||
|
||||
/* pass it up */
|
||||
rc = host_module->publish(&proc, oscp, opers, &opalcaddy->info, opal_opcbfunc, opalcaddy);
|
||||
rc = host_module->publish(&proc, &opalcaddy->info, opal_opcbfunc, opalcaddy);
|
||||
if (OPAL_SUCCESS != rc) {
|
||||
OBJ_RELEASE(opalcaddy);
|
||||
}
|
||||
@ -442,13 +423,12 @@ static void opal_lkupcbfunc(int status,
|
||||
OBJ_RELEASE(opalcaddy);
|
||||
}
|
||||
|
||||
static pmix_status_t server_lookup_fn(const pmix_proc_t *p, pmix_data_range_t scope,
|
||||
const pmix_info_t info[], size_t ninfo, char **keys,
|
||||
static pmix_status_t server_lookup_fn(const pmix_proc_t *p, char **keys,
|
||||
const pmix_info_t info[], size_t ninfo,
|
||||
pmix_lookup_cbfunc_t cbfunc, void *cbdata)
|
||||
{
|
||||
int rc;
|
||||
pmix1_opalcaddy_t *opalcaddy;
|
||||
opal_pmix_data_range_t oscp;
|
||||
opal_process_name_t proc;
|
||||
opal_value_t *iptr;
|
||||
size_t n;
|
||||
@ -467,11 +447,6 @@ static pmix_status_t server_lookup_fn(const pmix_proc_t *p, pmix_data_range_t sc
|
||||
proc.vpid = p->rank;
|
||||
}
|
||||
|
||||
/* convert the scope */
|
||||
if (OPAL_SUCCESS != (rc = convert_data_range(&oscp, scope))) {
|
||||
return pmix1_convert_opalrc(rc);
|
||||
}
|
||||
|
||||
/* setup the caddy */
|
||||
opalcaddy = OBJ_NEW(pmix1_opalcaddy_t);
|
||||
opalcaddy->lkupcbfunc = cbfunc;
|
||||
@ -489,7 +464,7 @@ static pmix_status_t server_lookup_fn(const pmix_proc_t *p, pmix_data_range_t sc
|
||||
}
|
||||
|
||||
/* pass it up */
|
||||
rc = host_module->lookup(&proc, oscp, &opalcaddy->info, keys, opal_lkupcbfunc, opalcaddy);
|
||||
rc = host_module->lookup(&proc, keys, &opalcaddy->info, opal_lkupcbfunc, opalcaddy);
|
||||
if (OPAL_SUCCESS != rc) {
|
||||
OBJ_RELEASE(opalcaddy);
|
||||
}
|
||||
@ -498,15 +473,13 @@ static pmix_status_t server_lookup_fn(const pmix_proc_t *p, pmix_data_range_t sc
|
||||
}
|
||||
|
||||
|
||||
static pmix_status_t server_unpublish_fn(const pmix_proc_t *p,
|
||||
pmix_data_range_t scope,
|
||||
const pmix_info_t info[], size_t ninfo, char **keys,
|
||||
static pmix_status_t server_unpublish_fn(const pmix_proc_t *p, char **keys,
|
||||
const pmix_info_t info[], size_t ninfo,
|
||||
pmix_op_cbfunc_t cbfunc, void *cbdata)
|
||||
{
|
||||
int rc;
|
||||
pmix1_opalcaddy_t *opalcaddy;
|
||||
opal_process_name_t proc;
|
||||
opal_pmix_data_range_t oscp;
|
||||
opal_value_t *iptr;
|
||||
size_t n;
|
||||
|
||||
@ -524,11 +497,6 @@ static pmix_status_t server_unpublish_fn(const pmix_proc_t *p,
|
||||
proc.vpid = p->rank;
|
||||
}
|
||||
|
||||
/* convert the data range */
|
||||
if (OPAL_SUCCESS != (rc = convert_data_range(&oscp, scope))) {
|
||||
return pmix1_convert_opalrc(rc);
|
||||
}
|
||||
|
||||
/* setup the caddy */
|
||||
opalcaddy = OBJ_NEW(pmix1_opalcaddy_t);
|
||||
opalcaddy->opcbfunc = cbfunc;
|
||||
@ -546,7 +514,7 @@ static pmix_status_t server_unpublish_fn(const pmix_proc_t *p,
|
||||
}
|
||||
|
||||
/* pass it up */
|
||||
rc = host_module->unpublish(&proc, oscp, &opalcaddy->info, keys, opal_opcbfunc, opalcaddy);
|
||||
rc = host_module->unpublish(&proc, keys, &opalcaddy->info, opal_opcbfunc, opalcaddy);
|
||||
if (OPAL_SUCCESS != rc) {
|
||||
OBJ_RELEASE(opalcaddy);
|
||||
}
|
||||
@ -802,54 +770,3 @@ static pmix_status_t server_listener_fn(int listening_sd,
|
||||
rc = host_module->listener(listening_sd, cbfunc);
|
||||
return pmix1_convert_opalrc(rc);
|
||||
}
|
||||
|
||||
/**** UTILITY FUNCTIONS ****/
|
||||
static int convert_data_range(opal_pmix_data_range_t *sc,
|
||||
pmix_data_range_t scope)
|
||||
{
|
||||
int rc = OPAL_SUCCESS;
|
||||
|
||||
switch(scope) {
|
||||
case PMIX_DATA_RANGE_UNDEF:
|
||||
*sc = OPAL_PMIX_DATA_RANGE_UNDEF;
|
||||
break;
|
||||
case PMIX_NAMESPACE:
|
||||
*sc = OPAL_PMIX_NAMESPACE;
|
||||
break;
|
||||
case PMIX_SESSION:
|
||||
*sc = OPAL_PMIX_SESSION;
|
||||
break;
|
||||
default:
|
||||
*sc = OPAL_PMIX_DATA_RANGE_UNDEF;
|
||||
rc = OPAL_ERR_BAD_PARAM;
|
||||
break;
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int convert_persistence(opal_pmix_persistence_t *p,
|
||||
pmix_persistence_t persist)
|
||||
{
|
||||
int rc = OPAL_SUCCESS;
|
||||
|
||||
switch (persist) {
|
||||
case PMIX_PERSIST_INDEF:
|
||||
*p = OPAL_PMIX_PERSIST_INDEF;
|
||||
break;
|
||||
case PMIX_PERSIST_PROC:
|
||||
*p = OPAL_PMIX_PERSIST_PROC;
|
||||
break;
|
||||
case PMIX_PERSIST_APP:
|
||||
*p = OPAL_PMIX_PERSIST_APP;
|
||||
break;
|
||||
case PMIX_PERSIST_SESSION:
|
||||
*p = OPAL_PMIX_PERSIST_SESSION;
|
||||
break;
|
||||
default:
|
||||
*p = OPAL_PMIX_PERSIST_PROC;
|
||||
rc = OPAL_ERR_BAD_PARAM;
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
|
||||
|
||||
|
@ -48,6 +48,42 @@ extern pmix_server_module_t mymodule;
|
||||
extern opal_pmix_server_module_t *host_module;
|
||||
static char *dbgvalue=NULL;
|
||||
|
||||
static void myerr(pmix_status_t status,
|
||||
pmix_proc_t procs[], size_t nprocs,
|
||||
pmix_info_t info[], size_t ninfo)
|
||||
{
|
||||
int rc;
|
||||
opal_list_t plist, ilist;
|
||||
opal_namelist_t *nm;
|
||||
opal_value_t *iptr;
|
||||
size_t n;
|
||||
|
||||
/* convert the incoming status */
|
||||
rc = pmix1_convert_rc(status);
|
||||
|
||||
/* convert the array of procs */
|
||||
OBJ_CONSTRUCT(&plist, opal_list_t);
|
||||
for (n=0; n < nprocs; n++) {
|
||||
nm = OBJ_NEW(opal_namelist_t);
|
||||
nm->name.jobid = strtoul(procs[n].nspace, NULL, 10);
|
||||
nm->name.vpid = procs[n].rank;
|
||||
opal_list_append(&plist, &nm->super);
|
||||
}
|
||||
|
||||
/* convert the array of info */
|
||||
OBJ_CONSTRUCT(&ilist, opal_list_t);
|
||||
for (n=0; n < ninfo; n++) {
|
||||
iptr = OBJ_NEW(opal_value_t);
|
||||
iptr->key = strdup(info[n].key);
|
||||
pmix1_value_unload(iptr, &info[n].value);
|
||||
opal_list_append(&plist, &nm->super);
|
||||
}
|
||||
|
||||
/* call the base errhandler */
|
||||
opal_pmix_base_errhandler(rc, &plist, &ilist);
|
||||
OPAL_LIST_DESTRUCT(&plist);
|
||||
OPAL_LIST_DESTRUCT(&ilist);
|
||||
}
|
||||
|
||||
int pmix1_server_init(opal_pmix_server_module_t *module)
|
||||
{
|
||||
@ -65,6 +101,8 @@ int pmix1_server_init(opal_pmix_server_module_t *module)
|
||||
/* record the host module */
|
||||
host_module = module;
|
||||
|
||||
/* register the errhandler */
|
||||
PMIx_Register_errhandler(NULL, 0, myerr);
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
||||
@ -72,6 +110,9 @@ int pmix1_server_finalize(void)
|
||||
{
|
||||
pmix_status_t rc;
|
||||
|
||||
/* deregister the errhandler */
|
||||
PMIx_Deregister_errhandler();
|
||||
|
||||
rc = PMIx_server_finalize();
|
||||
return pmix1_convert_rc(rc);
|
||||
}
|
||||
|
@ -35,6 +35,7 @@
|
||||
#include "opal/util/show_help.h"
|
||||
|
||||
#include "pmix1.h"
|
||||
#include "opal/mca/pmix/base/base.h"
|
||||
|
||||
#include "opal/mca/pmix/pmix1xx/pmix/include/pmix/pmix_common.h"
|
||||
|
||||
@ -82,37 +83,11 @@ const opal_pmix_base_module_t opal_pmix_pmix1xx_module = {
|
||||
pmix1_server_notify_error,
|
||||
/* utility APIs */
|
||||
PMIx_Get_version,
|
||||
pmix1_register_errhandler,
|
||||
pmix1_deregister_errhandler,
|
||||
opal_pmix_base_register_handler,
|
||||
opal_pmix_base_deregister_handler,
|
||||
pmix1_store_local
|
||||
};
|
||||
|
||||
static pmix_notification_fn_t errhandler = NULL;
|
||||
|
||||
static void notification_fn(int status,
|
||||
opal_list_t *procs,
|
||||
opal_list_t *info)
|
||||
{
|
||||
/* convert the status */
|
||||
|
||||
/* convert the list of procs to an array of pmix_proc_t */
|
||||
|
||||
/* convert the list of info to an array of pmix_info_t */
|
||||
|
||||
/* pass this down to the notification function
|
||||
* we were given */
|
||||
}
|
||||
|
||||
void pmix1_register_errhandler(opal_pmix_errhandler_fn_t errhandler)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
void pmix1_deregister_errhandler(void)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
int pmix1_store_local(const opal_process_name_t *proc,
|
||||
opal_value_t *val)
|
||||
{
|
||||
|
@ -92,8 +92,6 @@ typedef int (*opal_pmix_server_dmodex_req_fn_t)(opal_process_name_t *proc, opal_
|
||||
* process is also provided and is expected to be returned on any subsequent
|
||||
* lookup request */
|
||||
typedef int (*opal_pmix_server_publish_fn_t)(opal_process_name_t *proc,
|
||||
opal_pmix_data_range_t range,
|
||||
opal_pmix_persistence_t persist,
|
||||
opal_list_t *info,
|
||||
opal_pmix_op_cbfunc_t cbfunc, void *cbdata);
|
||||
|
||||
@ -110,18 +108,16 @@ typedef int (*opal_pmix_server_publish_fn_t)(opal_process_name_t *proc,
|
||||
* how the operation is to be executed (e.g., timeout limits, whether the
|
||||
* lookup should wait until data appears).
|
||||
*/
|
||||
typedef int (*opal_pmix_server_lookup_fn_t)(opal_process_name_t *proc,
|
||||
opal_pmix_data_range_t range,
|
||||
opal_list_t *info, char **keys,
|
||||
typedef int (*opal_pmix_server_lookup_fn_t)(opal_process_name_t *proc, char **keys,
|
||||
opal_list_t *info,
|
||||
opal_pmix_lookup_cbfunc_t cbfunc, void *cbdata);
|
||||
|
||||
/* Delete data from the data store. The host server will be passed a NULL-terminated array
|
||||
* of string keys along with the scope within which the data is expected to have
|
||||
* been published. The callback is to be executed upon completion of the delete
|
||||
* procedure */
|
||||
typedef int (*opal_pmix_server_unpublish_fn_t)(opal_process_name_t *proc,
|
||||
opal_pmix_data_range_t range,
|
||||
opal_list_t *info, char **keys,
|
||||
typedef int (*opal_pmix_server_unpublish_fn_t)(opal_process_name_t *proc, char **keys,
|
||||
opal_list_t *info,
|
||||
opal_pmix_op_cbfunc_t cbfunc, void *cbdata);
|
||||
|
||||
/* Spawn a set of applications/processes as per the PMIx API. Note that
|
||||
|
@ -32,14 +32,17 @@ BEGIN_C_DECLS
|
||||
#define OPAL_PMIX_USERID "pmix.euid" // (uint32_t) effective user id
|
||||
#define OPAL_PMIX_GRPID "pmix.egid" // (uint32_t) effective group id
|
||||
|
||||
/* general proc-level attributes */
|
||||
#define OPAL_PMIX_CPUSET "pmix.cpuset" // (char*) hwloc bitmap applied to proc upon launch
|
||||
#define OPAL_PMIX_CREDENTIAL "pmix.cred" // (char*) security credential assigned to proc
|
||||
#define OPAL_PMIX_SPAWNED "pmix.spawned" // (bool) true if this proc resulted from a call to PMIx_Spawn
|
||||
#define OPAL_PMIX_ARCH "pmix.arch" // (uint32_t) datatype architecture flag
|
||||
|
||||
/* scratch directory locations for use by applications */
|
||||
#define OPAL_PMIX_TMPDIR "pmix.tmpdir" // (char*) top-level tmp dir assigned to session
|
||||
#define OPAL_PMIX_NSDIR "pmix.nsdir" // (char*) sub-tmpdir assigned to namespace
|
||||
#define OPAL_PMIX_PROCDIR "pmix.pdir" // (char*) sub-nsdir assigned to proc
|
||||
|
||||
/* information about relative ranks as assigned by the RM */
|
||||
#define OPAL_PMIX_JOBID "pmix.jobid" // (char*) jobid assigned by scheduler
|
||||
#define OPAL_PMIX_APPNUM "pmix.appnum" // (uint32_t) app number within the job
|
||||
@ -71,17 +74,20 @@ BEGIN_C_DECLS
|
||||
#define OPAL_PMIX_LOCAL_PEERS "pmix.lpeers" // (char*) comma-delimited string of ranks on this node within the specified nspace
|
||||
#define OPAL_PMIX_LOCAL_CPUSETS "pmix.lcpus" // (char*) colon-delimited cpusets of local peers within the specified nspace
|
||||
#define OPAL_PMIX_PROC_URI "pmix.puri" // (char*) URI containing contact info for proc
|
||||
|
||||
/* size info */
|
||||
#define OPAL_PMIX_UNIV_SIZE "pmix.univ.size" // (uint32_t) #procs in this nspace
|
||||
#define OPAL_PMIX_JOB_SIZE "pmix.job.size" // (uint32_t) #procs in this job
|
||||
#define OPAL_PMIX_LOCAL_SIZE "pmix.local.size" // (uint32_t) #procs in this job on this node
|
||||
#define OPAL_PMIX_NODE_SIZE "pmix.node.size" // (uint32_t) #procs across all jobs on this node
|
||||
#define OPAL_PMIX_MAX_PROCS "pmix.max.size" // (uint32_t) max #procs for this job
|
||||
|
||||
/* topology info */
|
||||
#define OPAL_PMIX_NET_TOPO "pmix.ntopo" // (char*) xml-representation of network topology
|
||||
#define OPAL_PMIX_LOCAL_TOPO "pmix.ltopo" // (char*) xml-representation of local node topology
|
||||
#define OPAL_PMIX_NODE_LIST "pmix.nlist" // (char*) comma-delimited list of nodes running procs for this job
|
||||
#define OPAL_PMIX_TOPOLOGY "pmix.topo" // (hwloc_topology_t) pointer to the PMIx client's internal topology object
|
||||
|
||||
/* fault tolerance-related info */
|
||||
#define OPAL_PMIX_TERMINATE_SESSION "pmix.term.sess" // (bool) RM intends to terminate session
|
||||
#define OPAL_PMIX_TERMINATE_JOB "pmix.term.job" // (bool) RM intends to terminate this job
|
||||
@ -95,6 +101,9 @@ BEGIN_C_DECLS
|
||||
#define OPAL_PMIX_WAIT "pmix.wait" // (int) caller requests that the server wait until the specified #values are found
|
||||
#define OPAL_PMIX_COLLECTIVE_ALGO "pmix.calgo" // (char*) comma-delimited list of algorithms to use for collective
|
||||
#define OPAL_PMIX_COLLECTIVE_ALGO_REQD "pmix.calreqd" // (bool) if true, indicates that the requested choice of algo is mandatory
|
||||
#define OPAL_PMIX_NOTIFY_COMPLETION "pmix.notecomp" // (bool) notify parent process upon termination of child job
|
||||
#define OPAL_PMIX_RANGE "pmix.range" // (int) opal_pmix_data_range_t value for calls to publish/lookup/unpublish
|
||||
#define OPAL_PMIX_PERSISTENCE "pmix.persist" // (int) opal_pmix_persistence_t value for calls to publish
|
||||
|
||||
/* attribute used by host server to pass data to the server convenience library - the
|
||||
* data will then be parsed and provided to the local clients */
|
||||
@ -126,7 +135,8 @@ BEGIN_C_DECLS
|
||||
#define OPAL_PMIX_STDIN_TGT "pmix.stdin" // (uint32_t) spawned proc rank that is to receive stdin
|
||||
|
||||
|
||||
/* define a scope for data "put" by PMI per the following:
|
||||
/* define a scope for data "put" by PMI per the following - maintain
|
||||
* consistent order with the PMIx distro :
|
||||
*
|
||||
* OPAL_PMI_LOCAL - the data is intended only for other application
|
||||
* processes on the same node. Data marked in this way
|
||||
@ -137,7 +147,7 @@ BEGIN_C_DECLS
|
||||
* OPAL_PMI_GLOBAL - the data is to be shared with all other requesting processes,
|
||||
* regardless of location
|
||||
*/
|
||||
#define OPAL_PMIX_SCOPE PMIX_UINT32
|
||||
#define OPAL_PMIX_SCOPE PMIX_UINT
|
||||
typedef enum {
|
||||
OPAL_PMIX_SCOPE_UNDEF = 0,
|
||||
OPAL_PMIX_LOCAL, // share to procs also on this node
|
||||
@ -145,15 +155,17 @@ typedef enum {
|
||||
OPAL_PMIX_GLOBAL
|
||||
} opal_pmix_scope_t;
|
||||
|
||||
/* define a range for data "published" by PMI */
|
||||
#define OPAL_PMIX_DATA_RANGE OPAL_UINT8
|
||||
/* define a range for data "published" by PMI - maintain
|
||||
* consistent order with the PMIx distro */
|
||||
#define OPAL_PMIX_DATA_RANGE OPAL_UINT
|
||||
typedef enum {
|
||||
OPAL_PMIX_DATA_RANGE_UNDEF = 0,
|
||||
OPAL_PMIX_NAMESPACE, // data is available to procs in the same nspace only
|
||||
OPAL_PMIX_SESSION // data available to all jobs in this session
|
||||
} opal_pmix_data_range_t;
|
||||
|
||||
/* define a "persistence" policy for data published by clients */
|
||||
/* define a "persistence" policy for data published by clients - maintain
|
||||
* consistent order with the PMIx distro */
|
||||
typedef enum {
|
||||
OPAL_PMIX_PERSIST_INDEF = 0, // retain until specifically deleted
|
||||
OPAL_PMIX_PERSIST_PROC, // retain until publishing process terminates
|
||||
|
@ -351,6 +351,14 @@ int orte_ess_base_orted_setup(char **hosts)
|
||||
error = "orte_routed_base_select";
|
||||
goto error;
|
||||
}
|
||||
/* setup the routed info - the selected routed component
|
||||
* will know what to do.
|
||||
*/
|
||||
if (ORTE_SUCCESS != (ret = orte_routed.init_routes(ORTE_PROC_MY_NAME->jobid, NULL))) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "orte_routed.init_routes";
|
||||
goto error;
|
||||
}
|
||||
/*
|
||||
* Group communications
|
||||
*/
|
||||
@ -645,7 +653,7 @@ int orte_ess_base_orted_finalize(void)
|
||||
/* shutdown the pmix server */
|
||||
pmix_server_finalize();
|
||||
(void) mca_base_framework_close(&opal_pmix_base_framework);
|
||||
|
||||
|
||||
/* close frameworks */
|
||||
(void) mca_base_framework_close(&orte_schizo_base_framework);
|
||||
(void) mca_base_framework_close(&orte_filem_base_framework);
|
||||
|
@ -647,12 +647,6 @@ static int rte_init(void)
|
||||
error = "opal_pmix_base_select";
|
||||
goto error;
|
||||
}
|
||||
/* setup the PMIx server */
|
||||
if (ORTE_SUCCESS != (ret = pmix_server_init())) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "pmix server init";
|
||||
goto error;
|
||||
}
|
||||
|
||||
/* setup the routed info - the selected routed component
|
||||
* will know what to do.
|
||||
@ -662,6 +656,14 @@ static int rte_init(void)
|
||||
error = "orte_routed.init_routes";
|
||||
goto error;
|
||||
}
|
||||
|
||||
/* setup the PMIx server */
|
||||
if (ORTE_SUCCESS != (ret = pmix_server_init())) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "pmix server init";
|
||||
goto error;
|
||||
}
|
||||
|
||||
/* setup I/O forwarding system - must come after we init routes */
|
||||
if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_iof_base_framework, 0))) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
|
@ -65,7 +65,6 @@ ORTE_DECLSPEC int orte_schizo_base_parse_cli(char *personality,
|
||||
ORTE_DECLSPEC int orte_schizo_base_parse_env(char *personality,
|
||||
char *path,
|
||||
opal_cmd_line_t *cmd_line,
|
||||
char *server,
|
||||
char **srcenv,
|
||||
char ***dstenv);
|
||||
ORTE_DECLSPEC int orte_schizo_base_setup_fork(orte_job_t *jdata,
|
||||
|
@ -40,7 +40,6 @@ int orte_schizo_base_parse_cli(char *personality,
|
||||
int orte_schizo_base_parse_env(char *personality,
|
||||
char *path,
|
||||
opal_cmd_line_t *cmd_line,
|
||||
char *server,
|
||||
char **srcenv,
|
||||
char ***dstenv)
|
||||
{
|
||||
@ -50,7 +49,7 @@ int orte_schizo_base_parse_env(char *personality,
|
||||
OPAL_LIST_FOREACH(mod, &orte_schizo_base.active_modules, orte_schizo_base_active_module_t) {
|
||||
if (0 == strcmp(personality, mod->component->mca_component_name)) {
|
||||
if (NULL != mod->module->parse_env) {
|
||||
rc = mod->module->parse_env(personality, path, cmd_line, server, srcenv, dstenv);
|
||||
rc = mod->module->parse_env(personality, path, cmd_line, srcenv, dstenv);
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
|
@ -54,7 +54,6 @@ static int parse_cli(char *personality,
|
||||
static int parse_env(char *personality,
|
||||
char *path,
|
||||
opal_cmd_line_t *cmd_line,
|
||||
char *server,
|
||||
char **srcenv,
|
||||
char ***dstenv);
|
||||
static int setup_fork(orte_job_t *jdata,
|
||||
@ -154,7 +153,6 @@ static int parse_cli(char *personality,
|
||||
static int parse_env(char *personality,
|
||||
char *path,
|
||||
opal_cmd_line_t *cmd_line,
|
||||
char *ompi_server,
|
||||
char **srcenv,
|
||||
char ***dstenv)
|
||||
{
|
||||
@ -181,11 +179,6 @@ static int parse_env(char *personality,
|
||||
}
|
||||
}
|
||||
|
||||
/* add the ompi-server, if provided */
|
||||
if (NULL != ompi_server) {
|
||||
opal_setenv("OMPI_MCA_pubsub_orte_server", ompi_server, true, dstenv);
|
||||
}
|
||||
|
||||
/* set necessary env variables for external usage from tune conf file*/
|
||||
int set_from_file = 0;
|
||||
vars = NULL;
|
||||
|
@ -46,7 +46,6 @@ typedef int (*orte_schizo_base_module_parse_cli_fn_t)(char *personality,
|
||||
typedef int (*orte_schizo_base_module_parse_env_fn_t)(char *personality,
|
||||
char *path,
|
||||
opal_cmd_line_t *cmd_line,
|
||||
char *server,
|
||||
char **srcenv,
|
||||
char ***dstenv);
|
||||
|
||||
|
@ -62,6 +62,7 @@
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
#include "orte/mca/grpcomm/grpcomm.h"
|
||||
#include "orte/mca/rml/rml.h"
|
||||
#include "orte/mca/rml/base/rml_contact.h"
|
||||
#include "orte/util/name_fns.h"
|
||||
#include "orte/util/session_dir.h"
|
||||
#include "orte/util/show_help.h"
|
||||
@ -125,21 +126,40 @@ void pmix_server_register_params(void)
|
||||
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
|
||||
OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_ALL,
|
||||
&orte_pmix_server_globals.timeout);
|
||||
orte_pmix_server_globals.timeout = orte_pmix_server_globals.timeout * 1000000;
|
||||
|
||||
/* register the URI of the UNIVERSAL data server */
|
||||
orte_pmix_server_globals.server_uri = NULL;
|
||||
(void) mca_base_var_register ("orte", "pmix", NULL, "server_uri",
|
||||
"URI of a session-level keyval server for publish/lookup operations",
|
||||
MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
|
||||
OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_ALL,
|
||||
&orte_pmix_server_globals.server_uri);
|
||||
|
||||
/* if the universal server wasn't specified, then we use
|
||||
* our own HNP for that purpose */
|
||||
orte_pmix_server_globals.server = *ORTE_PROC_MY_HNP;
|
||||
|
||||
/* whether or not to wait for the universal server */
|
||||
orte_pmix_server_globals.wait_for_server = false;
|
||||
(void) mca_base_var_register ("orte", "pmix", NULL, "wait_for_server",
|
||||
"Whether or not to wait for the session-level server to start",
|
||||
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
|
||||
OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_ALL,
|
||||
&orte_pmix_server_globals.wait_for_server);
|
||||
}
|
||||
|
||||
static void eviction_cbfunc(struct opal_hotel_t *hotel,
|
||||
int room_num, void *occupant)
|
||||
{
|
||||
pmix_server_req_t *req = (pmix_server_req_t*)occupant;
|
||||
int rc;
|
||||
|
||||
/* decrement the request timeout */
|
||||
req->timeout -= orte_pmix_server_globals.timeout;
|
||||
if (0 < req->timeout) {
|
||||
/* not done yet - check us back in */
|
||||
if (OPAL_SUCCESS == (rc = opal_hotel_checkin(&orte_pmix_server_globals.reqs, req, &req->room_num))) {
|
||||
return;
|
||||
}
|
||||
ORTE_ERROR_LOG(rc);
|
||||
/* fall thru and return an error so the caller doesn't hang */
|
||||
}
|
||||
/* don't let the caller hang */
|
||||
if (NULL != req->opcbfunc) {
|
||||
req->opcbfunc(OPAL_ERR_TIMEOUT, req->cbdata);
|
||||
@ -169,7 +189,7 @@ int pmix_server_init(void)
|
||||
OBJ_CONSTRUCT(&orte_pmix_server_globals.reqs, opal_hotel_t);
|
||||
if (OPAL_SUCCESS != (rc = opal_hotel_init(&orte_pmix_server_globals.reqs,
|
||||
orte_pmix_server_globals.num_rooms,
|
||||
orte_event_base, orte_pmix_server_globals.timeout,
|
||||
orte_event_base, orte_pmix_server_globals.timeout*1000000,
|
||||
ORTE_ERROR_PRI, eviction_cbfunc))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
@ -191,12 +211,103 @@ int pmix_server_init(void)
|
||||
orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_DATA_CLIENT,
|
||||
ORTE_RML_PERSISTENT, pmix_server_keyval_client, NULL);
|
||||
|
||||
/* ensure the PMIx server uses the proper rendezvous directory */
|
||||
opal_setenv("PMIX_SERVER_TMPDIR", orte_process_info.proc_session_dir, true, &environ);
|
||||
|
||||
/* setup the local server */
|
||||
if (ORTE_SUCCESS != (rc = opal_pmix.server_init(&pmix_server))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
/* memory cleanup will occur when finalize is called */
|
||||
}
|
||||
|
||||
/* if the universal server wasn't specified, then we use
|
||||
* our own HNP for that purpose */
|
||||
if (NULL == orte_pmix_server_globals.server_uri) {
|
||||
orte_pmix_server_globals.server = *ORTE_PROC_MY_HNP;
|
||||
} else {
|
||||
char *server;
|
||||
opal_buffer_t buf;
|
||||
if (0 == strncmp(orte_pmix_server_globals.server_uri, "file", strlen("file")) ||
|
||||
0 == strncmp(orte_pmix_server_globals.server_uri, "FILE", strlen("FILE"))) {
|
||||
char input[1024], *filename;
|
||||
FILE *fp;
|
||||
|
||||
/* it is a file - get the filename */
|
||||
filename = strchr(orte_pmix_server_globals.server_uri, ':');
|
||||
if (NULL == filename) {
|
||||
/* filename is not correctly formatted */
|
||||
orte_show_help("help-orterun.txt", "orterun:ompi-server-filename-bad", true,
|
||||
orte_basename, orte_pmix_server_globals.server_uri);
|
||||
return ORTE_ERR_BAD_PARAM;
|
||||
}
|
||||
++filename; /* space past the : */
|
||||
|
||||
if (0 >= strlen(filename)) {
|
||||
/* they forgot to give us the name! */
|
||||
orte_show_help("help-orterun.txt", "orterun:ompi-server-filename-missing", true,
|
||||
orte_basename, orte_pmix_server_globals.server_uri);
|
||||
return ORTE_ERR_BAD_PARAM;
|
||||
}
|
||||
|
||||
/* open the file and extract the uri */
|
||||
fp = fopen(filename, "r");
|
||||
if (NULL == fp) { /* can't find or read file! */
|
||||
orte_show_help("help-orterun.txt", "orterun:ompi-server-filename-access", true,
|
||||
orte_basename, orte_pmix_server_globals.server_uri);
|
||||
return ORTE_ERR_BAD_PARAM;
|
||||
}
|
||||
if (NULL == fgets(input, 1024, fp)) {
|
||||
/* something malformed about file */
|
||||
fclose(fp);
|
||||
orte_show_help("help-orterun.txt", "orterun:ompi-server-file-bad", true,
|
||||
orte_basename, orte_pmix_server_globals.server_uri,
|
||||
orte_basename);
|
||||
return ORTE_ERR_BAD_PARAM;
|
||||
}
|
||||
fclose(fp);
|
||||
input[strlen(input)-1] = '\0'; /* remove newline */
|
||||
server = strdup(input);
|
||||
} else {
|
||||
server = strdup(orte_pmix_server_globals.server_uri);
|
||||
}
|
||||
/* setup our route to the server */
|
||||
OBJ_CONSTRUCT(&buf, opal_buffer_t);
|
||||
opal_dss.pack(&buf, &server, 1, OPAL_STRING);
|
||||
if (ORTE_SUCCESS != (rc = orte_rml_base_update_contact_info(&buf))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE);
|
||||
return rc;
|
||||
}
|
||||
OBJ_DESTRUCT(&buf);
|
||||
/* parse the URI to get the server's name */
|
||||
if (ORTE_SUCCESS != (rc = orte_rml_base_parse_uris(server, &orte_pmix_server_globals.server, NULL))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
/* check if we are to wait for the server to start - resolves
|
||||
* a race condition that can occur when the server is run
|
||||
* as a background job - e.g., in scripts
|
||||
*/
|
||||
if (orte_pmix_server_globals.wait_for_server) {
|
||||
/* ping the server */
|
||||
struct timeval timeout;
|
||||
timeout.tv_sec = orte_pmix_server_globals.timeout;
|
||||
timeout.tv_usec = 0;
|
||||
if (ORTE_SUCCESS != (rc = orte_rml.ping(server, &timeout))) {
|
||||
/* try it one more time */
|
||||
if (ORTE_SUCCESS != (rc = orte_rml.ping(server, &timeout))) {
|
||||
/* okay give up */
|
||||
orte_show_help("help-orterun.txt", "orterun:server-not-found", true,
|
||||
orte_basename, server,
|
||||
(long)orte_pmix_server_globals.timeout,
|
||||
ORTE_ERROR_NAME(rc));
|
||||
ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE);
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
@ -461,6 +572,7 @@ static void pmix_server_dmdx_resp(int status, orte_process_name_t* sender,
|
||||
|
||||
static void rqcon(pmix_server_req_t *p)
|
||||
{
|
||||
p->timeout = orte_pmix_server_globals.timeout;
|
||||
p->jdata = NULL;
|
||||
OBJ_CONSTRUCT(&p->msg, opal_buffer_t);
|
||||
p->opcbfunc = NULL;
|
||||
|
@ -53,6 +53,7 @@
|
||||
typedef struct {
|
||||
opal_object_t super;
|
||||
opal_event_t ev;
|
||||
int timeout;
|
||||
int room_num;
|
||||
int remote_room_num;
|
||||
orte_process_name_t proxy;
|
||||
@ -146,17 +147,13 @@ extern int pmix_server_fencenb_fn(opal_list_t *procs, opal_list_t *info,
|
||||
extern int pmix_server_dmodex_req_fn(opal_process_name_t *proc, opal_list_t *info,
|
||||
opal_pmix_modex_cbfunc_t cbfunc, void *cbdata);
|
||||
extern int pmix_server_publish_fn(opal_process_name_t *proc,
|
||||
opal_pmix_data_range_t range,
|
||||
opal_pmix_persistence_t persist,
|
||||
opal_list_t *info,
|
||||
opal_pmix_op_cbfunc_t cbfunc, void *cbdata);
|
||||
extern int pmix_server_lookup_fn(opal_process_name_t *proc,
|
||||
opal_pmix_data_range_t range,
|
||||
opal_list_t *info, char **keys,
|
||||
extern int pmix_server_lookup_fn(opal_process_name_t *proc, char **keys,
|
||||
opal_list_t *info,
|
||||
opal_pmix_lookup_cbfunc_t cbfunc, void *cbdata);
|
||||
extern int pmix_server_unpublish_fn(opal_process_name_t *proc,
|
||||
opal_pmix_data_range_t range,
|
||||
opal_list_t *info, char **keys,
|
||||
extern int pmix_server_unpublish_fn(opal_process_name_t *proc, char **keys,
|
||||
opal_list_t *info,
|
||||
opal_pmix_op_cbfunc_t cbfunc, void *cbdata);
|
||||
extern int pmix_server_spawn_fn(opal_process_name_t *requestor,
|
||||
opal_list_t *job_info, opal_list_t *apps,
|
||||
@ -186,6 +183,8 @@ typedef struct {
|
||||
opal_hotel_t reqs;
|
||||
int num_rooms;
|
||||
int timeout;
|
||||
char *server_uri;
|
||||
bool wait_for_server;
|
||||
orte_process_name_t server;
|
||||
} pmix_server_globals_t;
|
||||
|
||||
|
@ -85,16 +85,16 @@ static void execute(int sd, short args, void *cbdata)
|
||||
}
|
||||
|
||||
int pmix_server_publish_fn(opal_process_name_t *proc,
|
||||
opal_pmix_data_range_t range,
|
||||
opal_pmix_persistence_t persist,
|
||||
opal_list_t *info,
|
||||
opal_pmix_op_cbfunc_t cbfunc, void *cbdata)
|
||||
{
|
||||
pmix_server_req_t *req;
|
||||
int rc;
|
||||
uint8_t cmd = ORTE_PMIX_PUBLISH_CMD;
|
||||
int32_t ninfo;
|
||||
opal_value_t *iptr;
|
||||
opal_pmix_data_range_t range = OPAL_PMIX_SESSION;
|
||||
opal_pmix_persistence_t persist = OPAL_PMIX_PERSIST_APP;
|
||||
bool rset, pset;
|
||||
|
||||
/* create the caddy */
|
||||
req = OBJ_NEW(pmix_server_req_t);
|
||||
@ -115,6 +115,25 @@ int pmix_server_publish_fn(opal_process_name_t *proc,
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* no help for it - need to search for range/persistence */
|
||||
rset = false;
|
||||
pset = false;
|
||||
OPAL_LIST_FOREACH(iptr, info, opal_value_t) {
|
||||
if (0 == strcmp(iptr->key, OPAL_PMIX_RANGE)) {
|
||||
range = iptr->data.integer;
|
||||
if (pset) {
|
||||
break;
|
||||
}
|
||||
rset = true;
|
||||
} else if (0 == strcmp(iptr->key, OPAL_PMIX_PERSISTENCE)) {
|
||||
persist = iptr->data.integer;
|
||||
if (rset) {
|
||||
break;
|
||||
}
|
||||
pset = true;
|
||||
}
|
||||
}
|
||||
|
||||
/* pack the range */
|
||||
if (OPAL_SUCCESS != (rc = opal_dss.pack(&req->msg, &range, 1, OPAL_INT))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
@ -136,16 +155,13 @@ int pmix_server_publish_fn(opal_process_name_t *proc,
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* pack the number of info items */
|
||||
ninfo = opal_list_get_size(info);
|
||||
if (OPAL_SUCCESS != (rc = opal_dss.pack(&req->msg, &ninfo, 1, OPAL_UINT32))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
OBJ_RELEASE(req);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* if we have items, pack those too */
|
||||
/* if we have items, pack those too - ignore persistence
|
||||
* and range values */
|
||||
OPAL_LIST_FOREACH(iptr, info, opal_value_t) {
|
||||
if (0 == strcmp(iptr->key, OPAL_PMIX_RANGE) ||
|
||||
0 == strcmp(iptr->key, OPAL_PMIX_PERSISTENCE)) {
|
||||
continue;
|
||||
}
|
||||
if (OPAL_SUCCESS != (rc = opal_dss.pack(&req->msg, &iptr, 1, OPAL_VALUE))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
OBJ_RELEASE(req);
|
||||
@ -163,17 +179,16 @@ int pmix_server_publish_fn(opal_process_name_t *proc,
|
||||
|
||||
}
|
||||
|
||||
int pmix_server_lookup_fn(opal_process_name_t *proc,
|
||||
opal_pmix_data_range_t range,
|
||||
opal_list_t *info, char **keys,
|
||||
int pmix_server_lookup_fn(opal_process_name_t *proc, char **keys,
|
||||
opal_list_t *info,
|
||||
opal_pmix_lookup_cbfunc_t cbfunc, void *cbdata)
|
||||
{
|
||||
pmix_server_req_t *req;
|
||||
int rc;
|
||||
uint8_t cmd = ORTE_PMIX_LOOKUP_CMD;
|
||||
int32_t nkeys, i;
|
||||
int32_t ninfo;
|
||||
opal_value_t *iptr;
|
||||
opal_pmix_data_range_t range = OPAL_PMIX_SESSION;
|
||||
|
||||
/* the list of info objects are directives for us - they include
|
||||
* things like timeout constraints, so there is no reason to
|
||||
@ -191,6 +206,14 @@ int pmix_server_lookup_fn(opal_process_name_t *proc,
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* no help for it - need to search for range */
|
||||
OPAL_LIST_FOREACH(iptr, info, opal_value_t) {
|
||||
if (0 == strcmp(iptr->key, OPAL_PMIX_RANGE)) {
|
||||
range = iptr->data.integer;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* pack the range */
|
||||
if (OPAL_SUCCESS != (rc = opal_dss.pack(&req->msg, &range, 1, OPAL_INT))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
@ -205,23 +228,6 @@ int pmix_server_lookup_fn(opal_process_name_t *proc,
|
||||
req->target = *ORTE_PROC_MY_HNP;
|
||||
}
|
||||
|
||||
/* pack the number of info items */
|
||||
ninfo = opal_list_get_size(info);
|
||||
if (OPAL_SUCCESS != (rc = opal_dss.pack(&req->msg, &ninfo, 1, OPAL_UINT32))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
OBJ_RELEASE(req);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* if we have items, pack those too */
|
||||
OPAL_LIST_FOREACH(iptr, info, opal_value_t) {
|
||||
if (OPAL_SUCCESS != (rc = opal_dss.pack(&req->msg, &iptr, 1, OPAL_VALUE))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
OBJ_RELEASE(req);
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
|
||||
/* pack the number of keys */
|
||||
nkeys = opal_argv_count(keys);
|
||||
if (OPAL_SUCCESS != (rc = opal_dss.pack(&req->msg, &nkeys, 1, OPAL_UINT32))) {
|
||||
@ -239,6 +245,18 @@ int pmix_server_lookup_fn(opal_process_name_t *proc,
|
||||
}
|
||||
}
|
||||
|
||||
/* if we have items, pack those too - ignore range value */
|
||||
OPAL_LIST_FOREACH(iptr, info, opal_value_t) {
|
||||
if (0 == strcmp(iptr->key, OPAL_PMIX_RANGE)) {
|
||||
continue;
|
||||
}
|
||||
if (OPAL_SUCCESS != (rc = opal_dss.pack(&req->msg, &iptr, 1, OPAL_VALUE))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
OBJ_RELEASE(req);
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
|
||||
/* thread-shift so we can store the tracker */
|
||||
opal_event_set(orte_event_base, &(req->ev),
|
||||
-1, OPAL_EV_WRITE, execute, req);
|
||||
@ -248,16 +266,16 @@ int pmix_server_lookup_fn(opal_process_name_t *proc,
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
||||
int pmix_server_unpublish_fn(opal_process_name_t *proc,
|
||||
opal_pmix_data_range_t range,
|
||||
opal_list_t *info, char **keys,
|
||||
int pmix_server_unpublish_fn(opal_process_name_t *proc, char **keys,
|
||||
opal_list_t *info,
|
||||
opal_pmix_op_cbfunc_t cbfunc, void *cbdata)
|
||||
{
|
||||
pmix_server_req_t *req;
|
||||
int rc;
|
||||
uint8_t cmd = ORTE_PMIX_UNPUBLISH_CMD;
|
||||
uint32_t nkeys, ninfo;
|
||||
uint32_t nkeys, n;
|
||||
opal_value_t *iptr;
|
||||
opal_pmix_data_range_t range = OPAL_PMIX_SESSION;
|
||||
|
||||
/* create the caddy */
|
||||
req = OBJ_NEW(pmix_server_req_t);
|
||||
@ -278,6 +296,14 @@ int pmix_server_unpublish_fn(opal_process_name_t *proc,
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* no help for it - need to search for range */
|
||||
OPAL_LIST_FOREACH(iptr, info, opal_value_t) {
|
||||
if (0 == strcmp(iptr->key, OPAL_PMIX_RANGE)) {
|
||||
range = iptr->data.integer;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* pack the range */
|
||||
if (OPAL_SUCCESS != (rc = opal_dss.pack(&req->msg, &range, 1, OPAL_INT))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
@ -292,22 +318,6 @@ int pmix_server_unpublish_fn(opal_process_name_t *proc,
|
||||
req->target = *ORTE_PROC_MY_HNP;
|
||||
}
|
||||
|
||||
/* pack the number of info items */
|
||||
ninfo = opal_list_get_size(info);
|
||||
if (OPAL_SUCCESS != (rc = opal_dss.pack(&req->msg, &ninfo, 1, OPAL_UINT32))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
OBJ_RELEASE(req);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* if we have items, pack those too */
|
||||
OPAL_LIST_FOREACH(iptr, info, opal_value_t) {
|
||||
if (OPAL_SUCCESS != (rc = opal_dss.pack(&req->msg, &iptr, 1, OPAL_VALUE))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
OBJ_RELEASE(req);
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
/* pack the number of keys */
|
||||
nkeys = opal_argv_count(keys);
|
||||
if (OPAL_SUCCESS != (rc = opal_dss.pack(&req->msg, &nkeys, 1, OPAL_UINT32))) {
|
||||
@ -317,10 +327,24 @@ int pmix_server_unpublish_fn(opal_process_name_t *proc,
|
||||
}
|
||||
|
||||
/* pack the keys too */
|
||||
if (OPAL_SUCCESS != (rc = opal_dss.pack(&req->msg, keys, nkeys, OPAL_STRING))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
OBJ_RELEASE(req);
|
||||
return rc;
|
||||
for (n=0; n < nkeys; n++) {
|
||||
if (OPAL_SUCCESS != (rc = opal_dss.pack(&req->msg, &keys[n], 1, OPAL_STRING))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
OBJ_RELEASE(req);
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
|
||||
/* if we have items, pack those too - ignore range value */
|
||||
OPAL_LIST_FOREACH(iptr, info, opal_value_t) {
|
||||
if (0 == strcmp(iptr->key, OPAL_PMIX_RANGE)) {
|
||||
continue;
|
||||
}
|
||||
if (OPAL_SUCCESS != (rc = opal_dss.pack(&req->msg, &iptr, 1, OPAL_VALUE))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
OBJ_RELEASE(req);
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
|
||||
/* thread-shift so we can store the tracker */
|
||||
|
@ -80,8 +80,29 @@ OBJ_CLASS_INSTANCE(orte_data_object_t,
|
||||
opal_object_t,
|
||||
construct, destruct);
|
||||
|
||||
/* define a request object for delayed answers */
|
||||
typedef struct {
|
||||
opal_list_item_t super;
|
||||
orte_process_name_t requestor;
|
||||
uint32_t uid;
|
||||
opal_pmix_data_range_t range;
|
||||
char **keys;
|
||||
} orte_data_req_t;
|
||||
static void rqcon(orte_data_req_t *p)
|
||||
{
|
||||
p->keys = NULL;
|
||||
}
|
||||
static void rqdes(orte_data_req_t *p)
|
||||
{
|
||||
opal_argv_free(p->keys);
|
||||
}
|
||||
OBJ_CLASS_INSTANCE(orte_data_req_t,
|
||||
opal_list_item_t,
|
||||
rqcon, rqdes);
|
||||
|
||||
/* local globals */
|
||||
static opal_pointer_array_t orte_data_server_store;
|
||||
static opal_list_t pending;
|
||||
|
||||
int orte_data_server_init(void)
|
||||
{
|
||||
@ -96,6 +117,8 @@ int orte_data_server_init(void)
|
||||
return rc;
|
||||
}
|
||||
|
||||
OBJ_CONSTRUCT(&pending, opal_list_t);
|
||||
|
||||
orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD,
|
||||
ORTE_RML_TAG_DATA_SERVER,
|
||||
ORTE_RML_PERSISTENT,
|
||||
@ -118,6 +141,7 @@ void orte_data_server_finalize(void)
|
||||
}
|
||||
}
|
||||
OBJ_DESTRUCT(&orte_data_server_store);
|
||||
OPAL_LIST_DESTRUCT(&pending);
|
||||
}
|
||||
|
||||
void orte_data_server(int status, orte_process_name_t* sender,
|
||||
@ -128,15 +152,16 @@ void orte_data_server(int status, orte_process_name_t* sender,
|
||||
orte_std_cntr_t count;
|
||||
opal_process_name_t requestor;
|
||||
orte_data_object_t *data;
|
||||
opal_buffer_t *answer;
|
||||
opal_buffer_t *answer, *reply;
|
||||
int rc, ret, k;
|
||||
opal_value_t *iptr, *inext;
|
||||
uint32_t ninfo, i;
|
||||
char **keys = NULL, *str;
|
||||
bool ret_packed = false;
|
||||
bool ret_packed = false, wait = false;
|
||||
int room_number;
|
||||
uint32_t uid;
|
||||
opal_pmix_data_range_t range;
|
||||
orte_data_req_t *req, *rqnext;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_debug_output,
|
||||
"%s data server got message from %s",
|
||||
@ -196,34 +221,66 @@ void orte_data_server(int status, orte_process_name_t* sender,
|
||||
goto SEND_ERROR;
|
||||
}
|
||||
|
||||
/* unpack the number of info elements */
|
||||
count = 1;
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &ninfo, &count, OPAL_UINT32))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
OBJ_RELEASE(data);
|
||||
goto SEND_ERROR;
|
||||
}
|
||||
|
||||
if (0 < ninfo) {
|
||||
for (i=0; i < ninfo; i++) {
|
||||
count = 1;
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &iptr, &count, OPAL_VALUE))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
OBJ_RELEASE(data);
|
||||
goto SEND_ERROR;
|
||||
}
|
||||
/* if this is the userid, separate it out */
|
||||
if (0 == strcmp(iptr->key, OPAL_PMIX_USERID)) {
|
||||
data->uid = iptr->data.uint32;
|
||||
OBJ_RELEASE(iptr);
|
||||
} else {
|
||||
opal_list_append(&data->values, &iptr->super);
|
||||
}
|
||||
while (ORTE_SUCCESS == (rc = opal_dss.unpack(buffer, &iptr, &count, OPAL_VALUE))) {
|
||||
/* if this is the userid, separate it out */
|
||||
if (0 == strcmp(iptr->key, OPAL_PMIX_USERID)) {
|
||||
data->uid = iptr->data.uint32;
|
||||
OBJ_RELEASE(iptr);
|
||||
} else {
|
||||
opal_list_append(&data->values, &iptr->super);
|
||||
}
|
||||
}
|
||||
|
||||
data->index = opal_pointer_array_add(&orte_data_server_store, data);
|
||||
|
||||
/* check for pending requests that match this data */
|
||||
reply = NULL;
|
||||
OPAL_LIST_FOREACH_SAFE(req, rqnext, &pending, orte_data_req_t) {
|
||||
if (req->uid != data->uid) {
|
||||
continue;
|
||||
}
|
||||
if (req->range != data->range) {
|
||||
continue;
|
||||
}
|
||||
for (i=0; NULL != req->keys[i]; i++) {
|
||||
/* cycle thru the data keys for matches */
|
||||
OPAL_LIST_FOREACH(iptr, &data->values, opal_value_t) {
|
||||
if (0 == strcmp(iptr->key, req->keys[i])) {
|
||||
/* found it - package it for return */
|
||||
if (NULL == reply) {
|
||||
reply = OBJ_NEW(opal_buffer_t);
|
||||
ret = ORTE_SUCCESS;
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.pack(reply, &ret, 1, OPAL_INT))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.pack(reply, &data->owner, 1, OPAL_NAME))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
break;
|
||||
}
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.pack(reply, &iptr, 1, OPAL_VALUE))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (NULL != reply) {
|
||||
/* send it back to the requestor */
|
||||
if (0 > (rc = orte_rml.send_buffer_nb(&req->requestor, reply, ORTE_RML_TAG_DATA_CLIENT,
|
||||
orte_rml_send_callback, NULL))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
OBJ_RELEASE(reply);
|
||||
}
|
||||
/* remove this request */
|
||||
opal_list_remove_item(&pending, &req->super);
|
||||
OBJ_RELEASE(req);
|
||||
reply = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
/* tell the user it was wonderful... */
|
||||
ret = ORTE_SUCCESS;
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.pack(answer, &ret, 1, OPAL_INT))) {
|
||||
@ -247,28 +304,6 @@ void orte_data_server(int status, orte_process_name_t* sender,
|
||||
goto SEND_ERROR;
|
||||
}
|
||||
|
||||
/* unpack the number of info elements */
|
||||
count = 1;
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &ninfo, &count, OPAL_UINT32))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto SEND_ERROR;
|
||||
}
|
||||
if (0 < ninfo) {
|
||||
for (i=0; i < ninfo; i++) {
|
||||
count = 1;
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &iptr, &count, OPAL_VALUE))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto SEND_ERROR;
|
||||
}
|
||||
/* if this is the userid, separate it out */
|
||||
if (0 == strcmp(iptr->key, OPAL_PMIX_USERID)) {
|
||||
uid = iptr->data.uint32;
|
||||
}
|
||||
/* ignore anything else for now */
|
||||
OBJ_RELEASE(iptr);
|
||||
}
|
||||
}
|
||||
|
||||
/* unpack the number of keys */
|
||||
count = 1;
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &ninfo, &count, OPAL_UINT32))) {
|
||||
@ -277,6 +312,7 @@ void orte_data_server(int status, orte_process_name_t* sender,
|
||||
}
|
||||
if (0 == ninfo) {
|
||||
/* they forgot to send us the keys?? */
|
||||
ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
|
||||
rc = ORTE_ERR_BAD_PARAM;
|
||||
goto SEND_ERROR;
|
||||
}
|
||||
@ -293,7 +329,27 @@ void orte_data_server(int status, orte_process_name_t* sender,
|
||||
free(str);
|
||||
}
|
||||
|
||||
/* unpack any info elements */
|
||||
count = 1;
|
||||
while (ORTE_SUCCESS == (rc = opal_dss.unpack(buffer, &iptr, &count, OPAL_VALUE))) {
|
||||
/* if this is the userid, separate it out */
|
||||
if (0 == strcmp(iptr->key, OPAL_PMIX_USERID)) {
|
||||
uid = iptr->data.uint32;
|
||||
} else if (0 == strcmp(iptr->key, OPAL_PMIX_WAIT)) {
|
||||
/* flag that we wait until the data is present */
|
||||
wait = true;
|
||||
}
|
||||
/* ignore anything else for now */
|
||||
OBJ_RELEASE(iptr);
|
||||
}
|
||||
if (ORTE_ERR_UNPACK_READ_PAST_END_OF_BUFFER != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
opal_argv_free(keys);
|
||||
goto SEND_ERROR;
|
||||
}
|
||||
|
||||
/* cycle across the provided keys */
|
||||
ret_packed = false;
|
||||
for (i=0; NULL != keys[i]; i++) {
|
||||
/* cycle across the stored data, looking for a match */
|
||||
for (k=0; k < orte_data_server_store.size; k++) {
|
||||
@ -336,12 +392,23 @@ void orte_data_server(int status, orte_process_name_t* sender,
|
||||
}
|
||||
}
|
||||
}
|
||||
opal_argv_free(keys);
|
||||
if (!ret_packed) {
|
||||
/* if we were told to wait for the data, then queue this up
|
||||
* for later processing */
|
||||
if (wait) {
|
||||
req = OBJ_NEW(orte_data_req_t);
|
||||
req->requestor = *sender;
|
||||
req->uid = uid;
|
||||
req->range = range;
|
||||
req->keys = keys;
|
||||
return;
|
||||
}
|
||||
/* nothing was found - indicate that situation */
|
||||
rc = ORTE_ERR_NOT_FOUND;
|
||||
opal_argv_free(keys);
|
||||
goto SEND_ERROR;
|
||||
}
|
||||
opal_argv_free(keys);
|
||||
goto SEND_ANSWER;
|
||||
break;
|
||||
|
||||
@ -365,28 +432,6 @@ void orte_data_server(int status, orte_process_name_t* sender,
|
||||
goto SEND_ERROR;
|
||||
}
|
||||
|
||||
/* unpack the number of info elements */
|
||||
count = 1;
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &ninfo, &count, OPAL_UINT32))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto SEND_ERROR;
|
||||
}
|
||||
if (0 < ninfo) {
|
||||
for (i=0; i < ninfo; i++) {
|
||||
count = 1;
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &iptr, &count, OPAL_VALUE))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto SEND_ERROR;
|
||||
}
|
||||
/* if this is the userid, separate it out */
|
||||
if (0 == strcmp(iptr->key, OPAL_PMIX_USERID)) {
|
||||
uid = iptr->data.uint32;
|
||||
}
|
||||
/* ignore anything else for now */
|
||||
OBJ_RELEASE(iptr);
|
||||
}
|
||||
}
|
||||
|
||||
/* unpack the number of keys */
|
||||
count = 1;
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &ninfo, &count, OPAL_UINT32))) {
|
||||
@ -411,6 +456,22 @@ void orte_data_server(int status, orte_process_name_t* sender,
|
||||
free(str);
|
||||
}
|
||||
|
||||
/* unpack any info elements */
|
||||
count = 1;
|
||||
while (ORTE_SUCCESS == (rc = opal_dss.unpack(buffer, &iptr, &count, OPAL_VALUE))) {
|
||||
/* if this is the userid, separate it out */
|
||||
if (0 == strcmp(iptr->key, OPAL_PMIX_USERID)) {
|
||||
uid = iptr->data.uint32;
|
||||
}
|
||||
/* ignore anything else for now */
|
||||
OBJ_RELEASE(iptr);
|
||||
}
|
||||
if (ORTE_ERR_UNPACK_READ_PAST_END_OF_BUFFER != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
opal_argv_free(keys);
|
||||
goto SEND_ERROR;
|
||||
}
|
||||
|
||||
/* cycle across the provided keys */
|
||||
for (i=0; NULL != keys[i]; i++) {
|
||||
/* cycle across the stored data, looking for a match */
|
||||
@ -463,6 +524,10 @@ void orte_data_server(int status, orte_process_name_t* sender,
|
||||
}
|
||||
|
||||
SEND_ERROR:
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_debug_output,
|
||||
"%s data server: sending error %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_ERROR_NAME(rc)));
|
||||
/* pack the error code */
|
||||
if (ORTE_SUCCESS != (ret = opal_dss.pack(answer, &rc, 1, OPAL_INT))) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
|
@ -1046,7 +1046,7 @@ static int create_app(int argc, char* argv[],
|
||||
app->env = opal_argv_copy(*app_env);
|
||||
if (ORTE_SUCCESS != (rc = orte_schizo.parse_env(myglobals.personality,
|
||||
myglobals.path,
|
||||
&cmd_line, NULL,
|
||||
&cmd_line,
|
||||
environ, &app->env))) {
|
||||
goto cleanup;
|
||||
}
|
||||
|
@ -158,7 +158,6 @@ void* MPIR_Breakpoint(void)
|
||||
static char **global_mca_env = NULL;
|
||||
static orte_std_cntr_t total_num_apps = 0;
|
||||
static bool want_prefix_by_default = (bool) ORTE_WANT_ORTERUN_PREFIX_BY_DEFAULT;
|
||||
static char *ompi_server=NULL;
|
||||
|
||||
/*
|
||||
* Globals
|
||||
@ -284,16 +283,10 @@ static opal_cmd_line_init_t cmd_line_init[] = {
|
||||
NULL, OPAL_CMD_LINE_TYPE_BOOL,
|
||||
"Do not attempt to resolve interfaces" },
|
||||
|
||||
/* uri of Open MPI server, or at least where to get it */
|
||||
{ NULL, '\0', "ompi-server", "ompi-server", 1,
|
||||
&orterun_globals.ompi_server, OPAL_CMD_LINE_TYPE_STRING,
|
||||
"Specify the URI of the Open MPI server, or the name of the file (specified as file:filename) that contains that info" },
|
||||
{ NULL, '\0', "wait-for-server", "wait-for-server", 0,
|
||||
&orterun_globals.wait_for_server, OPAL_CMD_LINE_TYPE_BOOL,
|
||||
"If ompi-server is not already running, wait until it is detected (default: false)" },
|
||||
{ NULL, '\0', "server-wait-time", "server-wait-time", 1,
|
||||
&orterun_globals.server_wait_timeout, OPAL_CMD_LINE_TYPE_INT,
|
||||
"Time in seconds to wait for ompi-server (default: 10 sec)" },
|
||||
/* uri of PMIx publish/lookup server, or at least where to get it */
|
||||
{ "pmix_server_uri", '\0', "ompi-server", "ompi-server", 1,
|
||||
NULL, OPAL_CMD_LINE_TYPE_STRING,
|
||||
"Specify the URI of the publish/lookup server, or the name of the file (specified as file:filename) that contains that info" },
|
||||
|
||||
{ "carto_file_path", '\0', "cf", "cartofile", 1,
|
||||
NULL, OPAL_CMD_LINE_TYPE_STRING,
|
||||
@ -1041,42 +1034,6 @@ int orterun(int argc, char *argv[])
|
||||
goto DONE;
|
||||
}
|
||||
|
||||
/* if an uri for the ompi-server was provided, set the route */
|
||||
if (NULL != ompi_server) {
|
||||
opal_buffer_t buf;
|
||||
/* setup our route to the server */
|
||||
OBJ_CONSTRUCT(&buf, opal_buffer_t);
|
||||
opal_dss.pack(&buf, &ompi_server, 1, OPAL_STRING);
|
||||
if (ORTE_SUCCESS != (rc = orte_rml_base_update_contact_info(&buf))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE);
|
||||
goto DONE;
|
||||
}
|
||||
OBJ_DESTRUCT(&buf);
|
||||
/* check if we are to wait for the server to start - resolves
|
||||
* a race condition that can occur when the server is run
|
||||
* as a background job - e.g., in scripts
|
||||
*/
|
||||
if (orterun_globals.wait_for_server) {
|
||||
/* ping the server */
|
||||
struct timeval timeout;
|
||||
timeout.tv_sec = orterun_globals.server_wait_timeout;
|
||||
timeout.tv_usec = 0;
|
||||
if (ORTE_SUCCESS != (rc = orte_rml.ping(ompi_server, &timeout))) {
|
||||
/* try it one more time */
|
||||
if (ORTE_SUCCESS != (rc = orte_rml.ping(ompi_server, &timeout))) {
|
||||
/* okay give up */
|
||||
orte_show_help("help-orterun.txt", "orterun:server-not-found", true,
|
||||
orte_basename, ompi_server,
|
||||
(long)orterun_globals.server_wait_timeout,
|
||||
ORTE_ERROR_NAME(rc));
|
||||
ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE);
|
||||
goto DONE;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* setup for debugging */
|
||||
orte_debugger_init_before_spawn(jdata);
|
||||
orte_state.add_job_state(ORTE_JOB_STATE_READY_FOR_DEBUGGERS,
|
||||
@ -1175,9 +1132,6 @@ static int init_globals(void)
|
||||
orterun_globals.appfile = NULL;
|
||||
orterun_globals.wdir = NULL;
|
||||
orterun_globals.path = NULL;
|
||||
orterun_globals.ompi_server = NULL;
|
||||
orterun_globals.wait_for_server = false;
|
||||
orterun_globals.server_wait_timeout = 10;
|
||||
orterun_globals.stdin_target = "0";
|
||||
orterun_globals.report_pid = NULL;
|
||||
orterun_globals.report_uri = NULL;
|
||||
@ -1270,132 +1224,7 @@ static int parse_locals(orte_job_t *jdata, int argc, char* argv[])
|
||||
bool made_app;
|
||||
orte_std_cntr_t j, size1;
|
||||
|
||||
/* if the ompi-server was given, then set it up here */
|
||||
if (NULL != orterun_globals.ompi_server) {
|
||||
/* someone could have passed us a file instead of a uri, so
|
||||
* we need to first check to see what we have - if it starts
|
||||
* with "file", then we know it is a file. Otherwise, we assume
|
||||
* it is a uri as provided by the ompi-server's output
|
||||
* of an ORTE-standard string. Note that this is NOT a standard
|
||||
* uri as it starts with the process name!
|
||||
*/
|
||||
if (0 == strncmp(orterun_globals.ompi_server, "file", strlen("file")) ||
|
||||
0 == strncmp(orterun_globals.ompi_server, "FILE", strlen("FILE"))) {
|
||||
char input[1024], *filename;
|
||||
FILE *fp;
|
||||
|
||||
/* it is a file - get the filename */
|
||||
filename = strchr(orterun_globals.ompi_server, ':');
|
||||
if (NULL == filename) {
|
||||
/* filename is not correctly formatted */
|
||||
orte_show_help("help-orterun.txt", "orterun:ompi-server-filename-bad", true,
|
||||
orte_basename, orterun_globals.ompi_server);
|
||||
exit(1);
|
||||
}
|
||||
++filename; /* space past the : */
|
||||
|
||||
if (0 >= strlen(filename)) {
|
||||
/* they forgot to give us the name! */
|
||||
orte_show_help("help-orterun.txt", "orterun:ompi-server-filename-missing", true,
|
||||
orte_basename, orterun_globals.ompi_server);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
/* open the file and extract the uri */
|
||||
fp = fopen(filename, "r");
|
||||
if (NULL == fp) { /* can't find or read file! */
|
||||
orte_show_help("help-orterun.txt", "orterun:ompi-server-filename-access", true,
|
||||
orte_basename, orterun_globals.ompi_server);
|
||||
exit(1);
|
||||
}
|
||||
if (NULL == fgets(input, 1024, fp)) {
|
||||
/* something malformed about file */
|
||||
fclose(fp);
|
||||
orte_show_help("help-orterun.txt", "orterun:ompi-server-file-bad", true,
|
||||
orte_basename, orterun_globals.ompi_server,
|
||||
orte_basename);
|
||||
exit(1);
|
||||
}
|
||||
fclose(fp);
|
||||
input[strlen(input)-1] = '\0'; /* remove newline */
|
||||
ompi_server = strdup(input);
|
||||
} else if (0 == strncmp(orterun_globals.ompi_server, "pid", strlen("pid")) ||
|
||||
0 == strncmp(orterun_globals.ompi_server, "PID", strlen("PID"))) {
|
||||
opal_list_t hnp_list;
|
||||
opal_list_item_t *item;
|
||||
orte_hnp_contact_t *hnp;
|
||||
char *ptr;
|
||||
pid_t pid;
|
||||
|
||||
ptr = strchr(orterun_globals.ompi_server, ':');
|
||||
if (NULL == ptr) {
|
||||
/* pid is not correctly formatted */
|
||||
orte_show_help("help-orterun.txt", "orterun:ompi-server-pid-bad", true,
|
||||
orte_basename, orte_basename,
|
||||
orterun_globals.ompi_server, orte_basename);
|
||||
exit(1);
|
||||
}
|
||||
++ptr; /* space past the : */
|
||||
|
||||
if (0 >= strlen(ptr)) {
|
||||
/* they forgot to give us the pid! */
|
||||
orte_show_help("help-orterun.txt", "orterun:ompi-server-pid-bad", true,
|
||||
orte_basename, orte_basename,
|
||||
orterun_globals.ompi_server, orte_basename);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
pid = strtoul(ptr, NULL, 10);
|
||||
|
||||
/* to search the local mpirun's, we have to partially initialize the
|
||||
* orte_process_info structure. This won't fully be setup until orte_init,
|
||||
* but we finagle a little bit of it here
|
||||
*/
|
||||
if (ORTE_SUCCESS != (rc = orte_session_dir_get_name(NULL, &orte_process_info.tmpdir_base,
|
||||
&orte_process_info.top_session_dir,
|
||||
NULL, NULL, NULL))) {
|
||||
orte_show_help("help-orterun.txt", "orterun:ompi-server-could-not-get-hnp-list", true,
|
||||
orte_basename, orte_basename);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
OBJ_CONSTRUCT(&hnp_list, opal_list_t);
|
||||
|
||||
/* get the list of HNPs, but do -not- setup contact info to them in the RML */
|
||||
if (ORTE_SUCCESS != (rc = orte_list_local_hnps(&hnp_list, false))) {
|
||||
orte_show_help("help-orterun.txt", "orterun:ompi-server-could-not-get-hnp-list", true,
|
||||
orte_basename, orte_basename);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
/* search the list for the desired pid */
|
||||
while (NULL != (item = opal_list_remove_first(&hnp_list))) {
|
||||
hnp = (orte_hnp_contact_t*)item;
|
||||
if (pid == hnp->pid) {
|
||||
ompi_server = strdup(hnp->rml_uri);
|
||||
goto hnp_found;
|
||||
}
|
||||
OBJ_RELEASE(item);
|
||||
}
|
||||
/* if we got here, it wasn't found */
|
||||
orte_show_help("help-orterun.txt", "orterun:ompi-server-pid-not-found", true,
|
||||
orte_basename, orte_basename, pid, orterun_globals.ompi_server,
|
||||
orte_basename);
|
||||
OBJ_DESTRUCT(&hnp_list);
|
||||
exit(1);
|
||||
hnp_found:
|
||||
/* cleanup rest of list */
|
||||
while (NULL != (item = opal_list_remove_first(&hnp_list))) {
|
||||
OBJ_RELEASE(item);
|
||||
}
|
||||
OBJ_DESTRUCT(&hnp_list);
|
||||
} else {
|
||||
ompi_server = strdup(orterun_globals.ompi_server);
|
||||
}
|
||||
}
|
||||
|
||||
/* Make the apps */
|
||||
|
||||
temp_argc = 0;
|
||||
temp_argv = NULL;
|
||||
opal_argv_append(&temp_argc, &temp_argv, argv[0]);
|
||||
@ -1640,7 +1469,7 @@ static int create_app(int argc, char* argv[],
|
||||
app->env = opal_argv_copy(*app_env);
|
||||
if (ORTE_SUCCESS != (rc = orte_schizo.parse_env(orterun_globals.personality,
|
||||
orterun_globals.path,
|
||||
&cmd_line, ompi_server,
|
||||
&cmd_line,
|
||||
environ, &app->env))) {
|
||||
goto cleanup;
|
||||
}
|
||||
|
@ -51,9 +51,6 @@ struct orterun_globals_t {
|
||||
char *path;
|
||||
char *preload_files;
|
||||
bool sleep;
|
||||
char *ompi_server;
|
||||
bool wait_for_server;
|
||||
int server_wait_timeout;
|
||||
char *stdin_target;
|
||||
char *prefix;
|
||||
char *path_to_mpirun;
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user