1
1

Sync with PMIx master 43e45c3. Get multi-node publish/lookup/unpublish working

Этот коммит содержится в:
Ralph Castain 2015-09-04 08:29:09 -07:00
родитель c9710660af
Коммит f6948c2bb4
45 изменённых файлов: 1041 добавлений и 1080 удалений

Просмотреть файл

@ -73,17 +73,21 @@ static OBJ_CLASS_INSTANCE(ompi_dpm_proct_caddy_t,
NULL, NULL);
struct lookup_caddy_t {
bool active;
volatile bool active;
int status;
opal_pmix_pdata_t *pdat;
};
static void lookup_cbfunc(int status, opal_list_t *data, void *cbdata)
{
struct lookup_caddy_t *cd = (struct lookup_caddy_t*)cbdata;
opal_pmix_pdata_t *p = (opal_pmix_pdata_t*)opal_list_get_first(data);
if (NULL != p && OPAL_STRING == p->value.type &&
NULL != p->value.data.string) {
cd->pdat->value.data.string = strdup(p->value.data.string);
cd->status = status;
if (OPAL_SUCCESS == status && NULL != data) {
opal_pmix_pdata_t *p = (opal_pmix_pdata_t*)opal_list_get_first(data);
if (NULL != p && OPAL_STRING == p->value.type &&
NULL != p->value.data.string) {
cd->pdat->value.data.string = strdup(p->value.data.string);
}
}
cd->active = false;
}
@ -197,17 +201,13 @@ int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root,
if (send_first) {
(void)asprintf(&info->key, "%s:connect", port_string);
info->type = OPAL_STRING;
info->data.string = opal_argv_join(members, ':');
} else {
(void)asprintf(&info->key, "%s:accept", port_string);
info->type = OPAL_STRING;
info->data.string = opal_argv_join(members, ':');
}
info->type = OPAL_STRING;
info->data.string = opal_argv_join(members, ':');
/* publish it with "session" scope */
rc = opal_pmix.publish(OPAL_PMIX_SESSION,
OPAL_PMIX_PERSIST_APP,
&ilist);
rc = opal_pmix.publish(&ilist);
OPAL_LIST_DESTRUCT(&ilist);
if (OPAL_SUCCESS != rc) {
opal_argv_free(members);
@ -228,7 +228,7 @@ int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root,
}
opal_list_append(&ilist, &pdat->super);
if (NULL == opal_pmix.lookup_nb) {
rc = opal_pmix.lookup(OPAL_PMIX_SESSION, &ilist);
rc = opal_pmix.lookup(&ilist, NULL);
if (OPAL_SUCCESS != rc) {
OPAL_LIST_DESTRUCT(&ilist);
opal_argv_free(members);
@ -242,8 +242,7 @@ int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root,
opal_argv_append_nosize(&keys, pdat->value.key);
caddy.active = true;
caddy.pdat = pdat;
rc = opal_pmix.lookup_nb(OPAL_PMIX_SESSION, true, keys,
lookup_cbfunc, &caddy);
rc = opal_pmix.lookup_nb(keys, NULL, lookup_cbfunc, &caddy);
if (OPAL_SUCCESS != rc) {
OPAL_LIST_DESTRUCT(&ilist);
opal_argv_free(keys);
@ -252,6 +251,11 @@ int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root,
}
OMPI_WAIT_FOR_COMPLETION(caddy.active);
opal_argv_free(keys);
if (OPAL_SUCCESS != caddy.status) {
OPAL_LIST_DESTRUCT(&ilist);
opal_argv_free(members);
return OMPI_ERROR;
}
}
/* initiate a list of participants for the connect,
* starting with our own members, remembering to

Просмотреть файл

@ -30,7 +30,7 @@ int vprotocol_pessimist_event_logger_connect(int el_rank, ompi_communicator_t **
asprintf(&pdat->value.key, VPROTOCOL_EVENT_LOGGER_NAME_FMT, el_rank);
opal_list_append(&results, &pdat->super);
rc = opal_pmix.lookup(OPAL_PMIX_NAMESPACE, &results);
rc = opal_pmix.lookup(&results, NULL);
if (OPAL_SUCCESS != rc ||
OPAL_STRING != pdat->value.type ||
NULL == pdat->value.data.string) {

Просмотреть файл

@ -46,9 +46,8 @@ int MPI_Lookup_name(const char *service_name, MPI_Info info, char *port_name)
{
char range[OPAL_MAX_INFO_VAL];
int flag=0, ret;
opal_pmix_data_range_t rng;
bool range_given = false;
opal_list_t results;
opal_value_t *rng;
opal_list_t results, pinfo;
opal_pmix_pdata_t *pdat;
if ( MPI_PARAM_CHECK ) {
@ -70,27 +69,33 @@ int MPI_Lookup_name(const char *service_name, MPI_Info info, char *port_name)
OPAL_CR_ENTER_LIBRARY();
OBJ_CONSTRUCT(&pinfo, opal_list_t);
/* OMPI supports info keys to pass the range to
* be searched for the given key */
if (MPI_INFO_NULL != info) {
ompi_info_get (info, "range", sizeof(range) - 1, range, &flag);
if (flag) {
range_given = true;
if (0 == strcmp(range, "nspace")) {
rng = OPAL_PMIX_NAMESPACE; // share only with procs in same nspace
rng = OBJ_NEW(opal_value_t);
rng->key = strdup(OPAL_PMIX_RANGE);
rng->type = OPAL_INT;
rng->data.integer = OPAL_PMIX_NAMESPACE; // share only with procs in same nspace
opal_list_append(&pinfo, &rng->super);
} else if (0 == strcmp(range, "session")) {
rng = OPAL_PMIX_SESSION; // share only with procs in same session
rng = OBJ_NEW(opal_value_t);
rng->key = strdup(OPAL_PMIX_RANGE);
rng->type = OPAL_INT;
rng->data.integer = OPAL_PMIX_SESSION; // share only with procs in same session
opal_list_append(&pinfo, &rng->super);
} else {
/* unrecognized scope */
OPAL_LIST_DESTRUCT(&pinfo);
return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG,
FUNC_NAME);
}
}
}
if (!range_given) {
/* default to nspace */
rng = OPAL_PMIX_NAMESPACE;
}
/* collect the findings */
OBJ_CONSTRUCT(&results, opal_list_t);
@ -98,7 +103,8 @@ int MPI_Lookup_name(const char *service_name, MPI_Info info, char *port_name)
pdat->value.key = strdup(service_name);
opal_list_append(&results, &pdat->super);
ret = opal_pmix.lookup(rng, &results);
ret = opal_pmix.lookup(&results, &pinfo);
OPAL_LIST_DESTRUCT(&pinfo);
if (OPAL_SUCCESS != ret ||
OPAL_STRING != pdat->value.type ||
NULL == pdat->value.data.string) {

Просмотреть файл

@ -48,12 +48,8 @@ int MPI_Publish_name(const char *service_name, MPI_Info info,
int rc;
char range[OPAL_MAX_INFO_VAL];
int flag=0;
opal_pmix_data_range_t rng;
bool range_given = false;
opal_pmix_persistence_t persist;
bool persistence_given = false;
opal_value_t *rng;
opal_list_t values;
opal_value_t *pinfo;
if ( MPI_PARAM_CHECK ) {
OMPI_ERR_INIT_FINALIZE(FUNC_NAME);
@ -73,58 +69,75 @@ int MPI_Publish_name(const char *service_name, MPI_Info info,
}
OPAL_CR_ENTER_LIBRARY();
OBJ_CONSTRUCT(&values, opal_list_t);
/* OMPI supports info keys to pass the range and persistence to
* be used for the given key */
if (MPI_INFO_NULL != info) {
ompi_info_get (info, "range", sizeof(range) - 1, range, &flag);
if (flag) {
range_given = true;
if (0 == strcmp(range, "nspace")) {
rng = OPAL_PMIX_NAMESPACE; // share only with procs in same nspace
rng = OBJ_NEW(opal_value_t);
rng->key = strdup(OPAL_PMIX_RANGE);
rng->type = OPAL_INT;
rng->data.integer = OPAL_PMIX_NAMESPACE; // share only with procs in same nspace
opal_list_append(&values, &rng->super);
} else if (0 == strcmp(range, "session")) {
rng = OPAL_PMIX_SESSION; // share only with procs in same session
rng = OBJ_NEW(opal_value_t);
rng->key = strdup(OPAL_PMIX_RANGE);
rng->type = OPAL_INT;
rng->data.integer = OPAL_PMIX_SESSION; // share only with procs in same session
opal_list_append(&values, &rng->super);
} else {
/* unrecognized range */
/* unrecognized scope */
OPAL_LIST_DESTRUCT(&values);
return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG,
FUNC_NAME);
}
}
ompi_info_get (info, "persistence", sizeof(range) - 1, range, &flag);
if (flag) {
persistence_given = true;
if (0 == strcmp(range, "indef")) {
persist = OPAL_PMIX_PERSIST_INDEF; // retain until specifically deleted
rng = OBJ_NEW(opal_value_t);
rng->key = strdup(OPAL_PMIX_PERSISTENCE);
rng->type = OPAL_INT;
rng->data.integer = OPAL_PMIX_PERSIST_INDEF; // retain until specifically deleted
opal_list_append(&values, &rng->super);
} else if (0 == strcmp(range, "proc")) {
persist = OPAL_PMIX_PERSIST_PROC; // retain until publishing process terminates
rng = OBJ_NEW(opal_value_t);
rng->key = strdup(OPAL_PMIX_PERSISTENCE);
rng->type = OPAL_INT;
rng->data.integer = OPAL_PMIX_PERSIST_PROC; // retain until publishing process terminates
opal_list_append(&values, &rng->super);
} else if (0 == strcmp(range, "app")) {
persist = OPAL_PMIX_PERSIST_APP; // retain until application terminates
rng = OBJ_NEW(opal_value_t);
rng->key = strdup(OPAL_PMIX_PERSISTENCE);
rng->type = OPAL_INT;
rng->data.integer = OPAL_PMIX_PERSIST_APP; // retain until application terminates
opal_list_append(&values, &rng->super);
} else if (0 == strcmp(range, "session")) {
persist = OPAL_PMIX_PERSIST_SESSION; // retain until session/allocation terminates
rng = OBJ_NEW(opal_value_t);
rng->key = strdup(OPAL_PMIX_PERSISTENCE);
rng->type = OPAL_INT;
rng->data.integer = OPAL_PMIX_PERSIST_SESSION; // retain until session/allocation terminates
opal_list_append(&values, &rng->super);
} else {
/* unrecognized persistence */
OPAL_LIST_DESTRUCT(&values);
return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG,
FUNC_NAME);
}
}
}
if (!range_given) {
/* default to nspace */
rng = OPAL_PMIX_NAMESPACE;
}
if (!persistence_given) {
persist = OPAL_PMIX_PERSIST_APP;
}
/* publish the values */
OBJ_CONSTRUCT(&values, opal_list_t);
pinfo = OBJ_NEW(opal_value_t);
pinfo->key = strdup(service_name);
pinfo->type = OPAL_STRING;
pinfo->data.string = strdup(port_name);
opal_list_append(&values, &pinfo->super);
/* publish the service name */
rng = OBJ_NEW(opal_value_t);
rng->key = strdup(service_name);
rng->type = OPAL_STRING;
rng->data.string = strdup(port_name);
opal_list_append(&values, &rng->super);
rc = opal_pmix.publish(rng, persist, &values);
rc = opal_pmix.publish(&values);
OPAL_LIST_DESTRUCT(&values);
OPAL_CR_EXIT_LIBRARY();

Просмотреть файл

@ -49,8 +49,8 @@ int MPI_Unpublish_name(const char *service_name, MPI_Info info,
int rc;
char range[OPAL_MAX_INFO_VAL];
int flag=0;
opal_pmix_data_range_t rng;
bool range_given = false;
opal_list_t pinfo;
opal_value_t *rng;
char **keys = NULL;
if ( MPI_PARAM_CHECK ) {
@ -71,34 +71,40 @@ int MPI_Unpublish_name(const char *service_name, MPI_Info info,
}
OPAL_CR_ENTER_LIBRARY();
OBJ_CONSTRUCT(&pinfo, opal_list_t);
/* OMPI supports info keys to pass the range to
* be searched for the given key */
if (MPI_INFO_NULL != info) {
ompi_info_get (info, "range", sizeof(range) - 1, range, &flag);
if (flag) {
range_given = true;
if (0 == strcmp(range, "nspace")) {
rng = OPAL_PMIX_NAMESPACE; // share only with procs in same nspace
rng = OBJ_NEW(opal_value_t);
rng->key = strdup(OPAL_PMIX_RANGE);
rng->type = OPAL_INT;
rng->data.integer = OPAL_PMIX_NAMESPACE; // share only with procs in same nspace
opal_list_append(&pinfo, &rng->super);
} else if (0 == strcmp(range, "session")) {
rng = OPAL_PMIX_SESSION; // share only with procs in same session
rng = OBJ_NEW(opal_value_t);
rng->key = strdup(OPAL_PMIX_RANGE);
rng->type = OPAL_INT;
rng->data.integer = OPAL_PMIX_SESSION; // share only with procs in same session
opal_list_append(&pinfo, &rng->super);
} else {
/* unrecognized range */
/* unrecognized scope */
OPAL_LIST_DESTRUCT(&pinfo);
return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG,
FUNC_NAME);
}
}
}
if (!range_given) {
/* default to nspace */
rng = OPAL_PMIX_NAMESPACE;
}
/* unpublish the service_name */
opal_argv_append_nosize(&keys, service_name);
rc = opal_pmix.unpublish(rng, keys);
rc = opal_pmix.unpublish(keys, &pinfo);
opal_argv_free(keys);
OPAL_LIST_DESTRUCT(&pinfo);
if ( OPAL_SUCCESS != rc ) {
if (OPAL_ERR_NOT_FOUND == rc) {

Просмотреть файл

@ -325,12 +325,8 @@ typedef int (*opal_pmix_base_module_get_nb_fn_t)(const opal_process_name_t *proc
* data has been posted and is available. The non-blocking form will
* return immediately, executing the callback when the server confirms
* availability of the data */
typedef int (*opal_pmix_base_module_publish_fn_t)(opal_pmix_data_range_t scope,
opal_pmix_persistence_t persist,
opal_list_t *info);
typedef int (*opal_pmix_base_module_publish_nb_fn_t)(opal_pmix_data_range_t scope,
opal_pmix_persistence_t persist,
opal_list_t *info,
typedef int (*opal_pmix_base_module_publish_fn_t)(opal_list_t *info);
typedef int (*opal_pmix_base_module_publish_nb_fn_t)(opal_list_t *info,
opal_pmix_op_cbfunc_t cbfunc, void *cbdata);
/* Lookup information published by another process within the
@ -352,8 +348,8 @@ typedef int (*opal_pmix_base_module_publish_nb_fn_t)(opal_pmix_data_range_t scop
* and return any found items. Thus, the caller is responsible for
* ensuring that data is published prior to executing a lookup, or
* for retrying until the requested data is found */
typedef int (*opal_pmix_base_module_lookup_fn_t)(opal_pmix_data_range_t scope,
opal_list_t *data);
typedef int (*opal_pmix_base_module_lookup_fn_t)(opal_list_t *data,
opal_list_t *info);
/* Non-blocking form of the _PMIx_Lookup_ function. Data for
* the provided NULL-terminated keys array will be returned
@ -362,7 +358,7 @@ typedef int (*opal_pmix_base_module_lookup_fn_t)(opal_pmix_data_range_t scope,
* wait for _all_ requested data before executing the callback
* (_true_), or to callback once the server returns whatever
* data is immediately available (_false_) */
typedef int (*opal_pmix_base_module_lookup_nb_fn_t)(opal_pmix_data_range_t scope, int wait, char **keys,
typedef int (*opal_pmix_base_module_lookup_nb_fn_t)(char **keys, opal_list_t *info,
opal_pmix_lookup_cbfunc_t cbfunc, void *cbdata);
/* Unpublish data posted by this process using the given keys
@ -370,14 +366,14 @@ typedef int (*opal_pmix_base_module_lookup_nb_fn_t)(opal_pmix_data_range_t scope
* the data has been removed by the server. A value of _NULL_
* for the keys parameter instructs the server to remove
* _all_ data published by this process within the given scope */
typedef int (*opal_pmix_base_module_unpublish_fn_t)(opal_pmix_data_range_t scope, char **keys);
typedef int (*opal_pmix_base_module_unpublish_fn_t)(char **keys, opal_list_t *info);
/* Non-blocking form of the _PMIx_Unpublish_ function. The
* callback function will be executed once the server confirms
* removal of the specified data. A value of _NULL_
* for the keys parameter instructs the server to remove
* _all_ data published by this process within the given scope */
typedef int (*opal_pmix_base_module_unpublish_nb_fn_t)(opal_pmix_data_range_t scope, char **keys,
typedef int (*opal_pmix_base_module_unpublish_nb_fn_t)(char **keys, opal_list_t *info,
opal_pmix_op_cbfunc_t cbfunc, void *cbdata);
/* Spawn a new job. The spawned applications are automatically

Просмотреть файл

@ -30,7 +30,7 @@ greek=a1
# command, or with the date (if "git describe" fails) in the form of
# "date<date>".
repo_rev=git51479b0
repo_rev=git6afbc98
# If tarball_version is not empty, it is used as the version string in
# the tarball filename, regardless of all other versions listed in
@ -44,7 +44,7 @@ tarball_version=
# The date when this release was created
date="Sep 01, 2015"
date="Sep 04, 2015"
# The shared library version of each of PMIx's public libraries.
# These versions are maintained in accordance with the "Library

Просмотреть файл

@ -77,7 +77,7 @@ int main(int argc, char **argv)
(void)strncpy(info[1].key, "PANDA", PMIX_MAX_KEYLEN);
info[1].value.type = PMIX_SIZE;
info[1].value.data.size = 123456;
if (PMIX_SUCCESS != (rc = PMIx_Publish(PMIX_NAMESPACE, PMIX_PERSIST_APP, info, 2))) {
if (PMIX_SUCCESS != (rc = PMIx_Publish(info, 2))) {
fprintf(stderr, "Client ns %s rank %d: PMIx_Publish failed: %d\n", myproc.nspace, myproc.rank, rc);
goto done;
}
@ -95,7 +95,7 @@ int main(int argc, char **argv)
if (0 != myproc.rank) {
PMIX_PDATA_CREATE(pdata, 1);
(void)strncpy(pdata[0].key, "FOOBAR", PMIX_MAX_KEYLEN);
if (PMIX_SUCCESS != (rc = PMIx_Lookup(PMIX_NAMESPACE, NULL, 0, pdata, 1))) {
if (PMIX_SUCCESS != (rc = PMIx_Lookup(pdata, 1, NULL, 0))) {
fprintf(stderr, "Client ns %s rank %d: PMIx_Lookup failed: %d\n", myproc.nspace, myproc.rank, rc);
goto done;
}
@ -137,7 +137,7 @@ int main(int argc, char **argv)
keys[1] = "PANDA";
keys[2] = NULL;
if (PMIX_SUCCESS != (rc = PMIx_Unpublish(PMIX_NAMESPACE, keys))) {
if (PMIX_SUCCESS != (rc = PMIx_Unpublish(keys, NULL, 0))) {
fprintf(stderr, "Client ns %s rank %d: PMIx_Unpublish failed: %d\n", myproc.nspace, myproc.rank, rc);
free(keys);
goto done;

Просмотреть файл

@ -52,15 +52,13 @@ static int dmodex_fn(const pmix_proc_t *proc,
const pmix_info_t info[], size_t ninfo,
pmix_modex_cbfunc_t cbfunc, void *cbdata);
static int publish_fn(const pmix_proc_t *proc,
pmix_data_range_t scope, pmix_persistence_t persist,
const pmix_info_t info[], size_t ninfo,
pmix_op_cbfunc_t cbfunc, void *cbdata);
static int lookup_fn(const pmix_proc_t *proc,
pmix_data_range_t scope,
const pmix_info_t info[], size_t ninfo, char **keys,
static int lookup_fn(const pmix_proc_t *proc, char **keys,
const pmix_info_t info[], size_t ninfo,
pmix_lookup_cbfunc_t cbfunc, void *cbdata);
static int unpublish_fn(const pmix_proc_t *proc,
pmix_data_range_t scope, char **keys,
static int unpublish_fn(const pmix_proc_t *proc, char **keys,
const pmix_info_t info[], size_t ninfo,
pmix_op_cbfunc_t cbfunc, void *cbdata);
static int spawn_fn(const pmix_proc_t *proc,
const pmix_info_t job_info[], size_t ninfo,
@ -443,7 +441,6 @@ static int dmodex_fn(const pmix_proc_t *proc,
static int publish_fn(const pmix_proc_t *proc,
pmix_data_range_t scope, pmix_persistence_t persist,
const pmix_info_t info[], size_t ninfo,
pmix_op_cbfunc_t cbfunc, void *cbdata)
{
@ -467,9 +464,8 @@ static int publish_fn(const pmix_proc_t *proc,
}
static int lookup_fn(const pmix_proc_t *proc,
pmix_data_range_t scope,
const pmix_info_t info[], size_t ninfo, char **keys,
static int lookup_fn(const pmix_proc_t *proc, char **keys,
const pmix_info_t info[], size_t ninfo,
pmix_lookup_cbfunc_t cbfunc, void *cbdata)
{
pmix_locdat_t *p, *p2;
@ -517,8 +513,8 @@ static int lookup_fn(const pmix_proc_t *proc,
}
static int unpublish_fn(const pmix_proc_t *proc,
pmix_data_range_t scope, char **keys,
static int unpublish_fn(const pmix_proc_t *proc, char **keys,
const pmix_info_t info[], size_t ninfo,
pmix_op_cbfunc_t cbfunc, void *cbdata)
{
pmix_locdat_t *p, *p2;

Просмотреть файл

@ -65,27 +65,24 @@ BEGIN_C_DECLS
/**** PMIX API ****/
/* NOTE: calls to these APIs must be thread-protected as there
* currently is NO internal thread safety. */
/* Initialize the PMIx client, returning the namespace assigned
* to this client's application in the provided character array
* (must be of size PMIX_MAX_NSLEN or greater). Passing a parameter
* of _NULL_ for either or both parameters is allowed if the user
/* Initialize the PMIx client, returning the process identifier assigned
* to this client's application in the provided pmix_proc_t struct.
* Passing a parameter of _NULL_ for this parameter is allowed if the user
* wishes solely to initialize the PMIx system and does not require
* return of the NULL parameter(s) at that time.
* return of the identifier at that time.
*
* When called the PMIx client will check for the required connection
* information of the local PMIx server and will establish the connection.
* If the information is not found, or the server connection fails, then
* an appropriate error constant will be returned.
*
* If successful, the function will return PMIX_SUCCESS, will fill the
* provided namespace array with the server-assigned namespace, and return
* the rank of the process within the application. Note that the PMIx
* client library is referenced counted, and so multiple calls to PMIx_Init
* are allowed. Thus, one way to obtain the namespace and rank of the
* process is to simply call PMIx_Init with non-NULL parameters. */
* If successful, the function will return PMIX_SUCCESS and will fill the
* provided structure with the server-assigned namespace and rank of the
* process within the application.
*
* Note that the PMIx client library is referenced counted, and so multiple
* calls to PMIx_Init are allowed. Thus, one way to obtain the namespace and
* rank of the process is to simply call PMIx_Init with a non-NULL parameter. */
pmix_status_t PMIx_Init(pmix_proc_t *proc);
/* Finalize the PMIx client, closing the connection to the local server.
@ -116,12 +113,18 @@ int PMIx_Initialized(void);
* Passing a _NULL_ msg parameter is allowed. Note that race conditions
* caused by multiple processes calling PMIx_Abort are left to the
* server implementation to resolve with regard to which status is
* returned and what messages (if any) are printed.
*/
* returned and what messages (if any) are printed. */
pmix_status_t PMIx_Abort(int status, const char msg[],
pmix_proc_t procs[], size_t nprocs);
/* Push a value into the client's namespace. The client library will cache
* the information locally until _PMIx_Commit_ is called. The provided scope
* value is passed to the local PMIx server, which will distribute the data
* as directed. */
pmix_status_t PMIx_Put(pmix_scope_t scope, const char key[], pmix_value_t *val);
/* Push all previously _PMIx_Put_ values to the local PMIx server.
* This is an asynchronous operation - the library will immediately
* return to the caller while the data is transmitted to the local
@ -132,7 +135,7 @@ pmix_status_t PMIx_Commit(void);
/* Execute a blocking barrier across the processes identified in the
* specified array. Passing a _NULL_ pointer as the _procs_ parameter
* indicates that the barrier is to span all processes in the client's
* namespace. Each provided proc struct can pass PMIX_RANK_WILDCARD to
* namespace. Each provided pmix_proc_t struct can pass PMIX_RANK_WILDCARD to
* indicate that all processes in the given namespace are
* participating.
*
@ -144,19 +147,22 @@ pmix_status_t PMIx_Commit(void);
* A value of _false_ indicates that the callback is just used as a release
* and no data is to be returned at that time. A value of _true_ indicates
* that all _put_ data is to be collected by the barrier. Returned data is
* locally cached so that subsequent calls to _PMIx_Get_ can be serviced
* without communicating to/from the server, but at the cost of increased
* memory footprint
* cached at the server to reduce memory footprint, and can be retrieved
* as needed by calls to PMIx_Get(nb).
*
* Note that for scalability reasons, the default behavior for PMIx_Fence
* is to _not_ collect the data.
*
* (b) PMIX_COLLECTIVE_ALGO - a comma-delimited string indicating the algos
* to be used for executing the barrier, in priority order. The _mandatory_
* flag can instruct the host RM that it should return an error if none
* of the provided algos are available. Otherwise, the RM is to use one
* of the algos if possible, but is otherwise free to use any of its
* available methods to execute the operation.
* to be used for executing the barrier, in priority order.
*
* (c) PMIX_TIMEOUT - maximum time for the fence to execute before declaring
* an error. The RM shall terminate the operation and notify participants
* (c) PMIX_COLLECTIVE_ALGO_REQD - instructs the host RM that it should return
* an error if none of the specified algos are available. Otherwise, the RM
* is to use one of the algos if possible, but is otherwise free to use any
* of its available methods to execute the operation.
*
* (d) PMIX_TIMEOUT - maximum time for the fence to execute before declaring
* an error. By default, the RM shall terminate the operation and notify participants
* if one or more of the indicated procs fails during the fence. However,
* the timeout parameter can help avoid "hangs" due to programming errors
* that prevent one or more procs from reaching the "fence".
@ -164,7 +170,6 @@ pmix_status_t PMIx_Commit(void);
pmix_status_t PMIx_Fence(const pmix_proc_t procs[], size_t nprocs,
const pmix_info_t info[], size_t ninfo);
/* Fence_nb */
/* Non-blocking version of PMIx_Fence. Note that the function will return
* an error if a _NULL_ callback function is given. */
pmix_status_t PMIx_Fence_nb(const pmix_proc_t procs[], size_t nprocs,
@ -172,16 +177,9 @@ pmix_status_t PMIx_Fence_nb(const pmix_proc_t procs[], size_t nprocs,
pmix_op_cbfunc_t cbfunc, void *cbdata);
/* Push a value into the client's namespace. The client library will cache
* the information locally until _PMIx_Commit_ is called. The provided scope
* value is passed to the local PMIx server, which will distribute the data
* as directed. */
pmix_status_t PMIx_Put(pmix_scope_t scope, const char key[], pmix_value_t *val);
/* Retrieve information for the specified _key_ as published by the given _rank_
* within the provided _namespace_, returning a pointer to the value in the
* given address. A _NULL_ value for the namespace indicates that the rank
* is within the caller's namespace.
/* Retrieve information for the specified _key_ as published by the process
* identified in the given pmix_proc_t, returning a pointer to the value in the
* given address.
*
* This is a blocking operation - the caller will block until
* the specified data has been _PMIx_Put_ by the specified rank. The caller is
@ -199,55 +197,48 @@ pmix_status_t PMIx_Get(const pmix_proc_t *proc, const char key[],
const pmix_info_t info[], size_t ninfo,
pmix_value_t **val);
/* Retrieve information for the specified _key_ as _PMIx_Put_ by the given _rank_
* within the provided _namespace_. This is a non-blocking operation - the
* callback function will be executed once the specified data has been _PMIx_Put_
* by the specified rank and retrieved by the local server. The info
/* A non-blocking operation version of PMIx_Get - the callback function will
* be executed once the specified data has been _PMIx_Put_
* by the identified process and retrieved by the local server. The info
* array is used as described above for the blocking form of this call. */
pmix_status_t PMIx_Get_nb(const pmix_proc_t *proc, const char key[],
const pmix_info_t info[], size_t ninfo,
pmix_value_cbfunc_t cbfunc, void *cbdata);
/* Publish the data in the info array for lookup subject to the provided
* data range. Note that the keys must be unique within the specified
/* Publish the data in the info array for lookup. By default,
* the data will be published into the PMIX_SESSION range and
* with PMIX_PERSIST_APP persistence. Changes to those values,
* and any additional directives, can be included in the pmix_info_t
* array.
*
* Note that the keys must be unique within the specified
* data range or else an error will be returned (first published
* wins). Attempts to access the data by procs outside of
* the provided data range will be rejected.
*
* Note: Some host environments may support user/group level
* access controls on the information in addition to the data range.
* These can be specified in the info array using the appropriately
* defined keys.
*
* The persistence parameter instructs the server as to how long
* the data is to be retained, within the context of the range.
* For example, data published within _PMIX_NAMESPACE_ will be
* deleted along with the namespace regardless of the persistence.
* However, data published within PMIX_USER would be retained if
* the persistence was set to _PMIX_PERSIST_SESSION_ until the
* allocation terminates.
* the data is to be retained.
*
* The blocking form will block until the server confirms that the
* data has been posted and is available. The non-blocking form will
* return immediately, executing the callback when the server confirms
* availability of the data.
*/
pmix_status_t PMIx_Publish(pmix_data_range_t range,
pmix_persistence_t persist,
const pmix_info_t info[],
size_t ninfo);
pmix_status_t PMIx_Publish_nb(pmix_data_range_t range,
pmix_persistence_t persist,
const pmix_info_t info[],
size_t ninfo,
pmix_status_t PMIx_Publish(const pmix_info_t info[], size_t ninfo);
pmix_status_t PMIx_Publish_nb(const pmix_info_t info[], size_t ninfo,
pmix_op_cbfunc_t cbfunc, void *cbdata);
/* Lookup information published by another process within the
* specified range. A rabge of _PMIX_DATA_RANGE_UNDEF_ requests that
* the search be conducted across _all_ namespaces accessible by this
* user.
/* Lookup information published by this or another process. By default,
* the search will be conducted across the PMIX_SESSION range. Changes
* to the range, and any additional directives, can be provided
* in the pmix_info_t array. Note that the search is also constrained
* to only data published by the current user ID - i.e., the search
* will not return data published by an application being executed
* by another user. There currently is no option to override this
* behavior - such an option may become available later via an
* appropriate pmix_info_t directive.
*
* The "data" parameter consists of an array of pmix_pdata_t struct with the
* keys specifying the requested information. Data will be returned
@ -270,18 +261,13 @@ pmix_status_t PMIx_Publish_nb(pmix_data_range_t range,
* by including:
*
* (a) PMIX_WAIT - wait for the requested data to be published. The
* _mandatory_ flag indicates that the server is to wait until
* all data has become available. Otherwise, the function will
* return as soon as the specified number of values have been
* collected. A value of -1 indicates that all values must be
* obtained.
* server is to wait until all data has become available.
*
* (b) PMIX_TIMEOUT - max time to wait for data to become available.
*
*/
pmix_status_t PMIx_Lookup(pmix_data_range_t range,
const pmix_info_t info[], size_t ninfo,
pmix_pdata_t data[], size_t ndata);
pmix_status_t PMIx_Lookup(pmix_pdata_t data[], size_t ndata,
const pmix_info_t info[], size_t ninfo);
/* Non-blocking form of the _PMIx_Lookup_ function. Data for
* the provided NULL-terminated keys array will be returned
@ -289,44 +275,58 @@ pmix_status_t PMIx_Lookup(pmix_data_range_t range,
* behavior is to _not_ wait for data to be published. The
* info keys can be used to modify the behavior as previously
* described */
pmix_status_t PMIx_Lookup_nb(pmix_data_range_t range, char **keys,
const pmix_info_t info[], size_t ninfo,
pmix_status_t PMIx_Lookup_nb(char **keys, const pmix_info_t info[], size_t ninfo,
pmix_lookup_cbfunc_t cbfunc, void *cbdata);
/* Unpublish data posted by this process using the given keys
* within the specified data range. The function will block until
* the data has been removed by the server. A value of _NULL_
* for the keys parameter instructs the server to remove
* _all_ data published by this process within the given range */
pmix_status_t PMIx_Unpublish(pmix_data_range_t range, char **keys);
/* Unpublish data posted by this process using the given keys.
* The function will block until the data has been removed by
* the server. A value of _NULL_ for the keys parameter instructs
* the server to remove _all_ data published by this process.
*
* By default, the range is assumed to be PMIX_SESSION. Changes
* to the range, and any additional directives, can be provided
* in the pmix_info_t array */
pmix_status_t PMIx_Unpublish(char **keys,
const pmix_info_t info[], size_t ninfo);
/* Non-blocking form of the _PMIx_Unpublish_ function. The
* callback function will be executed once the server confirms
* removal of the specified data. A value of _NULL_
* for the keys parameter instructs the server to remove
* _all_ data published by this process within the given range */
pmix_status_t PMIx_Unpublish_nb(pmix_data_range_t range, char **keys,
* removal of the specified data. */
pmix_status_t PMIx_Unpublish_nb(char **keys,
const pmix_info_t info[], size_t ninfo,
pmix_op_cbfunc_t cbfunc, void *cbdata);
/* Spawn a new job. The assigned namespace of the spawned applications
* is returned in the nspace parameter - a _NULL_ value in that
* location indicates that the caller doesn't wish to have the
* namespace returned. Behavior of individual resource managers
* namespace returned. The nspace array must be at least of size
* PMIX_MAX_NSLEN+1. Behavior of individual resource managers
* may differ, but it is expected that failure of any application
* process to start will result in termination/cleanup of _all_
* processes in the newly spawned job and return of an error
* code to the caller.
*
* By default, the spawned processes will be PMIx "connected" to
* the parent process upon successful launch (see PMIx_Connect
* description for details). Note that this only means that the
* parent process (a) will be given a copy of the new job's
* information so it can query job-level info without
* incurring any communication penalties, and (b) will receive
* notification of errors from process in the child job.
*
* Job-level directives can be specified in the job_info array. This
* can include:
*
* (a) PMIX_NON_MPI - the spawned job is not an MPI job and the procs will
* (a) PMIX_NON_PMI - processes in the spawned job will
* not be calling PMIx_Init
*
* (b) PMIX_TIMEOUT - declare the spawn as having failed if the launched
* procs do not call PMIx_Init within the specified time
*
* (c) PMIX_NOTIFY_COMPLETION - notify the parent process when the
* child job terminates, either normally or with error
*/
pmix_status_t PMIx_Spawn(const pmix_info_t job_info[], size_t ninfo,
const pmix_app_t apps[], size_t napps,
@ -353,8 +353,8 @@ pmix_status_t PMIx_Spawn_nb(const pmix_info_t job_info[], size_t ninfo,
* the job-level info from those nspaces other than their own.
*
* Note: a process can only engage in _one_ connect operation involving the identical
* set of ranges at a time. However, a process _can_ be simultaneously engaged
* in multiple connect operations, each involving a different set of ranges
* set of processes at a time. However, a process _can_ be simultaneously engaged
* in multiple connect operations, each involving a different set of processes
*
* As in the case of the fence operation, the info array can be used to pass
* user-level directives regarding the algorithm to be used for the collective

Просмотреть файл

@ -163,6 +163,9 @@ BEGIN_C_DECLS
#define PMIX_WAIT "pmix.wait" // (int) caller requests that the server wait until the specified #values are found
#define PMIX_COLLECTIVE_ALGO "pmix.calgo" // (char*) comma-delimited list of algorithms to use for collective
#define PMIX_COLLECTIVE_ALGO_REQD "pmix.calreqd" // (bool) if true, indicates that the requested choice of algo is mandatory
#define PMIX_NOTIFY_COMPLETION "pmix.notecomp" // (bool) notify parent process upon termination of child job
#define PMIX_RANGE "pmix.range" // (int) pmix_data_range_t value for calls to publish/lookup/unpublish
#define PMIX_PERSISTENCE "pmix.persist" // (int) pmix_persistence_t value for calls to publish
/* attributes used by host server to pass data to the server convenience library - the
* data will then be parsed and provided to the local clients */

Просмотреть файл

@ -103,7 +103,7 @@ BEGIN_C_DECLS
typedef int (*pmix_server_client_connected_fn_t)(const pmix_proc_t *proc,
void* server_object);
/* Notify the host server that a client called PMIx_Finalize- note
/* Notify the host server that a client called PMIx_Finalize - note
* that the client will be in a blocked state until the host server
* executes the callback function, thus allowing the PMIx server support
* library to release the client */
@ -158,49 +158,46 @@ typedef pmix_status_t (*pmix_server_dmodex_req_fn_t)(const pmix_proc_t *proc,
/* Publish data per the PMIx API specification. The callback is to be executed
* upon completion of the operation. The host server is not required to guarantee
* support for the requested range - i.e., the server does not need to return an
* error if the data store doesn't support range-based isolation. However, the
* server must return an error (a) if the key is duplicative within the storage
* range, and (b) if the server does not allow overwriting of published info by
* the original publisher - it is left to the discretion of the host server to
* allow info-key-based flags to modify this behavior. The persist flag indicates
* how long the server should retain the data. The nspace/rank of the publishing
* process is also provided and is expected to be returned on any subsequent
* lookup request */
* upon completion of the operation. The default data range is expected to be
* PMIX_SESSION, and the default persistence PMIX_PERSIST_SESSION. These values
* can be modified by including the respective pmix_info_t struct in the
* provided array.
*
* Note that the host server is not required to guarantee support for any specific
* range - i.e., the server does not need to return an error if the data store
* doesn't support range-based isolation. However, the server must return an error
* (a) if the key is duplicative within the storage range, and (b) if the server
* does not allow overwriting of published info by the original publisher - it is
* left to the discretion of the host server to allow info-key-based flags to modify
* this behavior.
*
* The persistence indicates how long the server should retain the data.
*
* The identifier of the publishing process is also provided and is expected to
* be returned on any subsequent lookup request */
typedef pmix_status_t (*pmix_server_publish_fn_t)(const pmix_proc_t *proc,
pmix_data_range_t range, pmix_persistence_t persist,
const pmix_info_t info[], size_t ninfo,
pmix_op_cbfunc_t cbfunc, void *cbdata);
/* Lookup published data. The host server will be passed a NULL-terminated array
* of string keys along with the range within which the data is expected to have
* been published. The host server is not required to guarantee support for all
* PMIx-defined ranges, but should only search data stores within the specified
* range within the context of the corresponding "publish" API.
* of string keys.
*
* The array of info structs is used to pass user-requested options to the server.
* This can include a wait flag to indicate that the server should wait for all
* data to become available before executing the callback function, or should
* immediately callback with whatever data is available. In addition, a timeout
* can be specified on the wait to preclude an indefinite wait for data that
* may never be published. The directives are optional _unless_ the _mandatory_ flag
* has been set - in such cases, the host RM is required to return an error
* if the directive cannot be met. */
typedef pmix_status_t (*pmix_server_lookup_fn_t)(const pmix_proc_t *proc,
pmix_data_range_t range,
* may never be published. */
typedef pmix_status_t (*pmix_server_lookup_fn_t)(const pmix_proc_t *proc, char **keys,
const pmix_info_t info[], size_t ninfo,
char **keys,
pmix_lookup_cbfunc_t cbfunc, void *cbdata);
/* Delete data from the data store. The host server will be passed a NULL-terminated array
* of string keys along with the range within which the data is expected to have
* been published. The callback is to be executed upon completion of the delete
* of string keys, plus potential directives such as the data range within which the
* keys should be deleted. The callback is to be executed upon completion of the delete
* procedure */
typedef pmix_status_t (*pmix_server_unpublish_fn_t)(const pmix_proc_t *proc,
pmix_data_range_t range,
typedef pmix_status_t (*pmix_server_unpublish_fn_t)(const pmix_proc_t *proc, char **keys,
const pmix_info_t info[], size_t ninfo,
char **keys,
pmix_op_cbfunc_t cbfunc, void *cbdata);
/* Spawn a set of applications/processes as per the PMIx API. Note that

Просмотреть файл

@ -1,6 +1,6 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2014 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
* Copyright (c) 2014 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
@ -190,7 +190,7 @@ int PMI_Publish_name(const char service_name[], const char port[])
/* publish the info - PMI-1 doesn't support
* any scope other than inside our own nspace */
rc = PMIx_Publish(PMIX_NAMESPACE, PMIX_PERSIST_APP, &info, 1);
rc = PMIx_Publish(&info, 1);
return convert_err(rc);
}
@ -204,7 +204,7 @@ int PMI_Unpublish_name(const char service_name[])
keys[0] = (char*)service_name;
keys[1] = NULL;
rc = PMIx_Unpublish(PMIX_NAMESPACE, keys);
rc = PMIx_Unpublish(keys, NULL, 0);
return convert_err(rc);
}
@ -219,7 +219,7 @@ int PMI_Lookup_name(const char service_name[], char port[])
(void)strncpy(pdata.key, service_name, PMIX_MAX_KEYLEN);
/* PMI-1 doesn't want the nspace back */
if (PMIX_SUCCESS != (rc = PMIx_Lookup(PMIX_NAMESPACE, NULL, 0, &pdata, 1))) {
if (PMIX_SUCCESS != (rc = PMIx_Lookup(&pdata, 1, NULL, 0))) {
return convert_err(rc);
}

Просмотреть файл

@ -240,7 +240,7 @@ int PMI2_Nameserv_publish(const char service_name[], const PMI_keyval_t *info_pt
}
/* publish the info - PMI-2 doesn't support
* any scope other than inside our own nspace */
rc = PMIx_Publish(PMIX_NAMESPACE, PMIX_PERSIST_APP, info, nvals);
rc = PMIx_Publish(info, nvals);
return convert_err(rc);
}
@ -261,7 +261,7 @@ int PMI2_Nameserv_unpublish(const char service_name[],
keys[1] = info_ptr->key;
}
rc = PMIx_Unpublish(PMIX_NAMESPACE, keys);
rc = PMIx_Unpublish(keys, NULL, 0);
return convert_err(rc);
}
@ -288,7 +288,7 @@ int PMI2_Nameserv_lookup(const char service_name[], const PMI_keyval_t *info_ptr
}
/* lookup the info */
if (PMIX_SUCCESS != (rc = PMIx_Lookup(PMIX_NAMESPACE, NULL, 0, pdata, nvals))) {
if (PMIX_SUCCESS != (rc = PMIx_Lookup(pdata, nvals, NULL, 0))) {
PMIX_PDATA_DESTRUCT(&pdata[0]);
PMIX_PDATA_DESTRUCT(&pdata[1]);
return convert_err(rc);

Просмотреть файл

@ -149,6 +149,7 @@ static void wait_cbfunc(struct pmix_peer_t *pr, pmix_usock_hdr_t *hdr,
cb->active = false;
}
/* callback to receive job info */
static void job_data(struct pmix_peer_t *pr, pmix_usock_hdr_t *hdr,
pmix_buffer_t *buf, void *cbdata)
{
@ -165,6 +166,7 @@ static void job_data(struct pmix_peer_t *pr, pmix_usock_hdr_t *hdr,
}
/* decode it */
pmix_client_process_nspace_blob(pmix_globals.myid.nspace, buf);
cb->status = PMIX_SUCCESS;
cb->active = false;
}
@ -247,6 +249,8 @@ int PMIx_Init(pmix_proc_t *proc)
/* get our effective id's */
pmix_globals.uid = geteuid();
pmix_globals.gid = getegid();
/* default to our internal errhandler */
pmix_globals.errhandler = myerrhandler;
/* initialize the output system */
if (!pmix_output_init()) {
@ -323,17 +327,19 @@ int PMIx_Init(pmix_proc_t *proc)
return -1;
}
/* connect to the server - returns job info if successful */
/* setup an object to track server connection */
PMIX_CONSTRUCT(&cb, pmix_cb_t);
cb.active = true;
/* connect to the server - returns job info if successful */
if (PMIX_SUCCESS != (rc = connect_to_server(&address, &cb))){
PMIX_DESTRUCT(&cb);
return rc;
}
PMIX_WAIT_FOR_COMPLETION(cb.active);
rc = cb.status;
PMIX_DESTRUCT(&cb);
return PMIX_SUCCESS;
return rc;
}
int PMIx_Initialized(void)
@ -769,10 +775,21 @@ static int recv_connect_ack(int sd)
{
int reply;
int rc;
struct timeval tv, save;
pmix_socklen_t sz;
pmix_output_verbose(2, pmix_globals.debug_output,
"pmix: RECV CONNECT ACK FROM SERVER");
/* get the current timeout value so we can reset to it */
sz = sizeof(save);
getsockopt(sd, SOL_SOCKET, SO_RCVTIMEO, (void*)&save, &sz);
/* set a timeout on the blocking recv so we don't hang */
tv.tv_sec = 2;
tv.tv_usec = 0;
setsockopt(sd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv));
/* receive the status reply */
rc = pmix_usock_recv_blocking(sd, (char*)&reply, sizeof(int));
if (PMIX_SUCCESS != rc) {
@ -802,6 +819,9 @@ static int recv_connect_ack(int sd)
return rc;
}
/* return the socket to normal */
setsockopt(sd, SOL_SOCKET, SO_RCVTIMEO, &save, sz);
return PMIX_SUCCESS;
}

Просмотреть файл

@ -61,9 +61,7 @@ static void wait_lookup_cbfunc(struct pmix_peer_t *pr, pmix_usock_hdr_t *hdr,
static void lookup_cbfunc(int status, pmix_pdata_t pdata[], size_t ndata,
void *cbdata);
int PMIx_Publish(pmix_data_range_t scope,
pmix_persistence_t persist,
const pmix_info_t info[],
int PMIx_Publish(const pmix_info_t info[],
size_t ninfo)
{
int rc;
@ -85,7 +83,7 @@ int PMIx_Publish(pmix_data_range_t scope,
cb = PMIX_NEW(pmix_cb_t);
cb->active = true;
if (PMIX_SUCCESS != (rc = PMIx_Publish_nb(scope, persist, info, ninfo, op_cbfunc, cb))) {
if (PMIX_SUCCESS != (rc = PMIx_Publish_nb(info, ninfo, op_cbfunc, cb))) {
PMIX_ERROR_LOG(rc);
PMIX_RELEASE(cb);
return rc;
@ -99,10 +97,7 @@ int PMIx_Publish(pmix_data_range_t scope,
return rc;
}
int PMIx_Publish_nb(pmix_data_range_t scope,
pmix_persistence_t persist,
const pmix_info_t info[],
size_t ninfo,
int PMIx_Publish_nb(const pmix_info_t info[], size_t ninfo,
pmix_op_cbfunc_t cbfunc, void *cbdata)
{
pmix_buffer_t *msg;
@ -143,24 +138,14 @@ int PMIx_Publish_nb(pmix_data_range_t scope,
PMIX_RELEASE(msg);
return rc;
}
/* pack the data range */
if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &scope, 1, PMIX_DATA_RANGE))) {
PMIX_ERROR_LOG(rc);
PMIX_RELEASE(msg);
return rc;
}
/* pack the persistence */
if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &persist, 1, PMIX_PERSIST))) {
PMIX_ERROR_LOG(rc);
PMIX_RELEASE(msg);
return rc;
}
/* pack the info keys that were given */
/* pass the number of info structs - needed on remote end so
* space can be malloc'd for the values */
if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &ninfo, 1, PMIX_SIZE))) {
PMIX_ERROR_LOG(rc);
PMIX_RELEASE(msg);
return rc;
}
/* pack the info structs */
if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, info, ninfo, PMIX_INFO))) {
PMIX_ERROR_LOG(rc);
PMIX_RELEASE(msg);
@ -181,9 +166,8 @@ int PMIx_Publish_nb(pmix_data_range_t scope,
return PMIX_SUCCESS;
}
int PMIx_Lookup(pmix_data_range_t scope,
const pmix_info_t info[], size_t ninfo,
pmix_pdata_t pdata[], size_t ndata)
int PMIx_Lookup(pmix_pdata_t pdata[], size_t ndata,
const pmix_info_t info[], size_t ninfo)
{
int rc;
pmix_cb_t *cb;
@ -213,8 +197,7 @@ int PMIx_Lookup(pmix_data_range_t scope,
cb->nvals = ndata;
cb->active = true;
if (PMIX_SUCCESS != (rc = PMIx_Lookup_nb(scope, keys,
info, ninfo,
if (PMIX_SUCCESS != (rc = PMIx_Lookup_nb(keys, info, ninfo,
lookup_cbfunc, cb))) {
PMIX_RELEASE(cb);
pmix_argv_free(keys);
@ -231,8 +214,7 @@ int PMIx_Lookup(pmix_data_range_t scope,
return rc;
}
int PMIx_Lookup_nb(pmix_data_range_t range, char **keys,
const pmix_info_t info[], size_t ninfo,
int PMIx_Lookup_nb(char **keys, const pmix_info_t info[], size_t ninfo,
pmix_lookup_cbfunc_t cbfunc, void *cbdata)
{
pmix_buffer_t *msg;
@ -267,25 +249,6 @@ int PMIx_Lookup_nb(pmix_data_range_t range, char **keys,
PMIX_RELEASE(msg);
return rc;
}
/* pack the range */
if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &range, 1, PMIX_DATA_RANGE))) {
PMIX_ERROR_LOG(rc);
PMIX_RELEASE(msg);
return rc;
}
/* pack the info structs */
if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &ninfo, 1, PMIX_SIZE))) {
PMIX_ERROR_LOG(rc);
PMIX_RELEASE(msg);
return rc;
}
if (0 < ninfo) {
if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, info, ninfo, PMIX_INFO))) {
PMIX_ERROR_LOG(rc);
PMIX_RELEASE(msg);
return rc;
}
}
/* pack the keys */
nkeys = pmix_argv_count(keys);
if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &nkeys, 1, PMIX_SIZE))) {
@ -302,6 +265,19 @@ int PMIx_Lookup_nb(pmix_data_range_t range, char **keys,
}
}
}
/* pass the number of info structs - needed on remote end so
* space can be malloc'd for the values */
if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &ninfo, 1, PMIX_SIZE))) {
PMIX_ERROR_LOG(rc);
PMIX_RELEASE(msg);
return rc;
}
/* pack the info structs */
if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, info, ninfo, PMIX_INFO))) {
PMIX_ERROR_LOG(rc);
PMIX_RELEASE(msg);
return rc;
}
/* create a callback object as we need to pass it to the
* recv routine so we know which callback to use when
@ -316,7 +292,7 @@ int PMIx_Lookup_nb(pmix_data_range_t range, char **keys,
return PMIX_SUCCESS;
}
int PMIx_Unpublish(pmix_data_range_t scope, char **keys)
int PMIx_Unpublish(char **keys, const pmix_info_t info[], size_t ninfo)
{
int rc;
pmix_cb_t *cb;
@ -331,7 +307,7 @@ int PMIx_Unpublish(pmix_data_range_t scope, char **keys)
cb->active = true;
/* push the message into our event base to send to the server */
if (PMIX_SUCCESS != (rc = PMIx_Unpublish_nb(scope, keys, op_cbfunc, cb))) {
if (PMIX_SUCCESS != (rc = PMIx_Unpublish_nb(keys, info, ninfo, op_cbfunc, cb))) {
PMIX_RELEASE(cb);
return rc;
}
@ -344,7 +320,7 @@ int PMIx_Unpublish(pmix_data_range_t scope, char **keys)
return rc;
}
int PMIx_Unpublish_nb(pmix_data_range_t range, char **keys,
int PMIx_Unpublish_nb(char **keys, const pmix_info_t info[], size_t ninfo,
pmix_op_cbfunc_t cbfunc, void *cbdata)
{
pmix_buffer_t *msg;
@ -374,12 +350,6 @@ int PMIx_Unpublish_nb(pmix_data_range_t range, char **keys,
PMIX_RELEASE(msg);
return rc;
}
/* pack the range */
if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &range, 1, PMIX_DATA_RANGE))) {
PMIX_ERROR_LOG(rc);
PMIX_RELEASE(msg);
return rc;
}
/* pack the number of keys */
i = pmix_argv_count(keys);
if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &i, 1, PMIX_SIZE))) {
@ -396,6 +366,19 @@ int PMIx_Unpublish_nb(pmix_data_range_t range, char **keys,
}
}
}
/* pass the number of info structs - needed on remote end so
* space can be malloc'd for the values */
if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, &ninfo, 1, PMIX_SIZE))) {
PMIX_ERROR_LOG(rc);
PMIX_RELEASE(msg);
return rc;
}
/* pack the info structs */
if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(msg, info, ninfo, PMIX_INFO))) {
PMIX_ERROR_LOG(rc);
PMIX_RELEASE(msg);
return rc;
}
/* create a callback object */
cb = PMIX_NEW(pmix_cb_t);

Просмотреть файл

@ -246,13 +246,16 @@ static pmix_status_t initialize_server_base(pmix_server_module_t *module)
security_mode = strdup(pmix_sec.name);
/* find the temp dir */
if (NULL == (tdir = getenv("TMPDIR"))) {
if (NULL == (tdir = getenv("TEMP"))) {
if (NULL == (tdir = getenv("TMP"))) {
tdir = "/tmp";
if (NULL == (tdir = getenv("PMIX_SERVER_TMPDIR"))) {
if (NULL == (tdir = getenv("TMPDIR"))) {
if (NULL == (tdir = getenv("TEMP"))) {
if (NULL == (tdir = getenv("TMP"))) {
tdir = "/tmp";
}
}
}
}
/* now set the address - we use the pid here to reduce collisions */
memset(&myaddress, 0, sizeof(struct sockaddr_un));
myaddress.sun_family = AF_UNIX;
@ -1879,7 +1882,7 @@ static void cnct_cbfunc(int status, void *cbdata)
scd = PMIX_NEW(pmix_shift_caddy_t);
scd->status = status;
scd->tracker = tracker;
PMIX_THREADSHIFT(scd, _mdxcbfunc);
PMIX_THREADSHIFT(scd, _cnct);
}

Просмотреть файл

@ -979,9 +979,7 @@ pmix_status_t pmix_server_publish(pmix_peer_t *peer,
{
pmix_status_t rc;
int32_t cnt;
pmix_data_range_t range;
pmix_persistence_t persist;
size_t i, ninfo, einfo;
size_t ninfo, einfo;
pmix_info_t *info = NULL;
pmix_proc_t proc;
uint32_t uid;
@ -999,18 +997,6 @@ pmix_status_t pmix_server_publish(pmix_peer_t *peer,
PMIX_ERROR_LOG(rc);
return rc;
}
/* unpack the scope */
cnt=1;
if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &range, &cnt, PMIX_DATA_RANGE))) {
PMIX_ERROR_LOG(rc);
return rc;
}
/* unpack the persistence */
cnt=1;
if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &persist, &cnt, PMIX_PERSIST))) {
PMIX_ERROR_LOG(rc);
return rc;
}
/* unpack the number of info objects */
cnt=1;
if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &ninfo, &cnt, PMIX_SIZE))) {
@ -1035,7 +1021,8 @@ pmix_status_t pmix_server_publish(pmix_peer_t *peer,
/* call the local server */
(void)strncpy(proc.nspace, peer->info->nptr->nspace, PMIX_MAX_NSLEN);
proc.rank = peer->info->rank;
rc = pmix_host_server.publish(&proc, range, persist, info, einfo, cbfunc, cbdata);
pmix_output(0, "server passing %d values up", (int)einfo);
rc = pmix_host_server.publish(&proc, info, einfo, cbfunc, cbdata);
cleanup:
PMIX_INFO_FREE(info, einfo);
@ -1048,8 +1035,6 @@ pmix_status_t pmix_server_lookup(pmix_peer_t *peer,
{
int32_t cnt;
pmix_status_t rc;
int wait;
pmix_data_range_t range;
size_t nkeys, i;
char **keys=NULL, *sptr;
pmix_info_t *info = NULL;
@ -1070,12 +1055,22 @@ pmix_status_t pmix_server_lookup(pmix_peer_t *peer,
PMIX_ERROR_LOG(rc);
return rc;
}
/* unpack the range */
/* unpack the number of keys */
cnt=1;
if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &range, &cnt, PMIX_DATA_RANGE))) {
PMIX_ERROR_LOG(rc);
if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &nkeys, &cnt, PMIX_SIZE))) {
PMIX_ERROR_LOG(rc);
return rc;
}
/* unpack the array of keys */
for (i=0; i < nkeys; i++) {
cnt=1;
if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &sptr, &cnt, PMIX_STRING))) {
PMIX_ERROR_LOG(rc);
goto cleanup;
}
pmix_argv_append_nosize(&keys, sptr);
free(sptr);
}
/* unpack the number of info objects */
cnt=1;
if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &ninfo, &cnt, PMIX_SIZE))) {
@ -1098,27 +1093,10 @@ pmix_status_t pmix_server_lookup(pmix_peer_t *peer,
info[einfo-1].value.type = PMIX_UINT32;
info[einfo-1].value.data.uint32 = uid;
/* unpack the number of keys */
cnt=1;
if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &nkeys, &cnt, PMIX_SIZE))) {
PMIX_ERROR_LOG(rc);
return rc;
}
/* unpack the array of keys */
for (i=0; i < nkeys; i++) {
cnt=1;
if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &sptr, &cnt, PMIX_STRING))) {
PMIX_ERROR_LOG(rc);
goto cleanup;
}
pmix_argv_append_nosize(&keys, sptr);
free(sptr);
}
/* call the local server */
(void)strncpy(proc.nspace, peer->info->nptr->nspace, PMIX_MAX_NSLEN);
proc.rank = peer->info->rank;
rc = pmix_host_server.lookup(&proc, range, info, einfo, keys, cbfunc, cbdata);
rc = pmix_host_server.lookup(&proc, keys, info, einfo, cbfunc, cbdata);
cleanup:
PMIX_INFO_FREE(info, einfo);
@ -1132,12 +1110,11 @@ pmix_status_t pmix_server_unpublish(pmix_peer_t *peer,
{
int32_t cnt;
pmix_status_t rc;
pmix_data_range_t range;
size_t i, nkeys;
size_t i, nkeys, ninfo, einfo;
char **keys=NULL, *sptr;
pmix_proc_t proc;
uint32_t uid;
pmix_info_t info;
pmix_info_t *info;
pmix_output_verbose(2, pmix_globals.debug_output,
"recvd UNPUBLISH");
@ -1152,12 +1129,6 @@ pmix_status_t pmix_server_unpublish(pmix_peer_t *peer,
PMIX_ERROR_LOG(rc);
return rc;
}
/* unpack the range */
cnt=1;
if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &range, &cnt, PMIX_DATA_RANGE))) {
PMIX_ERROR_LOG(rc);
return rc;
}
/* unpack the number of keys */
cnt=1;
if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &nkeys, &cnt, PMIX_SIZE))) {
@ -1174,16 +1145,32 @@ pmix_status_t pmix_server_unpublish(pmix_peer_t *peer,
pmix_argv_append_nosize(&keys, sptr);
free(sptr);
}
/* setup the info key */
PMIX_INFO_CONSTRUCT(&info);
(void)strncpy(info.key, PMIX_USERID, PMIX_MAX_KEYLEN);
info.value.type = PMIX_UINT32;
info.value.data.uint32 = uid;
/* unpack the number of info objects */
cnt=1;
if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, &ninfo, &cnt, PMIX_SIZE))) {
PMIX_ERROR_LOG(rc);
return rc;
}
/* we will be adding one for the user id */
einfo = ninfo + 1;
PMIX_INFO_CREATE(info, einfo);
/* unpack the array of info objects */
if (0 < ninfo) {
PMIX_INFO_CREATE(info, ninfo);
cnt=ninfo;
if (PMIX_SUCCESS != (rc = pmix_bfrop.unpack(buf, info, &cnt, PMIX_INFO))) {
PMIX_ERROR_LOG(rc);
goto cleanup;
}
}
(void)strncpy(info[einfo-1].key, PMIX_USERID, PMIX_MAX_KEYLEN);
info[einfo-1].value.type = PMIX_UINT32;
info[einfo-1].value.data.uint32 = uid;
/* call the local server */
(void)strncpy(proc.nspace, peer->info->nptr->nspace, PMIX_MAX_NSLEN);
proc.rank = peer->info->rank;
rc = pmix_host_server.unpublish(&proc, range, &info, 1, keys, cbfunc, cbdata);
rc = pmix_host_server.unpublish(&proc, keys, info, einfo, cbfunc, cbdata);
cleanup:
pmix_argv_free(keys);

Просмотреть файл

@ -151,7 +151,6 @@ int dmodex_fn(const pmix_proc_t *proc,
}
int publish_fn(const pmix_proc_t *proc,
pmix_data_range_t scope, pmix_persistence_t persist,
const pmix_info_t info[], size_t ninfo,
pmix_op_cbfunc_t cbfunc, void *cbdata)
{
@ -184,8 +183,8 @@ int publish_fn(const pmix_proc_t *proc,
return PMIX_SUCCESS;
}
int lookup_fn(const pmix_proc_t *proc, pmix_data_range_t scope,
const pmix_info_t info[], size_t ninfo, char **keys,
int lookup_fn(const pmix_proc_t *proc, char **keys,
const pmix_info_t info[], size_t ninfo,
pmix_lookup_cbfunc_t cbfunc, void *cbdata)
{
size_t i, ndata, ret;
@ -216,26 +215,26 @@ int lookup_fn(const pmix_proc_t *proc, pmix_data_range_t scope,
return PMIX_SUCCESS;
}
int unpublish_fn(const pmix_proc_t *proc,
pmix_data_range_t scope, char **keys,
int unpublish_fn(const pmix_proc_t *proc, char **keys,
const pmix_info_t info[], size_t ninfo,
pmix_op_cbfunc_t cbfunc, void *cbdata)
{
size_t i, ninfo;
pmix_test_info_t *info, *next;
size_t i;
pmix_test_info_t *iptr, *next;
if (NULL == pmix_test_published_list) {
return PMIX_ERR_NOT_FOUND;
}
PMIX_LIST_FOREACH_SAFE(info, next, pmix_test_published_list, pmix_test_info_t) {
if (1) {// if data posted by this process
PMIX_LIST_FOREACH_SAFE(iptr, next, pmix_test_published_list, pmix_test_info_t) {
if (1) { // if data posted by this process
if (NULL == keys) {
pmix_list_remove_item(pmix_test_published_list, &info->super);
PMIX_RELEASE(info);
pmix_list_remove_item(pmix_test_published_list, &iptr->super);
PMIX_RELEASE(iptr);
} else {
ninfo = pmix_argv_count(keys);
for (i = 0; i < ninfo; i++) {
if (!strcmp(info->data.key, keys[i])) {
pmix_list_remove_item(pmix_test_published_list, &info->super);
PMIX_RELEASE(info);
if (!strcmp(iptr->data.key, keys[i])) {
pmix_list_remove_item(pmix_test_published_list, &iptr->super);
PMIX_RELEASE(iptr);
break;
}
}

Просмотреть файл

@ -29,14 +29,13 @@ pmix_status_t dmodex_fn(const pmix_proc_t *proc,
const pmix_info_t info[], size_t ninfo,
pmix_modex_cbfunc_t cbfunc, void *cbdata);
pmix_status_t publish_fn(const pmix_proc_t *proc,
pmix_data_range_t scope, pmix_persistence_t persist,
const pmix_info_t info[], size_t ninfo,
pmix_op_cbfunc_t cbfunc, void *cbdata);
pmix_status_t lookup_fn(const pmix_proc_t *proc, pmix_data_range_t scope,
const pmix_info_t info[], size_t ninfo, char **keys,
pmix_status_t lookup_fn(const pmix_proc_t *proc, char **keys,
const pmix_info_t info[], size_t ninfo,
pmix_lookup_cbfunc_t cbfunc, void *cbdata);
pmix_status_t unpublish_fn(const pmix_proc_t *proc,
pmix_data_range_t scope, char **keys,
pmix_status_t unpublish_fn(const pmix_proc_t *proc, char **keys,
const pmix_info_t info[], size_t ninfo,
pmix_op_cbfunc_t cbfunc, void *cbdata);
pmix_status_t spawn_fn(const pmix_proc_t *proc,
const pmix_info_t job_info[], size_t ninfo,

Просмотреть файл

@ -44,11 +44,11 @@ int main(int argc, char **argv)
char *tmp;
pmix_proc_t proc, myproc;
uint32_t nprocs, n;
/* init us */
if (PMIX_SUCCESS != (rc = PMIx_Init(&myproc))) {
pmix_output(0, "Client ns %s rank %d: PMIx_Init failed: %d", myproc.nspace, myproc.rank, rc);
exit(0);
exit(rc);
}
pmix_output(0, "Client ns %s rank %d: Running", myproc.nspace, myproc.rank);
@ -60,7 +60,7 @@ int main(int argc, char **argv)
nprocs = val->data.uint32;
PMIX_VALUE_RELEASE(val);
pmix_output(0, "Client %s:%d universe size %d", myproc.nspace, myproc.rank, nprocs);
/* put a few values */
(void)asprintf(&tmp, "%s-%d-internal", myproc.nspace, myproc.rank);
value.type = PMIX_UINT32;
@ -99,7 +99,7 @@ int main(int argc, char **argv)
pmix_output(0, "Client ns %s rank %d: PMIx_Fence failed: %d", myproc.nspace, myproc.rank, rc);
goto done;
}
/* check the returned data */
(void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN);
for (n=0; n < nprocs; n++) {
@ -156,5 +156,5 @@ int main(int argc, char **argv)
fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize successfully completed\n", myproc.nspace, myproc.rank);
}
fflush(stderr);
return(0);
return(rc);
}

Просмотреть файл

@ -63,7 +63,7 @@ int main(int argc, char **argv)
nprocs = val->data.uint32;
PMIX_VALUE_RELEASE(val);
pmix_output(0, "Client %s:%d universe size %d", myproc.nspace, myproc.rank, nprocs);
/* call fence to ensure the data is received */
PMIX_PROC_CONSTRUCT(&proc);
(void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN);
@ -72,7 +72,7 @@ int main(int argc, char **argv)
pmix_output(0, "Client ns %s rank %d: PMIx_Fence failed: %d", myproc.nspace, myproc.rank, rc);
goto done;
}
/* publish something */
if (0 == myproc.rank) {
PMIX_INFO_CREATE(info, 2);
@ -82,7 +82,7 @@ int main(int argc, char **argv)
(void)strncpy(info[1].key, "PANDA", PMIX_MAX_KEYLEN);
info[1].value.type = PMIX_SIZE;
info[1].value.data.size = 123456;
if (PMIX_SUCCESS != (rc = PMIx_Publish(PMIX_GLOBAL, PMIX_PERSIST_APP, info, 2))) {
if (PMIX_SUCCESS != (rc = PMIx_Publish(info, 2))) {
pmix_output(0, "Client ns %s rank %d: PMIx_Publish failed: %d", myproc.nspace, myproc.rank, rc);
goto done;
}
@ -100,7 +100,7 @@ int main(int argc, char **argv)
if (0 != myproc.rank) {
PMIX_PDATA_CREATE(pdata, 1);
(void)strncpy(pdata[0].key, "FOOBAR", PMIX_MAX_KEYLEN);
if (PMIX_SUCCESS != (rc = PMIx_Lookup(PMIX_GLOBAL, NULL, 0, pdata, 1))) {
if (PMIX_SUCCESS != (rc = PMIx_Lookup(pdata, 1, NULL, 0))) {
pmix_output(0, "Client ns %s rank %d: PMIx_Lookup failed: %d", myproc.nspace, myproc.rank, rc);
goto done;
}
@ -140,7 +140,7 @@ int main(int argc, char **argv)
pmix_argv_append_nosize(&keys, "FOOBAR");
pmix_argv_append_nosize(&keys, "PANDA");
if (PMIX_SUCCESS != (rc = PMIx_Unpublish(PMIX_GLOBAL, keys))) {
if (PMIX_SUCCESS != (rc = PMIx_Unpublish(keys, NULL, 0))) {
pmix_output(0, "Client ns %s rank %d: PMIx_Unpublish failed: %d", myproc.nspace, myproc.rank, rc);
goto done;
}

Просмотреть файл

@ -24,17 +24,24 @@
#include <private/autogen/config.h>
#include <pmix_server.h>
#include <private/types.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <time.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <errno.h>
#include <signal.h>
#include PMIX_EVENT_HEADER
#include "src/util/pmix_environ.h"
#include "src/util/output.h"
#include "src/util/printf.h"
#include "src/util/argv.h"
#include "src/buffer_ops/buffer_ops.h"
#include "src/usock/usock.h"
static pmix_status_t connected(const pmix_proc_t *proc, void *server_object);
static pmix_status_t finalized(const pmix_proc_t *proc, void *server_object,
@ -51,14 +58,13 @@ static pmix_status_t dmodex_fn(const pmix_proc_t *proc,
const pmix_info_t info[], size_t ninfo,
pmix_modex_cbfunc_t cbfunc, void *cbdata);
static pmix_status_t publish_fn(const pmix_proc_t *proc,
pmix_data_range_t scope, pmix_persistence_t persist,
const pmix_info_t info[], size_t ninfo,
pmix_op_cbfunc_t cbfunc, void *cbdata);
static pmix_status_t lookup_fn(const pmix_proc_t *proc, pmix_data_range_t scope,
const pmix_info_t info[], size_t ninfo, char **keys,
static pmix_status_t lookup_fn(const pmix_proc_t *proc, char **keys,
const pmix_info_t info[], size_t ninfo,
pmix_lookup_cbfunc_t cbfunc, void *cbdata);
static pmix_status_t unpublish_fn(const pmix_proc_t *proc,
pmix_data_range_t scope, char **keys,
static pmix_status_t unpublish_fn(const pmix_proc_t *proc, char **keys,
const pmix_info_t info[], size_t ninfo,
pmix_op_cbfunc_t cbfunc, void *cbdata);
static pmix_status_t spawn_fn(const pmix_proc_t *proc,
const pmix_info_t job_info[], size_t ninfo,
@ -72,8 +78,6 @@ static pmix_status_t disconnect_fn(const pmix_proc_t procs[], size_t nprocs,
pmix_op_cbfunc_t cbfunc, void *cbdata);
static pmix_status_t register_event_fn(const pmix_info_t info[], size_t ninfo,
pmix_op_cbfunc_t cbfunc, void *cbdata);
static pmix_status_t listener_fn(int listening_sd,
pmix_connection_cbfunc_t cbfunc);
static pmix_server_module_t mymodule = {
connected,
@ -88,7 +92,7 @@ static pmix_server_module_t mymodule = {
connect_fn,
disconnect_fn,
register_event_fn,
listener_fn
NULL
};
typedef struct {
@ -101,7 +105,7 @@ PMIX_CLASS_INSTANCE(pmix_locdat_t,
typedef struct {
pmix_object_t super;
volatile bool completed;
volatile bool active;
pmix_proc_t caller;
pmix_info_t *info;
size_t ninfo;
@ -113,7 +117,7 @@ static void xfcon(myxfer_t *p)
{
p->info = NULL;
p->ninfo = 0;
p->completed = false;
p->active = true;
p->cbfunc = NULL;
p->spcbfunc = NULL;
p->cbdata = NULL;
@ -128,26 +132,35 @@ PMIX_CLASS_INSTANCE(myxfer_t,
pmix_object_t,
xfcon, xfdes);
typedef struct {
pmix_list_item_t super;
pid_t pid;
} wait_tracker_t;
PMIX_CLASS_INSTANCE(wait_tracker_t,
pmix_list_item_t,
NULL, NULL);
static volatile int wakeup;
static pmix_list_t pubdata;
static pmix_event_t handler;
static pmix_list_t children;
static void set_namespace(int nprocs, char *ranks, char *nspace,
pmix_op_cbfunc_t cbfunc, myxfer_t *x);
static void errhandler(pmix_status_t status,
pmix_proc_t procs[], size_t nprocs,
pmix_info_t info[], size_t ninfo);
static void wait_signal_callback(int fd, short event, void *arg);
static void opcbfunc(pmix_status_t status, void *cbdata)
{
myxfer_t *x = (myxfer_t*)cbdata;
x->completed = true;
/* release the caller, if necessary - note that
* this may result in release of x, so this must
* be the last thing we do with it here */
/* release the caller, if necessary */
if (NULL != x->cbfunc) {
x->cbfunc(PMIX_SUCCESS, x->cbdata);
}
x->active = false;
}
int main(int argc, char **argv)
@ -161,6 +174,7 @@ int main(int argc, char **argv)
pid_t pid;
myxfer_t *x;
pmix_proc_t proc;
wait_tracker_t *child;
/* smoke test */
if (PMIX_SUCCESS != 0) {
@ -181,6 +195,12 @@ int main(int argc, char **argv)
/* setup the pub data, in case it is used */
PMIX_CONSTRUCT(&pubdata, pmix_list_t);
/* setup to see sigchld on the forked tests */
PMIX_CONSTRUCT(&children, pmix_list_t);
event_assign(&handler, pmix_globals.evbase, SIGCHLD,
EV_SIGNAL|EV_PERSIST,wait_signal_callback, &handler);
event_add(&handler, NULL);
/* see if we were passed the number of procs to run or
* the executable to use */
for (n=1; n < (argc-1); n++) {
@ -208,7 +228,6 @@ int main(int argc, char **argv)
tmp = pmix_argv_join(atmp, ',');
x = PMIX_NEW(myxfer_t);
set_namespace(nprocs, tmp, "foobar", opcbfunc, x);
free(tmp);
/* set common argv and env */
client_env = pmix_argv_copy(environ);
@ -220,12 +239,8 @@ int main(int argc, char **argv)
/* if the nspace registration hasn't completed yet,
* wait for it here */
while (!x->completed) {
struct timespec ts;
ts.tv_sec = 0;
ts.tv_nsec = 100000;
nanosleep(&ts, NULL);
}
PMIX_WAIT_FOR_COMPLETION(x->active);
free(tmp);
PMIX_RELEASE(x);
/* fork/exec the test */
@ -246,12 +261,7 @@ int main(int argc, char **argv)
}
/* don't fork/exec the client until we know it is registered
* so we avoid a potential race condition in the server */
while (!x->completed) {
struct timespec ts;
ts.tv_sec = 0;
ts.tv_nsec = 100000;
nanosleep(&ts, NULL);
}
PMIX_WAIT_FOR_COMPLETION(x->active);
PMIX_RELEASE(x);
pid = fork();
if (pid < 0) {
@ -259,6 +269,9 @@ int main(int argc, char **argv)
PMIx_server_finalize();
return -1;
}
child = PMIX_NEW(wait_tracker_t);
child->pid = pid;
pmix_list_append(&children, &child->super);
if (pid == 0) {
execve(executable, client_argv, client_env);
@ -438,7 +451,6 @@ static int dmodex_fn(const pmix_proc_t *proc,
static int publish_fn(const pmix_proc_t *proc,
pmix_data_range_t scope, pmix_persistence_t persist,
const pmix_info_t info[], size_t ninfo,
pmix_op_cbfunc_t cbfunc, void *cbdata)
{
@ -462,9 +474,8 @@ static int publish_fn(const pmix_proc_t *proc,
}
static int lookup_fn(const pmix_proc_t *proc,
pmix_data_range_t scope,
const pmix_info_t info[], size_t ninfo, char **keys,
static int lookup_fn(const pmix_proc_t *proc, char **keys,
const pmix_info_t info[], size_t ninfo,
pmix_lookup_cbfunc_t cbfunc, void *cbdata)
{
pmix_locdat_t *p, *p2;
@ -512,8 +523,8 @@ static int lookup_fn(const pmix_proc_t *proc,
}
static int unpublish_fn(const pmix_proc_t *proc,
pmix_data_range_t scope, char **keys,
static int unpublish_fn(const pmix_proc_t *proc, char **keys,
const pmix_info_t info[], size_t ninfo,
pmix_op_cbfunc_t cbfunc, void *cbdata)
{
pmix_locdat_t *p, *p2;
@ -610,10 +621,39 @@ static pmix_status_t register_event_fn(const pmix_info_t info[], size_t ninfo,
return PMIX_SUCCESS;
}
static int listener_fn(int listening_sd,
pmix_connection_cbfunc_t cbfunc)
static void wait_signal_callback(int fd, short event, void *arg)
{
return PMIX_SUCCESS;
pmix_event_t *sig = (pmix_event_t*) arg;
int status;
pid_t pid;
wait_tracker_t *t2;
if (SIGCHLD != event_get_signal(sig)) {
return;
}
/* we can have multiple children leave but only get one
* sigchild callback, so reap all the waitpids until we
* don't get anything valid back */
while (1) {
pid = waitpid(-1, &status, WNOHANG);
if (-1 == pid && EINTR == errno) {
/* try it again */
continue;
}
/* if we got garbage, then nothing we can do */
if (pid <= 0) {
return;
}
/* we are already in an event, so it is safe to access the list */
PMIX_LIST_FOREACH(t2, &children, wait_tracker_t) {
if (pid == t2->pid) {
/* found it! */
--wakeup;
break;
}
}
}
}

Просмотреть файл

@ -86,11 +86,11 @@ static void add_noise(char *noise_param, char *my_nspace, int my_rank)
SET_KEY(key, fence_num, ind, use_same_keys); \
(void)strncpy(foobar.nspace, ns, PMIX_MAX_NSLEN); \
foobar.rank = r; \
TEST_VERBOSE(("%s:%d want to get from %s:%d key %s", my_nspace, my_rank, ns, r, key)); \
TEST_VERBOSE(("%s:%d want to get from %s:%d key %s", my_nspace, my_rank, ns, r, key)); \
if (blocking) { \
if (PMIX_SUCCESS != (rc = PMIx_Get(&foobar, key, NULL, 0, &val))) { \
if (PMIX_SUCCESS != (rc = PMIx_Get(&foobar, key, NULL, 0, &val))) { \
if( !( rc == PMIX_ERR_NOT_FOUND && ok_notfnd ) ){ \
TEST_ERROR(("%s:%d: PMIx_Get failed: %d from %s:%d", my_nspace, my_rank, rc, ns, r)); \
TEST_ERROR(("%s:%d: PMIx_Get failed: %d from %s:%d, key %s", my_nspace, my_rank, rc, ns, r, key)); \
} \
rc = PMIX_ERROR; \
} \
@ -99,8 +99,8 @@ static void add_noise(char *noise_param, char *my_nspace, int my_rank)
cbdata.in_progress = 1; \
PMIX_VALUE_CREATE(val, 1); \
cbdata.kv = val; \
if (PMIX_SUCCESS != (rc = PMIx_Get_nb(&foobar, key, NULL, 0, get_cb, (void*)&cbdata))) { \
TEST_VERBOSE(("%s:%d: PMIx_Get_nb failed: %d from %s:%d", my_nspace, my_rank, rc, ns, r)); \
if (PMIX_SUCCESS != (rc = PMIx_Get_nb(&foobar, key, NULL, 0, get_cb, (void*)&cbdata))) { \
TEST_VERBOSE(("%s:%d: PMIx_Get_nb failed: %d from %s:%d, key=%s", my_nspace, my_rank, rc, ns, r, key)); \
rc = PMIX_ERROR; \
} else { \
count = 0; \
@ -116,7 +116,8 @@ static void add_noise(char *noise_param, char *my_nspace, int my_rank)
if (PMIX_SUCCESS == rc) { \
if( PMIX_SUCCESS != cbdata.status ){ \
if( !( rc == PMIX_ERR_NOT_FOUND && ok_notfnd ) ){ \
TEST_VERBOSE(("%s:%d: PMIx_Get_nb failed: %d from %s:%d", my_nspace, my_rank, rc, my_nspace, r));\
TEST_VERBOSE(("%s:%d: PMIx_Get_nb failed: %d from %s:%d, key=%s", \
my_nspace, my_rank, rc, my_nspace, r)); \
} \
rc = PMIX_ERROR; \
} else if (NULL == val) { \

Просмотреть файл

@ -59,10 +59,10 @@ static int test_publish(char *my_nspace, int my_rank, int blocking)
info.value.type = PMIX_STRING;
info.value.data.string = strdup(data);
if (blocking) {
rc = PMIx_Publish(PMIX_NAMESPACE, PMIX_PERSIST_INDEF, &info, 1);
rc = PMIx_Publish(&info, 1);
} else {
int in_progress = 1;
rc = PMIx_Publish_nb(PMIX_NAMESPACE, PMIX_PERSIST_INDEF, &info, 1, release_cb, &in_progress);
rc = PMIx_Publish_nb(&info, 1, release_cb, &in_progress);
if (PMIX_SUCCESS == rc) {
PMIX_WAIT_FOR_COMPLETION(in_progress);
}
@ -83,7 +83,7 @@ static int test_lookup(char *my_nspace, int my_rank, int blocking)
(void)snprintf(data, 512, "data from proc %s:%d", my_nspace, my_rank);
if (blocking) {
if (PMIX_SUCCESS != (rc = PMIx_Lookup(PMIX_NAMESPACE, NULL, 0, &pdata, 1))) {
if (PMIX_SUCCESS != (rc = PMIx_Lookup(&pdata, 1, NULL, 0))) {
PMIX_PDATA_DESTRUCT(&pdata);
return rc;
}
@ -98,7 +98,7 @@ static int test_lookup(char *my_nspace, int my_rank, int blocking)
cbdata.pdata = &pdata;
/* copy the key across */
(void)strncpy(pdata.key, keys[0], PMIX_MAX_KEYLEN);
rc = PMIx_Lookup_nb(PMIX_NAMESPACE, keys, NULL, 0, lookup_cb, (void*)&cbdata);
rc = PMIx_Lookup_nb(keys, NULL, 0, lookup_cb, (void*)&cbdata);
if (PMIX_SUCCESS != rc) {
PMIX_PDATA_DESTRUCT(&pdata);
return rc;
@ -130,10 +130,10 @@ static int test_unpublish(char *my_nspace, int my_rank, int blocking)
keys[1] = NULL;
if (blocking) {
rc = PMIx_Unpublish(PMIX_NAMESPACE, keys);
rc = PMIx_Unpublish(keys, NULL, 0);
} else {
int in_progress = 1;
rc = PMIx_Unpublish_nb(PMIX_NAMESPACE, keys, release_cb, &in_progress);
rc = PMIx_Unpublish_nb(keys, NULL, 0, release_cb, &in_progress);
if (PMIX_SUCCESS == rc) {
PMIX_WAIT_FOR_COMPLETION(in_progress);
}

Просмотреть файл

@ -89,20 +89,15 @@ OPAL_MODULE_DECLSPEC int pmix1_get(const opal_process_name_t *proc,
OPAL_MODULE_DECLSPEC int pmix1_getnb(const opal_process_name_t *proc,
const char *key,
opal_pmix_value_cbfunc_t cbfunc, void *cbdata);
OPAL_MODULE_DECLSPEC int pmix1_publish(opal_pmix_data_range_t scope,
opal_pmix_persistence_t persist,
opal_list_t *info);
OPAL_MODULE_DECLSPEC int pmix1_publishnb(opal_pmix_data_range_t scope,
opal_pmix_persistence_t persist,
opal_list_t *info,
OPAL_MODULE_DECLSPEC int pmix1_publish(opal_list_t *info);
OPAL_MODULE_DECLSPEC int pmix1_publishnb(opal_list_t *info,
opal_pmix_op_cbfunc_t cbfunc, void *cbdata);
OPAL_MODULE_DECLSPEC int pmix1_lookup(opal_list_t *data, opal_list_t *info);
OPAL_MODULE_DECLSPEC int pmix1_lookupnb(char **keys, opal_list_t *info,
opal_pmix_lookup_cbfunc_t cbfunc, void *cbdata);
OPAL_MODULE_DECLSPEC int pmix1_unpublish(char **keys, opal_list_t *info);
OPAL_MODULE_DECLSPEC int pmix1_unpublishnb(char **keys, opal_list_t *info,
opal_pmix_op_cbfunc_t cbfunc, void *cbdata);
OPAL_MODULE_DECLSPEC int pmix1_lookup(opal_pmix_data_range_t scope,
opal_list_t *data);
OPAL_MODULE_DECLSPEC int pmix1_lookupnb(opal_pmix_data_range_t scope, int wait, char **keys,
opal_pmix_lookup_cbfunc_t cbfunc, void *cbdata);
OPAL_MODULE_DECLSPEC int pmix1_unpublish(opal_pmix_data_range_t scope, char **keys);
OPAL_MODULE_DECLSPEC int pmix1_unpublishnb(opal_pmix_data_range_t scope, char **keys,
opal_pmix_op_cbfunc_t cbfunc, void *cbdata);
OPAL_MODULE_DECLSPEC int pmix1_spawn(opal_list_t *job_info, opal_list_t *apps, opal_jobid_t *jobid);
OPAL_MODULE_DECLSPEC int pmix1_spawnnb(opal_list_t *job_info, opal_list_t *apps,
opal_pmix_spawn_cbfunc_t cbfunc, void *cbdata);
@ -119,8 +114,6 @@ OPAL_MODULE_DECLSPEC int pmix1_resolve_peers(const char *nodename, opal_jobid_t
OPAL_MODULE_DECLSPEC int pmix1_resolve_nodes(opal_jobid_t jobid, char **nodelist);
/**** COMMON FUNCTIONS ****/
OPAL_MODULE_DECLSPEC void pmix1_register_errhandler(opal_pmix_errhandler_fn_t errhandler);
OPAL_MODULE_DECLSPEC void pmix1_deregister_errhandler(void);
OPAL_MODULE_DECLSPEC int pmix1_store_local(const opal_process_name_t *proc,
opal_value_t *val);

Просмотреть файл

@ -33,14 +33,42 @@
static pmix_proc_t myproc;
static char *dbgvalue=NULL;
static int convert_scope(pmix_scope_t *scope,
opal_pmix_scope_t sc);
static int convert_persistence(pmix_persistence_t *p,
opal_pmix_persistence_t persist);
static int convert_data_range(pmix_data_range_t *sc,
opal_pmix_data_range_t scope);
static void myerr(pmix_status_t status,
pmix_proc_t procs[], size_t nprocs,
pmix_info_t info[], size_t ninfo)
{
int rc;
opal_list_t plist, ilist;
opal_namelist_t *nm;
opal_value_t *iptr;
size_t n;
/* convert the incoming status */
rc = pmix1_convert_rc(status);
/* convert the array of procs */
OBJ_CONSTRUCT(&plist, opal_list_t);
for (n=0; n < nprocs; n++) {
nm = OBJ_NEW(opal_namelist_t);
nm->name.jobid = strtoul(procs[n].nspace, NULL, 10);
nm->name.vpid = procs[n].rank;
opal_list_append(&plist, &nm->super);
}
/* convert the array of info */
OBJ_CONSTRUCT(&ilist, opal_list_t);
for (n=0; n < ninfo; n++) {
iptr = OBJ_NEW(opal_value_t);
iptr->key = strdup(info[n].key);
pmix1_value_unload(iptr, &info[n].value);
opal_list_append(&plist, &nm->super);
}
/* call the base errhandler */
opal_pmix_base_errhandler(rc, &plist, &ilist);
OPAL_LIST_DESTRUCT(&plist);
OPAL_LIST_DESTRUCT(&ilist);
}
int pmix1_client_init(void)
{
@ -56,19 +84,28 @@ int pmix1_client_init(void)
putenv(dbgvalue);
}
rc = PMIx_Init(&myproc);
if (PMIX_SUCCESS == rc) {
/* store our jobid and rank */
opal_convert_string_to_jobid(&pname.jobid, myproc.nspace);
pname.vpid = myproc.rank;
opal_proc_set_name(&pname);
if (PMIX_SUCCESS != rc) {
return pmix1_convert_rc(rc);
}
return pmix1_convert_rc(rc);
/* store our jobid and rank */
opal_convert_string_to_jobid(&pname.jobid, myproc.nspace);
pname.vpid = myproc.rank;
opal_proc_set_name(&pname);
/* register the errhandler */
PMIx_Register_errhandler(NULL, 0, myerr);
return OPAL_SUCCESS;
}
int pmix1_client_finalize(void)
{
pmix_status_t rc;
/* deregister the errhandler */
PMIx_Deregister_errhandler();
rc = PMIx_Finalize();
return pmix1_convert_rc(rc);
}
@ -222,28 +259,21 @@ int pmix1_fencenb(opal_list_t *procs, int collect_data,
}
int pmix1_put(opal_pmix_scope_t scope,
opal_value_t *val)
opal_value_t *val)
{
pmix_scope_t pscope;
pmix_value_t kv;
pmix_status_t rc;
int irc;
/* convert the scope */
if (OPAL_SUCCESS != (irc = convert_scope(&pscope, scope))) {
return irc;
}
PMIX_VALUE_CONSTRUCT(&kv);
pmix1_value_load(&kv, val);
rc = PMIx_Put(pscope, val->key, &kv);
rc = PMIx_Put(scope, val->key, &kv);
PMIX_VALUE_DESTRUCT(&kv);
return pmix1_convert_rc(rc);
}
int pmix1_get(const opal_process_name_t *proc,
const char *key, opal_value_t **val)
const char *key, opal_value_t **val)
{
int ret;
pmix_value_t *kv;
@ -308,9 +338,8 @@ static void val_cbfunc(pmix_status_t status,
OBJ_RELEASE(op);
}
int pmix1_getnb(const opal_process_name_t *proc,
const char *key,
opal_pmix_value_cbfunc_t cbfunc, void *cbdata)
int pmix1_getnb(const opal_process_name_t *proc, const char *key,
opal_pmix_value_cbfunc_t cbfunc, void *cbdata)
{
pmix1_opcaddy_t *op;
pmix_status_t rc;
@ -342,29 +371,13 @@ int pmix1_getnb(const opal_process_name_t *proc,
return pmix1_convert_rc(rc);
}
int pmix1_publish(opal_pmix_data_range_t scope,
opal_pmix_persistence_t persist,
opal_list_t *info)
int pmix1_publish(opal_list_t *info)
{
pmix_data_range_t rng;
pmix_persistence_t pst;
int rc;
pmix_info_t *pinfo;
pmix_status_t ret;
opal_value_t *iptr;
size_t sz, n;
rc = convert_data_range(&rng, scope);
if (OPAL_SUCCESS != rc) {
OPAL_ERROR_LOG(rc);
return rc;
}
rc = convert_persistence(&pst, scope);
if (OPAL_SUCCESS != rc) {
OPAL_ERROR_LOG(rc);
return rc;
}
sz = opal_list_get_size(info);
if (0 < sz) {
PMIX_INFO_CREATE(pinfo, sz);
@ -376,33 +389,19 @@ int pmix1_publish(opal_pmix_data_range_t scope,
}
}
ret = PMIx_Publish(rng, pst, pinfo, sz);
ret = PMIx_Publish(pinfo, sz);
return pmix1_convert_rc(ret);
}
int pmix1_publishnb(opal_pmix_data_range_t scope,
opal_pmix_persistence_t persist,
opal_list_t *info,
opal_pmix_op_cbfunc_t cbfunc, void *cbdata)
int pmix1_publishnb(opal_list_t *info,
opal_pmix_op_cbfunc_t cbfunc, void *cbdata)
{
pmix_data_range_t rng;
pmix_persistence_t pst;
int rc;
pmix_status_t ret;
opal_value_t *iptr;
size_t n;
pmix1_opcaddy_t *op;
rc = convert_data_range(&rng, scope);
if (OPAL_SUCCESS != rc) {
return rc;
}
rc = convert_persistence(&pst, persist);
if (OPAL_SUCCESS != rc) {
return rc;
}
/* create the caddy */
op = OBJ_NEW(pmix1_opcaddy_t);
op->opcbfunc = cbfunc;
@ -419,34 +418,39 @@ int pmix1_publishnb(opal_pmix_data_range_t scope,
}
}
ret = PMIx_Publish_nb(rng, pst, op->info, op->sz, opcbfunc, op);
ret = PMIx_Publish_nb(op->info, op->sz, opcbfunc, op);
return pmix1_convert_rc(ret);
}
int pmix1_lookup(opal_pmix_data_range_t scope,
opal_list_t *data)
int pmix1_lookup(opal_list_t *data, opal_list_t *info)
{
pmix_data_range_t rng;
pmix_pdata_t *pdata;
size_t sz, n;
pmix_info_t *pinfo;
size_t sz, ninfo, n;
int rc;
pmix_status_t ret;
opal_pmix_pdata_t *d;
opal_value_t *iptr;
rc = convert_data_range(&rng, scope);
if (OPAL_SUCCESS != rc) {
return rc;
}
sz = opal_list_get_size(data);
PMIX_PDATA_CREATE(pdata, sz);
n=0;
OPAL_LIST_FOREACH(d, data, opal_pmix_pdata_t) {
(void)strncpy(pdata[n++].key, d->value.key, PMIX_MAX_KEYLEN);
}
ret = PMIx_Lookup(rng, NULL, 0, pdata, sz);
ninfo = opal_list_get_size(info);
PMIX_INFO_CREATE(pinfo, ninfo);
n=0;
OPAL_LIST_FOREACH(iptr, info, opal_value_t) {
(void)strncpy(pinfo[n++].key, iptr->key, PMIX_MAX_KEYLEN);
pmix1_value_load(&pinfo[n].value, iptr);
++n;
}
ret = PMIx_Lookup(pdata, sz, pinfo, ninfo);
PMIX_INFO_FREE(pinfo, ninfo);
if (PMIX_SUCCESS == ret) {
/* transfer the data back */
@ -523,64 +527,82 @@ static void lk_cbfunc(pmix_status_t status,
OBJ_RELEASE(op);
}
int pmix1_lookupnb(opal_pmix_data_range_t scope, int wait, char **keys,
int pmix1_lookupnb(char **keys, opal_list_t *info,
opal_pmix_lookup_cbfunc_t cbfunc, void *cbdata)
{
pmix_data_range_t rng;
int rc;
pmix_status_t ret;
pmix1_opcaddy_t *op;
rc = convert_data_range(&rng, scope);
if (OPAL_SUCCESS != rc) {
return rc;
}
opal_value_t *iptr;
size_t n;
/* create the caddy */
op = OBJ_NEW(pmix1_opcaddy_t);
op->lkcbfunc = cbfunc;
op->cbdata = cbdata;
ret = PMIx_Lookup_nb(rng, keys, NULL, 0, lk_cbfunc, op);
return pmix1_convert_rc(ret);
}
int pmix1_unpublish(opal_pmix_data_range_t scope, char **keys)
{
int rc;
pmix_status_t ret;
pmix_data_range_t rng;
rc = convert_data_range(&rng, scope);
if (OPAL_SUCCESS != rc) {
return rc;
op->sz = opal_list_get_size(info);
if (0 < op->sz) {
PMIX_INFO_CREATE(op->info, op->sz);
n=0;
OPAL_LIST_FOREACH(iptr, info, opal_value_t) {
(void)strncpy(op->info[n].key, iptr->key, PMIX_MAX_KEYLEN);
pmix1_value_load(&op->info[n].value, iptr);
++n;
}
}
ret = PMIx_Unpublish(rng, keys);
ret = PMIx_Lookup_nb(keys, op->info, op->sz, lk_cbfunc, op);
return pmix1_convert_rc(ret);
}
int pmix1_unpublishnb(opal_pmix_data_range_t scope, char **keys,
opal_pmix_op_cbfunc_t cbfunc, void *cbdata)
int pmix1_unpublish(char **keys, opal_list_t *info)
{
pmix_status_t ret;
size_t ninfo, n;
pmix_info_t *pinfo;
opal_value_t *iptr;
ninfo = opal_list_get_size(info);
PMIX_INFO_CREATE(pinfo, ninfo);
n=0;
OPAL_LIST_FOREACH(iptr, info, opal_value_t) {
(void)strncpy(pinfo[n++].key, iptr->key, PMIX_MAX_KEYLEN);
pmix1_value_load(&pinfo[n].value, iptr);
++n;
}
ret = PMIx_Unpublish(keys, pinfo, ninfo);
PMIX_INFO_FREE(pinfo, ninfo);
return pmix1_convert_rc(ret);
}
int pmix1_unpublishnb(char **keys, opal_list_t *info,
opal_pmix_op_cbfunc_t cbfunc, void *cbdata)
{
int rc;
pmix_status_t ret;
pmix_data_range_t rng;
pmix1_opcaddy_t *op;
rc = convert_data_range(&rng, scope);
if (OPAL_SUCCESS != rc) {
return rc;
}
opal_value_t *iptr;
size_t n;
/* create the caddy */
op = OBJ_NEW(pmix1_opcaddy_t);
op->opcbfunc = cbfunc;
op->cbdata = cbdata;
ret = PMIx_Unpublish_nb(rng, keys, opcbfunc, op);
op->sz = opal_list_get_size(info);
if (0 < op->sz) {
PMIX_INFO_CREATE(op->info, op->sz);
n=0;
OPAL_LIST_FOREACH(iptr, info, opal_value_t) {
(void)strncpy(op->info[n].key, iptr->key, PMIX_MAX_KEYLEN);
pmix1_value_load(&op->info[n].value, iptr);
++n;
}
}
ret = PMIx_Unpublish_nb(keys, op->info, op->sz, opcbfunc, op);
return pmix1_convert_rc(ret);
}
@ -894,77 +916,3 @@ int pmix1_resolve_nodes(opal_jobid_t jobid, char **nodelist)
return pmix1_convert_rc(ret);;
}
/*** UTILITY FUNCTIONS ***/
static int convert_scope(pmix_scope_t *sc,
opal_pmix_scope_t scope)
{
int rc = PMIX_SUCCESS;
switch (scope) {
case OPAL_PMIX_SCOPE_UNDEF:
*sc = PMIX_SCOPE_UNDEF;
break;
case OPAL_PMIX_LOCAL:
*sc = PMIX_LOCAL;
break;
case OPAL_PMIX_REMOTE:
*sc = PMIX_REMOTE;
break;
case OPAL_PMIX_GLOBAL:
*sc = PMIX_GLOBAL;
break;
default:
*sc = PMIX_SCOPE_UNDEF;
rc = OPAL_ERR_BAD_PARAM;
break;
}
return rc;
}
static int convert_persistence(pmix_persistence_t *p,
opal_pmix_persistence_t persist)
{
int rc = OPAL_SUCCESS;
switch (persist) {
case OPAL_PMIX_PERSIST_INDEF:
*p = PMIX_PERSIST_INDEF;
break;
case OPAL_PMIX_PERSIST_PROC:
*p = PMIX_PERSIST_PROC;
break;
case OPAL_PMIX_PERSIST_APP:
*p = PMIX_PERSIST_APP;
break;
case OPAL_PMIX_PERSIST_SESSION:
*p = PMIX_PERSIST_SESSION;
break;
default:
*p = PMIX_PERSIST_PROC;
rc = OPAL_ERR_BAD_PARAM;
}
return rc;
}
static int convert_data_range(pmix_data_range_t *sc,
opal_pmix_data_range_t scope)
{
int rc = OPAL_SUCCESS;
switch (scope) {
case OPAL_PMIX_DATA_RANGE_UNDEF:
*sc = PMIX_DATA_RANGE_UNDEF;
break;
case OPAL_PMIX_NAMESPACE:
*sc = PMIX_NAMESPACE;
break;
case OPAL_PMIX_SESSION:
*sc = PMIX_SESSION;
break;
default:
*sc = PMIX_DATA_RANGE_UNDEF;
rc = OPAL_ERR_BAD_PARAM;
}
return rc;
}

Просмотреть файл

@ -59,15 +59,13 @@ static pmix_status_t server_dmodex_req_fn(const pmix_proc_t *proc,
const pmix_info_t info[], size_t ninfo,
pmix_modex_cbfunc_t cbfunc, void *cbdata);
static pmix_status_t server_publish_fn(const pmix_proc_t *proc,
pmix_data_range_t scope, pmix_persistence_t persist,
const pmix_info_t info[], size_t ninfo,
pmix_op_cbfunc_t cbfunc, void *cbdata);
static pmix_status_t server_lookup_fn(const pmix_proc_t *proc, pmix_data_range_t scope,
const pmix_info_t info[], size_t ninfo, char **keys,
static pmix_status_t server_lookup_fn(const pmix_proc_t *proc, char **keys,
const pmix_info_t info[], size_t ninfo,
pmix_lookup_cbfunc_t cbfunc, void *cbdata);
static pmix_status_t server_unpublish_fn(const pmix_proc_t *proc,
pmix_data_range_t scope,
const pmix_info_t info[], size_t ninfo, char **keys,
static pmix_status_t server_unpublish_fn(const pmix_proc_t *proc, char **keys,
const pmix_info_t info[], size_t ninfo,
pmix_op_cbfunc_t cbfunc, void *cbdata);
static pmix_status_t server_spawn_fn(const pmix_proc_t *proc,
const pmix_info_t job_info[], size_t ninfo,
@ -101,10 +99,6 @@ pmix_server_module_t mymodule = {
};
opal_pmix_server_module_t *host_module = NULL;
static int convert_data_range(opal_pmix_data_range_t *sc,
pmix_data_range_t scope);
static int convert_persistence(opal_pmix_persistence_t *p,
pmix_persistence_t persist);
static void opal_opcbfunc(int status, void *cbdata)
@ -351,7 +345,6 @@ static pmix_status_t server_dmodex_req_fn(const pmix_proc_t *p,
}
static pmix_status_t server_publish_fn(const pmix_proc_t *p,
pmix_data_range_t scope, pmix_persistence_t persist,
const pmix_info_t info[], size_t ninfo,
pmix_op_cbfunc_t cbfunc, void *cbdata)
{
@ -359,8 +352,6 @@ static pmix_status_t server_publish_fn(const pmix_proc_t *p,
size_t n;
pmix1_opalcaddy_t *opalcaddy;
opal_process_name_t proc;
opal_pmix_data_range_t oscp;
opal_pmix_persistence_t opers;
opal_value_t *oinfo;
if (NULL == host_module || NULL == host_module->publish) {
@ -377,16 +368,6 @@ static pmix_status_t server_publish_fn(const pmix_proc_t *p,
proc.vpid = p->rank;
}
/* convert the data range */
if (OPAL_SUCCESS != (rc = convert_data_range(&oscp, scope))) {
return pmix1_convert_opalrc(rc);
}
/* convert the persistence */
if (OPAL_SUCCESS != (rc = convert_persistence(&opers, persist))) {
return pmix1_convert_opalrc(rc);
}
/* setup the caddy */
opalcaddy = OBJ_NEW(pmix1_opalcaddy_t);
opalcaddy->opcbfunc = cbfunc;
@ -404,7 +385,7 @@ static pmix_status_t server_publish_fn(const pmix_proc_t *p,
}
/* pass it up */
rc = host_module->publish(&proc, oscp, opers, &opalcaddy->info, opal_opcbfunc, opalcaddy);
rc = host_module->publish(&proc, &opalcaddy->info, opal_opcbfunc, opalcaddy);
if (OPAL_SUCCESS != rc) {
OBJ_RELEASE(opalcaddy);
}
@ -442,13 +423,12 @@ static void opal_lkupcbfunc(int status,
OBJ_RELEASE(opalcaddy);
}
static pmix_status_t server_lookup_fn(const pmix_proc_t *p, pmix_data_range_t scope,
const pmix_info_t info[], size_t ninfo, char **keys,
static pmix_status_t server_lookup_fn(const pmix_proc_t *p, char **keys,
const pmix_info_t info[], size_t ninfo,
pmix_lookup_cbfunc_t cbfunc, void *cbdata)
{
int rc;
pmix1_opalcaddy_t *opalcaddy;
opal_pmix_data_range_t oscp;
opal_process_name_t proc;
opal_value_t *iptr;
size_t n;
@ -467,11 +447,6 @@ static pmix_status_t server_lookup_fn(const pmix_proc_t *p, pmix_data_range_t sc
proc.vpid = p->rank;
}
/* convert the scope */
if (OPAL_SUCCESS != (rc = convert_data_range(&oscp, scope))) {
return pmix1_convert_opalrc(rc);
}
/* setup the caddy */
opalcaddy = OBJ_NEW(pmix1_opalcaddy_t);
opalcaddy->lkupcbfunc = cbfunc;
@ -489,7 +464,7 @@ static pmix_status_t server_lookup_fn(const pmix_proc_t *p, pmix_data_range_t sc
}
/* pass it up */
rc = host_module->lookup(&proc, oscp, &opalcaddy->info, keys, opal_lkupcbfunc, opalcaddy);
rc = host_module->lookup(&proc, keys, &opalcaddy->info, opal_lkupcbfunc, opalcaddy);
if (OPAL_SUCCESS != rc) {
OBJ_RELEASE(opalcaddy);
}
@ -498,15 +473,13 @@ static pmix_status_t server_lookup_fn(const pmix_proc_t *p, pmix_data_range_t sc
}
static pmix_status_t server_unpublish_fn(const pmix_proc_t *p,
pmix_data_range_t scope,
const pmix_info_t info[], size_t ninfo, char **keys,
static pmix_status_t server_unpublish_fn(const pmix_proc_t *p, char **keys,
const pmix_info_t info[], size_t ninfo,
pmix_op_cbfunc_t cbfunc, void *cbdata)
{
int rc;
pmix1_opalcaddy_t *opalcaddy;
opal_process_name_t proc;
opal_pmix_data_range_t oscp;
opal_value_t *iptr;
size_t n;
@ -524,11 +497,6 @@ static pmix_status_t server_unpublish_fn(const pmix_proc_t *p,
proc.vpid = p->rank;
}
/* convert the data range */
if (OPAL_SUCCESS != (rc = convert_data_range(&oscp, scope))) {
return pmix1_convert_opalrc(rc);
}
/* setup the caddy */
opalcaddy = OBJ_NEW(pmix1_opalcaddy_t);
opalcaddy->opcbfunc = cbfunc;
@ -546,7 +514,7 @@ static pmix_status_t server_unpublish_fn(const pmix_proc_t *p,
}
/* pass it up */
rc = host_module->unpublish(&proc, oscp, &opalcaddy->info, keys, opal_opcbfunc, opalcaddy);
rc = host_module->unpublish(&proc, keys, &opalcaddy->info, opal_opcbfunc, opalcaddy);
if (OPAL_SUCCESS != rc) {
OBJ_RELEASE(opalcaddy);
}
@ -802,54 +770,3 @@ static pmix_status_t server_listener_fn(int listening_sd,
rc = host_module->listener(listening_sd, cbfunc);
return pmix1_convert_opalrc(rc);
}
/**** UTILITY FUNCTIONS ****/
static int convert_data_range(opal_pmix_data_range_t *sc,
pmix_data_range_t scope)
{
int rc = OPAL_SUCCESS;
switch(scope) {
case PMIX_DATA_RANGE_UNDEF:
*sc = OPAL_PMIX_DATA_RANGE_UNDEF;
break;
case PMIX_NAMESPACE:
*sc = OPAL_PMIX_NAMESPACE;
break;
case PMIX_SESSION:
*sc = OPAL_PMIX_SESSION;
break;
default:
*sc = OPAL_PMIX_DATA_RANGE_UNDEF;
rc = OPAL_ERR_BAD_PARAM;
break;
}
return rc;
}
static int convert_persistence(opal_pmix_persistence_t *p,
pmix_persistence_t persist)
{
int rc = OPAL_SUCCESS;
switch (persist) {
case PMIX_PERSIST_INDEF:
*p = OPAL_PMIX_PERSIST_INDEF;
break;
case PMIX_PERSIST_PROC:
*p = OPAL_PMIX_PERSIST_PROC;
break;
case PMIX_PERSIST_APP:
*p = OPAL_PMIX_PERSIST_APP;
break;
case PMIX_PERSIST_SESSION:
*p = OPAL_PMIX_PERSIST_SESSION;
break;
default:
*p = OPAL_PMIX_PERSIST_PROC;
rc = OPAL_ERR_BAD_PARAM;
}
return rc;
}

Просмотреть файл

@ -48,6 +48,42 @@ extern pmix_server_module_t mymodule;
extern opal_pmix_server_module_t *host_module;
static char *dbgvalue=NULL;
static void myerr(pmix_status_t status,
pmix_proc_t procs[], size_t nprocs,
pmix_info_t info[], size_t ninfo)
{
int rc;
opal_list_t plist, ilist;
opal_namelist_t *nm;
opal_value_t *iptr;
size_t n;
/* convert the incoming status */
rc = pmix1_convert_rc(status);
/* convert the array of procs */
OBJ_CONSTRUCT(&plist, opal_list_t);
for (n=0; n < nprocs; n++) {
nm = OBJ_NEW(opal_namelist_t);
nm->name.jobid = strtoul(procs[n].nspace, NULL, 10);
nm->name.vpid = procs[n].rank;
opal_list_append(&plist, &nm->super);
}
/* convert the array of info */
OBJ_CONSTRUCT(&ilist, opal_list_t);
for (n=0; n < ninfo; n++) {
iptr = OBJ_NEW(opal_value_t);
iptr->key = strdup(info[n].key);
pmix1_value_unload(iptr, &info[n].value);
opal_list_append(&plist, &nm->super);
}
/* call the base errhandler */
opal_pmix_base_errhandler(rc, &plist, &ilist);
OPAL_LIST_DESTRUCT(&plist);
OPAL_LIST_DESTRUCT(&ilist);
}
int pmix1_server_init(opal_pmix_server_module_t *module)
{
@ -65,6 +101,8 @@ int pmix1_server_init(opal_pmix_server_module_t *module)
/* record the host module */
host_module = module;
/* register the errhandler */
PMIx_Register_errhandler(NULL, 0, myerr);
return OPAL_SUCCESS;
}
@ -72,6 +110,9 @@ int pmix1_server_finalize(void)
{
pmix_status_t rc;
/* deregister the errhandler */
PMIx_Deregister_errhandler();
rc = PMIx_server_finalize();
return pmix1_convert_rc(rc);
}

Просмотреть файл

@ -35,6 +35,7 @@
#include "opal/util/show_help.h"
#include "pmix1.h"
#include "opal/mca/pmix/base/base.h"
#include "opal/mca/pmix/pmix1xx/pmix/include/pmix/pmix_common.h"
@ -82,37 +83,11 @@ const opal_pmix_base_module_t opal_pmix_pmix1xx_module = {
pmix1_server_notify_error,
/* utility APIs */
PMIx_Get_version,
pmix1_register_errhandler,
pmix1_deregister_errhandler,
opal_pmix_base_register_handler,
opal_pmix_base_deregister_handler,
pmix1_store_local
};
static pmix_notification_fn_t errhandler = NULL;
static void notification_fn(int status,
opal_list_t *procs,
opal_list_t *info)
{
/* convert the status */
/* convert the list of procs to an array of pmix_proc_t */
/* convert the list of info to an array of pmix_info_t */
/* pass this down to the notification function
* we were given */
}
void pmix1_register_errhandler(opal_pmix_errhandler_fn_t errhandler)
{
return;
}
void pmix1_deregister_errhandler(void)
{
return;
}
int pmix1_store_local(const opal_process_name_t *proc,
opal_value_t *val)
{

Просмотреть файл

@ -92,8 +92,6 @@ typedef int (*opal_pmix_server_dmodex_req_fn_t)(opal_process_name_t *proc, opal_
* process is also provided and is expected to be returned on any subsequent
* lookup request */
typedef int (*opal_pmix_server_publish_fn_t)(opal_process_name_t *proc,
opal_pmix_data_range_t range,
opal_pmix_persistence_t persist,
opal_list_t *info,
opal_pmix_op_cbfunc_t cbfunc, void *cbdata);
@ -110,18 +108,16 @@ typedef int (*opal_pmix_server_publish_fn_t)(opal_process_name_t *proc,
* how the operation is to be executed (e.g., timeout limits, whether the
* lookup should wait until data appears).
*/
typedef int (*opal_pmix_server_lookup_fn_t)(opal_process_name_t *proc,
opal_pmix_data_range_t range,
opal_list_t *info, char **keys,
typedef int (*opal_pmix_server_lookup_fn_t)(opal_process_name_t *proc, char **keys,
opal_list_t *info,
opal_pmix_lookup_cbfunc_t cbfunc, void *cbdata);
/* Delete data from the data store. The host server will be passed a NULL-terminated array
* of string keys along with the scope within which the data is expected to have
* been published. The callback is to be executed upon completion of the delete
* procedure */
typedef int (*opal_pmix_server_unpublish_fn_t)(opal_process_name_t *proc,
opal_pmix_data_range_t range,
opal_list_t *info, char **keys,
typedef int (*opal_pmix_server_unpublish_fn_t)(opal_process_name_t *proc, char **keys,
opal_list_t *info,
opal_pmix_op_cbfunc_t cbfunc, void *cbdata);
/* Spawn a set of applications/processes as per the PMIx API. Note that

Просмотреть файл

@ -32,14 +32,17 @@ BEGIN_C_DECLS
#define OPAL_PMIX_USERID "pmix.euid" // (uint32_t) effective user id
#define OPAL_PMIX_GRPID "pmix.egid" // (uint32_t) effective group id
/* general proc-level attributes */
#define OPAL_PMIX_CPUSET "pmix.cpuset" // (char*) hwloc bitmap applied to proc upon launch
#define OPAL_PMIX_CREDENTIAL "pmix.cred" // (char*) security credential assigned to proc
#define OPAL_PMIX_SPAWNED "pmix.spawned" // (bool) true if this proc resulted from a call to PMIx_Spawn
#define OPAL_PMIX_ARCH "pmix.arch" // (uint32_t) datatype architecture flag
/* scratch directory locations for use by applications */
#define OPAL_PMIX_TMPDIR "pmix.tmpdir" // (char*) top-level tmp dir assigned to session
#define OPAL_PMIX_NSDIR "pmix.nsdir" // (char*) sub-tmpdir assigned to namespace
#define OPAL_PMIX_PROCDIR "pmix.pdir" // (char*) sub-nsdir assigned to proc
/* information about relative ranks as assigned by the RM */
#define OPAL_PMIX_JOBID "pmix.jobid" // (char*) jobid assigned by scheduler
#define OPAL_PMIX_APPNUM "pmix.appnum" // (uint32_t) app number within the job
@ -71,17 +74,20 @@ BEGIN_C_DECLS
#define OPAL_PMIX_LOCAL_PEERS "pmix.lpeers" // (char*) comma-delimited string of ranks on this node within the specified nspace
#define OPAL_PMIX_LOCAL_CPUSETS "pmix.lcpus" // (char*) colon-delimited cpusets of local peers within the specified nspace
#define OPAL_PMIX_PROC_URI "pmix.puri" // (char*) URI containing contact info for proc
/* size info */
#define OPAL_PMIX_UNIV_SIZE "pmix.univ.size" // (uint32_t) #procs in this nspace
#define OPAL_PMIX_JOB_SIZE "pmix.job.size" // (uint32_t) #procs in this job
#define OPAL_PMIX_LOCAL_SIZE "pmix.local.size" // (uint32_t) #procs in this job on this node
#define OPAL_PMIX_NODE_SIZE "pmix.node.size" // (uint32_t) #procs across all jobs on this node
#define OPAL_PMIX_MAX_PROCS "pmix.max.size" // (uint32_t) max #procs for this job
/* topology info */
#define OPAL_PMIX_NET_TOPO "pmix.ntopo" // (char*) xml-representation of network topology
#define OPAL_PMIX_LOCAL_TOPO "pmix.ltopo" // (char*) xml-representation of local node topology
#define OPAL_PMIX_NODE_LIST "pmix.nlist" // (char*) comma-delimited list of nodes running procs for this job
#define OPAL_PMIX_TOPOLOGY "pmix.topo" // (hwloc_topology_t) pointer to the PMIx client's internal topology object
/* fault tolerance-related info */
#define OPAL_PMIX_TERMINATE_SESSION "pmix.term.sess" // (bool) RM intends to terminate session
#define OPAL_PMIX_TERMINATE_JOB "pmix.term.job" // (bool) RM intends to terminate this job
@ -95,6 +101,9 @@ BEGIN_C_DECLS
#define OPAL_PMIX_WAIT "pmix.wait" // (int) caller requests that the server wait until the specified #values are found
#define OPAL_PMIX_COLLECTIVE_ALGO "pmix.calgo" // (char*) comma-delimited list of algorithms to use for collective
#define OPAL_PMIX_COLLECTIVE_ALGO_REQD "pmix.calreqd" // (bool) if true, indicates that the requested choice of algo is mandatory
#define OPAL_PMIX_NOTIFY_COMPLETION "pmix.notecomp" // (bool) notify parent process upon termination of child job
#define OPAL_PMIX_RANGE "pmix.range" // (int) opal_pmix_data_range_t value for calls to publish/lookup/unpublish
#define OPAL_PMIX_PERSISTENCE "pmix.persist" // (int) opal_pmix_persistence_t value for calls to publish
/* attribute used by host server to pass data to the server convenience library - the
* data will then be parsed and provided to the local clients */
@ -126,7 +135,8 @@ BEGIN_C_DECLS
#define OPAL_PMIX_STDIN_TGT "pmix.stdin" // (uint32_t) spawned proc rank that is to receive stdin
/* define a scope for data "put" by PMI per the following:
/* define a scope for data "put" by PMI per the following - maintain
* consistent order with the PMIx distro :
*
* OPAL_PMI_LOCAL - the data is intended only for other application
* processes on the same node. Data marked in this way
@ -137,7 +147,7 @@ BEGIN_C_DECLS
* OPAL_PMI_GLOBAL - the data is to be shared with all other requesting processes,
* regardless of location
*/
#define OPAL_PMIX_SCOPE PMIX_UINT32
#define OPAL_PMIX_SCOPE PMIX_UINT
typedef enum {
OPAL_PMIX_SCOPE_UNDEF = 0,
OPAL_PMIX_LOCAL, // share to procs also on this node
@ -145,15 +155,17 @@ typedef enum {
OPAL_PMIX_GLOBAL
} opal_pmix_scope_t;
/* define a range for data "published" by PMI */
#define OPAL_PMIX_DATA_RANGE OPAL_UINT8
/* define a range for data "published" by PMI - maintain
* consistent order with the PMIx distro */
#define OPAL_PMIX_DATA_RANGE OPAL_UINT
typedef enum {
OPAL_PMIX_DATA_RANGE_UNDEF = 0,
OPAL_PMIX_NAMESPACE, // data is available to procs in the same nspace only
OPAL_PMIX_SESSION // data available to all jobs in this session
} opal_pmix_data_range_t;
/* define a "persistence" policy for data published by clients */
/* define a "persistence" policy for data published by clients - maintain
* consistent order with the PMIx distro */
typedef enum {
OPAL_PMIX_PERSIST_INDEF = 0, // retain until specifically deleted
OPAL_PMIX_PERSIST_PROC, // retain until publishing process terminates

Просмотреть файл

@ -351,6 +351,14 @@ int orte_ess_base_orted_setup(char **hosts)
error = "orte_routed_base_select";
goto error;
}
/* setup the routed info - the selected routed component
* will know what to do.
*/
if (ORTE_SUCCESS != (ret = orte_routed.init_routes(ORTE_PROC_MY_NAME->jobid, NULL))) {
ORTE_ERROR_LOG(ret);
error = "orte_routed.init_routes";
goto error;
}
/*
* Group communications
*/
@ -645,7 +653,7 @@ int orte_ess_base_orted_finalize(void)
/* shutdown the pmix server */
pmix_server_finalize();
(void) mca_base_framework_close(&opal_pmix_base_framework);
/* close frameworks */
(void) mca_base_framework_close(&orte_schizo_base_framework);
(void) mca_base_framework_close(&orte_filem_base_framework);

Просмотреть файл

@ -647,12 +647,6 @@ static int rte_init(void)
error = "opal_pmix_base_select";
goto error;
}
/* setup the PMIx server */
if (ORTE_SUCCESS != (ret = pmix_server_init())) {
ORTE_ERROR_LOG(ret);
error = "pmix server init";
goto error;
}
/* setup the routed info - the selected routed component
* will know what to do.
@ -662,6 +656,14 @@ static int rte_init(void)
error = "orte_routed.init_routes";
goto error;
}
/* setup the PMIx server */
if (ORTE_SUCCESS != (ret = pmix_server_init())) {
ORTE_ERROR_LOG(ret);
error = "pmix server init";
goto error;
}
/* setup I/O forwarding system - must come after we init routes */
if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_iof_base_framework, 0))) {
ORTE_ERROR_LOG(ret);

Просмотреть файл

@ -65,7 +65,6 @@ ORTE_DECLSPEC int orte_schizo_base_parse_cli(char *personality,
ORTE_DECLSPEC int orte_schizo_base_parse_env(char *personality,
char *path,
opal_cmd_line_t *cmd_line,
char *server,
char **srcenv,
char ***dstenv);
ORTE_DECLSPEC int orte_schizo_base_setup_fork(orte_job_t *jdata,

Просмотреть файл

@ -40,7 +40,6 @@ int orte_schizo_base_parse_cli(char *personality,
int orte_schizo_base_parse_env(char *personality,
char *path,
opal_cmd_line_t *cmd_line,
char *server,
char **srcenv,
char ***dstenv)
{
@ -50,7 +49,7 @@ int orte_schizo_base_parse_env(char *personality,
OPAL_LIST_FOREACH(mod, &orte_schizo_base.active_modules, orte_schizo_base_active_module_t) {
if (0 == strcmp(personality, mod->component->mca_component_name)) {
if (NULL != mod->module->parse_env) {
rc = mod->module->parse_env(personality, path, cmd_line, server, srcenv, dstenv);
rc = mod->module->parse_env(personality, path, cmd_line, srcenv, dstenv);
return rc;
}
}

Просмотреть файл

@ -54,7 +54,6 @@ static int parse_cli(char *personality,
static int parse_env(char *personality,
char *path,
opal_cmd_line_t *cmd_line,
char *server,
char **srcenv,
char ***dstenv);
static int setup_fork(orte_job_t *jdata,
@ -154,7 +153,6 @@ static int parse_cli(char *personality,
static int parse_env(char *personality,
char *path,
opal_cmd_line_t *cmd_line,
char *ompi_server,
char **srcenv,
char ***dstenv)
{
@ -181,11 +179,6 @@ static int parse_env(char *personality,
}
}
/* add the ompi-server, if provided */
if (NULL != ompi_server) {
opal_setenv("OMPI_MCA_pubsub_orte_server", ompi_server, true, dstenv);
}
/* set necessary env variables for external usage from tune conf file*/
int set_from_file = 0;
vars = NULL;

Просмотреть файл

@ -46,7 +46,6 @@ typedef int (*orte_schizo_base_module_parse_cli_fn_t)(char *personality,
typedef int (*orte_schizo_base_module_parse_env_fn_t)(char *personality,
char *path,
opal_cmd_line_t *cmd_line,
char *server,
char **srcenv,
char ***dstenv);

Просмотреть файл

@ -62,6 +62,7 @@
#include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/grpcomm/grpcomm.h"
#include "orte/mca/rml/rml.h"
#include "orte/mca/rml/base/rml_contact.h"
#include "orte/util/name_fns.h"
#include "orte/util/session_dir.h"
#include "orte/util/show_help.h"
@ -125,21 +126,40 @@ void pmix_server_register_params(void)
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_ALL,
&orte_pmix_server_globals.timeout);
orte_pmix_server_globals.timeout = orte_pmix_server_globals.timeout * 1000000;
/* register the URI of the UNIVERSAL data server */
orte_pmix_server_globals.server_uri = NULL;
(void) mca_base_var_register ("orte", "pmix", NULL, "server_uri",
"URI of a session-level keyval server for publish/lookup operations",
MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_ALL,
&orte_pmix_server_globals.server_uri);
/* if the universal server wasn't specified, then we use
* our own HNP for that purpose */
orte_pmix_server_globals.server = *ORTE_PROC_MY_HNP;
/* whether or not to wait for the universal server */
orte_pmix_server_globals.wait_for_server = false;
(void) mca_base_var_register ("orte", "pmix", NULL, "wait_for_server",
"Whether or not to wait for the session-level server to start",
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_ALL,
&orte_pmix_server_globals.wait_for_server);
}
static void eviction_cbfunc(struct opal_hotel_t *hotel,
int room_num, void *occupant)
{
pmix_server_req_t *req = (pmix_server_req_t*)occupant;
int rc;
/* decrement the request timeout */
req->timeout -= orte_pmix_server_globals.timeout;
if (0 < req->timeout) {
/* not done yet - check us back in */
if (OPAL_SUCCESS == (rc = opal_hotel_checkin(&orte_pmix_server_globals.reqs, req, &req->room_num))) {
return;
}
ORTE_ERROR_LOG(rc);
/* fall thru and return an error so the caller doesn't hang */
}
/* don't let the caller hang */
if (NULL != req->opcbfunc) {
req->opcbfunc(OPAL_ERR_TIMEOUT, req->cbdata);
@ -169,7 +189,7 @@ int pmix_server_init(void)
OBJ_CONSTRUCT(&orte_pmix_server_globals.reqs, opal_hotel_t);
if (OPAL_SUCCESS != (rc = opal_hotel_init(&orte_pmix_server_globals.reqs,
orte_pmix_server_globals.num_rooms,
orte_event_base, orte_pmix_server_globals.timeout,
orte_event_base, orte_pmix_server_globals.timeout*1000000,
ORTE_ERROR_PRI, eviction_cbfunc))) {
ORTE_ERROR_LOG(rc);
return rc;
@ -191,12 +211,103 @@ int pmix_server_init(void)
orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_DATA_CLIENT,
ORTE_RML_PERSISTENT, pmix_server_keyval_client, NULL);
/* ensure the PMIx server uses the proper rendezvous directory */
opal_setenv("PMIX_SERVER_TMPDIR", orte_process_info.proc_session_dir, true, &environ);
/* setup the local server */
if (ORTE_SUCCESS != (rc = opal_pmix.server_init(&pmix_server))) {
ORTE_ERROR_LOG(rc);
/* memory cleanup will occur when finalize is called */
}
/* if the universal server wasn't specified, then we use
* our own HNP for that purpose */
if (NULL == orte_pmix_server_globals.server_uri) {
orte_pmix_server_globals.server = *ORTE_PROC_MY_HNP;
} else {
char *server;
opal_buffer_t buf;
if (0 == strncmp(orte_pmix_server_globals.server_uri, "file", strlen("file")) ||
0 == strncmp(orte_pmix_server_globals.server_uri, "FILE", strlen("FILE"))) {
char input[1024], *filename;
FILE *fp;
/* it is a file - get the filename */
filename = strchr(orte_pmix_server_globals.server_uri, ':');
if (NULL == filename) {
/* filename is not correctly formatted */
orte_show_help("help-orterun.txt", "orterun:ompi-server-filename-bad", true,
orte_basename, orte_pmix_server_globals.server_uri);
return ORTE_ERR_BAD_PARAM;
}
++filename; /* space past the : */
if (0 >= strlen(filename)) {
/* they forgot to give us the name! */
orte_show_help("help-orterun.txt", "orterun:ompi-server-filename-missing", true,
orte_basename, orte_pmix_server_globals.server_uri);
return ORTE_ERR_BAD_PARAM;
}
/* open the file and extract the uri */
fp = fopen(filename, "r");
if (NULL == fp) { /* can't find or read file! */
orte_show_help("help-orterun.txt", "orterun:ompi-server-filename-access", true,
orte_basename, orte_pmix_server_globals.server_uri);
return ORTE_ERR_BAD_PARAM;
}
if (NULL == fgets(input, 1024, fp)) {
/* something malformed about file */
fclose(fp);
orte_show_help("help-orterun.txt", "orterun:ompi-server-file-bad", true,
orte_basename, orte_pmix_server_globals.server_uri,
orte_basename);
return ORTE_ERR_BAD_PARAM;
}
fclose(fp);
input[strlen(input)-1] = '\0'; /* remove newline */
server = strdup(input);
} else {
server = strdup(orte_pmix_server_globals.server_uri);
}
/* setup our route to the server */
OBJ_CONSTRUCT(&buf, opal_buffer_t);
opal_dss.pack(&buf, &server, 1, OPAL_STRING);
if (ORTE_SUCCESS != (rc = orte_rml_base_update_contact_info(&buf))) {
ORTE_ERROR_LOG(rc);
ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE);
return rc;
}
OBJ_DESTRUCT(&buf);
/* parse the URI to get the server's name */
if (ORTE_SUCCESS != (rc = orte_rml_base_parse_uris(server, &orte_pmix_server_globals.server, NULL))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* check if we are to wait for the server to start - resolves
* a race condition that can occur when the server is run
* as a background job - e.g., in scripts
*/
if (orte_pmix_server_globals.wait_for_server) {
/* ping the server */
struct timeval timeout;
timeout.tv_sec = orte_pmix_server_globals.timeout;
timeout.tv_usec = 0;
if (ORTE_SUCCESS != (rc = orte_rml.ping(server, &timeout))) {
/* try it one more time */
if (ORTE_SUCCESS != (rc = orte_rml.ping(server, &timeout))) {
/* okay give up */
orte_show_help("help-orterun.txt", "orterun:server-not-found", true,
orte_basename, server,
(long)orte_pmix_server_globals.timeout,
ORTE_ERROR_NAME(rc));
ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE);
return rc;
}
}
}
}
return rc;
}
@ -461,6 +572,7 @@ static void pmix_server_dmdx_resp(int status, orte_process_name_t* sender,
static void rqcon(pmix_server_req_t *p)
{
p->timeout = orte_pmix_server_globals.timeout;
p->jdata = NULL;
OBJ_CONSTRUCT(&p->msg, opal_buffer_t);
p->opcbfunc = NULL;

Просмотреть файл

@ -53,6 +53,7 @@
typedef struct {
opal_object_t super;
opal_event_t ev;
int timeout;
int room_num;
int remote_room_num;
orte_process_name_t proxy;
@ -146,17 +147,13 @@ extern int pmix_server_fencenb_fn(opal_list_t *procs, opal_list_t *info,
extern int pmix_server_dmodex_req_fn(opal_process_name_t *proc, opal_list_t *info,
opal_pmix_modex_cbfunc_t cbfunc, void *cbdata);
extern int pmix_server_publish_fn(opal_process_name_t *proc,
opal_pmix_data_range_t range,
opal_pmix_persistence_t persist,
opal_list_t *info,
opal_pmix_op_cbfunc_t cbfunc, void *cbdata);
extern int pmix_server_lookup_fn(opal_process_name_t *proc,
opal_pmix_data_range_t range,
opal_list_t *info, char **keys,
extern int pmix_server_lookup_fn(opal_process_name_t *proc, char **keys,
opal_list_t *info,
opal_pmix_lookup_cbfunc_t cbfunc, void *cbdata);
extern int pmix_server_unpublish_fn(opal_process_name_t *proc,
opal_pmix_data_range_t range,
opal_list_t *info, char **keys,
extern int pmix_server_unpublish_fn(opal_process_name_t *proc, char **keys,
opal_list_t *info,
opal_pmix_op_cbfunc_t cbfunc, void *cbdata);
extern int pmix_server_spawn_fn(opal_process_name_t *requestor,
opal_list_t *job_info, opal_list_t *apps,
@ -186,6 +183,8 @@ typedef struct {
opal_hotel_t reqs;
int num_rooms;
int timeout;
char *server_uri;
bool wait_for_server;
orte_process_name_t server;
} pmix_server_globals_t;

Просмотреть файл

@ -85,16 +85,16 @@ static void execute(int sd, short args, void *cbdata)
}
int pmix_server_publish_fn(opal_process_name_t *proc,
opal_pmix_data_range_t range,
opal_pmix_persistence_t persist,
opal_list_t *info,
opal_pmix_op_cbfunc_t cbfunc, void *cbdata)
{
pmix_server_req_t *req;
int rc;
uint8_t cmd = ORTE_PMIX_PUBLISH_CMD;
int32_t ninfo;
opal_value_t *iptr;
opal_pmix_data_range_t range = OPAL_PMIX_SESSION;
opal_pmix_persistence_t persist = OPAL_PMIX_PERSIST_APP;
bool rset, pset;
/* create the caddy */
req = OBJ_NEW(pmix_server_req_t);
@ -115,6 +115,25 @@ int pmix_server_publish_fn(opal_process_name_t *proc,
return rc;
}
/* no help for it - need to search for range/persistence */
rset = false;
pset = false;
OPAL_LIST_FOREACH(iptr, info, opal_value_t) {
if (0 == strcmp(iptr->key, OPAL_PMIX_RANGE)) {
range = iptr->data.integer;
if (pset) {
break;
}
rset = true;
} else if (0 == strcmp(iptr->key, OPAL_PMIX_PERSISTENCE)) {
persist = iptr->data.integer;
if (rset) {
break;
}
pset = true;
}
}
/* pack the range */
if (OPAL_SUCCESS != (rc = opal_dss.pack(&req->msg, &range, 1, OPAL_INT))) {
ORTE_ERROR_LOG(rc);
@ -136,16 +155,13 @@ int pmix_server_publish_fn(opal_process_name_t *proc,
return rc;
}
/* pack the number of info items */
ninfo = opal_list_get_size(info);
if (OPAL_SUCCESS != (rc = opal_dss.pack(&req->msg, &ninfo, 1, OPAL_UINT32))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(req);
return rc;
}
/* if we have items, pack those too */
/* if we have items, pack those too - ignore persistence
* and range values */
OPAL_LIST_FOREACH(iptr, info, opal_value_t) {
if (0 == strcmp(iptr->key, OPAL_PMIX_RANGE) ||
0 == strcmp(iptr->key, OPAL_PMIX_PERSISTENCE)) {
continue;
}
if (OPAL_SUCCESS != (rc = opal_dss.pack(&req->msg, &iptr, 1, OPAL_VALUE))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(req);
@ -163,17 +179,16 @@ int pmix_server_publish_fn(opal_process_name_t *proc,
}
int pmix_server_lookup_fn(opal_process_name_t *proc,
opal_pmix_data_range_t range,
opal_list_t *info, char **keys,
int pmix_server_lookup_fn(opal_process_name_t *proc, char **keys,
opal_list_t *info,
opal_pmix_lookup_cbfunc_t cbfunc, void *cbdata)
{
pmix_server_req_t *req;
int rc;
uint8_t cmd = ORTE_PMIX_LOOKUP_CMD;
int32_t nkeys, i;
int32_t ninfo;
opal_value_t *iptr;
opal_pmix_data_range_t range = OPAL_PMIX_SESSION;
/* the list of info objects are directives for us - they include
* things like timeout constraints, so there is no reason to
@ -191,6 +206,14 @@ int pmix_server_lookup_fn(opal_process_name_t *proc,
return rc;
}
/* no help for it - need to search for range */
OPAL_LIST_FOREACH(iptr, info, opal_value_t) {
if (0 == strcmp(iptr->key, OPAL_PMIX_RANGE)) {
range = iptr->data.integer;
break;
}
}
/* pack the range */
if (OPAL_SUCCESS != (rc = opal_dss.pack(&req->msg, &range, 1, OPAL_INT))) {
ORTE_ERROR_LOG(rc);
@ -205,23 +228,6 @@ int pmix_server_lookup_fn(opal_process_name_t *proc,
req->target = *ORTE_PROC_MY_HNP;
}
/* pack the number of info items */
ninfo = opal_list_get_size(info);
if (OPAL_SUCCESS != (rc = opal_dss.pack(&req->msg, &ninfo, 1, OPAL_UINT32))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(req);
return rc;
}
/* if we have items, pack those too */
OPAL_LIST_FOREACH(iptr, info, opal_value_t) {
if (OPAL_SUCCESS != (rc = opal_dss.pack(&req->msg, &iptr, 1, OPAL_VALUE))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(req);
return rc;
}
}
/* pack the number of keys */
nkeys = opal_argv_count(keys);
if (OPAL_SUCCESS != (rc = opal_dss.pack(&req->msg, &nkeys, 1, OPAL_UINT32))) {
@ -239,6 +245,18 @@ int pmix_server_lookup_fn(opal_process_name_t *proc,
}
}
/* if we have items, pack those too - ignore range value */
OPAL_LIST_FOREACH(iptr, info, opal_value_t) {
if (0 == strcmp(iptr->key, OPAL_PMIX_RANGE)) {
continue;
}
if (OPAL_SUCCESS != (rc = opal_dss.pack(&req->msg, &iptr, 1, OPAL_VALUE))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(req);
return rc;
}
}
/* thread-shift so we can store the tracker */
opal_event_set(orte_event_base, &(req->ev),
-1, OPAL_EV_WRITE, execute, req);
@ -248,16 +266,16 @@ int pmix_server_lookup_fn(opal_process_name_t *proc,
return OPAL_SUCCESS;
}
int pmix_server_unpublish_fn(opal_process_name_t *proc,
opal_pmix_data_range_t range,
opal_list_t *info, char **keys,
int pmix_server_unpublish_fn(opal_process_name_t *proc, char **keys,
opal_list_t *info,
opal_pmix_op_cbfunc_t cbfunc, void *cbdata)
{
pmix_server_req_t *req;
int rc;
uint8_t cmd = ORTE_PMIX_UNPUBLISH_CMD;
uint32_t nkeys, ninfo;
uint32_t nkeys, n;
opal_value_t *iptr;
opal_pmix_data_range_t range = OPAL_PMIX_SESSION;
/* create the caddy */
req = OBJ_NEW(pmix_server_req_t);
@ -278,6 +296,14 @@ int pmix_server_unpublish_fn(opal_process_name_t *proc,
return rc;
}
/* no help for it - need to search for range */
OPAL_LIST_FOREACH(iptr, info, opal_value_t) {
if (0 == strcmp(iptr->key, OPAL_PMIX_RANGE)) {
range = iptr->data.integer;
break;
}
}
/* pack the range */
if (OPAL_SUCCESS != (rc = opal_dss.pack(&req->msg, &range, 1, OPAL_INT))) {
ORTE_ERROR_LOG(rc);
@ -292,22 +318,6 @@ int pmix_server_unpublish_fn(opal_process_name_t *proc,
req->target = *ORTE_PROC_MY_HNP;
}
/* pack the number of info items */
ninfo = opal_list_get_size(info);
if (OPAL_SUCCESS != (rc = opal_dss.pack(&req->msg, &ninfo, 1, OPAL_UINT32))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(req);
return rc;
}
/* if we have items, pack those too */
OPAL_LIST_FOREACH(iptr, info, opal_value_t) {
if (OPAL_SUCCESS != (rc = opal_dss.pack(&req->msg, &iptr, 1, OPAL_VALUE))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(req);
return rc;
}
}
/* pack the number of keys */
nkeys = opal_argv_count(keys);
if (OPAL_SUCCESS != (rc = opal_dss.pack(&req->msg, &nkeys, 1, OPAL_UINT32))) {
@ -317,10 +327,24 @@ int pmix_server_unpublish_fn(opal_process_name_t *proc,
}
/* pack the keys too */
if (OPAL_SUCCESS != (rc = opal_dss.pack(&req->msg, keys, nkeys, OPAL_STRING))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(req);
return rc;
for (n=0; n < nkeys; n++) {
if (OPAL_SUCCESS != (rc = opal_dss.pack(&req->msg, &keys[n], 1, OPAL_STRING))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(req);
return rc;
}
}
/* if we have items, pack those too - ignore range value */
OPAL_LIST_FOREACH(iptr, info, opal_value_t) {
if (0 == strcmp(iptr->key, OPAL_PMIX_RANGE)) {
continue;
}
if (OPAL_SUCCESS != (rc = opal_dss.pack(&req->msg, &iptr, 1, OPAL_VALUE))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(req);
return rc;
}
}
/* thread-shift so we can store the tracker */

Просмотреть файл

@ -80,8 +80,29 @@ OBJ_CLASS_INSTANCE(orte_data_object_t,
opal_object_t,
construct, destruct);
/* define a request object for delayed answers */
typedef struct {
opal_list_item_t super;
orte_process_name_t requestor;
uint32_t uid;
opal_pmix_data_range_t range;
char **keys;
} orte_data_req_t;
static void rqcon(orte_data_req_t *p)
{
p->keys = NULL;
}
static void rqdes(orte_data_req_t *p)
{
opal_argv_free(p->keys);
}
OBJ_CLASS_INSTANCE(orte_data_req_t,
opal_list_item_t,
rqcon, rqdes);
/* local globals */
static opal_pointer_array_t orte_data_server_store;
static opal_list_t pending;
int orte_data_server_init(void)
{
@ -96,6 +117,8 @@ int orte_data_server_init(void)
return rc;
}
OBJ_CONSTRUCT(&pending, opal_list_t);
orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD,
ORTE_RML_TAG_DATA_SERVER,
ORTE_RML_PERSISTENT,
@ -118,6 +141,7 @@ void orte_data_server_finalize(void)
}
}
OBJ_DESTRUCT(&orte_data_server_store);
OPAL_LIST_DESTRUCT(&pending);
}
void orte_data_server(int status, orte_process_name_t* sender,
@ -128,15 +152,16 @@ void orte_data_server(int status, orte_process_name_t* sender,
orte_std_cntr_t count;
opal_process_name_t requestor;
orte_data_object_t *data;
opal_buffer_t *answer;
opal_buffer_t *answer, *reply;
int rc, ret, k;
opal_value_t *iptr, *inext;
uint32_t ninfo, i;
char **keys = NULL, *str;
bool ret_packed = false;
bool ret_packed = false, wait = false;
int room_number;
uint32_t uid;
opal_pmix_data_range_t range;
orte_data_req_t *req, *rqnext;
OPAL_OUTPUT_VERBOSE((1, orte_debug_output,
"%s data server got message from %s",
@ -196,34 +221,66 @@ void orte_data_server(int status, orte_process_name_t* sender,
goto SEND_ERROR;
}
/* unpack the number of info elements */
count = 1;
if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &ninfo, &count, OPAL_UINT32))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(data);
goto SEND_ERROR;
}
if (0 < ninfo) {
for (i=0; i < ninfo; i++) {
count = 1;
if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &iptr, &count, OPAL_VALUE))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(data);
goto SEND_ERROR;
}
/* if this is the userid, separate it out */
if (0 == strcmp(iptr->key, OPAL_PMIX_USERID)) {
data->uid = iptr->data.uint32;
OBJ_RELEASE(iptr);
} else {
opal_list_append(&data->values, &iptr->super);
}
while (ORTE_SUCCESS == (rc = opal_dss.unpack(buffer, &iptr, &count, OPAL_VALUE))) {
/* if this is the userid, separate it out */
if (0 == strcmp(iptr->key, OPAL_PMIX_USERID)) {
data->uid = iptr->data.uint32;
OBJ_RELEASE(iptr);
} else {
opal_list_append(&data->values, &iptr->super);
}
}
data->index = opal_pointer_array_add(&orte_data_server_store, data);
/* check for pending requests that match this data */
reply = NULL;
OPAL_LIST_FOREACH_SAFE(req, rqnext, &pending, orte_data_req_t) {
if (req->uid != data->uid) {
continue;
}
if (req->range != data->range) {
continue;
}
for (i=0; NULL != req->keys[i]; i++) {
/* cycle thru the data keys for matches */
OPAL_LIST_FOREACH(iptr, &data->values, opal_value_t) {
if (0 == strcmp(iptr->key, req->keys[i])) {
/* found it - package it for return */
if (NULL == reply) {
reply = OBJ_NEW(opal_buffer_t);
ret = ORTE_SUCCESS;
if (ORTE_SUCCESS != (rc = opal_dss.pack(reply, &ret, 1, OPAL_INT))) {
ORTE_ERROR_LOG(rc);
break;
}
}
if (ORTE_SUCCESS != (rc = opal_dss.pack(reply, &data->owner, 1, OPAL_NAME))) {
ORTE_ERROR_LOG(rc);
break;
}
if (ORTE_SUCCESS != (rc = opal_dss.pack(reply, &iptr, 1, OPAL_VALUE))) {
ORTE_ERROR_LOG(rc);
break;
}
}
}
}
if (NULL != reply) {
/* send it back to the requestor */
if (0 > (rc = orte_rml.send_buffer_nb(&req->requestor, reply, ORTE_RML_TAG_DATA_CLIENT,
orte_rml_send_callback, NULL))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(reply);
}
/* remove this request */
opal_list_remove_item(&pending, &req->super);
OBJ_RELEASE(req);
reply = NULL;
}
}
/* tell the user it was wonderful... */
ret = ORTE_SUCCESS;
if (ORTE_SUCCESS != (rc = opal_dss.pack(answer, &ret, 1, OPAL_INT))) {
@ -247,28 +304,6 @@ void orte_data_server(int status, orte_process_name_t* sender,
goto SEND_ERROR;
}
/* unpack the number of info elements */
count = 1;
if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &ninfo, &count, OPAL_UINT32))) {
ORTE_ERROR_LOG(rc);
goto SEND_ERROR;
}
if (0 < ninfo) {
for (i=0; i < ninfo; i++) {
count = 1;
if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &iptr, &count, OPAL_VALUE))) {
ORTE_ERROR_LOG(rc);
goto SEND_ERROR;
}
/* if this is the userid, separate it out */
if (0 == strcmp(iptr->key, OPAL_PMIX_USERID)) {
uid = iptr->data.uint32;
}
/* ignore anything else for now */
OBJ_RELEASE(iptr);
}
}
/* unpack the number of keys */
count = 1;
if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &ninfo, &count, OPAL_UINT32))) {
@ -277,6 +312,7 @@ void orte_data_server(int status, orte_process_name_t* sender,
}
if (0 == ninfo) {
/* they forgot to send us the keys?? */
ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
rc = ORTE_ERR_BAD_PARAM;
goto SEND_ERROR;
}
@ -293,7 +329,27 @@ void orte_data_server(int status, orte_process_name_t* sender,
free(str);
}
/* unpack any info elements */
count = 1;
while (ORTE_SUCCESS == (rc = opal_dss.unpack(buffer, &iptr, &count, OPAL_VALUE))) {
/* if this is the userid, separate it out */
if (0 == strcmp(iptr->key, OPAL_PMIX_USERID)) {
uid = iptr->data.uint32;
} else if (0 == strcmp(iptr->key, OPAL_PMIX_WAIT)) {
/* flag that we wait until the data is present */
wait = true;
}
/* ignore anything else for now */
OBJ_RELEASE(iptr);
}
if (ORTE_ERR_UNPACK_READ_PAST_END_OF_BUFFER != rc) {
ORTE_ERROR_LOG(rc);
opal_argv_free(keys);
goto SEND_ERROR;
}
/* cycle across the provided keys */
ret_packed = false;
for (i=0; NULL != keys[i]; i++) {
/* cycle across the stored data, looking for a match */
for (k=0; k < orte_data_server_store.size; k++) {
@ -336,12 +392,23 @@ void orte_data_server(int status, orte_process_name_t* sender,
}
}
}
opal_argv_free(keys);
if (!ret_packed) {
/* if we were told to wait for the data, then queue this up
* for later processing */
if (wait) {
req = OBJ_NEW(orte_data_req_t);
req->requestor = *sender;
req->uid = uid;
req->range = range;
req->keys = keys;
return;
}
/* nothing was found - indicate that situation */
rc = ORTE_ERR_NOT_FOUND;
opal_argv_free(keys);
goto SEND_ERROR;
}
opal_argv_free(keys);
goto SEND_ANSWER;
break;
@ -365,28 +432,6 @@ void orte_data_server(int status, orte_process_name_t* sender,
goto SEND_ERROR;
}
/* unpack the number of info elements */
count = 1;
if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &ninfo, &count, OPAL_UINT32))) {
ORTE_ERROR_LOG(rc);
goto SEND_ERROR;
}
if (0 < ninfo) {
for (i=0; i < ninfo; i++) {
count = 1;
if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &iptr, &count, OPAL_VALUE))) {
ORTE_ERROR_LOG(rc);
goto SEND_ERROR;
}
/* if this is the userid, separate it out */
if (0 == strcmp(iptr->key, OPAL_PMIX_USERID)) {
uid = iptr->data.uint32;
}
/* ignore anything else for now */
OBJ_RELEASE(iptr);
}
}
/* unpack the number of keys */
count = 1;
if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &ninfo, &count, OPAL_UINT32))) {
@ -411,6 +456,22 @@ void orte_data_server(int status, orte_process_name_t* sender,
free(str);
}
/* unpack any info elements */
count = 1;
while (ORTE_SUCCESS == (rc = opal_dss.unpack(buffer, &iptr, &count, OPAL_VALUE))) {
/* if this is the userid, separate it out */
if (0 == strcmp(iptr->key, OPAL_PMIX_USERID)) {
uid = iptr->data.uint32;
}
/* ignore anything else for now */
OBJ_RELEASE(iptr);
}
if (ORTE_ERR_UNPACK_READ_PAST_END_OF_BUFFER != rc) {
ORTE_ERROR_LOG(rc);
opal_argv_free(keys);
goto SEND_ERROR;
}
/* cycle across the provided keys */
for (i=0; NULL != keys[i]; i++) {
/* cycle across the stored data, looking for a match */
@ -463,6 +524,10 @@ void orte_data_server(int status, orte_process_name_t* sender,
}
SEND_ERROR:
OPAL_OUTPUT_VERBOSE((1, orte_debug_output,
"%s data server: sending error %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_ERROR_NAME(rc)));
/* pack the error code */
if (ORTE_SUCCESS != (ret = opal_dss.pack(answer, &rc, 1, OPAL_INT))) {
ORTE_ERROR_LOG(ret);

Просмотреть файл

@ -1046,7 +1046,7 @@ static int create_app(int argc, char* argv[],
app->env = opal_argv_copy(*app_env);
if (ORTE_SUCCESS != (rc = orte_schizo.parse_env(myglobals.personality,
myglobals.path,
&cmd_line, NULL,
&cmd_line,
environ, &app->env))) {
goto cleanup;
}

Просмотреть файл

@ -158,7 +158,6 @@ void* MPIR_Breakpoint(void)
static char **global_mca_env = NULL;
static orte_std_cntr_t total_num_apps = 0;
static bool want_prefix_by_default = (bool) ORTE_WANT_ORTERUN_PREFIX_BY_DEFAULT;
static char *ompi_server=NULL;
/*
* Globals
@ -284,16 +283,10 @@ static opal_cmd_line_init_t cmd_line_init[] = {
NULL, OPAL_CMD_LINE_TYPE_BOOL,
"Do not attempt to resolve interfaces" },
/* uri of Open MPI server, or at least where to get it */
{ NULL, '\0', "ompi-server", "ompi-server", 1,
&orterun_globals.ompi_server, OPAL_CMD_LINE_TYPE_STRING,
"Specify the URI of the Open MPI server, or the name of the file (specified as file:filename) that contains that info" },
{ NULL, '\0', "wait-for-server", "wait-for-server", 0,
&orterun_globals.wait_for_server, OPAL_CMD_LINE_TYPE_BOOL,
"If ompi-server is not already running, wait until it is detected (default: false)" },
{ NULL, '\0', "server-wait-time", "server-wait-time", 1,
&orterun_globals.server_wait_timeout, OPAL_CMD_LINE_TYPE_INT,
"Time in seconds to wait for ompi-server (default: 10 sec)" },
/* uri of PMIx publish/lookup server, or at least where to get it */
{ "pmix_server_uri", '\0', "ompi-server", "ompi-server", 1,
NULL, OPAL_CMD_LINE_TYPE_STRING,
"Specify the URI of the publish/lookup server, or the name of the file (specified as file:filename) that contains that info" },
{ "carto_file_path", '\0', "cf", "cartofile", 1,
NULL, OPAL_CMD_LINE_TYPE_STRING,
@ -1041,42 +1034,6 @@ int orterun(int argc, char *argv[])
goto DONE;
}
/* if an uri for the ompi-server was provided, set the route */
if (NULL != ompi_server) {
opal_buffer_t buf;
/* setup our route to the server */
OBJ_CONSTRUCT(&buf, opal_buffer_t);
opal_dss.pack(&buf, &ompi_server, 1, OPAL_STRING);
if (ORTE_SUCCESS != (rc = orte_rml_base_update_contact_info(&buf))) {
ORTE_ERROR_LOG(rc);
ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE);
goto DONE;
}
OBJ_DESTRUCT(&buf);
/* check if we are to wait for the server to start - resolves
* a race condition that can occur when the server is run
* as a background job - e.g., in scripts
*/
if (orterun_globals.wait_for_server) {
/* ping the server */
struct timeval timeout;
timeout.tv_sec = orterun_globals.server_wait_timeout;
timeout.tv_usec = 0;
if (ORTE_SUCCESS != (rc = orte_rml.ping(ompi_server, &timeout))) {
/* try it one more time */
if (ORTE_SUCCESS != (rc = orte_rml.ping(ompi_server, &timeout))) {
/* okay give up */
orte_show_help("help-orterun.txt", "orterun:server-not-found", true,
orte_basename, ompi_server,
(long)orterun_globals.server_wait_timeout,
ORTE_ERROR_NAME(rc));
ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE);
goto DONE;
}
}
}
}
/* setup for debugging */
orte_debugger_init_before_spawn(jdata);
orte_state.add_job_state(ORTE_JOB_STATE_READY_FOR_DEBUGGERS,
@ -1175,9 +1132,6 @@ static int init_globals(void)
orterun_globals.appfile = NULL;
orterun_globals.wdir = NULL;
orterun_globals.path = NULL;
orterun_globals.ompi_server = NULL;
orterun_globals.wait_for_server = false;
orterun_globals.server_wait_timeout = 10;
orterun_globals.stdin_target = "0";
orterun_globals.report_pid = NULL;
orterun_globals.report_uri = NULL;
@ -1270,132 +1224,7 @@ static int parse_locals(orte_job_t *jdata, int argc, char* argv[])
bool made_app;
orte_std_cntr_t j, size1;
/* if the ompi-server was given, then set it up here */
if (NULL != orterun_globals.ompi_server) {
/* someone could have passed us a file instead of a uri, so
* we need to first check to see what we have - if it starts
* with "file", then we know it is a file. Otherwise, we assume
* it is a uri as provided by the ompi-server's output
* of an ORTE-standard string. Note that this is NOT a standard
* uri as it starts with the process name!
*/
if (0 == strncmp(orterun_globals.ompi_server, "file", strlen("file")) ||
0 == strncmp(orterun_globals.ompi_server, "FILE", strlen("FILE"))) {
char input[1024], *filename;
FILE *fp;
/* it is a file - get the filename */
filename = strchr(orterun_globals.ompi_server, ':');
if (NULL == filename) {
/* filename is not correctly formatted */
orte_show_help("help-orterun.txt", "orterun:ompi-server-filename-bad", true,
orte_basename, orterun_globals.ompi_server);
exit(1);
}
++filename; /* space past the : */
if (0 >= strlen(filename)) {
/* they forgot to give us the name! */
orte_show_help("help-orterun.txt", "orterun:ompi-server-filename-missing", true,
orte_basename, orterun_globals.ompi_server);
exit(1);
}
/* open the file and extract the uri */
fp = fopen(filename, "r");
if (NULL == fp) { /* can't find or read file! */
orte_show_help("help-orterun.txt", "orterun:ompi-server-filename-access", true,
orte_basename, orterun_globals.ompi_server);
exit(1);
}
if (NULL == fgets(input, 1024, fp)) {
/* something malformed about file */
fclose(fp);
orte_show_help("help-orterun.txt", "orterun:ompi-server-file-bad", true,
orte_basename, orterun_globals.ompi_server,
orte_basename);
exit(1);
}
fclose(fp);
input[strlen(input)-1] = '\0'; /* remove newline */
ompi_server = strdup(input);
} else if (0 == strncmp(orterun_globals.ompi_server, "pid", strlen("pid")) ||
0 == strncmp(orterun_globals.ompi_server, "PID", strlen("PID"))) {
opal_list_t hnp_list;
opal_list_item_t *item;
orte_hnp_contact_t *hnp;
char *ptr;
pid_t pid;
ptr = strchr(orterun_globals.ompi_server, ':');
if (NULL == ptr) {
/* pid is not correctly formatted */
orte_show_help("help-orterun.txt", "orterun:ompi-server-pid-bad", true,
orte_basename, orte_basename,
orterun_globals.ompi_server, orte_basename);
exit(1);
}
++ptr; /* space past the : */
if (0 >= strlen(ptr)) {
/* they forgot to give us the pid! */
orte_show_help("help-orterun.txt", "orterun:ompi-server-pid-bad", true,
orte_basename, orte_basename,
orterun_globals.ompi_server, orte_basename);
exit(1);
}
pid = strtoul(ptr, NULL, 10);
/* to search the local mpirun's, we have to partially initialize the
* orte_process_info structure. This won't fully be setup until orte_init,
* but we finagle a little bit of it here
*/
if (ORTE_SUCCESS != (rc = orte_session_dir_get_name(NULL, &orte_process_info.tmpdir_base,
&orte_process_info.top_session_dir,
NULL, NULL, NULL))) {
orte_show_help("help-orterun.txt", "orterun:ompi-server-could-not-get-hnp-list", true,
orte_basename, orte_basename);
exit(1);
}
OBJ_CONSTRUCT(&hnp_list, opal_list_t);
/* get the list of HNPs, but do -not- setup contact info to them in the RML */
if (ORTE_SUCCESS != (rc = orte_list_local_hnps(&hnp_list, false))) {
orte_show_help("help-orterun.txt", "orterun:ompi-server-could-not-get-hnp-list", true,
orte_basename, orte_basename);
exit(1);
}
/* search the list for the desired pid */
while (NULL != (item = opal_list_remove_first(&hnp_list))) {
hnp = (orte_hnp_contact_t*)item;
if (pid == hnp->pid) {
ompi_server = strdup(hnp->rml_uri);
goto hnp_found;
}
OBJ_RELEASE(item);
}
/* if we got here, it wasn't found */
orte_show_help("help-orterun.txt", "orterun:ompi-server-pid-not-found", true,
orte_basename, orte_basename, pid, orterun_globals.ompi_server,
orte_basename);
OBJ_DESTRUCT(&hnp_list);
exit(1);
hnp_found:
/* cleanup rest of list */
while (NULL != (item = opal_list_remove_first(&hnp_list))) {
OBJ_RELEASE(item);
}
OBJ_DESTRUCT(&hnp_list);
} else {
ompi_server = strdup(orterun_globals.ompi_server);
}
}
/* Make the apps */
temp_argc = 0;
temp_argv = NULL;
opal_argv_append(&temp_argc, &temp_argv, argv[0]);
@ -1640,7 +1469,7 @@ static int create_app(int argc, char* argv[],
app->env = opal_argv_copy(*app_env);
if (ORTE_SUCCESS != (rc = orte_schizo.parse_env(orterun_globals.personality,
orterun_globals.path,
&cmd_line, ompi_server,
&cmd_line,
environ, &app->env))) {
goto cleanup;
}

Просмотреть файл

@ -51,9 +51,6 @@ struct orterun_globals_t {
char *path;
char *preload_files;
bool sleep;
char *ompi_server;
bool wait_for_server;
int server_wait_timeout;
char *stdin_target;
char *prefix;
char *path_to_mpirun;