From 01a653d50a996f044d8daff54261ac8de3a51de7 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Tue, 19 Jul 2016 20:10:58 -0700 Subject: [PATCH] Remove a debug print in comm_cid.c. Update PMIx2 to include the revised PMIx_Get logic for higher performance by reducing the number of hash table lookups. Fix a bug where requests for data from a proc in another nspace could hang, or result in "not found". Remove stale file reference Restore autogen pass thru pmix Remove generated file --- ompi/communicator/comm_cid.c | 1 - opal/mca/pmix/pmix2x/pmix/VERSION | 4 +- .../pmix2x/pmix/src/client/pmix_client_get.c | 132 +++++++++--------- .../pmix/pmix2x/pmix/src/server/pmix_server.c | 5 +- .../pmix2x/pmix/src/server/pmix_server_get.c | 68 ++++++--- opal/mca/pmix/pmix2x/pmix/test/pmix_client.c | 6 +- .../pmix/pmix2x/pmix/test/simple/simpclient.c | 4 +- .../pmix/pmix2x/pmix/test/simple/simpdmodex.c | 4 +- .../pmix/pmix2x/pmix/test/simple/simpdyn.c | 4 +- .../mca/pmix/pmix2x/pmix/test/simple/simpft.c | 4 +- .../pmix/pmix2x/pmix/test/simple/simppub.c | 4 +- opal/mca/pmix/pmix2x/pmix2x.c | 30 +++- opal/mca/pmix/pmix2x/pmix2x.h | 2 + opal/mca/pmix/pmix2x/pmix2x_client.c | 58 +++----- opal/mca/pmix/pmix2x/pmix2x_server_north.c | 66 ++------- opal/mca/pmix/pmix2x/pmix2x_server_south.c | 12 +- opal/mca/pmix/pmix_types.h | 3 +- orte/mca/ess/pmi/ess_pmi_module.c | 10 +- 18 files changed, 215 insertions(+), 202 deletions(-) mode change 100755 => 100644 ompi/communicator/comm_cid.c diff --git a/ompi/communicator/comm_cid.c b/ompi/communicator/comm_cid.c old mode 100755 new mode 100644 index b4478b120e..0f2415ab94 --- a/ompi/communicator/comm_cid.c +++ b/ompi/communicator/comm_cid.c @@ -877,7 +877,6 @@ static int ompi_comm_allreduce_pmix_reduce_complete (ompi_comm_request_t *reques * needs to be reworked to take advantage of it. */ OPAL_PMIX_EXCHANGE(rc, &info, &pdat, 60); OBJ_DESTRUCT(&info); - fprintf (stderr, "OPAL_PMIX_EXCHANGE returned %d\n", rc); if (OPAL_SUCCESS != rc) { OBJ_DESTRUCT(&pdat); return rc; diff --git a/opal/mca/pmix/pmix2x/pmix/VERSION b/opal/mca/pmix/pmix2x/pmix/VERSION index ae4703b2a4..37ceb8af33 100644 --- a/opal/mca/pmix/pmix2x/pmix/VERSION +++ b/opal/mca/pmix/pmix2x/pmix/VERSION @@ -30,7 +30,7 @@ greek= # command, or with the date (if "git describe" fails) in the form of # "date". -repo_rev=git92df386 +repo_rev=git4e10e9d # If tarball_version is not empty, it is used as the version string in # the tarball filename, regardless of all other versions listed in @@ -44,7 +44,7 @@ tarball_version= # The date when this release was created -date="Jul 05, 2016" +date="Jul 19, 2016" # The shared library version of each of PMIx's public libraries. # These versions are maintained in accordance with the "Library diff --git a/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_get.c b/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_get.c index 85521bdcd1..0fc7dcd1f5 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_get.c +++ b/opal/mca/pmix/pmix2x/pmix/src/client/pmix_client_get.c @@ -296,39 +296,46 @@ static void _getnb_cbfunc(struct pmix_peer_t *pr, pmix_usock_hdr_t *hdr, cnt = 1; if (PMIX_SUCCESS == (rc = pmix_bfrop.unpack(buf, &bptr, &cnt, PMIX_BUFFER))) { - cnt = 1; - cur_kval = PMIX_NEW(pmix_kval_t); - while (PMIX_SUCCESS == (rc = pmix_bfrop.unpack(bptr, cur_kval, &cnt, PMIX_KVAL))) { - pmix_output_verbose(2, pmix_globals.debug_output, - "pmix: unpacked key %s", cur_kval->key); - if (PMIX_SUCCESS != (rc = pmix_hash_store(&nptr->modex, cur_rank, cur_kval))) { - PMIX_ERROR_LOG(rc); - } - if (NULL != cb->key && 0 == strcmp(cb->key, cur_kval->key)) { - pmix_output_verbose(2, pmix_globals.debug_output, - "pmix: found requested value"); - if (PMIX_SUCCESS != (rc = pmix_bfrop.copy((void**)&val, cur_kval->value, PMIX_VALUE))) { - PMIX_ERROR_LOG(rc); - PMIX_RELEASE(cur_kval); - val = NULL; - goto done; - } - } - PMIX_RELEASE(cur_kval); // maintain acctg - hash_store does a retain + /* if the rank is WILDCARD, then this is an nspace blob */ + if (PMIX_RANK_WILDCARD == cur_rank) { + pmix_client_process_nspace_blob(cb->nspace, bptr); + } else { cnt = 1; cur_kval = PMIX_NEW(pmix_kval_t); + while (PMIX_SUCCESS == (rc = pmix_bfrop.unpack(bptr, cur_kval, &cnt, PMIX_KVAL))) { + pmix_output_verbose(2, pmix_globals.debug_output, + "pmix: unpacked key %s", cur_kval->key); + if (PMIX_SUCCESS != (rc = pmix_hash_store(&nptr->modex, cur_rank, cur_kval))) { + PMIX_ERROR_LOG(rc); + } + if (NULL != cb->key && 0 == strcmp(cb->key, cur_kval->key)) { + pmix_output_verbose(2, pmix_globals.debug_output, + "pmix: found requested value"); + if (PMIX_SUCCESS != (rc = pmix_bfrop.copy((void**)&val, cur_kval->value, PMIX_VALUE))) { + PMIX_ERROR_LOG(rc); + PMIX_RELEASE(cur_kval); + val = NULL; + goto done; + } + } + PMIX_RELEASE(cur_kval); // maintain acctg - hash_store does a retain + cnt = 1; + cur_kval = PMIX_NEW(pmix_kval_t); + } + cnt = 1; + PMIX_RELEASE(cur_kval); } - cnt = 1; - PMIX_RELEASE(cur_kval); } PMIX_RELEASE(bptr); // free's the data region - if (PMIX_ERR_UNPACK_READ_PAST_END_OF_BUFFER != rc) { + if (PMIX_ERR_UNPACK_READ_PAST_END_OF_BUFFER != rc && + PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); rc = PMIX_ERR_SILENT; // avoid error-logging twice break; } } - if (PMIX_ERR_UNPACK_READ_PAST_END_OF_BUFFER != rc) { + if (PMIX_ERR_UNPACK_READ_PAST_END_OF_BUFFER != rc && + PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); } else { rc = PMIX_SUCCESS; @@ -409,10 +416,11 @@ static void _getnbfn(int fd, short flags, void *cbdata) goto request; } - /* if the key is NULL, then we have to check both the job-data - * and the modex tables. If we don't yet have the modex data, - * then we are going to have to go get it. So let's check that - * case first */ + /* The NULL==key scenario only pertains to cases where legacy + * PMI methods are being employed. In this case, we have to check + * both the job-data and the modex tables. If we don't yet have + * the modex data, then we are going to have to go get it. So let's + * check that case first */ if (NULL == cb->key) { PMIX_CONSTRUCT(&results, pmix_pointer_array_t); pmix_pointer_array_init(&results, 2, INT_MAX, 1); @@ -451,9 +459,7 @@ static void _getnbfn(int fd, short flags, void *cbdata) } } else { /* if we didn't find a modex for this rank, then we need - * to go get it. Recall that the NULL==key scenario only - * pertains to cases where legacy PMI methods are being - * employed. Thus, the caller wants -all- information for + * to go get it. Thus, the caller wants -all- information for * the specified rank, not just the job-level info. */ goto request; } @@ -505,43 +511,26 @@ static void _getnbfn(int fd, short flags, void *cbdata) return; } - /* the requested data could be in the job-data table, so let's - * just check there first. */ - if (PMIX_SUCCESS == (rc = pmix_hash_fetch(&nptr->internal, PMIX_RANK_WILDCARD, cb->key, &val))) { - /* found it - we are in an event, so we can - * just execute the callback */ - cb->value_cbfunc(rc, val, cb->cbdata); - /* cleanup */ - if (NULL != val) { - PMIX_VALUE_RELEASE(val); + /* if the key is in the PMIx namespace, then they are looking for data + * that was provided at startup */ + if (0 == strncmp(cb->key, "pmix", 4)) { + /* should be in the internal hash table. */ + if (PMIX_SUCCESS == (rc = pmix_hash_fetch(&nptr->internal, cb->rank, cb->key, &val))) { + /* found it - we are in an event, so we can + * just execute the callback */ + cb->value_cbfunc(rc, val, cb->cbdata); + /* cleanup */ + if (NULL != val) { + PMIX_VALUE_RELEASE(val); + } + PMIX_RELEASE(cb); + return; } - PMIX_RELEASE(cb); - return; - } - if (PMIX_RANK_WILDCARD == cb->rank) { - /* can't be anywhere else */ - cb->value_cbfunc(PMIX_ERR_NOT_FOUND, NULL, cb->cbdata); - PMIX_RELEASE(cb); - return; + /* if we don't have it, go request it */ + goto request; } - /* it could still be in the job-data table, only stored under its own - * rank and not WILDCARD - e.g., this is true of data returned about - * ourselves during startup */ - if (PMIX_SUCCESS == (rc = pmix_hash_fetch(&nptr->internal, cb->rank, cb->key, &val))) { - /* found it - we are in an event, so we can - * just execute the callback */ - cb->value_cbfunc(rc, val, cb->cbdata); - /* cleanup */ - if (NULL != val) { - PMIX_VALUE_RELEASE(val); - } - PMIX_RELEASE(cb); - return; - } - - /* not finding it is not an error - it could be in the - * modex hash table, so check it */ + /* otherwise, the data must be something they "put" */ #if defined(PMIX_ENABLE_DSTORE) && (PMIX_ENABLE_DSTORE == 1) if (PMIX_SUCCESS == (rc = pmix_dstore_fetch(nptr->nspace, cb->rank, cb->key, &val))) { #else @@ -606,6 +595,16 @@ static void _getnbfn(int fd, short flags, void *cbdata) } } + /* if we are seeking "pmix" data for our own nspace, then we must fail + * as it was provided at startup - any updates would have come via + * event notifications */ + if (0 == strncmp(cb->key, "pmix", 4) && + 0 == strncmp(cb->nspace, pmix_globals.myid.nspace, PMIX_MAX_NSLEN)) { + cb->value_cbfunc(PMIX_ERR_NOT_FOUND, NULL, cb->cbdata); + PMIX_RELEASE(cb); + return; + } + /* see if we already have a request in place with the server for data from * this nspace:rank. If we do, then no need to ask again as the * request will return _all_ data from that proc */ @@ -628,6 +627,11 @@ static void _getnbfn(int fd, short flags, void *cbdata) return; } + pmix_output_verbose(2, pmix_globals.debug_output, + "%s:%d REQUESTING DATA FROM SERVER FOR %s:%d KEY %s", + pmix_globals.myid.nspace, pmix_globals.myid.rank, + cb->nspace, cb->rank, cb->key); + /* create a callback object as we need to pass it to the * recv routine so we know which callback to use when * the return message is recvd */ diff --git a/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server.c b/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server.c index ff3d22628c..345ea0b442 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server.c +++ b/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server.c @@ -983,10 +983,9 @@ PMIX_EXPORT pmix_status_t PMIx_server_setup_fork(const pmix_proc_t *proc, char * pmix_setenv("PMIX_RANK", rankstr, true, env); /* pass our rendezvous info */ PMIX_LIST_FOREACH(lt, &pmix_server_globals.listeners, pmix_listener_t) { - if (NULL == lt->uri) { - continue; + if (NULL != lt->uri && NULL != lt->varname) { + pmix_setenv(lt->varname, lt->uri, true, env); } - pmix_setenv(lt->varname, lt->uri, true, env); } /* pass our active security mode */ pmix_setenv("PMIX_SECURITY_MODE", security_mode, true, env); diff --git a/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server_get.c b/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server_get.c index 971b2ce757..66189c6c27 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server_get.c +++ b/opal/mca/pmix/pmix2x/pmix/src/server/pmix_server_get.c @@ -88,6 +88,7 @@ static void dmdx_cbfunc(pmix_status_t status, const char *data, size_t ndata, void *cbdata, pmix_release_cbfunc_t relfn, void *relcbdata); static pmix_status_t _satisfy_request(pmix_nspace_t *ns, int rank, + pmix_server_caddy_t *cd, pmix_modex_cbfunc_t cbfunc, void *cbdata, bool *scope); static pmix_status_t create_local_tracker(char nspace[], int rank, pmix_info_t info[], size_t ninfo, @@ -110,6 +111,7 @@ pmix_status_t pmix_server_get(pmix_buffer_t *buf, pmix_modex_cbfunc_t cbfunc, void *cbdata) { + pmix_server_caddy_t *cd = (pmix_server_caddy_t*)cbdata; int32_t cnt; pmix_status_t rc; int rank; @@ -120,6 +122,9 @@ pmix_status_t pmix_server_get(pmix_buffer_t *buf, size_t ninfo=0; pmix_dmdx_local_t *lcd; bool local; + pmix_buffer_t pbkt; + char *data; + size_t sz; pmix_output_verbose(2, pmix_globals.debug_output, "recvd GET"); @@ -166,9 +171,11 @@ pmix_status_t pmix_server_get(pmix_buffer_t *buf, } pmix_output_verbose(2, pmix_globals.debug_output, - "%s:%d EXECUTE GET FOR %s:%d", + "%s:%d EXECUTE GET FOR %s:%d ON BEHALF OF %s:%d", pmix_globals.myid.nspace, - pmix_globals.myid.rank, nspace, rank); + pmix_globals.myid.rank, nspace, rank, + cd->peer->info->nptr->nspace, + cd->peer->info->rank); if (NULL == nptr || NULL == nptr->server) { /* this is for an nspace we don't know about yet, so @@ -179,6 +186,20 @@ pmix_status_t pmix_server_get(pmix_buffer_t *buf, return rc; } + /* if the rank is wildcard, then they are asking for the job-level + * info for this nspace - provide it */ + if (PMIX_RANK_WILDCARD == rank) { + PMIX_CONSTRUCT(&pbkt, pmix_buffer_t); + pmix_bfrop.pack(&pbkt, &rank, 1, PMIX_INT); + /* the client is expecting this to arrive as a byte object + * containing a buffer, so package it accordingly */ + pmix_bfrop.pack(&pbkt, &nptr->server->job_info, 1, PMIX_BUFFER); + PMIX_UNLOAD_BUFFER(&pbkt, data, sz); + PMIX_DESTRUCT(&pbkt); + cbfunc(PMIX_SUCCESS, data, sz, cbdata, relfn, data); + return PMIX_SUCCESS; + } + /* We have to wait for all local clients to be registered before * we can know whether this request is for data from a local or a * remote client because one client might ask for data about another @@ -194,27 +215,13 @@ pmix_status_t pmix_server_get(pmix_buffer_t *buf, } /* see if we already have this data */ - rc = _satisfy_request(nptr, rank, cbfunc, cbdata, &local); + rc = _satisfy_request(nptr, rank, cd, cbfunc, cbdata, &local); if( PMIX_SUCCESS == rc ){ /* request was successfully satisfied */ PMIX_INFO_FREE(info, ninfo); return rc; } - /* do not force dmodex logic for non-specific ranks - * let return not found status instead of doing fence with - * data exchange. User can make a decision to do such call getting - * not found status - */ - if (PMIX_RANK_UNDEF == rank || PMIX_RANK_WILDCARD == rank) { - pmix_output_verbose(2, pmix_globals.debug_output, - "%s:%d not found data for namespace = %s, rank = %d " - "(do not request resource manager server for non-specified rank)", - pmix_globals.myid.nspace, - pmix_globals.myid.rank, nspace, rank); - return PMIX_ERR_NOT_FOUND; - } - /* If we get here, then we don't have the data at this time. Check * to see if we already have a pending request for the data - if * we do, then we can just wait for it to arrive */ @@ -355,7 +362,7 @@ void pmix_pending_nspace_requests(pmix_nspace_t *nptr) } } -static pmix_status_t _satisfy_request(pmix_nspace_t *nptr, int rank, +static pmix_status_t _satisfy_request(pmix_nspace_t *nptr, int rank, pmix_server_caddy_t *cd, pmix_modex_cbfunc_t cbfunc, void *cbdata, bool *scope) { pmix_status_t rc; @@ -364,7 +371,7 @@ static pmix_status_t _satisfy_request(pmix_nspace_t *nptr, int rank, size_t sz; int cur_rank; int found = 0; - pmix_buffer_t pbkt; + pmix_buffer_t pbkt, *pbptr; void *last; pmix_hash_table_t *hts[3]; pmix_hash_table_t **htptr; @@ -404,6 +411,27 @@ static pmix_status_t _satisfy_request(pmix_nspace_t *nptr, int rank, * having been committed */ htptr = hts; PMIX_CONSTRUCT(&pbkt, pmix_buffer_t); + + /* if they are asking about a rank from an nspace different + * from their own, then include a copy of the job-level info */ + if (NULL != cd && + 0 != strncmp(nptr->nspace, cd->peer->info->nptr->nspace, PMIX_MAX_NSLEN)) { + cur_rank = PMIX_RANK_WILDCARD; + if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(&pbkt, &cur_rank, 1, PMIX_INT))) { + PMIX_ERROR_LOG(rc); + cbfunc(rc, NULL, 0, cbdata, relfn, data); + return rc; + } + /* the client is expecting this to arrive as a byte object + * containing a buffer, so package it accordingly */ + pbptr = &nptr->server->job_info; + if (PMIX_SUCCESS != (rc = pmix_bfrop.pack(&pbkt, &pbptr, 1, PMIX_BUFFER))) { + PMIX_ERROR_LOG(rc); + cbfunc(rc, NULL, 0, cbdata, relfn, data); + return rc; + } + } + while (NULL != *htptr) { cur_rank = rank; if (PMIX_RANK_UNDEF == rank) { @@ -494,7 +522,7 @@ pmix_status_t pmix_pending_resolve(pmix_nspace_t *nptr, int rank, /* run through all the requests to this rank */ PMIX_LIST_FOREACH(req, &lcd->loc_reqs, pmix_dmdx_request_t) { pmix_status_t rc; - rc = _satisfy_request(nptr, rank, req->cbfunc, req->cbdata, NULL); + rc = _satisfy_request(nptr, rank, NULL, req->cbfunc, req->cbdata, NULL); if( PMIX_SUCCESS != rc ){ /* if we can't satisfy this particular request (missing key?) */ req->cbfunc(rc, NULL, 0, req->cbdata, NULL, NULL); diff --git a/opal/mca/pmix/pmix2x/pmix/test/pmix_client.c b/opal/mca/pmix/pmix2x/pmix/test/pmix_client.c index 25192c36d0..8235b0914b 100644 --- a/opal/mca/pmix/pmix2x/pmix/test/pmix_client.c +++ b/opal/mca/pmix/pmix2x/pmix/test/pmix_client.c @@ -73,7 +73,7 @@ int main(int argc, char **argv) pmix_value_t *val = &value; test_params params; INIT_TEST_PARAMS(params); - pmix_proc_t myproc; + pmix_proc_t myproc, proc; parse_cmd(argc, argv, ¶ms); @@ -102,7 +102,9 @@ int main(int argc, char **argv) } TEST_VERBOSE((" Client ns %s rank %d: PMIx_Init success", myproc.nspace, myproc.rank)); - if (PMIX_SUCCESS != (rc = PMIx_Get(&myproc,PMIX_UNIV_SIZE,NULL, 0,&val))) { + (void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); + proc.rank = PMIX_RANK_WILDCARD; + if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_UNIV_SIZE, NULL, 0, &val))) { TEST_ERROR(("rank %d: PMIx_Get universe size failed: %d", myproc.rank, rc)); FREE_TEST_PARAMS(params); exit(0); diff --git a/opal/mca/pmix/pmix2x/pmix/test/simple/simpclient.c b/opal/mca/pmix/pmix2x/pmix/test/simple/simpclient.c index c71e3469c7..0e57621406 100644 --- a/opal/mca/pmix/pmix2x/pmix/test/simple/simpclient.c +++ b/opal/mca/pmix/pmix2x/pmix/test/simple/simpclient.c @@ -101,7 +101,9 @@ int main(int argc, char **argv) } /* get our universe size */ - if (PMIX_SUCCESS != (rc = PMIx_Get(&myproc, PMIX_UNIV_SIZE, NULL, 0, &val))) { + (void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); + proc.rank = PMIX_RANK_WILDCARD; + if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_UNIV_SIZE, NULL, 0, &val))) { pmix_output(0, "Client ns %s rank %d: PMIx_Get universe size failed: %d", myproc.nspace, myproc.rank, rc); goto done; } diff --git a/opal/mca/pmix/pmix2x/pmix/test/simple/simpdmodex.c b/opal/mca/pmix/pmix2x/pmix/test/simple/simpdmodex.c index 05dd8acebf..94039b926d 100644 --- a/opal/mca/pmix/pmix2x/pmix/test/simple/simpdmodex.c +++ b/opal/mca/pmix/pmix2x/pmix/test/simple/simpdmodex.c @@ -104,7 +104,9 @@ int main(int argc, char **argv) pmix_output(0, "Client ns %s rank %d: Running", myproc.nspace, myproc.rank); /* get our universe size */ - if (PMIX_SUCCESS != (rc = PMIx_Get(&myproc, PMIX_UNIV_SIZE, NULL, 0, &val))) { + (void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); + proc.rank = PMIX_RANK_WILDCARD; + if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_UNIV_SIZE, NULL, 0, &val))) { pmix_output(0, "Client ns %s rank %d: PMIx_Get universe size failed: %d", myproc.nspace, myproc.rank, rc); goto done; } diff --git a/opal/mca/pmix/pmix2x/pmix/test/simple/simpdyn.c b/opal/mca/pmix/pmix2x/pmix/test/simple/simpdyn.c index 99987e606a..2ec7028c6a 100644 --- a/opal/mca/pmix/pmix2x/pmix/test/simple/simpdyn.c +++ b/opal/mca/pmix/pmix2x/pmix/test/simple/simpdyn.c @@ -64,7 +64,9 @@ int main(int argc, char **argv) pmix_output(0, "Client ns %s rank %d: Running", myproc.nspace, myproc.rank); /* get our universe size */ - if (PMIX_SUCCESS != (rc = PMIx_Get(&myproc, PMIX_UNIV_SIZE, NULL, 0, &val))) { + (void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); + proc.rank = PMIX_RANK_WILDCARD; + if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_UNIV_SIZE, NULL, 0, &val))) { pmix_output(0, "Client ns %s rank %d: PMIx_Get universe size failed: %d", myproc.nspace, myproc.rank, rc); goto done; } diff --git a/opal/mca/pmix/pmix2x/pmix/test/simple/simpft.c b/opal/mca/pmix/pmix2x/pmix/test/simple/simpft.c index afe2a97523..61d006da4e 100644 --- a/opal/mca/pmix/pmix2x/pmix/test/simple/simpft.c +++ b/opal/mca/pmix/pmix2x/pmix/test/simple/simpft.c @@ -85,7 +85,9 @@ int main(int argc, char **argv) pmix_output(0, "Client ns %s rank %d: Running", myproc.nspace, myproc.rank); /* get our universe size */ - if (PMIX_SUCCESS != (rc = PMIx_Get(&myproc, PMIX_UNIV_SIZE, NULL, 0, &val))) { + (void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); + proc.rank = PMIX_RANK_WILDCARD; + if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_UNIV_SIZE, NULL, 0, &val))) { pmix_output(0, "Client ns %s rank %d: PMIx_Get universe size failed: %d", myproc.nspace, myproc.rank, rc); goto done; } diff --git a/opal/mca/pmix/pmix2x/pmix/test/simple/simppub.c b/opal/mca/pmix/pmix2x/pmix/test/simple/simppub.c index 42b07df810..4d73146567 100644 --- a/opal/mca/pmix/pmix2x/pmix/test/simple/simppub.c +++ b/opal/mca/pmix/pmix2x/pmix/test/simple/simppub.c @@ -56,7 +56,9 @@ int main(int argc, char **argv) pmix_output(0, "Client ns %s rank %d: Running", myproc.nspace, myproc.rank); /* get our universe size */ - if (PMIX_SUCCESS != (rc = PMIx_Get(&myproc, PMIX_UNIV_SIZE, NULL, 0, &val))) { + (void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN); + proc.rank = PMIX_RANK_WILDCARD; + if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_UNIV_SIZE, NULL, 0, &val))) { pmix_output(0, "Client ns %s rank %d: PMIx_Get universe size failed: %d", myproc.nspace, myproc.rank, rc); goto done; } diff --git a/opal/mca/pmix/pmix2x/pmix2x.c b/opal/mca/pmix/pmix2x/pmix2x.c index a5faef7184..dc1b6a5ad5 100644 --- a/opal/mca/pmix/pmix2x/pmix2x.c +++ b/opal/mca/pmix/pmix2x/pmix2x.c @@ -385,7 +385,7 @@ void pmix2x_event_hdlr(size_t evhdlr_registration_id, OBJ_RELEASE(cd); return; } - cd->pname.vpid = source->rank; + cd->pname.vpid = pmix2x_convert_rank(source->rank); } /* convert the array of info */ @@ -417,6 +417,30 @@ void pmix2x_event_hdlr(size_t evhdlr_registration_id, } } +opal_vpid_t pmix2x_convert_rank(int rank) +{ + switch(rank) { + case PMIX_RANK_UNDEF: + return OPAL_VPID_INVALID; + case PMIX_RANK_WILDCARD: + return OPAL_VPID_WILDCARD; + default: + return (opal_vpid_t)rank; + } +} + +int pmix2x_convert_opalrank(opal_vpid_t vpid) +{ + switch(vpid) { + case OPAL_VPID_WILDCARD: + return PMIX_RANK_WILDCARD; + case OPAL_VPID_INVALID: + return PMIX_RANK_UNDEF; + default: + return (int)vpid; + } +} + pmix_status_t pmix2x_convert_opalrc(int rc) { switch (rc) { @@ -717,7 +741,7 @@ void pmix2x_value_load(pmix_value_t *v, case OPAL_NAME: v->type = PMIX_PROC; (void)opal_snprintf_jobid(v->data.proc.nspace, PMIX_MAX_NSLEN, kv->data.name.jobid); - v->data.proc.rank = kv->data.name.vpid; + v->data.proc.rank = pmix2x_convert_opalrank(kv->data.name.vpid); break; default: /* silence warnings */ @@ -825,7 +849,7 @@ int pmix2x_value_unload(opal_value_t *kv, if (OPAL_SUCCESS != (rc = opal_convert_string_to_jobid(&kv->data.name.jobid, v->data.proc.nspace))) { return pmix2x_convert_opalrc(rc); } - kv->data.name.vpid = v->data.proc.rank; + kv->data.name.vpid = pmix2x_convert_rank(v->data.proc.rank); break; default: /* silence warnings */ diff --git a/opal/mca/pmix/pmix2x/pmix2x.h b/opal/mca/pmix/pmix2x/pmix2x.h index 801bb5444a..3c824534dd 100644 --- a/opal/mca/pmix/pmix2x/pmix2x.h +++ b/opal/mca/pmix/pmix2x/pmix2x.h @@ -300,6 +300,8 @@ OPAL_MODULE_DECLSPEC void pmix2x_event_hdlr(size_t evhdlr_registration_id, void *cbdata); OPAL_MODULE_DECLSPEC pmix_status_t pmix2x_convert_opalrc(int rc); OPAL_MODULE_DECLSPEC int pmix2x_convert_rc(pmix_status_t rc); +OPAL_MODULE_DECLSPEC opal_vpid_t pmix2x_convert_rank(int rank); +OPAL_MODULE_DECLSPEC int pmix2x_convert_opalrank(opal_vpid_t vpid); OPAL_MODULE_DECLSPEC pmix_scope_t pmix2x_convert_opalscope(opal_pmix_scope_t scope); OPAL_MODULE_DECLSPEC pmix_data_range_t pmix2x_convert_opalrange(opal_pmix_data_range_t range); OPAL_MODULE_DECLSPEC opal_pmix_data_range_t pmix2x_convert_range(pmix_data_range_t range); diff --git a/opal/mca/pmix/pmix2x/pmix2x_client.c b/opal/mca/pmix/pmix2x/pmix2x_client.c index dff3f1dc7b..1c4dd9aa8f 100644 --- a/opal/mca/pmix/pmix2x/pmix2x_client.c +++ b/opal/mca/pmix/pmix2x/pmix2x_client.c @@ -94,7 +94,7 @@ int pmix2x_client_init(void) job->jobid = pname.jobid; opal_list_append(&mca_pmix_pmix2x_component.jobids, &job->super); - pname.vpid = my_proc.rank; + pname.vpid = pmix2x_convert_rank(my_proc.rank); opal_proc_set_name(&pname); /* register the default event handler */ @@ -157,7 +157,7 @@ int pmix2x_abort(int flag, const char *msg, return OPAL_ERR_NOT_FOUND; } (void)strncpy(parray[n].nspace, job->nspace, PMIX_MAX_NSLEN); - parray[n].rank = ptr->name.vpid; + parray[n].rank = pmix2x_convert_opalrank(ptr->name.vpid); ++n; } } @@ -196,11 +196,11 @@ int pmix2x_store_local(const opal_process_name_t *proc, opal_value_t *val) return OPAL_ERR_NOT_FOUND; } (void)strncpy(p.nspace, job->nspace, PMIX_MAX_NSLEN); - p.rank = proc->vpid; + p.rank = pmix2x_convert_opalrank(proc->vpid); } else { /* use our name */ (void)strncpy(p.nspace, my_proc.nspace, PMIX_MAX_NSLEN); - p.rank = OPAL_PROC_MY_NAME.vpid; + p.rank = pmix2x_convert_opalrank(OPAL_PROC_MY_NAME.vpid); } PMIX_VALUE_CONSTRUCT(&kv); @@ -261,7 +261,7 @@ int pmix2x_fence(opal_list_t *procs, int collect_data) return OPAL_ERR_NOT_FOUND; } (void)strncpy(parray[n].nspace, job->nspace, PMIX_MAX_NSLEN); - parray[n].rank = ptr->name.vpid; + parray[n].rank = pmix2x_convert_opalrank(ptr->name.vpid); ++n; } } @@ -327,7 +327,7 @@ int pmix2x_fencenb(opal_list_t *procs, int collect_data, return OPAL_ERR_NOT_FOUND; } (void)strncpy(parray[n].nspace, job->nspace, PMIX_MAX_NSLEN); - parray[n].rank = ptr->name.vpid; + parray[n].rank = pmix2x_convert_opalrank(ptr->name.vpid); ++n; } } @@ -412,7 +412,7 @@ int pmix2x_get(const opal_process_name_t *proc, const char *key, return OPAL_ERR_NOT_FOUND; } (void)strncpy(p.nspace, job->nspace, PMIX_MAX_NSLEN); - p.rank = proc->vpid; + p.rank = pmix2x_convert_opalrank(proc->vpid); pptr = &p; } else { /* if they are asking for our jobid, then return it */ @@ -424,7 +424,7 @@ int pmix2x_get(const opal_process_name_t *proc, const char *key, } else if (0 == strcmp(key, OPAL_PMIX_RANK)) { (*val) = OBJ_NEW(opal_value_t); (*val)->type = OPAL_INT; - (*val)->data.integer = my_proc.rank; + (*val)->data.integer = pmix2x_convert_rank(my_proc.rank); return OPAL_SUCCESS; } pptr = NULL; @@ -520,10 +520,10 @@ int pmix2x_getnb(const opal_process_name_t *proc, const char *key, return OPAL_ERR_NOT_FOUND; } (void)strncpy(op->p.nspace, job->nspace, PMIX_MAX_NSLEN); - op->p.rank = proc->vpid; + op->p.rank = pmix2x_convert_opalrank(proc->vpid); } else { (void)strncpy(op->p.nspace, my_proc.nspace, PMIX_MAX_NSLEN); - op->p.rank = PMIX_RANK_WILDCARD; + op->p.rank = pmix2x_convert_rank(PMIX_RANK_WILDCARD); } if (NULL != info) { @@ -686,11 +686,7 @@ int pmix2x_lookup(opal_list_t *data, opal_list_t *info) job->jobid = d->proc.jobid; opal_list_append(&mca_pmix_pmix2x_component.jobids, &job->super); } - if (PMIX_RANK_WILDCARD == pdata[n].proc.rank) { - d->proc.vpid = OPAL_VPID_WILDCARD; - } else { - d->proc.vpid = pdata[n].proc.rank; - } + d->proc.vpid = pmix2x_convert_rank(pdata[n].proc.rank); rc = pmix2x_value_unload(&d->value, &pdata[n].value); if (OPAL_SUCCESS != rc) { OPAL_ERROR_LOG(rc); @@ -753,11 +749,7 @@ static void lk_cbfunc(pmix_status_t status, job->jobid = d->proc.jobid; opal_list_append(&mca_pmix_pmix2x_component.jobids, &job->super); } - if (PMIX_RANK_WILDCARD == data[n].proc.rank) { - d->proc.vpid = OPAL_VPID_WILDCARD; - } else { - d->proc.vpid = data[n].proc.rank; - } + d->proc.vpid = pmix2x_convert_rank(data[n].proc.rank); d->value.key = strdup(data[n].key); rc = pmix2x_value_unload(&d->value, &data[n].value); if (OPAL_SUCCESS != rc) { @@ -1052,11 +1044,7 @@ int pmix2x_connect(opal_list_t *procs) return OPAL_ERR_NOT_FOUND; } (void)strncpy(parray[n].nspace, job->nspace, PMIX_MAX_NSLEN); - if (OPAL_VPID_WILDCARD == ptr->name.vpid) { - parray[n].rank = PMIX_RANK_WILDCARD; - } else { - parray[n].rank = ptr->name.vpid; - } + parray[n].rank = pmix2x_convert_opalrank(ptr->name.vpid); ++n; } @@ -1103,11 +1091,7 @@ int pmix2x_connectnb(opal_list_t *procs, break; } } - if (OPAL_VPID_WILDCARD == ptr->name.vpid) { - op->procs[n].rank = PMIX_RANK_WILDCARD; - } else { - op->procs[n].rank = ptr->name.vpid; - } + op->procs[n].rank = pmix2x_convert_opalrank(ptr->name.vpid); ++n; } @@ -1142,11 +1126,7 @@ int pmix2x_disconnect(opal_list_t *procs) break; } } - if (OPAL_VPID_WILDCARD == ptr->name.vpid) { - parray[n].rank = PMIX_RANK_WILDCARD; - } else { - parray[n].rank = ptr->name.vpid; - } + parray[n].rank = pmix2x_convert_opalrank(ptr->name.vpid); ++n; } @@ -1193,11 +1173,7 @@ int pmix2x_disconnectnb(opal_list_t *procs, break; } } - if (OPAL_VPID_WILDCARD == ptr->name.vpid) { - op->procs[n].rank = PMIX_RANK_WILDCARD; - } else { - op->procs[n].rank = ptr->name.vpid; - } + op->procs[n].rank = pmix2x_convert_opalrank(ptr->name.vpid); ++n; } @@ -1267,7 +1243,7 @@ int pmix2x_resolve_peers(const char *nodename, opal_jobid_t jobid, job->jobid = jobid; opal_list_append(&mca_pmix_pmix2x_component.jobids, &job->super); } - nm->name.vpid = array[n].rank; + nm->name.vpid = pmix2x_convert_rank(array[n].rank); } } PMIX_PROC_FREE(array, nprocs); diff --git a/opal/mca/pmix/pmix2x/pmix2x_server_north.c b/opal/mca/pmix/pmix2x/pmix2x_server_north.c index 7a9116f490..2719ac290f 100644 --- a/opal/mca/pmix/pmix2x/pmix2x_server_north.c +++ b/opal/mca/pmix/pmix2x/pmix2x_server_north.c @@ -149,7 +149,7 @@ static pmix_status_t server_client_connected_fn(const pmix_proc_t *p, void *serv if (OPAL_SUCCESS != (rc = opal_convert_string_to_jobid(&proc.jobid, p->nspace))) { return pmix2x_convert_opalrc(rc); } - proc.vpid = p->rank; + proc.vpid = pmix2x_convert_rank(p->rank); /* pass it up */ rc = host_module->client_connected(&proc, server_object, @@ -172,7 +172,7 @@ static pmix_status_t server_client_finalized_fn(const pmix_proc_t *p, void* serv if (OPAL_SUCCESS != (rc = opal_convert_string_to_jobid(&proc.jobid, p->nspace))) { return pmix2x_convert_opalrc(rc); } - proc.vpid = p->rank; + proc.vpid = pmix2x_convert_rank(p->rank); /* setup the caddy */ opalcaddy = OBJ_NEW(pmix2x_opalcaddy_t); @@ -206,7 +206,7 @@ static pmix_status_t server_abort_fn(const pmix_proc_t *p, void *server_object, if (OPAL_SUCCESS != (rc = opal_convert_string_to_jobid(&proc.jobid, p->nspace))) { return pmix2x_convert_opalrc(rc); } - proc.vpid = p->rank; + proc.vpid = pmix2x_convert_rank(p->rank); /* setup the caddy */ opalcaddy = OBJ_NEW(pmix2x_opalcaddy_t); @@ -221,11 +221,7 @@ static pmix_status_t server_abort_fn(const pmix_proc_t *p, void *server_object, OBJ_RELEASE(opalcaddy); return pmix2x_convert_opalrc(rc); } - if (PMIX_RANK_WILDCARD == procs[n].rank) { - nm->name.vpid = OPAL_VPID_WILDCARD; - } else { - nm->name.vpid = procs[n].rank; - } + nm->name.vpid = pmix2x_convert_rank(procs[n].rank); } /* pass it up */ @@ -292,11 +288,7 @@ static pmix_status_t server_fencenb_fn(const pmix_proc_t procs[], size_t nprocs, OBJ_RELEASE(opalcaddy); return pmix2x_convert_opalrc(rc); } - if (PMIX_RANK_WILDCARD == procs[n].rank) { - nm->name.vpid = OPAL_VPID_WILDCARD; - } else { - nm->name.vpid = procs[n].rank; - } + nm->name.vpid = pmix2x_convert_rank(procs[n].rank); } /* convert the array of pmix_info_t to the list of info */ @@ -337,11 +329,7 @@ static pmix_status_t server_dmodex_req_fn(const pmix_proc_t *p, if (OPAL_SUCCESS != (rc = opal_convert_string_to_jobid(&proc.jobid, p->nspace))) { return pmix2x_convert_opalrc(rc); } - if (PMIX_RANK_WILDCARD == p->rank) { - proc.vpid = OPAL_VPID_WILDCARD; - } else { - proc.vpid = p->rank; - } + proc.vpid = pmix2x_convert_rank(p->rank); /* setup the caddy */ opalcaddy = OBJ_NEW(pmix2x_opalcaddy_t); @@ -388,11 +376,7 @@ static pmix_status_t server_publish_fn(const pmix_proc_t *p, if (OPAL_SUCCESS != (rc = opal_convert_string_to_jobid(&proc.jobid, p->nspace))) { return pmix2x_convert_opalrc(rc); } - if (PMIX_RANK_WILDCARD == p->rank) { - proc.vpid = OPAL_VPID_WILDCARD; - } else { - proc.vpid = p->rank; - } + proc.vpid = pmix2x_convert_rank(p->rank); /* setup the caddy */ opalcaddy = OBJ_NEW(pmix2x_opalcaddy_t); @@ -439,7 +423,7 @@ static void opal_lkupcbfunc(int status, OPAL_LIST_FOREACH(p, data, opal_pmix_pdata_t) { /* convert the jobid */ (void)opal_snprintf_jobid(d[n].proc.nspace, PMIX_MAX_NSLEN, p->proc.jobid); - d[n].proc.rank = p->proc.vpid; + d[n].proc.rank = pmix2x_convert_opalrank(p->proc.vpid); (void)strncpy(d[n].key, p->value.key, PMIX_MAX_KEYLEN); pmix2x_value_load(&d[n].value, &p->value); } @@ -467,11 +451,7 @@ static pmix_status_t server_lookup_fn(const pmix_proc_t *p, char **keys, if (OPAL_SUCCESS != (rc = opal_convert_string_to_jobid(&proc.jobid, p->nspace))) { return pmix2x_convert_opalrc(rc); } - if (PMIX_RANK_WILDCARD == p->rank) { - proc.vpid = OPAL_VPID_WILDCARD; - } else { - proc.vpid = p->rank; - } + proc.vpid = pmix2x_convert_rank(p->rank); /* setup the caddy */ opalcaddy = OBJ_NEW(pmix2x_opalcaddy_t); @@ -517,11 +497,7 @@ static pmix_status_t server_unpublish_fn(const pmix_proc_t *p, char **keys, if (OPAL_SUCCESS != (rc = opal_convert_string_to_jobid(&proc.jobid, p->nspace))) { return pmix2x_convert_opalrc(rc); } - if (PMIX_RANK_WILDCARD == p->rank) { - proc.vpid = OPAL_VPID_WILDCARD; - } else { - proc.vpid = p->rank; - } + proc.vpid = pmix2x_convert_rank(p->rank); /* setup the caddy */ opalcaddy = OBJ_NEW(pmix2x_opalcaddy_t); @@ -583,11 +559,7 @@ static pmix_status_t server_spawn_fn(const pmix_proc_t *p, if (OPAL_SUCCESS != (rc = opal_convert_string_to_jobid(&proc.jobid, p->nspace))) { return pmix2x_convert_opalrc(rc); } - if (PMIX_RANK_WILDCARD == p->rank) { - proc.vpid = OPAL_VPID_WILDCARD; - } else { - proc.vpid = p->rank; - } + proc.vpid = pmix2x_convert_rank(p->rank); /* setup the caddy */ opalcaddy = OBJ_NEW(pmix2x_opalcaddy_t); @@ -669,11 +641,7 @@ static pmix_status_t server_connect_fn(const pmix_proc_t procs[], size_t nprocs, OBJ_RELEASE(opalcaddy); return pmix2x_convert_opalrc(rc); } - if (PMIX_RANK_WILDCARD == procs[n].rank) { - nm->name.vpid = OPAL_VPID_WILDCARD; - } else { - nm->name.vpid = procs[n].rank; - } + nm->name.vpid = pmix2x_convert_rank(procs[n].rank); } /* convert the info */ @@ -724,11 +692,7 @@ static pmix_status_t server_disconnect_fn(const pmix_proc_t procs[], size_t npro OBJ_RELEASE(opalcaddy); return pmix2x_convert_opalrc(rc); } - if (PMIX_RANK_WILDCARD == procs[n].rank) { - nm->name.vpid = OPAL_VPID_WILDCARD; - } else { - nm->name.vpid = procs[n].rank; - } + nm->name.vpid = pmix2x_convert_rank(procs[n].rank); } /* convert the info */ @@ -874,7 +838,7 @@ static pmix_status_t server_query(pmix_proc_t *proct, OBJ_RELEASE(opalcaddy); return pmix2x_convert_opalrc(rc); } - requestor.vpid = proct->rank; + requestor.vpid = pmix2x_convert_rank(proct->rank); /* convert the info */ for (n=0; n < ninfo; n++) { @@ -912,7 +876,7 @@ static void toolcbfunc(int status, /* convert the process name */ (void)opal_snprintf_jobid(p.nspace, PMIX_MAX_NSLEN, proc.jobid); - p.rank = proc.vpid; + p.rank = pmix2x_convert_opalrank(proc.vpid); /* pass it down */ if (NULL != opalcaddy->toolcbfunc) { diff --git a/opal/mca/pmix/pmix2x/pmix2x_server_south.c b/opal/mca/pmix/pmix2x/pmix2x_server_south.c index e615c05202..c73e7fc124 100644 --- a/opal/mca/pmix/pmix2x/pmix2x_server_south.c +++ b/opal/mca/pmix/pmix2x/pmix2x_server_south.c @@ -345,7 +345,7 @@ int pmix2x_server_register_client(const opal_process_name_t *proc, /* convert the jobid */ (void)opal_snprintf_jobid(p.nspace, PMIX_MAX_NSLEN, proc->jobid); - p.rank = proc->vpid; + p.rank = pmix2x_convert_opalrank(proc->vpid); OBJ_CONSTRUCT(&op, pmix2x_opcaddy_t); op.active = true; @@ -370,7 +370,7 @@ static void _dereg_client(int sd, short args, void *cbdata) if (jptr->jobid == cd->source->jobid) { /* found it - tell the server to deregister */ (void)strncpy(p.nspace, jptr->nspace, PMIX_MAX_NSLEN); - p.rank = cd->source->vpid; + p.rank = pmix2x_convert_opalrank(cd->source->vpid); cd->active = true; PMIx_server_deregister_client(&p, tdcbfunc, (void*)cd); PMIX_WAIT_FOR_COMPLETION(cd->active); @@ -411,7 +411,7 @@ int pmix2x_server_setup_fork(const opal_process_name_t *proc, char ***env) /* convert the jobid */ (void)opal_snprintf_jobid(p.nspace, PMIX_MAX_NSLEN, proc->jobid); - p.rank = proc->vpid; + p.rank = pmix2x_convert_opalrank(proc->vpid); rc = PMIx_server_setup_fork(&p, env); return pmix2x_convert_rc(rc); @@ -446,7 +446,7 @@ int pmix2x_server_dmodex(const opal_process_name_t *proc, /* convert the jobid */ (void)opal_snprintf_jobid(op->p.nspace, PMIX_MAX_NSLEN, proc->jobid); - op->p.rank = proc->vpid; + op->p.rank = pmix2x_convert_opalrank(proc->vpid); /* find the internally-cached data for this proc */ rc = PMIx_server_dmodex_request(&op->p, dmdx_response, op); @@ -490,10 +490,10 @@ int pmix2x_server_notify_event(int status, /* convert the jobid */ if (NULL == source) { (void)opal_snprintf_jobid(op->p.nspace, PMIX_MAX_NSLEN, OPAL_JOBID_INVALID); - op->p.rank = OPAL_VPID_INVALID; + op->p.rank = pmix2x_convert_opalrank(OPAL_VPID_INVALID); } else { (void)opal_snprintf_jobid(op->p.nspace, PMIX_MAX_NSLEN, source->jobid); - op->p.rank = source->vpid; + op->p.rank = pmix2x_convert_opalrank(source->vpid); } diff --git a/opal/mca/pmix/pmix_types.h b/opal/mca/pmix/pmix_types.h index 77ce60b324..840366a324 100644 --- a/opal/mca/pmix/pmix_types.h +++ b/opal/mca/pmix/pmix_types.h @@ -95,7 +95,8 @@ BEGIN_C_DECLS #define OPAL_PMIX_NODEID "pmix.nodeid" // (uint32_t) node identifier #define OPAL_PMIX_LOCAL_PEERS "pmix.lpeers" // (char*) comma-delimited string of ranks on this node within the specified nspace #define OPAL_PMIX_LOCAL_CPUSETS "pmix.lcpus" // (char*) colon-delimited cpusets of local peers within the specified nspace -#define OPAL_PMIX_PROC_URI "pmix.puri" // (char*) URI containing contact info for proc +#define OPAL_PMIX_PROC_URI "opal.puri" // (char*) URI containing contact info for proc - NOTE: this is published by procs and + // thus cannot be prefixed with "pmix" /* size info */ #define OPAL_PMIX_UNIV_SIZE "pmix.univ.size" // (uint32_t) #procs in this nspace diff --git a/orte/mca/ess/pmi/ess_pmi_module.c b/orte/mca/ess/pmi/ess_pmi_module.c index e520eebbc5..3159ae9d85 100644 --- a/orte/mca/ess/pmi/ess_pmi_module.c +++ b/orte/mca/ess/pmi/ess_pmi_module.c @@ -136,6 +136,10 @@ static int rte_init(void) ORTE_PROC_MY_NAME->jobid = OPAL_PROC_MY_NAME.jobid; ORTE_PROC_MY_NAME->vpid = OPAL_PROC_MY_NAME.vpid; + /* setup a name for retrieving data associated with the job */ + name.jobid = ORTE_PROC_MY_NAME->jobid; + name.vpid = ORTE_NAME_WILDCARD->vpid; + /* get our local rank from PMI */ OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_LOCAL_RANK, ORTE_PROC_MY_NAME, &u16ptr, OPAL_UINT16); @@ -154,9 +158,9 @@ static int rte_init(void) } orte_process_info.my_node_rank = u16; - /* get max procs */ + /* get max procs for this application */ OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_MAX_PROCS, - ORTE_PROC_MY_NAME, &u32ptr, OPAL_UINT32); + &name, &u32ptr, OPAL_UINT32); if (OPAL_SUCCESS != ret) { error = "getting max procs"; goto error; @@ -165,7 +169,7 @@ static int rte_init(void) /* get job size */ OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_JOB_SIZE, - ORTE_PROC_MY_NAME, &u32ptr, OPAL_UINT32); + &name, &u32ptr, OPAL_UINT32); if (OPAL_SUCCESS != ret) { error = "getting job size"; goto error;