diff --git a/ompi/proc/proc.c b/ompi/proc/proc.c index 79ceb3ec68..d34558ee71 100644 --- a/ompi/proc/proc.c +++ b/ompi/proc/proc.c @@ -140,7 +140,7 @@ static int ompi_proc_complete_init_single (ompi_proc_t *proc) /* get the locality information - all RTEs are required * to provide this information at startup */ - OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_LOCALITY, &proc->super.proc_name, &u16ptr, OPAL_UINT16); + OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, OPAL_PMIX_LOCALITY, &proc->super.proc_name, &u16ptr, OPAL_UINT16); if (OPAL_SUCCESS != ret) { proc->super.proc_flags = OPAL_PROC_NON_LOCAL; } else { @@ -149,10 +149,10 @@ static int ompi_proc_complete_init_single (ompi_proc_t *proc) /* we can retrieve the hostname at no cost because it * was provided at startup */ - OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_HOSTNAME, &proc->super.proc_name, - (char**)&(proc->super.proc_hostname), OPAL_STRING); + OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, OPAL_PMIX_HOSTNAME, &proc->super.proc_name, + (char**)&(proc->super.proc_hostname), OPAL_STRING); if (OPAL_SUCCESS != ret) { - return ret; + return ret; } #if OPAL_ENABLE_HETEROGENEOUS_SUPPORT /* get the remote architecture - this might force a modex except @@ -345,7 +345,7 @@ int ompi_proc_complete_init(void) /* the runtime is required to fill in locality for all local processes by this * point. only local processes will have locality set */ - OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_LOCALITY, &proc_name, &u16ptr, OPAL_UINT16); + OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, OPAL_PMIX_LOCALITY, &proc_name, &u16ptr, OPAL_UINT16); if (OPAL_SUCCESS == ret) { locality = u16; } diff --git a/opal/mca/pmix/cray/pmix_cray.c b/opal/mca/pmix/cray/pmix_cray.c index 0b6d9d7813..9d36d6e6ee 100644 --- a/opal/mca/pmix/cray/pmix_cray.c +++ b/opal/mca/pmix/cray/pmix_cray.c @@ -61,9 +61,10 @@ static int cray_fence_nb(opal_list_t *procs, int collect_data, opal_pmix_op_cbfunc_t cbfunc, void *cbdata); static int cray_commit(void); static int cray_get(const opal_process_name_t *id, - const char *key, + const char *key, opal_list_t *info, opal_value_t **kv); static int cray_get_nb(const opal_process_name_t *id, const char *key, + opal_list_t *info, opal_pmix_value_cbfunc_t cbfunc, void *cbdata); static int cray_publish(opal_list_t *info); static int cray_publish_nb(opal_list_t *info, @@ -735,7 +736,7 @@ static int cray_fence_nb(opal_list_t *procs, int collect_data, return OPAL_ERR_NOT_IMPLEMENTED; } -static int cray_get(const opal_process_name_t *id, const char *key, opal_value_t **kv) +static int cray_get(const opal_process_name_t *id, const char *key, opal_list_t *info, opal_value_t **kv) { int rc; opal_list_t vals; @@ -762,7 +763,7 @@ static int cray_get(const opal_process_name_t *id, const char *key, opal_value_t } static int cray_get_nb(const opal_process_name_t *id, const char *key, - opal_pmix_value_cbfunc_t cbfunc, void *cbdata) + opal_list_t *info, opal_pmix_value_cbfunc_t cbfunc, void *cbdata) { return OPAL_ERR_NOT_IMPLEMENTED; } diff --git a/opal/mca/pmix/pmix.h b/opal/mca/pmix/pmix.h index b8b3127311..3b1847e445 100644 --- a/opal/mca/pmix/pmix.h +++ b/opal/mca/pmix/pmix.h @@ -108,6 +108,43 @@ extern int opal_pmix_verbose_output; free(_key); \ } while(0); +/** + * Provide a simplified macro for retrieving modex data + * from another process when we don't want the PMIx module + * to request it from the server if not found: + * + * r - the integer return status from the modex op (int) + * s - string key (char*) + * p - pointer to the opal_process_name_t of the proc that posted + * the data (opal_process_name_t*) + * d - pointer to a location wherein the data object + * is to be returned + * t - the expected data type + */ +#define OPAL_MODEX_RECV_VALUE_OPTIONAL(r, s, p, d, t) \ + do { \ + opal_value_t *_kv, *_info; \ + opal_list_t _ilist; \ + OPAL_OUTPUT_VERBOSE((1, opal_pmix_verbose_output, \ + "%s[%s:%d] MODEX RECV VALUE OPTIONAL FOR PROC %s KEY %s", \ + OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), \ + __FILE__, __LINE__, \ + OPAL_NAME_PRINT(*(p)), (s))); \ + OBJ_CONSTRUCT(&(_ilist), opal_list_t); \ + _info = OBJ_NEW(opal_value_t); \ + _info->key = strdup(OPAL_PMIX_OPTIONAL); \ + _info->type = OPAL_BOOL; \ + _info->data.flag = true; \ + opal_list_append(&(_ilist), &(_info)->super); \ + if (OPAL_SUCCESS != ((r) = opal_pmix.get((p), (s), &(_ilist), &(_kv)))) { \ + *(d) = NULL; \ + } else { \ + (r) = opal_value_unload(_kv, (void**)(d), (t)); \ + OBJ_RELEASE(_kv); \ + } \ + OPAL_LIST_DESTRUCT(&(_ilist)); \ + } while(0); + /** * Provide a simplified macro for retrieving modex data * from another process: @@ -128,7 +165,7 @@ extern int opal_pmix_verbose_output; OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), \ __FILE__, __LINE__, \ OPAL_NAME_PRINT(*(p)), (s))); \ - if (OPAL_SUCCESS != ((r) = opal_pmix.get((p), (s), &(_kv)))) { \ + if (OPAL_SUCCESS != ((r) = opal_pmix.get((p), (s), NULL, &(_kv)))) { \ *(d) = NULL; \ } else { \ (r) = opal_value_unload(_kv, (void**)(d), (t)); \ @@ -157,7 +194,7 @@ extern int opal_pmix_verbose_output; OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), \ __FILE__, __LINE__, \ OPAL_NAME_PRINT(*(p)), (s))); \ - if (OPAL_SUCCESS == ((r) = opal_pmix.get((p), (s), &(_kv))) && \ + if (OPAL_SUCCESS == ((r) = opal_pmix.get((p), (s), NULL, &(_kv))) && \ NULL != _kv) { \ *(d) = _kv->data.bo.bytes; \ *(sz) = _kv->data.bo.size; \ @@ -301,23 +338,24 @@ typedef int (*opal_pmix_base_module_put_fn_t)(opal_pmix_scope_t scope, opal_value_t *val); /* Retrieve information for the specified _key_ as published by the rank - * and jobid i the provided opal_process_name, returning a pointer to the value in the - * given address. + * and jobid i the provided opal_process_name, and subject to any provided + * constraints, returning a pointer to the value in the given address. * * This is a blocking operation - the caller will block until * the specified data has been _PMIx_Put_ by the specified rank. The caller is * responsible for freeing all memory associated with the returned value when * no longer required. */ typedef int (*opal_pmix_base_module_get_fn_t)(const opal_process_name_t *proc, - const char *key, + const char *key, opal_list_t *info, opal_value_t **val); /* Retrieve information for the specified _key_ as published by the given rank - * and jobid in the opal_process_name_t. This is a non-blocking operation - the + * and jobid in the opal_process_name_t, and subject to any provided + * constraints. This is a non-blocking operation - the * callback function will be executed once the specified data has been _PMIx_Put_ * by the specified proc and retrieved by the local server. */ typedef int (*opal_pmix_base_module_get_nb_fn_t)(const opal_process_name_t *proc, - const char *key, + const char *key, opal_list_t *info, opal_pmix_value_cbfunc_t cbfunc, void *cbdata); /* Publish the given data to the "universal" nspace diff --git a/opal/mca/pmix/pmix1xx/pmix/include/pmix/pmix_common.h.in b/opal/mca/pmix/pmix1xx/pmix/include/pmix/pmix_common.h.in index 7af614c21c..fe191fc488 100644 --- a/opal/mca/pmix/pmix1xx/pmix/include/pmix/pmix_common.h.in +++ b/opal/mca/pmix/pmix1xx/pmix/include/pmix/pmix_common.h.in @@ -160,12 +160,15 @@ BEGIN_C_DECLS /* request-related info */ #define PMIX_COLLECT_DATA "pmix.collect" // (bool) collect data and return it at the end of the operation #define PMIX_TIMEOUT "pmix.timeout" // (int) time in sec before specified operation should time out -#define PMIX_WAIT "pmix.wait" // (int) caller requests that the server wait until the specified #values are found +#define PMIX_WAIT "pmix.wait" // (int) caller requests that the server wait until at least the specified + // #values are found (0 => all and is the default) #define PMIX_COLLECTIVE_ALGO "pmix.calgo" // (char*) comma-delimited list of algorithms to use for collective #define PMIX_COLLECTIVE_ALGO_REQD "pmix.calreqd" // (bool) if true, indicates that the requested choice of algo is mandatory #define PMIX_NOTIFY_COMPLETION "pmix.notecomp" // (bool) notify parent process upon termination of child job #define PMIX_RANGE "pmix.range" // (int) pmix_data_range_t value for calls to publish/lookup/unpublish #define PMIX_PERSISTENCE "pmix.persist" // (int) pmix_persistence_t value for calls to publish +#define PMIX_OPTIONAL "pmix.optional" // (bool) look only in the immediate data store for the requested value - do + // not request data from the server if not found /* attributes used by host server to pass data to the server convenience library - the * data will then be parsed and provided to the local clients */ diff --git a/opal/mca/pmix/pmix1xx/pmix/src/client/pmix_client_get.c b/opal/mca/pmix/pmix1xx/pmix/src/client/pmix_client_get.c index 622b6ba44c..b3cc2fdff6 100644 --- a/opal/mca/pmix/pmix1xx/pmix/src/client/pmix_client_get.c +++ b/opal/mca/pmix/pmix1xx/pmix/src/client/pmix_client_get.c @@ -117,6 +117,7 @@ pmix_status_t PMIx_Get_nb(const pmix_proc_t *proc, const char *key, pmix_status_t rc; char *nm; pmix_nspace_t *ns, *nptr; + size_t n; if (NULL == proc) { return PMIX_ERR_BAD_PARAM; @@ -252,7 +253,7 @@ pmix_status_t PMIx_Get_nb(const pmix_proc_t *proc, const char *key, * key to eventually be found, so all we can do is return * the error */ pmix_output_verbose(2, pmix_globals.debug_output, - "Error requesting key=%s for rank = %d, namespace = %s\n", + "Error requesting key=%s for rank = %d, namespace = %s", key, proc->rank, nm); return rc; } @@ -265,6 +266,18 @@ pmix_status_t PMIx_Get_nb(const pmix_proc_t *proc, const char *key, return PMIX_ERR_NOT_FOUND; } + /* we also have to check the user's directives to see if they do not want + * us to attempt to retrieve it from the server */ + for (n=0; n < ninfo; n++) { + if (0 == strcmp(info[n].key, PMIX_OPTIONAL) && + info[n].value.data.flag) { + /* they don't want us to try and retrieve it */ + pmix_output_verbose(2, pmix_globals.debug_output, + "PMIx_Get key=%s for rank = %d, namespace = %s was not found - request was optional", + key, proc->rank, nm); + return PMIX_ERR_NOT_FOUND; + } + } /* see if we already have a request in place with the server for data from * this nspace:rank. If we do, then no need to ask again as the * request will return _all_ data from that proc */ diff --git a/opal/mca/pmix/pmix1xx/pmix1.h b/opal/mca/pmix/pmix1xx/pmix1.h index 8141c26458..f944dbd144 100644 --- a/opal/mca/pmix/pmix1xx/pmix1.h +++ b/opal/mca/pmix/pmix1xx/pmix1.h @@ -77,18 +77,18 @@ OPAL_MODULE_DECLSPEC int pmix1_client_init(void); OPAL_MODULE_DECLSPEC int pmix1_client_finalize(void); OPAL_MODULE_DECLSPEC int pmix1_initialized(void); OPAL_MODULE_DECLSPEC int pmix1_abort(int flag, const char *msg, - opal_list_t *procs); + opal_list_t *procs); OPAL_MODULE_DECLSPEC int pmix1_commit(void); OPAL_MODULE_DECLSPEC int pmix1_fence(opal_list_t *procs, int collect_data); OPAL_MODULE_DECLSPEC int pmix1_fencenb(opal_list_t *procs, int collect_data, - opal_pmix_op_cbfunc_t cbfunc, void *cbdata); + opal_pmix_op_cbfunc_t cbfunc, void *cbdata); OPAL_MODULE_DECLSPEC int pmix1_put(opal_pmix_scope_t scope, opal_value_t *val); -OPAL_MODULE_DECLSPEC int pmix1_get(const opal_process_name_t *proc, - const char *key, opal_value_t **val); -OPAL_MODULE_DECLSPEC int pmix1_getnb(const opal_process_name_t *proc, - const char *key, - opal_pmix_value_cbfunc_t cbfunc, void *cbdata); +OPAL_MODULE_DECLSPEC int pmix1_get(const opal_process_name_t *proc, const char *key, + opal_list_t *info, opal_value_t **val); +OPAL_MODULE_DECLSPEC int pmix1_getnb(const opal_process_name_t *proc, const char *key, + opal_list_t *info, + opal_pmix_value_cbfunc_t cbfunc, void *cbdata); OPAL_MODULE_DECLSPEC int pmix1_publish(opal_list_t *info); OPAL_MODULE_DECLSPEC int pmix1_publishnb(opal_list_t *info, opal_pmix_op_cbfunc_t cbfunc, void *cbdata); @@ -100,17 +100,17 @@ OPAL_MODULE_DECLSPEC int pmix1_unpublishnb(char **keys, opal_list_t *info, opal_pmix_op_cbfunc_t cbfunc, void *cbdata); OPAL_MODULE_DECLSPEC int pmix1_spawn(opal_list_t *job_info, opal_list_t *apps, opal_jobid_t *jobid); OPAL_MODULE_DECLSPEC int pmix1_spawnnb(opal_list_t *job_info, opal_list_t *apps, - opal_pmix_spawn_cbfunc_t cbfunc, void *cbdata); + opal_pmix_spawn_cbfunc_t cbfunc, void *cbdata); OPAL_MODULE_DECLSPEC int pmix1_connect(opal_list_t *procs); OPAL_MODULE_DECLSPEC int pmix1_connectnb(opal_list_t *procs, - opal_pmix_op_cbfunc_t cbfunc, - void *cbdata); + opal_pmix_op_cbfunc_t cbfunc, + void *cbdata); OPAL_MODULE_DECLSPEC int pmix1_disconnect(opal_list_t *procs); OPAL_MODULE_DECLSPEC int pmix1_disconnectnb(opal_list_t *procs, - opal_pmix_op_cbfunc_t cbfunc, - void *cbdata); + opal_pmix_op_cbfunc_t cbfunc, + void *cbdata); OPAL_MODULE_DECLSPEC int pmix1_resolve_peers(const char *nodename, opal_jobid_t jobid, - opal_list_t *procs); + opal_list_t *procs); OPAL_MODULE_DECLSPEC int pmix1_resolve_nodes(opal_jobid_t jobid, char **nodelist); /**** COMMON FUNCTIONS ****/ @@ -123,32 +123,32 @@ OPAL_MODULE_DECLSPEC int pmix1_server_finalize(void); OPAL_MODULE_DECLSPEC int pmix1_server_gen_regex(const char *input, char **regex); OPAL_MODULE_DECLSPEC int pmix1_server_gen_ppn(const char *input, char **ppn); OPAL_MODULE_DECLSPEC int pmix1_server_register_nspace(opal_jobid_t jobid, - int nlocalprocs, - opal_list_t *info, - opal_pmix_op_cbfunc_t cbfunc, - void *cbdata); + int nlocalprocs, + opal_list_t *info, + opal_pmix_op_cbfunc_t cbfunc, + void *cbdata); OPAL_MODULE_DECLSPEC int pmix1_server_register_client(const opal_process_name_t *proc, - uid_t uid, gid_t gid, - void *server_object, - opal_pmix_op_cbfunc_t cbfunc, - void *cbdata); + uid_t uid, gid_t gid, + void *server_object, + opal_pmix_op_cbfunc_t cbfunc, + void *cbdata); OPAL_MODULE_DECLSPEC int pmix1_server_setup_fork(const opal_process_name_t *proc, char ***env); OPAL_MODULE_DECLSPEC int pmix1_server_dmodex(const opal_process_name_t *proc, - opal_pmix_modex_cbfunc_t cbfunc, void *cbdata); + opal_pmix_modex_cbfunc_t cbfunc, void *cbdata); OPAL_MODULE_DECLSPEC int pmix1_server_notify_error(int status, - opal_list_t *procs, - opal_list_t *error_procs, - opal_list_t *info, - opal_pmix_op_cbfunc_t cbfunc, void *cbdata); + opal_list_t *procs, + opal_list_t *error_procs, + opal_list_t *info, + opal_pmix_op_cbfunc_t cbfunc, void *cbdata); /**** COMPONENT UTILITY FUNCTIONS ****/ OPAL_MODULE_DECLSPEC pmix_status_t pmix1_convert_opalrc(int rc); OPAL_MODULE_DECLSPEC int pmix1_convert_rc(pmix_status_t rc); OPAL_MODULE_DECLSPEC void pmix1_value_load(pmix_value_t *v, - opal_value_t *kv); + opal_value_t *kv); OPAL_MODULE_DECLSPEC int pmix1_value_unload(opal_value_t *kv, - const pmix_value_t *v); + const pmix_value_t *v); END_C_DECLS diff --git a/opal/mca/pmix/pmix1xx/pmix1_client.c b/opal/mca/pmix/pmix1xx/pmix1_client.c index 7c9ae9f927..1f25b35504 100644 --- a/opal/mca/pmix/pmix1xx/pmix1_client.c +++ b/opal/mca/pmix/pmix1xx/pmix1_client.c @@ -290,13 +290,16 @@ int pmix1_put(opal_pmix_scope_t scope, return pmix1_convert_rc(rc); } -int pmix1_get(const opal_process_name_t *proc, - const char *key, opal_value_t **val) +int pmix1_get(const opal_process_name_t *proc, const char *key, + opal_list_t *info, opal_value_t **val) { int ret; pmix_value_t *kv; pmix_status_t rc; pmix_proc_t p, *pptr; + size_t ninfo, n; + pmix_info_t *pinfo; + opal_value_t *ival; opal_output_verbose(1, opal_pmix_base_framework.framework_output, "%s PMIx_client get on proc %s key %s", @@ -326,8 +329,25 @@ int pmix1_get(const opal_process_name_t *proc, pptr = NULL; } + if (NULL != info) { + ninfo = opal_list_get_size(info); + if (0 < ninfo) { + PMIX_INFO_CREATE(pinfo, ninfo); + n=0; + OPAL_LIST_FOREACH(ival, info, opal_value_t) { + (void)strncpy(pinfo[n].key, ival->key, PMIX_MAX_KEYLEN); + pmix1_value_load(&pinfo[n].value, ival); + } + } else { + pinfo = NULL; + } + } else { + pinfo = NULL; + ninfo = 0; + } + /* pass the request down */ - rc = PMIx_Get(pptr, key, NULL, 0, &kv); + rc = PMIx_Get(pptr, key, pinfo, ninfo, &kv); if (PMIX_SUCCESS == rc) { if (NULL == kv) { ret = OPAL_SUCCESS; @@ -339,6 +359,7 @@ int pmix1_get(const opal_process_name_t *proc, } else { ret = pmix1_convert_rc(rc); } + PMIX_INFO_FREE(pinfo, ninfo); return ret; } @@ -362,11 +383,14 @@ static void val_cbfunc(pmix_status_t status, } int pmix1_getnb(const opal_process_name_t *proc, const char *key, + opal_list_t *info, opal_pmix_value_cbfunc_t cbfunc, void *cbdata) { pmix1_opcaddy_t *op; pmix_status_t rc; char *tmp; + size_t n; + opal_value_t *ival; opal_output_verbose(1, opal_pmix_base_framework.framework_output, "%s PMIx_client get_nb on proc %s key %s", @@ -389,9 +413,20 @@ int pmix1_getnb(const opal_process_name_t *proc, const char *key, op->p.rank = PMIX_RANK_WILDCARD; } + if (NULL != info) { + op->sz = opal_list_get_size(info); + if (0 < op->sz) { + PMIX_INFO_CREATE(op->info, op->sz); + n=0; + OPAL_LIST_FOREACH(ival, info, opal_value_t) { + (void)strncpy(op->info[n].key, ival->key, PMIX_MAX_KEYLEN); + pmix1_value_load(&op->info[n].value, ival); + } + } + } /* call the library function */ - rc = PMIx_Get_nb(&op->p, key, NULL, 0, val_cbfunc, op); + rc = PMIx_Get_nb(&op->p, key, op->info, op->sz, val_cbfunc, op); if (PMIX_SUCCESS != rc) { OBJ_RELEASE(op); } diff --git a/opal/mca/pmix/pmix_types.h b/opal/mca/pmix/pmix_types.h index e872b48054..ca185c1e6e 100644 --- a/opal/mca/pmix/pmix_types.h +++ b/opal/mca/pmix/pmix_types.h @@ -98,12 +98,15 @@ BEGIN_C_DECLS /* request-related info */ #define OPAL_PMIX_COLLECT_DATA "pmix.collect" // (bool) collect data and return it at the end of the operation #define OPAL_PMIX_TIMEOUT "pmix.timeout" // (int) time in sec before specified operation should time out -#define OPAL_PMIX_WAIT "pmix.wait" // (int) caller requests that the server wait until the specified #values are found +#define OPAL_PMIX_WAIT "pmix.wait" // (int) caller requests that the server wait until at least the specified + // #values are found (0 => all and is the default) #define OPAL_PMIX_COLLECTIVE_ALGO "pmix.calgo" // (char*) comma-delimited list of algorithms to use for collective #define OPAL_PMIX_COLLECTIVE_ALGO_REQD "pmix.calreqd" // (bool) if true, indicates that the requested choice of algo is mandatory #define OPAL_PMIX_NOTIFY_COMPLETION "pmix.notecomp" // (bool) notify parent process upon termination of child job #define OPAL_PMIX_RANGE "pmix.range" // (int) opal_pmix_data_range_t value for calls to publish/lookup/unpublish #define OPAL_PMIX_PERSISTENCE "pmix.persist" // (int) opal_pmix_persistence_t value for calls to publish +#define OPAL_PMIX_OPTIONAL "pmix.optional" // (bool) look only in the immediate data store for the requested value - do + // not request data from the server if not found /* attribute used by host server to pass data to the server convenience library - the * data will then be parsed and provided to the local clients */ diff --git a/opal/mca/pmix/s1/pmix_s1.c b/opal/mca/pmix/s1/pmix_s1.c index 0b0a73f193..3699a19e52 100644 --- a/opal/mca/pmix/s1/pmix_s1.c +++ b/opal/mca/pmix/s1/pmix_s1.c @@ -39,7 +39,7 @@ static int s1_fence(opal_list_t *procs, int collect_data); static int s1_put(opal_pmix_scope_t scope, opal_value_t *kv); static int s1_get(const opal_process_name_t *id, - const char *key, + const char *key, opal_list_t *info, opal_value_t **kv); static int s1_publish(opal_list_t *info); static int s1_lookup(opal_list_t *data, opal_list_t *info); @@ -588,7 +588,7 @@ static int s1_fence(opal_list_t *procs, int collect_data) } static int s1_get(const opal_process_name_t *id, - const char *key, + const char *key, opal_list_t *info, opal_value_t **kv) { int rc; diff --git a/opal/mca/pmix/s2/pmix_s2.c b/opal/mca/pmix/s2/pmix_s2.c index 0c0e25eab3..3d09a25674 100644 --- a/opal/mca/pmix/s2/pmix_s2.c +++ b/opal/mca/pmix/s2/pmix_s2.c @@ -46,7 +46,7 @@ static int s2_fence(opal_list_t *procs, int collect_data); static int s2_put(opal_pmix_scope_t scope, opal_value_t *kv); static int s2_get(const opal_process_name_t *id, - const char *key, + const char *key, opal_list_t *info, opal_value_t **kv); static int s2_publish(opal_list_t *info); static int s2_lookup(opal_list_t *data, opal_list_t *info); @@ -607,7 +607,7 @@ static int s2_fence(opal_list_t *procs, int collect_data) } static int s2_get(const opal_process_name_t *id, - const char *key, + const char *key, opal_list_t *info, opal_value_t **kv) { int rc; diff --git a/orte/mca/ess/pmi/ess_pmi_module.c b/orte/mca/ess/pmi/ess_pmi_module.c index d1664bdbd8..c85881b7cc 100644 --- a/orte/mca/ess/pmi/ess_pmi_module.c +++ b/orte/mca/ess/pmi/ess_pmi_module.c @@ -88,10 +88,9 @@ static int rte_init(void) char *rmluri; opal_value_t *kv; char *val; - size_t sz; int u32, *u32ptr; uint16_t u16, *u16ptr; - char **peers=NULL, *mycpuset; + char **peers=NULL, *mycpuset, **cpusets=NULL; opal_process_name_t name; size_t i; @@ -153,8 +152,8 @@ static int rte_init(void) /* get our app number from PMI - ok if not found */ - OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_APPNUM, - ORTE_PROC_MY_NAME, &u32ptr, OPAL_UINT32); + OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, OPAL_PMIX_APPNUM, + ORTE_PROC_MY_NAME, &u32ptr, OPAL_UINT32); if (OPAL_SUCCESS == ret) { orte_process_info.app_num = u32; } else { @@ -190,8 +189,8 @@ static int rte_init(void) } /* retrieve our topology */ - OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_LOCAL_TOPO, - ORTE_PROC_MY_NAME, &val, OPAL_STRING); + OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, OPAL_PMIX_LOCAL_TOPO, + ORTE_PROC_MY_NAME, &val, OPAL_STRING); if (OPAL_SUCCESS == ret && NULL != val) { /* load the topology */ if (0 != hwloc_topology_init(&opal_hwloc_topology)) { @@ -259,84 +258,77 @@ static int rte_init(void) /* get our local peers */ if (0 < orte_process_info.num_local_peers) { + /* if my local rank if too high, then that's an error */ + if (orte_process_info.num_local_peers < orte_process_info.my_local_rank) { + ret = ORTE_ERR_BAD_PARAM; + error = "num local peers"; + goto error; + } /* retrieve the local peers */ OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_LOCAL_PEERS, ORTE_PROC_MY_NAME, &val, OPAL_STRING); if (OPAL_SUCCESS == ret && NULL != val) { peers = opal_argv_split(val, ','); free(val); + /* and their cpusets, if available */ + OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, OPAL_PMIX_LOCAL_CPUSETS, ORTE_PROC_MY_NAME, &val, OPAL_STRING); + if (OPAL_SUCCESS == ret && NULL != val) { + cpusets = opal_argv_split(val, ':'); + free(val); + } else { + cpusets = NULL; + } } else { peers = NULL; + cpusets = NULL; } } else { peers = NULL; - } - - /* get our cpuset */ - OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_CPUSET, ORTE_PROC_MY_NAME, &val, OPAL_STRING); - if (OPAL_SUCCESS != ret || NULL == val) { - /* if we don't have a cpuset, or it is NULL, then we declare our local - * peers to be on the same node and everyone else to be non-local */ - mycpuset = NULL; - } else { - mycpuset = val; + cpusets = NULL; } /* set the locality */ - name.jobid = ORTE_PROC_MY_NAME->jobid; - for (sz=0; sz < orte_process_info.num_procs; sz++) { - kv = OBJ_NEW(opal_value_t); - kv->key = strdup(OPAL_PMIX_LOCALITY); - kv->type = OPAL_UINT16; - name.vpid = sz; - if (sz == ORTE_PROC_MY_NAME->vpid) { - /* we are fully local to ourselves */ - u16 = OPAL_PROC_ALL_LOCAL; - } else if (NULL == peers) { - /* nobody is local to us */ - u16 = OPAL_PROC_NON_LOCAL; + if (NULL != peers) { + /* indentify our cpuset */ + if (NULL != cpusets) { + mycpuset = cpusets[orte_process_info.my_local_rank]; } else { - for (i=0; NULL != peers[i]; i++) { - if (sz == strtoul(peers[i], NULL, 10)) { - break; - } - } - if (NULL == peers[i]) { - /* not a local peer */ - u16 = OPAL_PROC_NON_LOCAL; - } else if (NULL == mycpuset) { + mycpuset = NULL; + } + name.jobid = ORTE_PROC_MY_NAME->jobid; + for (i=0; NULL != peers[i]; i++) { + kv = OBJ_NEW(opal_value_t); + kv->key = strdup(OPAL_PMIX_LOCALITY); + kv->type = OPAL_UINT16; + name.vpid = strtoul(peers[i], NULL, 10); + if (name.vpid == ORTE_PROC_MY_NAME->vpid) { + /* we are fully local to ourselves */ + u16 = OPAL_PROC_ALL_LOCAL; + } else if (NULL == mycpuset || NULL == cpusets[i] || + 0 == strcmp(cpusets[i], "UNBOUND")) { /* all we can say is that it shares our node */ u16 = OPAL_PROC_ON_CLUSTER | OPAL_PROC_ON_CU | OPAL_PROC_ON_NODE; } else { - /* attempt to get their cpuset */ - OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_CPUSET, &name, &val, OPAL_STRING); - if (OPAL_SUCCESS == ret && NULL != val) { - /* we have it, so compute the locality */ - u16 = opal_hwloc_base_get_relative_locality(opal_hwloc_topology, - mycpuset, val); - free(val); - } else { - /* all we can say is that it shares our node */ - u16 = OPAL_PROC_ON_CLUSTER | OPAL_PROC_ON_CU | OPAL_PROC_ON_NODE; - } + /* we have it, so compute the locality */ + u16 = opal_hwloc_base_get_relative_locality(opal_hwloc_topology, mycpuset, cpusets[i]); } - } - kv->data.uint16 = u16; - ret = opal_pmix.store_local(&name, kv); - if (OPAL_SUCCESS != ret) { - error = "local store of locality"; - if (NULL != mycpuset) { - free(mycpuset); + OPAL_OUTPUT_VERBOSE((1, orte_ess_base_framework.framework_output, + "%s ess:pmi:locality: proc %s locality %x", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_NAME_PRINT(&name), u16)); + kv->data.uint16 = u16; + ret = opal_pmix.store_local(&name, kv); + if (OPAL_SUCCESS != ret) { + error = "local store of locality"; + opal_argv_free(peers); + opal_argv_free(cpusets); + goto error; } - opal_argv_free(peers); - goto error; + OBJ_RELEASE(kv); } - OBJ_RELEASE(kv); + opal_argv_free(peers); + opal_argv_free(cpusets); } - if (NULL != mycpuset){ - free(mycpuset); - } - opal_argv_free(peers); /* now that we have all required info, complete the setup */ if (ORTE_SUCCESS != (ret = orte_ess_base_app_setup(false))) { diff --git a/orte/mca/oob/base/oob_base_stubs.c b/orte/mca/oob/base/oob_base_stubs.c index c558ee3b0f..94fa3d642a 100644 --- a/orte/mca/oob/base/oob_base_stubs.c +++ b/orte/mca/oob/base/oob_base_stubs.c @@ -40,7 +40,7 @@ void orte_oob_base_send_nb(int fd, short args, void *cbdata) bool msg_sent; mca_oob_base_component_t *component; bool reachable; - opal_value_t *kv; + char *uri; /* done with this. release it now */ OBJ_RELEASE(cd); @@ -61,11 +61,14 @@ void orte_oob_base_send_nb(int fd, short args, void *cbdata) ORTE_NAME_PRINT(&msg->dst)); /* for direct launched procs, the URI might be in the database, * so check there next - if it is, the peer object will be added - * to our hash table + * to our hash table. However, we don't want to chase up to the + * server after it, so indicate it is optional */ - if (OPAL_SUCCESS == opal_pmix.get(&msg->dst, OPAL_PMIX_PROC_URI, &kv)) { - if (NULL != kv) { - process_uri(kv->data.string); + OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, OPAL_PMIX_PROC_URI, &msg->dst, + (char**)&uri, OPAL_STRING); + if (OPAL_SUCCESS == rc ) { + if (NULL != uri) { + process_uri(uri); if (OPAL_SUCCESS != opal_hash_table_get_value_uint64(&orte_oob_base.peers, ui64, (void**)&pr) || NULL == pr) { diff --git a/orte/orted/pmix/pmix_server.c b/orte/orted/pmix/pmix_server.c index 9cb387a0c2..f35f4251d2 100644 --- a/orte/orted/pmix/pmix_server.c +++ b/orte/orted/pmix/pmix_server.c @@ -481,6 +481,7 @@ static void pmix_server_dmdx_recv(int status, orte_process_name_t* sender, /* ask our local pmix server for the data */ if (OPAL_SUCCESS != (rc = opal_pmix.server_dmodex_request(&idreq, modex_resp, req))) { + ORTE_ERROR_LOG(rc); opal_hotel_checkout(&orte_pmix_server_globals.reqs, req->room_num); OBJ_RELEASE(req); send_error(rc, &idreq, sender); diff --git a/orte/orted/pmix/pmix_server_register_fns.c b/orte/orted/pmix/pmix_server_register_fns.c index 67e67e0ec8..7fa7392fd6 100644 --- a/orte/orted/pmix/pmix_server_register_fns.c +++ b/orte/orted/pmix/pmix_server_register_fns.c @@ -236,6 +236,7 @@ int orte_pmix_server_register_nspace(orte_job_t *jdata) /* construct the list of local peers, while adding * each proc's locality info */ list = NULL; + procs = NULL; vpid = ORTE_VPID_MAX; for (i=0; i < node->procs->size; i++) { if (NULL == (pptr = (orte_proc_t*)opal_pointer_array_get_item(node->procs, i))) { @@ -249,13 +250,15 @@ int orte_pmix_server_register_nspace(orte_job_t *jdata) /* note that we have to pass the cpuset for each local * peer so locality can be computed */ tmp = NULL; - kv = OBJ_NEW(opal_value_t); - kv->key = strdup(OPAL_PMIX_CPUSET); - kv->type = OPAL_STRING; if (orte_get_attribute(&pptr->attributes, ORTE_PROC_CPU_BITMAP, (void**)&tmp, OPAL_STRING)) { - kv->data.string = tmp; + if (NULL != tmp) { + opal_argv_append_nosize(&procs, tmp); + } else { + opal_argv_append_nosize(&procs, "UNBOUND"); + } + } else { + opal_argv_append_nosize(&procs, "UNBOUND"); } - opal_list_append(info, &kv->super); /* go ahead and register this client */ if (OPAL_SUCCESS != (rc = opal_pmix.server_register_client(&pptr->name, uid, gid, (void*)pptr, NULL, NULL))) { @@ -276,6 +279,18 @@ int orte_pmix_server_register_nspace(orte_job_t *jdata) kv->data.string = tmp; opal_list_append(info, &kv->super); } + /* construct the list of cpusets for transmission */ + if (NULL != procs) { + tmp = opal_argv_join(procs, ':'); + opal_argv_free(procs); + procs = NULL; + /* pass the list of cpusets */ + kv = OBJ_NEW(opal_value_t); + kv->key = strdup(OPAL_PMIX_LOCAL_CPUSETS); + kv->type = OPAL_STRING; + kv->data.string = tmp; + opal_list_append(info, &kv->super); + } /* pass the local ldr */ kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_LOCALLDR);