1
1

Revert "Revert "Fix the handling of cpusets so we get the correct cpuset for each local peer. Add the ability to indicate that a modex request is "optional" so we don't call the server if we don't find the value. Take advantage of that to allow the MPI layer to decide that the lack of locality info indicates non-local""

Fix the locality computation by correctly computing the vpid of the local peer

This reverts commit open-mpi/ompi@6a8fad49e5.
Этот коммит содержится в:
Ralph Castain 2015-09-11 06:38:47 -07:00
родитель 6a8fad49e5
Коммит dc5796b8a1
14 изменённых файлов: 230 добавлений и 126 удалений

Просмотреть файл

@ -140,7 +140,7 @@ static int ompi_proc_complete_init_single (ompi_proc_t *proc)
/* get the locality information - all RTEs are required
* to provide this information at startup */
OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_LOCALITY, &proc->super.proc_name, &u16ptr, OPAL_UINT16);
OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, OPAL_PMIX_LOCALITY, &proc->super.proc_name, &u16ptr, OPAL_UINT16);
if (OPAL_SUCCESS != ret) {
proc->super.proc_flags = OPAL_PROC_NON_LOCAL;
} else {
@ -149,10 +149,10 @@ static int ompi_proc_complete_init_single (ompi_proc_t *proc)
/* we can retrieve the hostname at no cost because it
* was provided at startup */
OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_HOSTNAME, &proc->super.proc_name,
(char**)&(proc->super.proc_hostname), OPAL_STRING);
OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, OPAL_PMIX_HOSTNAME, &proc->super.proc_name,
(char**)&(proc->super.proc_hostname), OPAL_STRING);
if (OPAL_SUCCESS != ret) {
return ret;
return ret;
}
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
/* get the remote architecture - this might force a modex except
@ -345,7 +345,7 @@ int ompi_proc_complete_init(void)
/* the runtime is required to fill in locality for all local processes by this
* point. only local processes will have locality set */
OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_LOCALITY, &proc_name, &u16ptr, OPAL_UINT16);
OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, OPAL_PMIX_LOCALITY, &proc_name, &u16ptr, OPAL_UINT16);
if (OPAL_SUCCESS == ret) {
locality = u16;
}

Просмотреть файл

@ -61,9 +61,10 @@ static int cray_fence_nb(opal_list_t *procs, int collect_data,
opal_pmix_op_cbfunc_t cbfunc, void *cbdata);
static int cray_commit(void);
static int cray_get(const opal_process_name_t *id,
const char *key,
const char *key, opal_list_t *info,
opal_value_t **kv);
static int cray_get_nb(const opal_process_name_t *id, const char *key,
opal_list_t *info,
opal_pmix_value_cbfunc_t cbfunc, void *cbdata);
static int cray_publish(opal_list_t *info);
static int cray_publish_nb(opal_list_t *info,
@ -735,7 +736,7 @@ static int cray_fence_nb(opal_list_t *procs, int collect_data,
return OPAL_ERR_NOT_IMPLEMENTED;
}
static int cray_get(const opal_process_name_t *id, const char *key, opal_value_t **kv)
static int cray_get(const opal_process_name_t *id, const char *key, opal_list_t *info, opal_value_t **kv)
{
int rc;
opal_list_t vals;
@ -762,7 +763,7 @@ static int cray_get(const opal_process_name_t *id, const char *key, opal_value_t
}
static int cray_get_nb(const opal_process_name_t *id, const char *key,
opal_pmix_value_cbfunc_t cbfunc, void *cbdata)
opal_list_t *info, opal_pmix_value_cbfunc_t cbfunc, void *cbdata)
{
return OPAL_ERR_NOT_IMPLEMENTED;
}

Просмотреть файл

@ -108,6 +108,43 @@ extern int opal_pmix_verbose_output;
free(_key); \
} while(0);
/**
* Provide a simplified macro for retrieving modex data
* from another process when we don't want the PMIx module
* to request it from the server if not found:
*
* r - the integer return status from the modex op (int)
* s - string key (char*)
* p - pointer to the opal_process_name_t of the proc that posted
* the data (opal_process_name_t*)
* d - pointer to a location wherein the data object
* is to be returned
* t - the expected data type
*/
#define OPAL_MODEX_RECV_VALUE_OPTIONAL(r, s, p, d, t) \
do { \
opal_value_t *_kv, *_info; \
opal_list_t _ilist; \
OPAL_OUTPUT_VERBOSE((1, opal_pmix_verbose_output, \
"%s[%s:%d] MODEX RECV VALUE OPTIONAL FOR PROC %s KEY %s", \
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), \
__FILE__, __LINE__, \
OPAL_NAME_PRINT(*(p)), (s))); \
OBJ_CONSTRUCT(&(_ilist), opal_list_t); \
_info = OBJ_NEW(opal_value_t); \
_info->key = strdup(OPAL_PMIX_OPTIONAL); \
_info->type = OPAL_BOOL; \
_info->data.flag = true; \
opal_list_append(&(_ilist), &(_info)->super); \
if (OPAL_SUCCESS != ((r) = opal_pmix.get((p), (s), &(_ilist), &(_kv)))) { \
*(d) = NULL; \
} else { \
(r) = opal_value_unload(_kv, (void**)(d), (t)); \
OBJ_RELEASE(_kv); \
} \
OPAL_LIST_DESTRUCT(&(_ilist)); \
} while(0);
/**
* Provide a simplified macro for retrieving modex data
* from another process:
@ -128,7 +165,7 @@ extern int opal_pmix_verbose_output;
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), \
__FILE__, __LINE__, \
OPAL_NAME_PRINT(*(p)), (s))); \
if (OPAL_SUCCESS != ((r) = opal_pmix.get((p), (s), &(_kv)))) { \
if (OPAL_SUCCESS != ((r) = opal_pmix.get((p), (s), NULL, &(_kv)))) { \
*(d) = NULL; \
} else { \
(r) = opal_value_unload(_kv, (void**)(d), (t)); \
@ -157,7 +194,7 @@ extern int opal_pmix_verbose_output;
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), \
__FILE__, __LINE__, \
OPAL_NAME_PRINT(*(p)), (s))); \
if (OPAL_SUCCESS == ((r) = opal_pmix.get((p), (s), &(_kv))) && \
if (OPAL_SUCCESS == ((r) = opal_pmix.get((p), (s), NULL, &(_kv))) && \
NULL != _kv) { \
*(d) = _kv->data.bo.bytes; \
*(sz) = _kv->data.bo.size; \
@ -301,23 +338,24 @@ typedef int (*opal_pmix_base_module_put_fn_t)(opal_pmix_scope_t scope,
opal_value_t *val);
/* Retrieve information for the specified _key_ as published by the rank
* and jobid i the provided opal_process_name, returning a pointer to the value in the
* given address.
* and jobid i the provided opal_process_name, and subject to any provided
* constraints, returning a pointer to the value in the given address.
*
* This is a blocking operation - the caller will block until
* the specified data has been _PMIx_Put_ by the specified rank. The caller is
* responsible for freeing all memory associated with the returned value when
* no longer required. */
typedef int (*opal_pmix_base_module_get_fn_t)(const opal_process_name_t *proc,
const char *key,
const char *key, opal_list_t *info,
opal_value_t **val);
/* Retrieve information for the specified _key_ as published by the given rank
* and jobid in the opal_process_name_t. This is a non-blocking operation - the
* and jobid in the opal_process_name_t, and subject to any provided
* constraints. This is a non-blocking operation - the
* callback function will be executed once the specified data has been _PMIx_Put_
* by the specified proc and retrieved by the local server. */
typedef int (*opal_pmix_base_module_get_nb_fn_t)(const opal_process_name_t *proc,
const char *key,
const char *key, opal_list_t *info,
opal_pmix_value_cbfunc_t cbfunc, void *cbdata);
/* Publish the given data to the "universal" nspace

Просмотреть файл

@ -160,12 +160,15 @@ BEGIN_C_DECLS
/* request-related info */
#define PMIX_COLLECT_DATA "pmix.collect" // (bool) collect data and return it at the end of the operation
#define PMIX_TIMEOUT "pmix.timeout" // (int) time in sec before specified operation should time out
#define PMIX_WAIT "pmix.wait" // (int) caller requests that the server wait until the specified #values are found
#define PMIX_WAIT "pmix.wait" // (int) caller requests that the server wait until at least the specified
// #values are found (0 => all and is the default)
#define PMIX_COLLECTIVE_ALGO "pmix.calgo" // (char*) comma-delimited list of algorithms to use for collective
#define PMIX_COLLECTIVE_ALGO_REQD "pmix.calreqd" // (bool) if true, indicates that the requested choice of algo is mandatory
#define PMIX_NOTIFY_COMPLETION "pmix.notecomp" // (bool) notify parent process upon termination of child job
#define PMIX_RANGE "pmix.range" // (int) pmix_data_range_t value for calls to publish/lookup/unpublish
#define PMIX_PERSISTENCE "pmix.persist" // (int) pmix_persistence_t value for calls to publish
#define PMIX_OPTIONAL "pmix.optional" // (bool) look only in the immediate data store for the requested value - do
// not request data from the server if not found
/* attributes used by host server to pass data to the server convenience library - the
* data will then be parsed and provided to the local clients */

Просмотреть файл

@ -117,6 +117,7 @@ pmix_status_t PMIx_Get_nb(const pmix_proc_t *proc, const char *key,
pmix_status_t rc;
char *nm;
pmix_nspace_t *ns, *nptr;
size_t n;
if (NULL == proc) {
return PMIX_ERR_BAD_PARAM;
@ -252,7 +253,7 @@ pmix_status_t PMIx_Get_nb(const pmix_proc_t *proc, const char *key,
* key to eventually be found, so all we can do is return
* the error */
pmix_output_verbose(2, pmix_globals.debug_output,
"Error requesting key=%s for rank = %d, namespace = %s\n",
"Error requesting key=%s for rank = %d, namespace = %s",
key, proc->rank, nm);
return rc;
}
@ -265,6 +266,18 @@ pmix_status_t PMIx_Get_nb(const pmix_proc_t *proc, const char *key,
return PMIX_ERR_NOT_FOUND;
}
/* we also have to check the user's directives to see if they do not want
* us to attempt to retrieve it from the server */
for (n=0; n < ninfo; n++) {
if (0 == strcmp(info[n].key, PMIX_OPTIONAL) &&
info[n].value.data.flag) {
/* they don't want us to try and retrieve it */
pmix_output_verbose(2, pmix_globals.debug_output,
"PMIx_Get key=%s for rank = %d, namespace = %s was not found - request was optional",
key, proc->rank, nm);
return PMIX_ERR_NOT_FOUND;
}
}
/* see if we already have a request in place with the server for data from
* this nspace:rank. If we do, then no need to ask again as the
* request will return _all_ data from that proc */

Просмотреть файл

@ -77,18 +77,18 @@ OPAL_MODULE_DECLSPEC int pmix1_client_init(void);
OPAL_MODULE_DECLSPEC int pmix1_client_finalize(void);
OPAL_MODULE_DECLSPEC int pmix1_initialized(void);
OPAL_MODULE_DECLSPEC int pmix1_abort(int flag, const char *msg,
opal_list_t *procs);
opal_list_t *procs);
OPAL_MODULE_DECLSPEC int pmix1_commit(void);
OPAL_MODULE_DECLSPEC int pmix1_fence(opal_list_t *procs, int collect_data);
OPAL_MODULE_DECLSPEC int pmix1_fencenb(opal_list_t *procs, int collect_data,
opal_pmix_op_cbfunc_t cbfunc, void *cbdata);
opal_pmix_op_cbfunc_t cbfunc, void *cbdata);
OPAL_MODULE_DECLSPEC int pmix1_put(opal_pmix_scope_t scope,
opal_value_t *val);
OPAL_MODULE_DECLSPEC int pmix1_get(const opal_process_name_t *proc,
const char *key, opal_value_t **val);
OPAL_MODULE_DECLSPEC int pmix1_getnb(const opal_process_name_t *proc,
const char *key,
opal_pmix_value_cbfunc_t cbfunc, void *cbdata);
OPAL_MODULE_DECLSPEC int pmix1_get(const opal_process_name_t *proc, const char *key,
opal_list_t *info, opal_value_t **val);
OPAL_MODULE_DECLSPEC int pmix1_getnb(const opal_process_name_t *proc, const char *key,
opal_list_t *info,
opal_pmix_value_cbfunc_t cbfunc, void *cbdata);
OPAL_MODULE_DECLSPEC int pmix1_publish(opal_list_t *info);
OPAL_MODULE_DECLSPEC int pmix1_publishnb(opal_list_t *info,
opal_pmix_op_cbfunc_t cbfunc, void *cbdata);
@ -100,17 +100,17 @@ OPAL_MODULE_DECLSPEC int pmix1_unpublishnb(char **keys, opal_list_t *info,
opal_pmix_op_cbfunc_t cbfunc, void *cbdata);
OPAL_MODULE_DECLSPEC int pmix1_spawn(opal_list_t *job_info, opal_list_t *apps, opal_jobid_t *jobid);
OPAL_MODULE_DECLSPEC int pmix1_spawnnb(opal_list_t *job_info, opal_list_t *apps,
opal_pmix_spawn_cbfunc_t cbfunc, void *cbdata);
opal_pmix_spawn_cbfunc_t cbfunc, void *cbdata);
OPAL_MODULE_DECLSPEC int pmix1_connect(opal_list_t *procs);
OPAL_MODULE_DECLSPEC int pmix1_connectnb(opal_list_t *procs,
opal_pmix_op_cbfunc_t cbfunc,
void *cbdata);
opal_pmix_op_cbfunc_t cbfunc,
void *cbdata);
OPAL_MODULE_DECLSPEC int pmix1_disconnect(opal_list_t *procs);
OPAL_MODULE_DECLSPEC int pmix1_disconnectnb(opal_list_t *procs,
opal_pmix_op_cbfunc_t cbfunc,
void *cbdata);
opal_pmix_op_cbfunc_t cbfunc,
void *cbdata);
OPAL_MODULE_DECLSPEC int pmix1_resolve_peers(const char *nodename, opal_jobid_t jobid,
opal_list_t *procs);
opal_list_t *procs);
OPAL_MODULE_DECLSPEC int pmix1_resolve_nodes(opal_jobid_t jobid, char **nodelist);
/**** COMMON FUNCTIONS ****/
@ -123,32 +123,32 @@ OPAL_MODULE_DECLSPEC int pmix1_server_finalize(void);
OPAL_MODULE_DECLSPEC int pmix1_server_gen_regex(const char *input, char **regex);
OPAL_MODULE_DECLSPEC int pmix1_server_gen_ppn(const char *input, char **ppn);
OPAL_MODULE_DECLSPEC int pmix1_server_register_nspace(opal_jobid_t jobid,
int nlocalprocs,
opal_list_t *info,
opal_pmix_op_cbfunc_t cbfunc,
void *cbdata);
int nlocalprocs,
opal_list_t *info,
opal_pmix_op_cbfunc_t cbfunc,
void *cbdata);
OPAL_MODULE_DECLSPEC int pmix1_server_register_client(const opal_process_name_t *proc,
uid_t uid, gid_t gid,
void *server_object,
opal_pmix_op_cbfunc_t cbfunc,
void *cbdata);
uid_t uid, gid_t gid,
void *server_object,
opal_pmix_op_cbfunc_t cbfunc,
void *cbdata);
OPAL_MODULE_DECLSPEC int pmix1_server_setup_fork(const opal_process_name_t *proc, char ***env);
OPAL_MODULE_DECLSPEC int pmix1_server_dmodex(const opal_process_name_t *proc,
opal_pmix_modex_cbfunc_t cbfunc, void *cbdata);
opal_pmix_modex_cbfunc_t cbfunc, void *cbdata);
OPAL_MODULE_DECLSPEC int pmix1_server_notify_error(int status,
opal_list_t *procs,
opal_list_t *error_procs,
opal_list_t *info,
opal_pmix_op_cbfunc_t cbfunc, void *cbdata);
opal_list_t *procs,
opal_list_t *error_procs,
opal_list_t *info,
opal_pmix_op_cbfunc_t cbfunc, void *cbdata);
/**** COMPONENT UTILITY FUNCTIONS ****/
OPAL_MODULE_DECLSPEC pmix_status_t pmix1_convert_opalrc(int rc);
OPAL_MODULE_DECLSPEC int pmix1_convert_rc(pmix_status_t rc);
OPAL_MODULE_DECLSPEC void pmix1_value_load(pmix_value_t *v,
opal_value_t *kv);
opal_value_t *kv);
OPAL_MODULE_DECLSPEC int pmix1_value_unload(opal_value_t *kv,
const pmix_value_t *v);
const pmix_value_t *v);
END_C_DECLS

Просмотреть файл

@ -290,13 +290,16 @@ int pmix1_put(opal_pmix_scope_t scope,
return pmix1_convert_rc(rc);
}
int pmix1_get(const opal_process_name_t *proc,
const char *key, opal_value_t **val)
int pmix1_get(const opal_process_name_t *proc, const char *key,
opal_list_t *info, opal_value_t **val)
{
int ret;
pmix_value_t *kv;
pmix_status_t rc;
pmix_proc_t p, *pptr;
size_t ninfo, n;
pmix_info_t *pinfo;
opal_value_t *ival;
opal_output_verbose(1, opal_pmix_base_framework.framework_output,
"%s PMIx_client get on proc %s key %s",
@ -326,8 +329,25 @@ int pmix1_get(const opal_process_name_t *proc,
pptr = NULL;
}
if (NULL != info) {
ninfo = opal_list_get_size(info);
if (0 < ninfo) {
PMIX_INFO_CREATE(pinfo, ninfo);
n=0;
OPAL_LIST_FOREACH(ival, info, opal_value_t) {
(void)strncpy(pinfo[n].key, ival->key, PMIX_MAX_KEYLEN);
pmix1_value_load(&pinfo[n].value, ival);
}
} else {
pinfo = NULL;
}
} else {
pinfo = NULL;
ninfo = 0;
}
/* pass the request down */
rc = PMIx_Get(pptr, key, NULL, 0, &kv);
rc = PMIx_Get(pptr, key, pinfo, ninfo, &kv);
if (PMIX_SUCCESS == rc) {
if (NULL == kv) {
ret = OPAL_SUCCESS;
@ -339,6 +359,7 @@ int pmix1_get(const opal_process_name_t *proc,
} else {
ret = pmix1_convert_rc(rc);
}
PMIX_INFO_FREE(pinfo, ninfo);
return ret;
}
@ -362,11 +383,14 @@ static void val_cbfunc(pmix_status_t status,
}
int pmix1_getnb(const opal_process_name_t *proc, const char *key,
opal_list_t *info,
opal_pmix_value_cbfunc_t cbfunc, void *cbdata)
{
pmix1_opcaddy_t *op;
pmix_status_t rc;
char *tmp;
size_t n;
opal_value_t *ival;
opal_output_verbose(1, opal_pmix_base_framework.framework_output,
"%s PMIx_client get_nb on proc %s key %s",
@ -389,9 +413,20 @@ int pmix1_getnb(const opal_process_name_t *proc, const char *key,
op->p.rank = PMIX_RANK_WILDCARD;
}
if (NULL != info) {
op->sz = opal_list_get_size(info);
if (0 < op->sz) {
PMIX_INFO_CREATE(op->info, op->sz);
n=0;
OPAL_LIST_FOREACH(ival, info, opal_value_t) {
(void)strncpy(op->info[n].key, ival->key, PMIX_MAX_KEYLEN);
pmix1_value_load(&op->info[n].value, ival);
}
}
}
/* call the library function */
rc = PMIx_Get_nb(&op->p, key, NULL, 0, val_cbfunc, op);
rc = PMIx_Get_nb(&op->p, key, op->info, op->sz, val_cbfunc, op);
if (PMIX_SUCCESS != rc) {
OBJ_RELEASE(op);
}

Просмотреть файл

@ -98,12 +98,15 @@ BEGIN_C_DECLS
/* request-related info */
#define OPAL_PMIX_COLLECT_DATA "pmix.collect" // (bool) collect data and return it at the end of the operation
#define OPAL_PMIX_TIMEOUT "pmix.timeout" // (int) time in sec before specified operation should time out
#define OPAL_PMIX_WAIT "pmix.wait" // (int) caller requests that the server wait until the specified #values are found
#define OPAL_PMIX_WAIT "pmix.wait" // (int) caller requests that the server wait until at least the specified
// #values are found (0 => all and is the default)
#define OPAL_PMIX_COLLECTIVE_ALGO "pmix.calgo" // (char*) comma-delimited list of algorithms to use for collective
#define OPAL_PMIX_COLLECTIVE_ALGO_REQD "pmix.calreqd" // (bool) if true, indicates that the requested choice of algo is mandatory
#define OPAL_PMIX_NOTIFY_COMPLETION "pmix.notecomp" // (bool) notify parent process upon termination of child job
#define OPAL_PMIX_RANGE "pmix.range" // (int) opal_pmix_data_range_t value for calls to publish/lookup/unpublish
#define OPAL_PMIX_PERSISTENCE "pmix.persist" // (int) opal_pmix_persistence_t value for calls to publish
#define OPAL_PMIX_OPTIONAL "pmix.optional" // (bool) look only in the immediate data store for the requested value - do
// not request data from the server if not found
/* attribute used by host server to pass data to the server convenience library - the
* data will then be parsed and provided to the local clients */

Просмотреть файл

@ -39,7 +39,7 @@ static int s1_fence(opal_list_t *procs, int collect_data);
static int s1_put(opal_pmix_scope_t scope,
opal_value_t *kv);
static int s1_get(const opal_process_name_t *id,
const char *key,
const char *key, opal_list_t *info,
opal_value_t **kv);
static int s1_publish(opal_list_t *info);
static int s1_lookup(opal_list_t *data, opal_list_t *info);
@ -588,7 +588,7 @@ static int s1_fence(opal_list_t *procs, int collect_data)
}
static int s1_get(const opal_process_name_t *id,
const char *key,
const char *key, opal_list_t *info,
opal_value_t **kv)
{
int rc;

Просмотреть файл

@ -46,7 +46,7 @@ static int s2_fence(opal_list_t *procs, int collect_data);
static int s2_put(opal_pmix_scope_t scope,
opal_value_t *kv);
static int s2_get(const opal_process_name_t *id,
const char *key,
const char *key, opal_list_t *info,
opal_value_t **kv);
static int s2_publish(opal_list_t *info);
static int s2_lookup(opal_list_t *data, opal_list_t *info);
@ -607,7 +607,7 @@ static int s2_fence(opal_list_t *procs, int collect_data)
}
static int s2_get(const opal_process_name_t *id,
const char *key,
const char *key, opal_list_t *info,
opal_value_t **kv)
{
int rc;

Просмотреть файл

@ -88,10 +88,9 @@ static int rte_init(void)
char *rmluri;
opal_value_t *kv;
char *val;
size_t sz;
int u32, *u32ptr;
uint16_t u16, *u16ptr;
char **peers=NULL, *mycpuset;
char **peers=NULL, *mycpuset, **cpusets=NULL;
opal_process_name_t name;
size_t i;
@ -153,8 +152,8 @@ static int rte_init(void)
/* get our app number from PMI - ok if not found */
OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_APPNUM,
ORTE_PROC_MY_NAME, &u32ptr, OPAL_UINT32);
OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, OPAL_PMIX_APPNUM,
ORTE_PROC_MY_NAME, &u32ptr, OPAL_UINT32);
if (OPAL_SUCCESS == ret) {
orte_process_info.app_num = u32;
} else {
@ -190,8 +189,8 @@ static int rte_init(void)
}
/* retrieve our topology */
OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_LOCAL_TOPO,
ORTE_PROC_MY_NAME, &val, OPAL_STRING);
OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, OPAL_PMIX_LOCAL_TOPO,
ORTE_PROC_MY_NAME, &val, OPAL_STRING);
if (OPAL_SUCCESS == ret && NULL != val) {
/* load the topology */
if (0 != hwloc_topology_init(&opal_hwloc_topology)) {
@ -259,84 +258,77 @@ static int rte_init(void)
/* get our local peers */
if (0 < orte_process_info.num_local_peers) {
/* if my local rank if too high, then that's an error */
if (orte_process_info.num_local_peers < orte_process_info.my_local_rank) {
ret = ORTE_ERR_BAD_PARAM;
error = "num local peers";
goto error;
}
/* retrieve the local peers */
OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_LOCAL_PEERS,
ORTE_PROC_MY_NAME, &val, OPAL_STRING);
if (OPAL_SUCCESS == ret && NULL != val) {
peers = opal_argv_split(val, ',');
free(val);
/* and their cpusets, if available */
OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, OPAL_PMIX_LOCAL_CPUSETS, ORTE_PROC_MY_NAME, &val, OPAL_STRING);
if (OPAL_SUCCESS == ret && NULL != val) {
cpusets = opal_argv_split(val, ':');
free(val);
} else {
cpusets = NULL;
}
} else {
peers = NULL;
cpusets = NULL;
}
} else {
peers = NULL;
}
/* get our cpuset */
OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_CPUSET, ORTE_PROC_MY_NAME, &val, OPAL_STRING);
if (OPAL_SUCCESS != ret || NULL == val) {
/* if we don't have a cpuset, or it is NULL, then we declare our local
* peers to be on the same node and everyone else to be non-local */
mycpuset = NULL;
} else {
mycpuset = val;
cpusets = NULL;
}
/* set the locality */
name.jobid = ORTE_PROC_MY_NAME->jobid;
for (sz=0; sz < orte_process_info.num_procs; sz++) {
kv = OBJ_NEW(opal_value_t);
kv->key = strdup(OPAL_PMIX_LOCALITY);
kv->type = OPAL_UINT16;
name.vpid = sz;
if (sz == ORTE_PROC_MY_NAME->vpid) {
/* we are fully local to ourselves */
u16 = OPAL_PROC_ALL_LOCAL;
} else if (NULL == peers) {
/* nobody is local to us */
u16 = OPAL_PROC_NON_LOCAL;
if (NULL != peers) {
/* indentify our cpuset */
if (NULL != cpusets) {
mycpuset = cpusets[orte_process_info.my_local_rank];
} else {
for (i=0; NULL != peers[i]; i++) {
if (sz == strtoul(peers[i], NULL, 10)) {
break;
}
}
if (NULL == peers[i]) {
/* not a local peer */
u16 = OPAL_PROC_NON_LOCAL;
} else if (NULL == mycpuset) {
mycpuset = NULL;
}
name.jobid = ORTE_PROC_MY_NAME->jobid;
for (i=0; NULL != peers[i]; i++) {
kv = OBJ_NEW(opal_value_t);
kv->key = strdup(OPAL_PMIX_LOCALITY);
kv->type = OPAL_UINT16;
name.vpid = strtoul(peers[i], NULL, 10);
if (name.vpid == ORTE_PROC_MY_NAME->vpid) {
/* we are fully local to ourselves */
u16 = OPAL_PROC_ALL_LOCAL;
} else if (NULL == mycpuset || NULL == cpusets[i] ||
0 == strcmp(cpusets[i], "UNBOUND")) {
/* all we can say is that it shares our node */
u16 = OPAL_PROC_ON_CLUSTER | OPAL_PROC_ON_CU | OPAL_PROC_ON_NODE;
} else {
/* attempt to get their cpuset */
OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_CPUSET, &name, &val, OPAL_STRING);
if (OPAL_SUCCESS == ret && NULL != val) {
/* we have it, so compute the locality */
u16 = opal_hwloc_base_get_relative_locality(opal_hwloc_topology,
mycpuset, val);
free(val);
} else {
/* all we can say is that it shares our node */
u16 = OPAL_PROC_ON_CLUSTER | OPAL_PROC_ON_CU | OPAL_PROC_ON_NODE;
}
/* we have it, so compute the locality */
u16 = opal_hwloc_base_get_relative_locality(opal_hwloc_topology, mycpuset, cpusets[i]);
}
}
kv->data.uint16 = u16;
ret = opal_pmix.store_local(&name, kv);
if (OPAL_SUCCESS != ret) {
error = "local store of locality";
if (NULL != mycpuset) {
free(mycpuset);
OPAL_OUTPUT_VERBOSE((1, orte_ess_base_framework.framework_output,
"%s ess:pmi:locality: proc %s locality %x",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&name), u16));
kv->data.uint16 = u16;
ret = opal_pmix.store_local(&name, kv);
if (OPAL_SUCCESS != ret) {
error = "local store of locality";
opal_argv_free(peers);
opal_argv_free(cpusets);
goto error;
}
opal_argv_free(peers);
goto error;
OBJ_RELEASE(kv);
}
OBJ_RELEASE(kv);
opal_argv_free(peers);
opal_argv_free(cpusets);
}
if (NULL != mycpuset){
free(mycpuset);
}
opal_argv_free(peers);
/* now that we have all required info, complete the setup */
if (ORTE_SUCCESS != (ret = orte_ess_base_app_setup(false))) {

Просмотреть файл

@ -40,7 +40,7 @@ void orte_oob_base_send_nb(int fd, short args, void *cbdata)
bool msg_sent;
mca_oob_base_component_t *component;
bool reachable;
opal_value_t *kv;
char *uri;
/* done with this. release it now */
OBJ_RELEASE(cd);
@ -61,11 +61,14 @@ void orte_oob_base_send_nb(int fd, short args, void *cbdata)
ORTE_NAME_PRINT(&msg->dst));
/* for direct launched procs, the URI might be in the database,
* so check there next - if it is, the peer object will be added
* to our hash table
* to our hash table. However, we don't want to chase up to the
* server after it, so indicate it is optional
*/
if (OPAL_SUCCESS == opal_pmix.get(&msg->dst, OPAL_PMIX_PROC_URI, &kv)) {
if (NULL != kv) {
process_uri(kv->data.string);
OPAL_MODEX_RECV_VALUE_OPTIONAL(rc, OPAL_PMIX_PROC_URI, &msg->dst,
(char**)&uri, OPAL_STRING);
if (OPAL_SUCCESS == rc ) {
if (NULL != uri) {
process_uri(uri);
if (OPAL_SUCCESS != opal_hash_table_get_value_uint64(&orte_oob_base.peers,
ui64, (void**)&pr) ||
NULL == pr) {

Просмотреть файл

@ -481,6 +481,7 @@ static void pmix_server_dmdx_recv(int status, orte_process_name_t* sender,
/* ask our local pmix server for the data */
if (OPAL_SUCCESS != (rc = opal_pmix.server_dmodex_request(&idreq, modex_resp, req))) {
ORTE_ERROR_LOG(rc);
opal_hotel_checkout(&orte_pmix_server_globals.reqs, req->room_num);
OBJ_RELEASE(req);
send_error(rc, &idreq, sender);

Просмотреть файл

@ -236,6 +236,7 @@ int orte_pmix_server_register_nspace(orte_job_t *jdata)
/* construct the list of local peers, while adding
* each proc's locality info */
list = NULL;
procs = NULL;
vpid = ORTE_VPID_MAX;
for (i=0; i < node->procs->size; i++) {
if (NULL == (pptr = (orte_proc_t*)opal_pointer_array_get_item(node->procs, i))) {
@ -249,13 +250,15 @@ int orte_pmix_server_register_nspace(orte_job_t *jdata)
/* note that we have to pass the cpuset for each local
* peer so locality can be computed */
tmp = NULL;
kv = OBJ_NEW(opal_value_t);
kv->key = strdup(OPAL_PMIX_CPUSET);
kv->type = OPAL_STRING;
if (orte_get_attribute(&pptr->attributes, ORTE_PROC_CPU_BITMAP, (void**)&tmp, OPAL_STRING)) {
kv->data.string = tmp;
if (NULL != tmp) {
opal_argv_append_nosize(&procs, tmp);
} else {
opal_argv_append_nosize(&procs, "UNBOUND");
}
} else {
opal_argv_append_nosize(&procs, "UNBOUND");
}
opal_list_append(info, &kv->super);
/* go ahead and register this client */
if (OPAL_SUCCESS != (rc = opal_pmix.server_register_client(&pptr->name, uid, gid,
(void*)pptr, NULL, NULL))) {
@ -276,6 +279,18 @@ int orte_pmix_server_register_nspace(orte_job_t *jdata)
kv->data.string = tmp;
opal_list_append(info, &kv->super);
}
/* construct the list of cpusets for transmission */
if (NULL != procs) {
tmp = opal_argv_join(procs, ':');
opal_argv_free(procs);
procs = NULL;
/* pass the list of cpusets */
kv = OBJ_NEW(opal_value_t);
kv->key = strdup(OPAL_PMIX_LOCAL_CPUSETS);
kv->type = OPAL_STRING;
kv->data.string = tmp;
opal_list_append(info, &kv->super);
}
/* pass the local ldr */
kv = OBJ_NEW(opal_value_t);
kv->key = strdup(OPAL_PMIX_LOCALLDR);