Cleanup the pidmap decoding for apps to avoid confusion
This commit was SVN r26498.
Этот коммит содержится в:
родитель
d1e91e9372
Коммит
c69a04e16b
@ -819,13 +819,13 @@ int orte_util_encode_pidmap(opal_byte_object_t *boptr)
|
|||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* only APPS call this function - daemons have their own */
|
||||||
int orte_util_decode_pidmap(opal_byte_object_t *bo)
|
int orte_util_decode_pidmap(opal_byte_object_t *bo)
|
||||||
{
|
{
|
||||||
orte_jobid_t jobid;
|
orte_jobid_t jobid;
|
||||||
orte_vpid_t i, num_procs;
|
orte_vpid_t i, num_procs;
|
||||||
orte_pmap_t *pmap;
|
orte_pmap_t *pmap;
|
||||||
int32_t *nodes=NULL, my_node = 0;
|
int32_t *nodes=NULL;
|
||||||
orte_local_rank_t *local_rank=NULL;
|
orte_local_rank_t *local_rank=NULL;
|
||||||
orte_node_rank_t *node_rank=NULL;
|
orte_node_rank_t *node_rank=NULL;
|
||||||
#if OPAL_HAVE_HWLOC
|
#if OPAL_HAVE_HWLOC
|
||||||
@ -887,7 +887,9 @@ int orte_util_decode_pidmap(opal_byte_object_t *bo)
|
|||||||
goto cleanup;
|
goto cleanup;
|
||||||
}
|
}
|
||||||
/* set mine */
|
/* set mine */
|
||||||
orte_process_info.bind_level = bind_level;
|
if (jobid == ORTE_PROC_MY_NAME->jobid) {
|
||||||
|
orte_process_info.bind_level = bind_level;
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* allocate memory for the node info */
|
/* allocate memory for the node info */
|
||||||
@ -926,7 +928,7 @@ int orte_util_decode_pidmap(opal_byte_object_t *bo)
|
|||||||
ORTE_ERROR_LOG(rc);
|
ORTE_ERROR_LOG(rc);
|
||||||
goto cleanup;
|
goto cleanup;
|
||||||
}
|
}
|
||||||
if (ORTE_PROC_IS_APP) {
|
if (jobid == ORTE_PROC_MY_NAME->jobid) {
|
||||||
/* set mine */
|
/* set mine */
|
||||||
orte_process_info.bind_idx = bind_idx[ORTE_PROC_MY_NAME->vpid];
|
orte_process_info.bind_idx = bind_idx[ORTE_PROC_MY_NAME->vpid];
|
||||||
}
|
}
|
||||||
@ -1009,37 +1011,37 @@ int orte_util_decode_pidmap(opal_byte_object_t *bo)
|
|||||||
ORTE_ERROR_LOG(rc);
|
ORTE_ERROR_LOG(rc);
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
if (ORTE_PROC_IS_APP) {
|
if (jobid == ORTE_PROC_MY_NAME->jobid) {
|
||||||
/* track my node */
|
/* track my node */
|
||||||
my_node = nodes[ORTE_PROC_MY_NAME->vpid];
|
orte_process_info.my_node = nodes[ORTE_PROC_MY_NAME->vpid];
|
||||||
}
|
}
|
||||||
|
|
||||||
/* xfer the data */
|
/* xfer the data */
|
||||||
for (i=0; i < num_procs; i++) {
|
for (i=0; i < num_procs; i++) {
|
||||||
pmap = OBJ_NEW(orte_pmap_t);
|
pmap = OBJ_NEW(orte_pmap_t);
|
||||||
pmap->node = nodes[i];
|
pmap->node = nodes[i];
|
||||||
pmap->local_rank = local_rank[i];
|
pmap->local_rank = local_rank[i];
|
||||||
pmap->node_rank = node_rank[i];
|
pmap->node_rank = node_rank[i];
|
||||||
/* if I am an app, record the locality of this proc
|
/* record the locality of this proc
|
||||||
* relative to me - daemons don't need this info
|
* relative to me
|
||||||
*/
|
*/
|
||||||
if (ORTE_PROC_IS_APP) {
|
if (ORTE_PROC_MY_NAME->vpid == i &&
|
||||||
if (ORTE_PROC_MY_NAME->vpid == i) {
|
jobid == ORTE_PROC_MY_NAME->jobid) {
|
||||||
/* this is me */
|
/* this is me */
|
||||||
pmap->locality = OPAL_PROC_ALL_LOCAL;
|
pmap->locality = OPAL_PROC_ALL_LOCAL;
|
||||||
} else if (pmap->node == my_node) {
|
} else if (pmap->node == orte_process_info.my_node) {
|
||||||
#if OPAL_HAVE_HWLOC
|
#if OPAL_HAVE_HWLOC
|
||||||
/* we share a node - see what else we share */
|
/* we share a node - see what else we share */
|
||||||
pmap->locality = opal_hwloc_base_get_relative_locality(opal_hwloc_topology,
|
pmap->locality = opal_hwloc_base_get_relative_locality(opal_hwloc_topology,
|
||||||
orte_process_info.bind_level,
|
orte_process_info.bind_level,
|
||||||
orte_process_info.bind_idx,
|
orte_process_info.bind_idx,
|
||||||
jmap->bind_level,
|
jmap->bind_level,
|
||||||
bind_idx[i]);
|
bind_idx[i]);
|
||||||
#else
|
#else
|
||||||
pmap->locality = OPAL_PROC_ON_NODE;
|
pmap->locality = OPAL_PROC_ON_NODE;
|
||||||
#endif
|
#endif
|
||||||
} else {
|
} else {
|
||||||
pmap->locality = OPAL_PROC_NON_LOCAL;
|
pmap->locality = OPAL_PROC_NON_LOCAL;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
/* add the pidmap entry at the specific site corresponding
|
/* add the pidmap entry at the specific site corresponding
|
||||||
* to the proc's vpid
|
* to the proc's vpid
|
||||||
|
@ -69,6 +69,7 @@ ORTE_DECLSPEC orte_proc_info_t orte_process_info = {
|
|||||||
/* .bind_level = */ OPAL_HWLOC_NODE_LEVEL,
|
/* .bind_level = */ OPAL_HWLOC_NODE_LEVEL,
|
||||||
/* .bind_idx = */ 0,
|
/* .bind_idx = */ 0,
|
||||||
#endif
|
#endif
|
||||||
|
/* .my_node = */ -1,
|
||||||
/* .app_rank = */ -1,
|
/* .app_rank = */ -1,
|
||||||
/* .peer_modex = */ -1,
|
/* .peer_modex = */ -1,
|
||||||
/* .peer_init_barrier = */ -1,
|
/* .peer_init_barrier = */ -1,
|
||||||
|
@ -121,7 +121,8 @@ struct orte_proc_info_t {
|
|||||||
opal_hwloc_level_t bind_level;
|
opal_hwloc_level_t bind_level;
|
||||||
unsigned int bind_idx;
|
unsigned int bind_idx;
|
||||||
#endif
|
#endif
|
||||||
int32_t app_rank;
|
int32_t my_node; /**< index in the node array of the node I am on */
|
||||||
|
int32_t app_rank; /**< rank within my app_context */
|
||||||
orte_grpcomm_coll_id_t peer_modex; /**< modex collective id */
|
orte_grpcomm_coll_id_t peer_modex; /**< modex collective id */
|
||||||
orte_grpcomm_coll_id_t peer_init_barrier; /**< barrier id during init */
|
orte_grpcomm_coll_id_t peer_init_barrier; /**< barrier id during init */
|
||||||
orte_grpcomm_coll_id_t peer_fini_barrier; /**< barrier id during finalize */
|
orte_grpcomm_coll_id_t peer_fini_barrier; /**< barrier id during finalize */
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user