1
1

Cleanup the pidmap decoding for apps to avoid confusion

This commit was SVN r26498.
Этот коммит содержится в:
Ralph Castain 2012-05-27 16:21:38 +00:00
родитель d1e91e9372
Коммит c69a04e16b
3 изменённых файлов: 28 добавлений и 24 удалений

Просмотреть файл

@ -819,13 +819,13 @@ int orte_util_encode_pidmap(opal_byte_object_t *boptr)
return rc; return rc;
} }
/* only APPS call this function - daemons have their own */
int orte_util_decode_pidmap(opal_byte_object_t *bo) int orte_util_decode_pidmap(opal_byte_object_t *bo)
{ {
orte_jobid_t jobid; orte_jobid_t jobid;
orte_vpid_t i, num_procs; orte_vpid_t i, num_procs;
orte_pmap_t *pmap; orte_pmap_t *pmap;
int32_t *nodes=NULL, my_node = 0; int32_t *nodes=NULL;
orte_local_rank_t *local_rank=NULL; orte_local_rank_t *local_rank=NULL;
orte_node_rank_t *node_rank=NULL; orte_node_rank_t *node_rank=NULL;
#if OPAL_HAVE_HWLOC #if OPAL_HAVE_HWLOC
@ -887,7 +887,9 @@ int orte_util_decode_pidmap(opal_byte_object_t *bo)
goto cleanup; goto cleanup;
} }
/* set mine */ /* set mine */
orte_process_info.bind_level = bind_level; if (jobid == ORTE_PROC_MY_NAME->jobid) {
orte_process_info.bind_level = bind_level;
}
#endif #endif
/* allocate memory for the node info */ /* allocate memory for the node info */
@ -926,7 +928,7 @@ int orte_util_decode_pidmap(opal_byte_object_t *bo)
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
goto cleanup; goto cleanup;
} }
if (ORTE_PROC_IS_APP) { if (jobid == ORTE_PROC_MY_NAME->jobid) {
/* set mine */ /* set mine */
orte_process_info.bind_idx = bind_idx[ORTE_PROC_MY_NAME->vpid]; orte_process_info.bind_idx = bind_idx[ORTE_PROC_MY_NAME->vpid];
} }
@ -1009,37 +1011,37 @@ int orte_util_decode_pidmap(opal_byte_object_t *bo)
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
return rc; return rc;
} }
if (ORTE_PROC_IS_APP) { if (jobid == ORTE_PROC_MY_NAME->jobid) {
/* track my node */ /* track my node */
my_node = nodes[ORTE_PROC_MY_NAME->vpid]; orte_process_info.my_node = nodes[ORTE_PROC_MY_NAME->vpid];
} }
/* xfer the data */ /* xfer the data */
for (i=0; i < num_procs; i++) { for (i=0; i < num_procs; i++) {
pmap = OBJ_NEW(orte_pmap_t); pmap = OBJ_NEW(orte_pmap_t);
pmap->node = nodes[i]; pmap->node = nodes[i];
pmap->local_rank = local_rank[i]; pmap->local_rank = local_rank[i];
pmap->node_rank = node_rank[i]; pmap->node_rank = node_rank[i];
/* if I am an app, record the locality of this proc /* record the locality of this proc
* relative to me - daemons don't need this info * relative to me
*/ */
if (ORTE_PROC_IS_APP) { if (ORTE_PROC_MY_NAME->vpid == i &&
if (ORTE_PROC_MY_NAME->vpid == i) { jobid == ORTE_PROC_MY_NAME->jobid) {
/* this is me */ /* this is me */
pmap->locality = OPAL_PROC_ALL_LOCAL; pmap->locality = OPAL_PROC_ALL_LOCAL;
} else if (pmap->node == my_node) { } else if (pmap->node == orte_process_info.my_node) {
#if OPAL_HAVE_HWLOC #if OPAL_HAVE_HWLOC
/* we share a node - see what else we share */ /* we share a node - see what else we share */
pmap->locality = opal_hwloc_base_get_relative_locality(opal_hwloc_topology, pmap->locality = opal_hwloc_base_get_relative_locality(opal_hwloc_topology,
orte_process_info.bind_level, orte_process_info.bind_level,
orte_process_info.bind_idx, orte_process_info.bind_idx,
jmap->bind_level, jmap->bind_level,
bind_idx[i]); bind_idx[i]);
#else #else
pmap->locality = OPAL_PROC_ON_NODE; pmap->locality = OPAL_PROC_ON_NODE;
#endif #endif
} else { } else {
pmap->locality = OPAL_PROC_NON_LOCAL; pmap->locality = OPAL_PROC_NON_LOCAL;
}
} }
/* add the pidmap entry at the specific site corresponding /* add the pidmap entry at the specific site corresponding
* to the proc's vpid * to the proc's vpid

Просмотреть файл

@ -69,6 +69,7 @@ ORTE_DECLSPEC orte_proc_info_t orte_process_info = {
/* .bind_level = */ OPAL_HWLOC_NODE_LEVEL, /* .bind_level = */ OPAL_HWLOC_NODE_LEVEL,
/* .bind_idx = */ 0, /* .bind_idx = */ 0,
#endif #endif
/* .my_node = */ -1,
/* .app_rank = */ -1, /* .app_rank = */ -1,
/* .peer_modex = */ -1, /* .peer_modex = */ -1,
/* .peer_init_barrier = */ -1, /* .peer_init_barrier = */ -1,

Просмотреть файл

@ -121,7 +121,8 @@ struct orte_proc_info_t {
opal_hwloc_level_t bind_level; opal_hwloc_level_t bind_level;
unsigned int bind_idx; unsigned int bind_idx;
#endif #endif
int32_t app_rank; int32_t my_node; /**< index in the node array of the node I am on */
int32_t app_rank; /**< rank within my app_context */
orte_grpcomm_coll_id_t peer_modex; /**< modex collective id */ orte_grpcomm_coll_id_t peer_modex; /**< modex collective id */
orte_grpcomm_coll_id_t peer_init_barrier; /**< barrier id during init */ orte_grpcomm_coll_id_t peer_init_barrier; /**< barrier id during init */
orte_grpcomm_coll_id_t peer_fini_barrier; /**< barrier id during finalize */ orte_grpcomm_coll_id_t peer_fini_barrier; /**< barrier id during finalize */