1
1

Fix a bug in the way we computed local_rank. This needs to be the local_rank -among my job peers- on a node.

We were mistakenly computing the local_rank across -all- jobs with procs on that node. While the two definitions are equivalent for an initial launch, comm_spawn'd procs would get the wrong local_rank. In particular, there would not be a local_rank=0 proc in the comm_spawn'd job on any node that was shared with the initial job.

This commit was SVN r18263.
Этот коммит содержится в:
Ralph Castain 2008-04-23 17:42:59 +00:00
родитель 4d1ae7b05f
Коммит eece9f88f0

Просмотреть файл

@ -359,11 +359,10 @@ int orte_rmaps_base_compute_usage(orte_job_t *jdata)
/* for each node in the map... */ /* for each node in the map... */
nodes = (orte_node_t**)map->nodes->addr; nodes = (orte_node_t**)map->nodes->addr;
for (i=0; i < map->nodes->size; i++) { for (i=0; i < map->num_nodes; i++) {
if (NULL != nodes[i]) { /* cycle through the array of procs IN THIS JOB on this node, looking for
/* cycle through the array of procs on this node, looking for
* the minimum vpid one and setting that local rank, until we * the minimum vpid one and setting that local rank, until we
* have done so for all procs on the node * have done so for all procs on the node and/or in the job
*/ */
/* init search values */ /* init search values */
@ -372,11 +371,21 @@ int orte_rmaps_base_compute_usage(orte_job_t *jdata)
while (local_rank < nodes[i]->num_procs) { while (local_rank < nodes[i]->num_procs) {
minv = ORTE_VPID_MAX; minv = ORTE_VPID_MAX;
/* find the minimum vpid proc */ psave = NULL;
/* find the minimum vpid proc IN THIS JOB */
for (j=0; j < nodes[i]->procs->size; j++) { for (j=0; j < nodes[i]->procs->size; j++) {
if (NULL != procs[j]) { if (NULL == procs[j]) {
/* the array is left justified, so this
* means we are done
*/
break;
}
if (procs[j]->name.jobid != jdata->jobid) {
/* not in our job */
continue;
}
if (ORTE_VPID_INVALID != procs[j]->local_rank) { if (ORTE_VPID_INVALID != procs[j]->local_rank) {
/* already done this one */ /* already did this one */
continue; continue;
} }
if (procs[j]->name.vpid < minv) { if (procs[j]->name.vpid < minv) {
@ -384,13 +393,16 @@ int orte_rmaps_base_compute_usage(orte_job_t *jdata)
psave = procs[j]; psave = procs[j];
} }
} }
if (NULL == psave) {
/* we must have processed them all! */
goto DONE;
} }
psave->local_rank = local_rank; psave->local_rank = local_rank;
++local_rank; ++local_rank;
} }
} }
}
DONE:
return ORTE_SUCCESS; return ORTE_SUCCESS;
} }