1
1

Fix a bug in the way we computed local_rank. This needs to be the local_rank -among my job peers- on a node.

We were mistakenly computing the local_rank across -all- jobs with procs on that node. While the two definitions are equivalent for an initial launch, comm_spawn'd procs would get the wrong local_rank. In particular, there would not be a local_rank=0 proc in the comm_spawn'd job on any node that was shared with the initial job.

This commit was SVN r18263.
Этот коммит содержится в:
Ralph Castain 2008-04-23 17:42:59 +00:00
родитель 4d1ae7b05f
Коммит eece9f88f0

Просмотреть файл

@ -359,38 +359,50 @@ int orte_rmaps_base_compute_usage(orte_job_t *jdata)
/* for each node in the map... */ /* for each node in the map... */
nodes = (orte_node_t**)map->nodes->addr; nodes = (orte_node_t**)map->nodes->addr;
for (i=0; i < map->nodes->size; i++) { for (i=0; i < map->num_nodes; i++) {
if (NULL != nodes[i]) { /* cycle through the array of procs IN THIS JOB on this node, looking for
/* cycle through the array of procs on this node, looking for * the minimum vpid one and setting that local rank, until we
* the minimum vpid one and setting that local rank, until we * have done so for all procs on the node and/or in the job
* have done so for all procs on the node */
*/
/* init search values */
/* init search values */ procs = (orte_proc_t**)nodes[i]->procs->addr;
procs = (orte_proc_t**)nodes[i]->procs->addr; local_rank = 0;
local_rank = 0;
while (local_rank < nodes[i]->num_procs) {
while (local_rank < nodes[i]->num_procs) { minv = ORTE_VPID_MAX;
minv = ORTE_VPID_MAX; psave = NULL;
/* find the minimum vpid proc */ /* find the minimum vpid proc IN THIS JOB */
for (j=0; j < nodes[i]->procs->size; j++) { for (j=0; j < nodes[i]->procs->size; j++) {
if (NULL != procs[j]) { if (NULL == procs[j]) {
if (ORTE_VPID_INVALID != procs[j]->local_rank) { /* the array is left justified, so this
/* already done this one */ * means we are done
continue; */
} break;
if (procs[j]->name.vpid < minv) { }
minv = procs[j]->name.vpid; if (procs[j]->name.jobid != jdata->jobid) {
psave = procs[j]; /* not in our job */
} continue;
} }
if (ORTE_VPID_INVALID != procs[j]->local_rank) {
/* already did this one */
continue;
}
if (procs[j]->name.vpid < minv) {
minv = procs[j]->name.vpid;
psave = procs[j];
} }
psave->local_rank = local_rank;
++local_rank;
} }
if (NULL == psave) {
/* we must have processed them all! */
goto DONE;
}
psave->local_rank = local_rank;
++local_rank;
} }
} }
DONE:
return ORTE_SUCCESS; return ORTE_SUCCESS;
} }