Fix a bug in the way we computed local_rank. This needs to be the local_rank -among my job peers- on a node.
We were mistakenly computing the local_rank across -all- jobs with procs on that node. While the two definitions are equivalent for an initial launch, comm_spawn'd procs would get the wrong local_rank. In particular, there would not be a local_rank=0 proc in the comm_spawn'd job on any node that was shared with the initial job. This commit was SVN r18263.
Этот коммит содержится в:
родитель
4d1ae7b05f
Коммит
eece9f88f0
@ -359,38 +359,50 @@ int orte_rmaps_base_compute_usage(orte_job_t *jdata)
|
||||
|
||||
/* for each node in the map... */
|
||||
nodes = (orte_node_t**)map->nodes->addr;
|
||||
for (i=0; i < map->nodes->size; i++) {
|
||||
if (NULL != nodes[i]) {
|
||||
/* cycle through the array of procs on this node, looking for
|
||||
* the minimum vpid one and setting that local rank, until we
|
||||
* have done so for all procs on the node
|
||||
*/
|
||||
|
||||
/* init search values */
|
||||
procs = (orte_proc_t**)nodes[i]->procs->addr;
|
||||
local_rank = 0;
|
||||
|
||||
while (local_rank < nodes[i]->num_procs) {
|
||||
minv = ORTE_VPID_MAX;
|
||||
/* find the minimum vpid proc */
|
||||
for (j=0; j < nodes[i]->procs->size; j++) {
|
||||
if (NULL != procs[j]) {
|
||||
if (ORTE_VPID_INVALID != procs[j]->local_rank) {
|
||||
/* already done this one */
|
||||
continue;
|
||||
}
|
||||
if (procs[j]->name.vpid < minv) {
|
||||
minv = procs[j]->name.vpid;
|
||||
psave = procs[j];
|
||||
}
|
||||
}
|
||||
for (i=0; i < map->num_nodes; i++) {
|
||||
/* cycle through the array of procs IN THIS JOB on this node, looking for
|
||||
* the minimum vpid one and setting that local rank, until we
|
||||
* have done so for all procs on the node and/or in the job
|
||||
*/
|
||||
|
||||
/* init search values */
|
||||
procs = (orte_proc_t**)nodes[i]->procs->addr;
|
||||
local_rank = 0;
|
||||
|
||||
while (local_rank < nodes[i]->num_procs) {
|
||||
minv = ORTE_VPID_MAX;
|
||||
psave = NULL;
|
||||
/* find the minimum vpid proc IN THIS JOB */
|
||||
for (j=0; j < nodes[i]->procs->size; j++) {
|
||||
if (NULL == procs[j]) {
|
||||
/* the array is left justified, so this
|
||||
* means we are done
|
||||
*/
|
||||
break;
|
||||
}
|
||||
if (procs[j]->name.jobid != jdata->jobid) {
|
||||
/* not in our job */
|
||||
continue;
|
||||
}
|
||||
if (ORTE_VPID_INVALID != procs[j]->local_rank) {
|
||||
/* already did this one */
|
||||
continue;
|
||||
}
|
||||
if (procs[j]->name.vpid < minv) {
|
||||
minv = procs[j]->name.vpid;
|
||||
psave = procs[j];
|
||||
}
|
||||
psave->local_rank = local_rank;
|
||||
++local_rank;
|
||||
}
|
||||
if (NULL == psave) {
|
||||
/* we must have processed them all! */
|
||||
goto DONE;
|
||||
}
|
||||
psave->local_rank = local_rank;
|
||||
++local_rank;
|
||||
}
|
||||
}
|
||||
|
||||
DONE:
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user