Fix the proc assignment into the job data object during assignment of vpids as comm_spawned procs were being overwritten by their parents with the same vpid.
Add a little debug output when updating proc state This commit was SVN r22042.
Этот коммит содержится в:
родитель
b04a42ba3b
Коммит
a15c58c583
@ -329,6 +329,12 @@ void process_msg(int fd, short event, void *data)
|
||||
continue;
|
||||
}
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output,
|
||||
"%s plm:base:receive updating state for proc %s current state %x new state %x",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&proc->name),
|
||||
(unsigned int)proc->state, (unsigned int)state));
|
||||
|
||||
/* update the termination counter IFF the state is changing to something
|
||||
* indicating terminated
|
||||
*/
|
||||
|
@ -406,6 +406,11 @@ int orte_rmaps_base_compute_vpids(orte_job_t *jdata)
|
||||
if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(node->procs, j))) {
|
||||
continue;
|
||||
}
|
||||
/* ignore procs from other jobs */
|
||||
if (proc->name.jobid != jdata->jobid) {
|
||||
continue;
|
||||
}
|
||||
/* if the vpid is already defined, then update start */
|
||||
if (ORTE_VPID_INVALID != proc->name.vpid &&
|
||||
vpid_start < proc->name.vpid) {
|
||||
vpid_start = proc->name.vpid;
|
||||
@ -429,6 +434,10 @@ int orte_rmaps_base_compute_vpids(orte_job_t *jdata)
|
||||
if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(node->procs, j))) {
|
||||
continue;
|
||||
}
|
||||
/* ignore procs from other jobs */
|
||||
if (proc->name.jobid != jdata->jobid) {
|
||||
continue;
|
||||
}
|
||||
if (ORTE_VPID_INVALID == proc->name.vpid) {
|
||||
proc->name.vpid = vpid++;
|
||||
}
|
||||
@ -453,6 +462,10 @@ int orte_rmaps_base_compute_vpids(orte_job_t *jdata)
|
||||
if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(node->procs, j))) {
|
||||
continue;
|
||||
}
|
||||
/* ignore procs from other jobs */
|
||||
if (proc->name.jobid != jdata->jobid) {
|
||||
continue;
|
||||
}
|
||||
if (ORTE_VPID_INVALID == proc->name.vpid) {
|
||||
proc->name.vpid = vpid;
|
||||
vpid += map->num_nodes;
|
||||
@ -518,6 +531,9 @@ int orte_rmaps_base_compute_local_ranks(orte_job_t *jdata)
|
||||
if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(node->procs, j))) {
|
||||
continue;
|
||||
}
|
||||
/* only look at procs for this job when
|
||||
* determining local rank
|
||||
*/
|
||||
if (proc->name.jobid == jdata->jobid &&
|
||||
ORTE_LOCAL_RANK_INVALID == proc->local_rank &&
|
||||
proc->name.vpid < minv) {
|
||||
@ -598,6 +614,10 @@ retry_lr:
|
||||
if (NULL == (proc = (orte_proc_t *) opal_pointer_array_get_item(newnode->procs, k))) {
|
||||
continue;
|
||||
}
|
||||
/* ignore procs from other jobs */
|
||||
if (proc->name.jobid != jdata->jobid) {
|
||||
continue;
|
||||
}
|
||||
if (local_rank == proc->local_rank) {
|
||||
local_rank++;
|
||||
goto retry_lr;
|
||||
|
Загрузка…
Ссылка в новой задаче
Block a user