Enable the system to keep functioning even when multiple launches are occurring simultaneously.
This is a bit of a hack, but it does seem to allow the system to work. A better solution is being discussed. This commit was SVN r21705.
Этот коммит содержится в:
родитель
03a0b04ab8
Коммит
4c1eb040b0
@ -980,7 +980,8 @@ int orte_plm_base_report_launched(orte_jobid_t job)
|
|||||||
ORTE_PROGRESSED_WAIT(app_launch_failed, jdata->num_launched, jdata->num_procs);
|
ORTE_PROGRESSED_WAIT(app_launch_failed, jdata->num_launched, jdata->num_procs);
|
||||||
|
|
||||||
/* cancel the lingering recv */
|
/* cancel the lingering recv */
|
||||||
if (ORTE_SUCCESS != (rc = orte_rml.recv_cancel(ORTE_NAME_WILDCARD, ORTE_RML_TAG_APP_LAUNCH_CALLBACK))) {
|
if (ORTE_SUCCESS != (rc = orte_rml.recv_cancel(ORTE_NAME_WILDCARD, ORTE_RML_TAG_APP_LAUNCH_CALLBACK)) &&
|
||||||
|
ORTE_ERR_NOT_FOUND != rc) {
|
||||||
ORTE_ERROR_LOG(rc);
|
ORTE_ERROR_LOG(rc);
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
|
@ -381,6 +381,7 @@ void orte_plm_base_recv(int status, orte_process_name_t* sender,
|
|||||||
NULL))) {
|
NULL))) {
|
||||||
ORTE_ERROR_LOG(rc);
|
ORTE_ERROR_LOG(rc);
|
||||||
}
|
}
|
||||||
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1394,8 +1394,6 @@ void orte_plm_base_reset_job(orte_job_t *jdata)
|
|||||||
}
|
}
|
||||||
/* adjust job accounting */
|
/* adjust job accounting */
|
||||||
jdata->num_terminated--;
|
jdata->num_terminated--;
|
||||||
jdata->num_launched--;
|
|
||||||
jdata->num_reported--;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
/* clear the info on who aborted */
|
/* clear the info on who aborted */
|
||||||
@ -1404,4 +1402,7 @@ void orte_plm_base_reset_job(orte_job_t *jdata)
|
|||||||
OBJ_RELEASE(jdata->aborted_proc); /* maintain reference count */
|
OBJ_RELEASE(jdata->aborted_proc); /* maintain reference count */
|
||||||
jdata->aborted_proc = NULL;
|
jdata->aborted_proc = NULL;
|
||||||
}
|
}
|
||||||
|
/* since every daemon will be reporting status for every proc, reset these to zero */
|
||||||
|
jdata->num_launched = 0;
|
||||||
|
jdata->num_reported = 0;
|
||||||
}
|
}
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user