1
1

Enable the system to keep functioning even when multiple launches are occurring simultaneously.

This is a bit of a hack, but it does seem to allow the system to work. A better solution is being discussed.

This commit was SVN r21705.
Этот коммит содержится в:
Ralph Castain 2009-07-17 02:28:47 +00:00
родитель 03a0b04ab8
Коммит 4c1eb040b0
3 изменённых файлов: 6 добавлений и 3 удалений

Просмотреть файл

@ -980,7 +980,8 @@ int orte_plm_base_report_launched(orte_jobid_t job)
ORTE_PROGRESSED_WAIT(app_launch_failed, jdata->num_launched, jdata->num_procs);
/* cancel the lingering recv */
if (ORTE_SUCCESS != (rc = orte_rml.recv_cancel(ORTE_NAME_WILDCARD, ORTE_RML_TAG_APP_LAUNCH_CALLBACK))) {
if (ORTE_SUCCESS != (rc = orte_rml.recv_cancel(ORTE_NAME_WILDCARD, ORTE_RML_TAG_APP_LAUNCH_CALLBACK)) &&
ORTE_ERR_NOT_FOUND != rc) {
ORTE_ERROR_LOG(rc);
return rc;
}

Просмотреть файл

@ -381,6 +381,7 @@ void orte_plm_base_recv(int status, orte_process_name_t* sender,
NULL))) {
ORTE_ERROR_LOG(rc);
}
return;
}

Просмотреть файл

@ -1394,8 +1394,6 @@ void orte_plm_base_reset_job(orte_job_t *jdata)
}
/* adjust job accounting */
jdata->num_terminated--;
jdata->num_launched--;
jdata->num_reported--;
}
}
/* clear the info on who aborted */
@ -1404,4 +1402,7 @@ void orte_plm_base_reset_job(orte_job_t *jdata)
OBJ_RELEASE(jdata->aborted_proc); /* maintain reference count */
jdata->aborted_proc = NULL;
}
/* since every daemon will be reporting status for every proc, reset these to zero */
jdata->num_launched = 0;
jdata->num_reported = 0;
}