A few minor cleanups in where threads are unlocked.
Reset mpirun's exit code when we restart failed procs This commit was SVN r21955.
Этот коммит содержится в:
родитель
8ae4b55d16
Коммит
51b13b3d5c
@ -1059,6 +1059,10 @@ int orte_plm_base_orted_append_basic_args(int *argc, char ***argv,
|
|||||||
if (orted_spin_flag) {
|
if (orted_spin_flag) {
|
||||||
opal_argv_append(argc, argv, "--spin");
|
opal_argv_append(argc, argv, "--spin");
|
||||||
}
|
}
|
||||||
|
if (orte_leave_session_attached) {
|
||||||
|
opal_argv_append(argc, argv, "--leave-session-attached");
|
||||||
|
}
|
||||||
|
|
||||||
if ((int)ORTE_VPID_INVALID != orted_debug_failure) {
|
if ((int)ORTE_VPID_INVALID != orted_debug_failure) {
|
||||||
opal_argv_append(argc, argv, "--debug-failure");
|
opal_argv_append(argc, argv, "--debug-failure");
|
||||||
asprintf(¶m, "%d", orted_debug_failure);
|
asprintf(¶m, "%d", orted_debug_failure);
|
||||||
|
@ -149,6 +149,10 @@ void process_msg(int fd, short event, void *data)
|
|||||||
|
|
||||||
OPAL_THREAD_LOCK(&lock);
|
OPAL_THREAD_LOCK(&lock);
|
||||||
|
|
||||||
|
OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output,
|
||||||
|
"%s plm:base:receive processing msg",
|
||||||
|
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||||
|
|
||||||
/* tag that we are processing the list */
|
/* tag that we are processing the list */
|
||||||
processing = true;
|
processing = true;
|
||||||
|
|
||||||
|
@ -1406,4 +1406,6 @@ void orte_plm_base_reset_job(orte_job_t *jdata)
|
|||||||
/* since every daemon will be reporting status for every proc, reset these to zero */
|
/* since every daemon will be reporting status for every proc, reset these to zero */
|
||||||
jdata->num_launched = 0;
|
jdata->num_launched = 0;
|
||||||
jdata->num_reported = 0;
|
jdata->num_reported = 0;
|
||||||
|
/* since we are restarting the failed proc, reset the exit status */
|
||||||
|
ORTE_RESET_EXIT_STATUS();
|
||||||
}
|
}
|
||||||
|
@ -973,10 +973,9 @@ int orte_plm_rsh_launch(orte_job_t *jdata)
|
|||||||
opal_condition_wait(&orte_plm_globals.spawn_in_progress_cond, &orte_plm_globals.spawn_lock);
|
opal_condition_wait(&orte_plm_globals.spawn_in_progress_cond, &orte_plm_globals.spawn_lock);
|
||||||
}
|
}
|
||||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output, "released to spawn"));
|
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output, "released to spawn"));
|
||||||
OPAL_THREAD_UNLOCK(&orte_plm_globals.spawn_lock);
|
|
||||||
|
|
||||||
orte_plm_globals.spawn_in_progress = true;
|
orte_plm_globals.spawn_in_progress = true;
|
||||||
orte_plm_globals.spawn_status = ORTE_ERR_FATAL;
|
orte_plm_globals.spawn_status = ORTE_ERR_FATAL;
|
||||||
|
OPAL_THREAD_UNLOCK(&orte_plm_globals.spawn_lock);
|
||||||
|
|
||||||
if (jdata->controls & ORTE_JOB_CONTROL_LOCAL_SLAVE) {
|
if (jdata->controls & ORTE_JOB_CONTROL_LOCAL_SLAVE) {
|
||||||
/* if this is a request to launch a local slave,
|
/* if this is a request to launch a local slave,
|
||||||
@ -987,7 +986,9 @@ int orte_plm_rsh_launch(orte_job_t *jdata)
|
|||||||
* including the target hosts
|
* including the target hosts
|
||||||
*/
|
*/
|
||||||
rc = orte_plm_base_local_slave_launch(jdata);
|
rc = orte_plm_base_local_slave_launch(jdata);
|
||||||
|
OPAL_THREAD_LOCK(&orte_plm_globals.spawn_lock);
|
||||||
orte_plm_globals.spawn_in_progress = false;
|
orte_plm_globals.spawn_in_progress = false;
|
||||||
|
OPAL_THREAD_UNLOCK(&orte_plm_globals.spawn_lock);
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user