1
1

A few minor cleanups in where threads are unlocked.

Reset mpirun's exit code when we restart failed procs

This commit was SVN r21955.
Этот коммит содержится в:
Ralph Castain 2009-09-09 05:31:06 +00:00
родитель 8ae4b55d16
Коммит 51b13b3d5c
4 изменённых файлов: 13 добавлений и 2 удалений

Просмотреть файл

@ -1059,6 +1059,10 @@ int orte_plm_base_orted_append_basic_args(int *argc, char ***argv,
if (orted_spin_flag) {
opal_argv_append(argc, argv, "--spin");
}
if (orte_leave_session_attached) {
opal_argv_append(argc, argv, "--leave-session-attached");
}
if ((int)ORTE_VPID_INVALID != orted_debug_failure) {
opal_argv_append(argc, argv, "--debug-failure");
asprintf(&param, "%d", orted_debug_failure);

Просмотреть файл

@ -149,6 +149,10 @@ void process_msg(int fd, short event, void *data)
OPAL_THREAD_LOCK(&lock);
OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output,
"%s plm:base:receive processing msg",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
/* tag that we are processing the list */
processing = true;

Просмотреть файл

@ -1406,4 +1406,6 @@ void orte_plm_base_reset_job(orte_job_t *jdata)
/* since every daemon will be reporting status for every proc, reset these to zero */
jdata->num_launched = 0;
jdata->num_reported = 0;
/* since we are restarting the failed proc, reset the exit status */
ORTE_RESET_EXIT_STATUS();
}

Просмотреть файл

@ -973,10 +973,9 @@ int orte_plm_rsh_launch(orte_job_t *jdata)
opal_condition_wait(&orte_plm_globals.spawn_in_progress_cond, &orte_plm_globals.spawn_lock);
}
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output, "released to spawn"));
OPAL_THREAD_UNLOCK(&orte_plm_globals.spawn_lock);
orte_plm_globals.spawn_in_progress = true;
orte_plm_globals.spawn_status = ORTE_ERR_FATAL;
OPAL_THREAD_UNLOCK(&orte_plm_globals.spawn_lock);
if (jdata->controls & ORTE_JOB_CONTROL_LOCAL_SLAVE) {
/* if this is a request to launch a local slave,
@ -987,7 +986,9 @@ int orte_plm_rsh_launch(orte_job_t *jdata)
* including the target hosts
*/
rc = orte_plm_base_local_slave_launch(jdata);
OPAL_THREAD_LOCK(&orte_plm_globals.spawn_lock);
orte_plm_globals.spawn_in_progress = false;
OPAL_THREAD_UNLOCK(&orte_plm_globals.spawn_lock);
return rc;
}