1
1

Ensure we have a correct, non-zero exit status when daemons or procs abort or fail to launch

This commit was SVN r20478.
Этот коммит содержится в:
Ralph Castain 2009-02-07 00:57:17 +00:00
родитель 8924e00e4c
Коммит 5bfd1f3fd0

Просмотреть файл

@ -295,6 +295,7 @@ void orte_plm_base_launch_failed(orte_jobid_t job, pid_t pid,
{
orte_job_t *jdata;
char *pidstr;
int sts;
if (!opal_atomic_trylock(&orte_abort_inprogress_lock)) { /* returns 1 if already locked */
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
@ -305,10 +306,17 @@ void orte_plm_base_launch_failed(orte_jobid_t job, pid_t pid,
}
OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output,
"%s plm:base:launch_failed for job %s",
"%s plm:base:launch_failed for job %s, status %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_JOBID_PRINT(job)));
ORTE_JOBID_PRINT(job), status));
/* no matter what, we must exit with a non-zero status */
if (0 == status) {
sts = ORTE_ERROR_DEFAULT_EXIT_CODE;
} else {
sts = status;
}
/* if we didn't even attempt to launch, then just quietly update
* the job record and leave
*/
@ -339,17 +347,21 @@ void orte_plm_base_launch_failed(orte_jobid_t job, pid_t pid,
if (WCOREDUMP(status)) {
orte_show_help("help-plm-base.txt", "daemon-died-signal-core", true,
pidstr, WTERMSIG(status));
sts = WTERMSIG(status);
} else {
orte_show_help("help-plm-base.txt", "daemon-died-signal", true,
pidstr, WTERMSIG(status));
sts = WTERMSIG(status);
}
#else
orte_show_help("help-plm-base.txt", "daemon-died-signal", true,
pidstr, WTERMSIG(status));
sts = WTERMSIG(status);
#endif /* WCOREDUMP */
} else {
orte_show_help("help-plm-base.txt", "daemon-died-no-signal", true,
pidstr, WEXITSTATUS(status));
sts = WEXITSTATUS(status);
}
orted_failed_launch = true;
free(pidstr);
@ -370,7 +382,7 @@ PROCESS:
WAKEUP:
/* set orterun's exit code and wakeup so it can exit */
ORTE_UPDATE_EXIT_STATUS(status);
ORTE_UPDATE_EXIT_STATUS(sts);
orte_trigger_event(&orte_exit);
}
@ -789,6 +801,10 @@ void orte_plm_base_app_report_launch(int fd, short event, void *data)
jdata->aborted_proc = procs[vpid]; /* only store this once */
jdata->state = ORTE_JOB_STATE_FAILED_TO_START; /* update the job state */
}
/* ensure we have a non-zero exit code */
if (0 == jdata->aborted_proc->exit_code) {
jdata->aborted_proc->exit_code = ORTE_ERROR_DEFAULT_EXIT_CODE;
}
app_launch_failed = true;
goto CLEANUP;
}