Ensure we have a correct, non-zero exit status when daemons or procs abort or fail to launch
This commit was SVN r20478.
Этот коммит содержится в:
родитель
8924e00e4c
Коммит
5bfd1f3fd0
@ -295,6 +295,7 @@ void orte_plm_base_launch_failed(orte_jobid_t job, pid_t pid,
|
||||
{
|
||||
orte_job_t *jdata;
|
||||
char *pidstr;
|
||||
int sts;
|
||||
|
||||
if (!opal_atomic_trylock(&orte_abort_inprogress_lock)) { /* returns 1 if already locked */
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output,
|
||||
@ -305,10 +306,17 @@ void orte_plm_base_launch_failed(orte_jobid_t job, pid_t pid,
|
||||
}
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_plm_globals.output,
|
||||
"%s plm:base:launch_failed for job %s",
|
||||
"%s plm:base:launch_failed for job %s, status %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_JOBID_PRINT(job)));
|
||||
ORTE_JOBID_PRINT(job), status));
|
||||
|
||||
/* no matter what, we must exit with a non-zero status */
|
||||
if (0 == status) {
|
||||
sts = ORTE_ERROR_DEFAULT_EXIT_CODE;
|
||||
} else {
|
||||
sts = status;
|
||||
}
|
||||
|
||||
/* if we didn't even attempt to launch, then just quietly update
|
||||
* the job record and leave
|
||||
*/
|
||||
@ -339,17 +347,21 @@ void orte_plm_base_launch_failed(orte_jobid_t job, pid_t pid,
|
||||
if (WCOREDUMP(status)) {
|
||||
orte_show_help("help-plm-base.txt", "daemon-died-signal-core", true,
|
||||
pidstr, WTERMSIG(status));
|
||||
sts = WTERMSIG(status);
|
||||
} else {
|
||||
orte_show_help("help-plm-base.txt", "daemon-died-signal", true,
|
||||
pidstr, WTERMSIG(status));
|
||||
sts = WTERMSIG(status);
|
||||
}
|
||||
#else
|
||||
orte_show_help("help-plm-base.txt", "daemon-died-signal", true,
|
||||
pidstr, WTERMSIG(status));
|
||||
sts = WTERMSIG(status);
|
||||
#endif /* WCOREDUMP */
|
||||
} else {
|
||||
orte_show_help("help-plm-base.txt", "daemon-died-no-signal", true,
|
||||
pidstr, WEXITSTATUS(status));
|
||||
sts = WEXITSTATUS(status);
|
||||
}
|
||||
orted_failed_launch = true;
|
||||
free(pidstr);
|
||||
@ -370,7 +382,7 @@ PROCESS:
|
||||
|
||||
WAKEUP:
|
||||
/* set orterun's exit code and wakeup so it can exit */
|
||||
ORTE_UPDATE_EXIT_STATUS(status);
|
||||
ORTE_UPDATE_EXIT_STATUS(sts);
|
||||
orte_trigger_event(&orte_exit);
|
||||
}
|
||||
|
||||
@ -789,6 +801,10 @@ void orte_plm_base_app_report_launch(int fd, short event, void *data)
|
||||
jdata->aborted_proc = procs[vpid]; /* only store this once */
|
||||
jdata->state = ORTE_JOB_STATE_FAILED_TO_START; /* update the job state */
|
||||
}
|
||||
/* ensure we have a non-zero exit code */
|
||||
if (0 == jdata->aborted_proc->exit_code) {
|
||||
jdata->aborted_proc->exit_code = ORTE_ERROR_DEFAULT_EXIT_CODE;
|
||||
}
|
||||
app_launch_failed = true;
|
||||
goto CLEANUP;
|
||||
}
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user