1
1

Since there is no defined behavior for the case where all application procs exit normally, but some or all have non-zero returns, just output a warning telling the user how many procs meet that criteria. Let the return code of mpirun in that scenario reflect any errors in OMPI/ORTE itself.

Clearly a temporary solution until a defined behavior can be established.

This commit was SVN r23075.
Этот коммит содержится в:
Ralph Castain 2010-04-30 19:01:10 +00:00
родитель 29f02d88c6
Коммит c93af95351

Просмотреть файл

@ -130,7 +130,7 @@ static int update_state(orte_jobid_t job,
orte_proc_state_to_str(state), exit_code)); orte_proc_state_to_str(state), exit_code));
/* /*
* if orterun is trying to shutdown, just let it * if orte is trying to shutdown, just let it
*/ */
if (orte_errmgr_base.shutting_down) { if (orte_errmgr_base.shutting_down) {
return ORTE_SUCCESS; return ORTE_SUCCESS;
@ -580,7 +580,7 @@ static void check_job_complete(orte_job_t *jdata)
orte_job_map_t *map; orte_job_map_t *map;
orte_std_cntr_t index; orte_std_cntr_t index;
bool one_still_alive; bool one_still_alive;
orte_exit_code_t first_non_zero=0; orte_vpid_t non_zero=0;
#if 0 #if 0
/* Check if FileM is active. If so then keep processing. */ /* Check if FileM is active. If so then keep processing. */
@ -595,8 +595,8 @@ static void check_job_complete(orte_job_t *jdata)
continue; continue;
} }
if (0 == first_non_zero && 0 != proc->exit_code) { if (0 != proc->exit_code) {
first_non_zero = proc->exit_code; non_zero++;
} }
/* /*
@ -732,8 +732,15 @@ static void check_job_complete(orte_job_t *jdata)
/* turn off any sensor monitors on this job */ /* turn off any sensor monitors on this job */
orte_sensor.stop(jdata->jobid); orte_sensor.stop(jdata->jobid);
#endif #endif
/* update our exit code */ if (0 < non_zero) {
ORTE_UPDATE_EXIT_STATUS(first_non_zero); /* warn user */
opal_output(orte_clean_output,
"-----------------------------------------------------\n\n"
"While job %s terminated normally, %s processes returned\n"
"non-zero exit codes. Further examination may be required.\n\n"
"-----------------------------------------------------",
ORTE_JOBID_PRINT(jdata->jobid), ORTE_VPID_PRINT(non_zero));
}
OPAL_OUTPUT_VERBOSE((5, orte_errmgr_base.output, OPAL_OUTPUT_VERBOSE((5, orte_errmgr_base.output,
"%s errmgr:hnp:check_job_completed declared job %s normally terminated - checking all jobs", "%s errmgr:hnp:check_job_completed declared job %s normally terminated - checking all jobs",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),