One more correction to mpirun exit codes - cleanup the application proc's exit codes in the orted so that non-zero exit codes generated by mpirun itself don't get "munged".
Modify the multi_abort function so they all return different exit codes - allows us to tell which one was being reported. This commit was SVN r17895.
Этот коммит содержится в:
родитель
27a73ad9ee
Коммит
6bb139e4f2
@ -1436,9 +1436,6 @@ GOTCHILD:
|
|||||||
goto MOVEON;
|
goto MOVEON;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* save the exit code */
|
|
||||||
child->exit_code = status;
|
|
||||||
|
|
||||||
/* If this child was the (vpid==0), we hooked it up to orterun's
|
/* If this child was the (vpid==0), we hooked it up to orterun's
|
||||||
STDIN SOURCE earlier (do not change this without also changing
|
STDIN SOURCE earlier (do not change this without also changing
|
||||||
odsl_default_fork_local_proc()). So we have to tell the SOURCE
|
odsl_default_fork_local_proc()). So we have to tell the SOURCE
|
||||||
@ -1481,6 +1478,9 @@ GOTCHILD:
|
|||||||
|
|
||||||
/* determine the state of this process */
|
/* determine the state of this process */
|
||||||
if(WIFEXITED(status)) {
|
if(WIFEXITED(status)) {
|
||||||
|
/* set the exit status appropriately */
|
||||||
|
child->exit_code = WEXITSTATUS(status);
|
||||||
|
|
||||||
/* even though the process exited "normally", it is quite
|
/* even though the process exited "normally", it is quite
|
||||||
* possible that this happened via an orte_abort call - in
|
* possible that this happened via an orte_abort call - in
|
||||||
* which case, we need to indicate this was an "abnormal"
|
* which case, we need to indicate this was an "abnormal"
|
||||||
@ -1556,6 +1556,13 @@ GOTCHILD:
|
|||||||
* abnormal, so indicate that condition
|
* abnormal, so indicate that condition
|
||||||
*/
|
*/
|
||||||
child->state = ORTE_PROC_STATE_ABORTED_BY_SIG;
|
child->state = ORTE_PROC_STATE_ABORTED_BY_SIG;
|
||||||
|
/* If a process was killed by a signal, then make the
|
||||||
|
* exit code of orterun be "signo + 128" so that "prog"
|
||||||
|
* and "orterun prog" will both set the same status
|
||||||
|
* value for the shell
|
||||||
|
*/
|
||||||
|
child->exit_code = WTERMSIG(status) + 128;
|
||||||
|
|
||||||
OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output,
|
OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output,
|
||||||
"%s odls:wait_local_proc child process %s terminated with signal",
|
"%s odls:wait_local_proc child process %s terminated with signal",
|
||||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||||
|
@ -18,7 +18,7 @@ int main(int argc, char* argv[])
|
|||||||
|
|
||||||
printf("Hello, World, I am %d of %d\n", rank, size);
|
printf("Hello, World, I am %d of %d\n", rank, size);
|
||||||
|
|
||||||
if (0 != rank) MPI_Abort(MPI_COMM_WORLD, 2);
|
if (0 != rank) MPI_Abort(MPI_COMM_WORLD, rank);
|
||||||
|
|
||||||
MPI_Finalize();
|
MPI_Finalize();
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -580,19 +580,6 @@ static void job_completed(int trigpipe, short event, void *arg)
|
|||||||
num_killed, ((num_killed > 1) ? "es" : ""), orterun_basename);
|
num_killed, ((num_killed > 1) ? "es" : ""), orterun_basename);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
/* Make sure we propagate the exit code */
|
|
||||||
if (WIFEXITED(orte_exit_status)) {
|
|
||||||
orte_exit_status = WEXITSTATUS(orte_exit_status);
|
|
||||||
} else if (ORTE_JOB_STATE_FAILED_TO_START == exit_state ||
|
|
||||||
ORTE_JOB_STATE_ABORTED_WO_SYNC == exit_state) {
|
|
||||||
/* ensure we don't treat this like a signal */
|
|
||||||
} else {
|
|
||||||
/* If a process was killed by a signal, then make the
|
|
||||||
* exit code of orterun be "signo + 128" so that "prog"
|
|
||||||
* and "orterun prog" will both set the same status
|
|
||||||
* value for the shell */
|
|
||||||
orte_exit_status = WTERMSIG(orte_exit_status) + 128;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* the job is complete - now setup an event that will
|
/* the job is complete - now setup an event that will
|
||||||
* trigger when the orteds are gone and tell the orteds that it is
|
* trigger when the orteds are gone and tell the orteds that it is
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user