1
1

Fix the sigkill timeout sleep to prevent SIGCHLD from preventing completion.

* The user can set `-mca odls_base_sigkill_timeout 30` to have ORTE wait
   30 seconds before sending SIGTERM then another 30 seconds before sending
   SIGKILL to remaining processes. This usually happens on an abnormal
   termination. Sometimes the user wants to delay the cleanup to give the
   system time to write out corefile or run other diagnostics.
 * The problem is that child processes may be completing while ORTE is
   in this loop. The SIGCHLD will interrupt the `sleep` system call.
   Without the loop the sleep could effectively be ignored in this case.
   - Sleep returns the amount of time remaining to sleep. If it was
     interrupted by a signal then it is a positive number less than or
     equal to the parameter passed to it. If it slept the whole time
     then it returns 0.

Signed-off-by: Joshua Hursey <jhursey@us.ibm.com>
Этот коммит содержится в:
Joshua Hursey 2019-10-01 17:32:39 -04:00
родитель 7ddfa6950b
Коммит 0e8a97c598

Просмотреть файл

@ -1767,7 +1767,7 @@ int orte_odls_base_default_kill_local_procs(opal_pointer_array_t *procs,
orte_proc_t *child; orte_proc_t *child;
opal_list_t procs_killed; opal_list_t procs_killed;
orte_proc_t *proc, proctmp; orte_proc_t *proc, proctmp;
int i, j; int i, j, ret;
opal_pointer_array_t procarray, *procptr; opal_pointer_array_t procarray, *procptr;
bool do_cleanup; bool do_cleanup;
orte_odls_quick_caddy_t *cd; orte_odls_quick_caddy_t *cd;
@ -1913,7 +1913,17 @@ int orte_odls_base_default_kill_local_procs(opal_pointer_array_t *procs,
/* if we are issuing signals, then we need to wait a little /* if we are issuing signals, then we need to wait a little
* and send the next in sequence */ * and send the next in sequence */
if (0 < opal_list_get_size(&procs_killed)) { if (0 < opal_list_get_size(&procs_killed)) {
sleep(orte_odls_globals.timeout_before_sigkill); /* Wait a little. Do so in a loop since sleep() can be interrupted by a
* signal. Most likely SIGCHLD in this case */
ret = orte_odls_globals.timeout_before_sigkill;
while( ret > 0 ) {
OPAL_OUTPUT_VERBOSE((5, orte_odls_base_framework.framework_output,
"%s Sleep %d sec (total = %d)",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ret, orte_odls_globals.timeout_before_sigkill));
ret = sleep(ret);
}
/* issue a SIGTERM to all */ /* issue a SIGTERM to all */
OPAL_LIST_FOREACH(cd, &procs_killed, orte_odls_quick_caddy_t) { OPAL_LIST_FOREACH(cd, &procs_killed, orte_odls_quick_caddy_t) {
OPAL_OUTPUT_VERBOSE((5, orte_odls_base_framework.framework_output, OPAL_OUTPUT_VERBOSE((5, orte_odls_base_framework.framework_output,
@ -1922,8 +1932,18 @@ int orte_odls_base_default_kill_local_procs(opal_pointer_array_t *procs,
ORTE_NAME_PRINT(&cd->child->name))); ORTE_NAME_PRINT(&cd->child->name)));
kill_local(cd->child->pid, SIGTERM); kill_local(cd->child->pid, SIGTERM);
} }
/* wait a little again */
sleep(orte_odls_globals.timeout_before_sigkill); /* Wait a little. Do so in a loop since sleep() can be interrupted by a
* signal. Most likely SIGCHLD in this case */
ret = orte_odls_globals.timeout_before_sigkill;
while( ret > 0 ) {
OPAL_OUTPUT_VERBOSE((5, orte_odls_base_framework.framework_output,
"%s Sleep %d sec (total = %d)",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ret, orte_odls_globals.timeout_before_sigkill));
ret = sleep(ret);
}
/* issue a SIGKILL to all */ /* issue a SIGKILL to all */
OPAL_LIST_FOREACH(cd, &procs_killed, orte_odls_quick_caddy_t) { OPAL_LIST_FOREACH(cd, &procs_killed, orte_odls_quick_caddy_t) {
OPAL_OUTPUT_VERBOSE((5, orte_odls_base_framework.framework_output, OPAL_OUTPUT_VERBOSE((5, orte_odls_base_framework.framework_output,