From 88313debc270ae4adf132a296b035bc524ce94bd Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Fri, 2 Dec 2016 03:36:22 -0800 Subject: [PATCH] Per discussion on email thread, restore placement of child procs in their own process group so that any signal sent to one of our children is automatically propagated to any child process they might have spawned. Signed-off-by: Ralph Castain --- orte/mca/odls/base/odls_base_default_fns.c | 2 +- orte/mca/odls/default/odls_default_module.c | 21 ++++++++++++++++++++- 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/orte/mca/odls/base/odls_base_default_fns.c b/orte/mca/odls/base/odls_base_default_fns.c index 5e6704964e..edc7345e94 100644 --- a/orte/mca/odls/base/odls_base_default_fns.c +++ b/orte/mca/odls/base/odls_base_default_fns.c @@ -1466,7 +1466,7 @@ int orte_odls_base_default_kill_local_procs(opal_pointer_array_t *procs, if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(procptr, i))) { continue; } - for(j=0; j < orte_local_children->size; j++) { + for (j=0; j < orte_local_children->size; j++) { if (NULL == (child = (orte_proc_t*)opal_pointer_array_get_item(orte_local_children, j))) { continue; } diff --git a/orte/mca/odls/default/odls_default_module.c b/orte/mca/odls/default/odls_default_module.c index 605b790845..635f7f8a20 100644 --- a/orte/mca/odls/default/odls_default_module.c +++ b/orte/mca/odls/default/odls_default_module.c @@ -167,6 +167,22 @@ orte_odls_base_module_t orte_odls_default_module = { /* deliver a signal to a specified pid. */ static int odls_default_kill_local(pid_t pid, int signum) { + pid_t pgrp; + +#if HAVE_SETPGID + pgrp = getpgid(pid); + if (-1 != pgrp) { + /* target the lead process of the process + * group so we ensure that the signal is + * seen by all members of that group. This + * ensures that the signal is seen by any + * child processes our child may have + * started + */ + pid = pgrp; + } +#endif + if (0 != kill(pid, signum)) { if (ESRCH != errno) { OPAL_OUTPUT_VERBOSE((2, orte_odls_base_framework.framework_output, @@ -313,6 +329,10 @@ static int do_child(orte_app_context_t* context, long fd, fdmax = sysconf(_SC_OPEN_MAX); char *param, *msg; + /* Set a new process group for this child, so that any + * signals we send to it will reach any children it spawns */ + setpgid(0, 0); + /* Setup the pipe to be close-on-exec */ opal_fd_set_cloexec(write_fd); @@ -717,4 +737,3 @@ static int orte_odls_default_restart_proc(orte_proc_t *child) } return rc; } -