From 3da579139be36cdca20cb84b25002b8990566223 Mon Sep 17 00:00:00 2001 From: Jeff Squyres Date: Tue, 18 Mar 2014 21:31:01 +0000 Subject: [PATCH] More corrections w.r.t. process groups To accompany r31092 and r310924, also ensure to create a new process group in the child right after the orted forks. Add trivial configury to ensure that we have setpgid, and only do the setpgid/getpgid if we have setpgid. Without this commit, killing the entire process group can do unexpected things (e.g., kill the orted, mpirun, and even mpirun's parent!). cmr=v1.7.5:reviewer=rhc This commit was SVN r31132. The following SVN revision numbers were found above: r31092 --> open-mpi/ompi@99c9ecaed0d047271f0542f5928e3d0d095f03ea The following SVN revisions from the original message are invalid or inconsistent and therefore were not cross-referenced: r310924 --- configure.ac | 2 +- orte/mca/odls/default/odls_default_module.c | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/configure.ac b/configure.ac index aaec09e93d..c804eb7968 100644 --- a/configure.ac +++ b/configure.ac @@ -854,7 +854,7 @@ OMPI_CHECK_FUNC_LIB([dirname], [gen]) # Darwin doesn't need -lm, as it's a symlink to libSystem.dylib OMPI_CHECK_FUNC_LIB([ceil], [m]) -AC_CHECK_FUNCS([asprintf snprintf vasprintf vsnprintf openpty isatty getpwuid fork waitpid execve pipe ptsname setsid mmap tcgetpgrp posix_memalign strsignal sysconf syslog vsyslog regcmp regexec regfree _NSGetEnviron socketpair strncpy_s _strdup usleep mkfifo dbopen dbm_open statfs statvfs]) +AC_CHECK_FUNCS([asprintf snprintf vasprintf vsnprintf openpty isatty getpwuid fork waitpid execve pipe ptsname setsid mmap tcgetpgrp posix_memalign strsignal sysconf syslog vsyslog regcmp regexec regfree _NSGetEnviron socketpair strncpy_s _strdup usleep mkfifo dbopen dbm_open statfs statvfs setpgid]) # Sanity check: ensure that we got at least one of statfs or statvfs. if test $ac_cv_func_statfs = no -a $ac_cv_func_statvfs = no; then diff --git a/orte/mca/odls/default/odls_default_module.c b/orte/mca/odls/default/odls_default_module.c index 49c1765a1e..88d6231f13 100644 --- a/orte/mca/odls/default/odls_default_module.c +++ b/orte/mca/odls/default/odls_default_module.c @@ -257,6 +257,7 @@ static int odls_default_kill_local(pid_t pid, int signum) { pid_t pgrp; +#if HAVE_SETPGID pgrp = getpgid(pid); if (-1 != pgrp) { /* target the lead process of the process @@ -268,6 +269,7 @@ static int odls_default_kill_local(pid_t pid, int signum) */ pid = pgrp; } +#endif if (0 != kill(pid, signum)) { if (ESRCH != errno) { OPAL_OUTPUT_VERBOSE((2, orte_odls_base_framework.framework_output, @@ -908,6 +910,9 @@ static int odls_default_fork_local_proc(orte_app_context_t* context, if (pid == 0) { close(p[0]); +#if HAVE_SETPGID + setpgid(0, 0); +#endif do_child(context, child, environ_copy, jobdat, p[1], opts); /* Does not return */ }