From b5bf0a7f1d5e75d66b902b40976e0ea891a17a85 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Thu, 30 Nov 2017 07:18:46 -0800 Subject: [PATCH] Add a new posix_spawn component to the ODLS framework. Only selectable when specifically requested via "-mca odls pspawn" Note that there are several concerns: * we aren't getting SIGCHLD calls when the procs terminate * we aren't seeing the IO pipes close on termination, though we are getting output forwarded to mpirun * I haven't found a way to bind the child process prior to exec. If we want to use this method, we probably need someone to implement a cgroup component for the orte/rtc framework Signed-off-by: Ralph Castain --- .gitignore | 1 + .../mca/odls/default/odls_default_component.c | 3 +- orte/mca/odls/pspawn/Makefile.am | 49 ++ orte/mca/odls/pspawn/configure.m4 | 33 ++ .../mca/odls/pspawn/help-orte-odls-pspawn.txt | 140 ++++++ orte/mca/odls/pspawn/odls_pspawn.c | 465 ++++++++++++++++++ orte/mca/odls/pspawn/odls_pspawn.h | 42 ++ orte/mca/odls/pspawn/odls_pspawn_component.c | 103 ++++ orte/mca/odls/pspawn/owner.txt | 7 + orte/test/system/Makefile | 2 +- orte/test/system/pspawn.c | 45 ++ 11 files changed, 888 insertions(+), 2 deletions(-) create mode 100644 orte/mca/odls/pspawn/Makefile.am create mode 100644 orte/mca/odls/pspawn/configure.m4 create mode 100644 orte/mca/odls/pspawn/help-orte-odls-pspawn.txt create mode 100644 orte/mca/odls/pspawn/odls_pspawn.c create mode 100644 orte/mca/odls/pspawn/odls_pspawn.h create mode 100644 orte/mca/odls/pspawn/odls_pspawn_component.c create mode 100644 orte/mca/odls/pspawn/owner.txt create mode 100644 orte/test/system/pspawn.c diff --git a/.gitignore b/.gitignore index 6c678878f3..a41542010d 100644 --- a/.gitignore +++ b/.gitignore @@ -474,6 +474,7 @@ orte/test/system/orte_sensor orte/test/system/event-threads orte/test/system/test-time orte/test/system/psm_keygen +orte/test/system/pspawn orte/test/system/regex orte/test/system/orte_errors orte/test/system/evthread-test diff --git a/orte/mca/odls/default/odls_default_component.c b/orte/mca/odls/default/odls_default_component.c index 663e674acd..c911043923 100644 --- a/orte/mca/odls/default/odls_default_component.c +++ b/orte/mca/odls/default/odls_default_component.c @@ -12,6 +12,7 @@ * All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -85,7 +86,7 @@ int orte_odls_default_component_query(mca_base_module_t **module, int *priority) * if we do. Hence, we only get here if we CAN build - in which * case, we definitely should be considered for selection */ - *priority = 1; /* let others override us - we are the default */ + *priority = 10; /* let others override us - we are the default */ *module = (mca_base_module_t *) &orte_odls_default_module; return ORTE_SUCCESS; } diff --git a/orte/mca/odls/pspawn/Makefile.am b/orte/mca/odls/pspawn/Makefile.am new file mode 100644 index 0000000000..dbef87569c --- /dev/null +++ b/orte/mca/odls/pspawn/Makefile.am @@ -0,0 +1,49 @@ +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2017 IBM Corporation. All rights reserved. +# Copyright (c) 2017 Intel, Inc. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +dist_ortedata_DATA = help-orte-odls-pspawn.txt + +sources = \ + odls_pspawn.h \ + odls_pspawn_component.c \ + odls_pspawn.c + +# Make the output library in this directory, and name it either +# mca__.la (for DSO builds) or libmca__.la +# (for static builds). + +if MCA_BUILD_orte_odls_pspawn_DSO +component_noinst = +component_install = mca_odls_pspawn.la +else +component_noinst = libmca_odls_pspawn.la +component_install = +endif + +mcacomponentdir = $(ortelibdir) +mcacomponent_LTLIBRARIES = $(component_install) +mca_odls_pspawn_la_SOURCES = $(sources) +mca_odls_pspawn_la_LDFLAGS = -module -avoid-version +mca_odls_pspawn_la_LIBADD = $(top_builddir)/orte/lib@ORTE_LIB_PREFIX@open-rte.la + +noinst_LTLIBRARIES = $(component_noinst) +libmca_odls_pspawn_la_SOURCES =$(sources) +libmca_odls_pspawn_la_LDFLAGS = -module -avoid-version diff --git a/orte/mca/odls/pspawn/configure.m4 b/orte/mca/odls/pspawn/configure.m4 new file mode 100644 index 0000000000..a4c0d320ed --- /dev/null +++ b/orte/mca/odls/pspawn/configure.m4 @@ -0,0 +1,33 @@ +# -*- shell-script -*- +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2011 Los Alamos National Security, LLC. +# All rights reserved. +# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2017 Intel, Inc. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# MCA_odls_pspawn_CONFIG([action-if-found], [action-if-not-found]) +# ----------------------------------------------------------- +AC_DEFUN([MCA_orte_odls_pspawn_CONFIG],[ + AC_CONFIG_FILES([orte/mca/odls/pspawn/Makefile]) + + AC_CHECK_FUNC([posix_spawn], [odls_pspawn_happy="yes"], [odls_pspawn_happy="no"]) + + AS_IF([test "$odls_pspawn_happy" = "yes"], [$1], [$2]) + +])dnl diff --git a/orte/mca/odls/pspawn/help-orte-odls-pspawn.txt b/orte/mca/odls/pspawn/help-orte-odls-pspawn.txt new file mode 100644 index 0000000000..06181b7c96 --- /dev/null +++ b/orte/mca/odls/pspawn/help-orte-odls-pspawn.txt @@ -0,0 +1,140 @@ +# -*- text -*- +# +# Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. +# Copyright (c) 2010-2011 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2017 Intel, Inc. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# +# This is a US/English help file. +# +[execve error] +Open MPI tried to fork a new process via the "execve" system call but +failed. Open MPI checks many things before attempting to launch a +child process, but nothing is perfect. This error may be indicative +of another problem on the target host, or even something as silly as +having specified a directory for your application. Your job will now +abort. + + Local host: %s + Working dir: %s + Application name: %s + Error: %s +# +[binding not supported] +Open MPI tried to bind a new process, but process binding is not +supported on the host where it was launched. The process was killed +without launching the target application. Your job will now abort. + + Local host: %s + Application name: %s +# +[binding generic error] +Open MPI tried to bind a new process, but something went wrong. The +process was killed without launching the target application. Your job +will now abort. + + Local host: %s + Application name: %s + Error message: %s + Location: %s:%d +# +[bound to everything] +Open MPI tried to bind a new process to a specific set of processors, +but ended up binding it to *all* processors. This means that the new +process is effectively unbound. + +This is only a warning -- your job will continue. You can suppress +this warning in the future by setting the odls_warn_if_not_bound MCA +parameter to 0. + + Local host: %s + Application name: %s + Location: %s:%d +# +[slot list and paffinity_alone] +Open MPI detected that both a slot list was specified and the MCA +parameter "paffinity_alone" was set to true. Only one of these can be +used at a time. Your job will now abort. + + Local host: %s + Application name: %s +# +[iof setup failed] +Open MPI tried to launch a child process but the "IOF child setup" +failed. This should not happen. Your job will now abort. + + Local host: %s + Application name: %s +# +[not bound] +WARNING: Open MPI tried to bind a process but failed. This is a +warning only; your job will continue. + + Local host: %s + Application name: %s + Error message: %s + Location: %s:%d +# +[syscall fail] +A system call failed that should not have. In this particular case, +a warning or error message was not displayed that should have been. +Your job may behave unpredictably after this, or abort. + + Local host: %s + Application name: %s + Function: %s + Location: %s:%d +# +[memory not bound] +WARNING: Open MPI tried to bind a process but failed. This is a +warning only; your job will continue, though performance may +be degraded. + + Local host: %s + Application name: %s + Error message: %s + Location: %s:%d + +# +[memory binding error] +Open MPI tried to bind memory for a new process but something went +wrong. The process was killed without launching the target +application. Your job will now abort. + + Local host: %s + Application name: %s + Error message: %s + Location: %s:%d +# +[set limit] +Error message received from: + + Local host: %s + Application name: %s + Location: %s:%d + +Message: + +%s +# +[incorrectly-bound] +WARNING: Open MPI incorrectly bound a process to the daemon's cores. +This is a warning only; your job will continue. + + Local host: %s + Application name: %s + Location: %s:%d diff --git a/orte/mca/odls/pspawn/odls_pspawn.c b/orte/mca/odls/pspawn/odls_pspawn.c new file mode 100644 index 0000000000..6e4083d6d1 --- /dev/null +++ b/orte/mca/odls/pspawn/odls_pspawn.c @@ -0,0 +1,465 @@ +/* + * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2008 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2007-2010 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2007 Evergrid, Inc. All rights reserved. + * Copyright (c) 2008-2017 Cisco Systems, Inc. All rights reserved + * Copyright (c) 2010 IBM Corporation. All rights reserved. + * Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2017 Rutgers, The State University of New Jersey. + * All rights reserved. + * Copyright (c) 2017 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +/* + * There is a complicated sequence of events that occurs when the + * parent forks a child process that is intended to launch the target + * executable. + * + * Before the child process exec's the target executable, it might tri + * to set the affinity of that new child process according to a + * complex series of rules. This binding may fail in a myriad of + * different ways. A lot of this code deals with reporting that error + * occurately to the end user. This is a complex task in itself + * because the child process is not "really" an ORTE process -- all + * error reporting must be proxied up to the parent who can use normal + * ORTE error reporting mechanisms. + * + * Here's a high-level description of what is occurring in this file: + * + * - parent opens a pipe + * - parent forks a child + * - parent blocks reading on the pipe: the pipe will either close + * (indicating that the child successfully exec'ed) or the child will + * write some proxied error data up the pipe + * + * - the child tries to set affinity and do other housekeeping in + * preparation of exec'ing the target executable + * - if the child fails anywhere along the way, it sends a message up + * the pipe to the parent indicating what happened -- including a + * rendered error message detailing the problem (i.e., human-readable). + * - it is important that the child renders the error message: there + * are so many errors that are possible that the child is really the + * only entity that has enough information to make an accuate error string + * to report back to the user. + * - the parent reads this message + rendered string in and uses ORTE + * reporting mechanisms to display it to the user + * - if the problem was only a warning, the child continues processing + * (potentially eventually exec'ing the target executable). + * - if the problem was an error, the child exits and the parent + * handles the death of the child as appropriate (i.e., this ODLS + * simply reports the error -- other things decide what to do). + */ + +#include "orte_config.h" +#include "orte/constants.h" +#include "orte/types.h" + +#include +#include +#ifdef HAVE_UNISTD_H +#include +#endif +#include +#ifdef HAVE_SYS_TYPES_H +#include +#endif +#ifdef HAVE_SYS_WAIT_H +#include +#endif +#include +#ifdef HAVE_FCNTL_H +#include +#endif +#ifdef HAVE_SYS_TIME_H +#include +#endif +#ifdef HAVE_SYS_PARAM_H +#include +#endif +#ifdef HAVE_NETDB_H +#include +#endif +#include +#ifdef HAVE_SYS_STAT_H +#include +#endif /* HAVE_SYS_STAT_H */ +#include +#ifdef HAVE_SYS_SELECT_H +#include +#endif +#ifdef HAVE_DIRENT_H +#include +#endif +#include +#ifdef HAVE_UTIL_H +#include +#endif +#ifdef HAVE_PTY_H +#include +#endif +#ifdef HAVE_FCNTL_H +#include +#endif +#ifdef HAVE_TERMIOS_H +#include +# ifdef HAVE_TERMIO_H +# include +# endif +#endif +#ifdef HAVE_LIBUTIL_H +#include +#endif + +#include + +#include "opal/mca/hwloc/hwloc-internal.h" +#include "opal/mca/hwloc/base/base.h" +#include "opal/class/opal_pointer_array.h" +#include "opal/util/opal_environ.h" +#include "opal/util/show_help.h" +#include "opal/util/sys_limits.h" +#include "opal/util/fd.h" + +#include "orte/util/show_help.h" +#include "orte/runtime/orte_wait.h" +#include "orte/runtime/orte_globals.h" +#include "orte/mca/errmgr/errmgr.h" +#include "orte/mca/ess/ess.h" +#include "orte/mca/iof/base/iof_base_setup.h" +#include "orte/mca/plm/plm.h" +#include "orte/mca/rtc/rtc.h" +#include "orte/util/name_fns.h" +#include "orte/util/threads.h" + +#include "orte/mca/odls/base/base.h" +#include "orte/mca/odls/base/odls_private.h" +#include "orte/mca/odls/pspawn/odls_pspawn.h" +#include "orte/orted/pmix/pmix_server.h" + +/* + * Module functions (function pointers used in a struct) + */ +static int orte_odls_pspawn_launch_local_procs(opal_buffer_t *data); +static int orte_odls_pspawn_kill_local_procs(opal_pointer_array_t *procs); +static int orte_odls_pspawn_signal_local_procs(const orte_process_name_t *proc, int32_t signal); +static int orte_odls_pspawn_restart_proc(orte_proc_t *child); + + +/* + * Module + */ +orte_odls_base_module_t orte_odls_pspawn_module = { + .get_add_procs_data = orte_odls_base_default_get_add_procs_data, + .launch_local_procs = orte_odls_pspawn_launch_local_procs, + .kill_local_procs = orte_odls_pspawn_kill_local_procs, + .signal_local_procs = orte_odls_pspawn_signal_local_procs, + .restart_proc = orte_odls_pspawn_restart_proc +}; + + +/* deliver a signal to a specified pid. */ +static int odls_pspawn_kill_local(pid_t pid, int signum) +{ + pid_t pgrp; + +#if HAVE_SETPGID + pgrp = getpgid(pid); + if (-1 != pgrp) { + /* target the lead process of the process + * group so we ensure that the signal is + * seen by all members of that group. This + * ensures that the signal is seen by any + * child processes our child may have + * started + */ + pid = -pgrp; + } +#endif + + if (0 != kill(pid, signum)) { + if (ESRCH != errno) { + OPAL_OUTPUT_VERBOSE((2, orte_odls_base_framework.framework_output, + "%s odls:pspawn:SENT KILL %d TO PID %d GOT ERRNO %d", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), signum, (int)pid, errno)); + return errno; + } + } + OPAL_OUTPUT_VERBOSE((2, orte_odls_base_framework.framework_output, + "%s odls:pspawn:SENT KILL %d TO PID %d SUCCESS", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), signum, (int)pid)); + return 0; +} + +int orte_odls_pspawn_kill_local_procs(opal_pointer_array_t *procs) +{ + int rc; + + if (ORTE_SUCCESS != (rc = orte_odls_base_default_kill_local_procs(procs, + odls_pspawn_kill_local))) { + ORTE_ERROR_LOG(rc); + return rc; + } + return ORTE_SUCCESS; +} + + + +/* close all open file descriptors w/ exception of stdin/stdout/stderr + and the pipe up to the parent. */ +static int close_open_file_descriptors(posix_spawn_file_actions_t *factions) +{ + DIR *dir = opendir("/proc/self/fd"); + if (NULL == dir) { + return ORTE_ERR_FILE_OPEN_FAILURE; + } + struct dirent *files; + while (NULL != (files = readdir(dir))) { + if (!isdigit(files->d_name[0])) { + continue; + } + int fd = strtol(files->d_name, NULL, 10); + if (errno == EINVAL || errno == ERANGE) { + closedir(dir); + return ORTE_ERR_TYPE_MISMATCH; + } + if (fd >=3) { + posix_spawn_file_actions_addclose(factions, fd); + } + } + closedir(dir); + return ORTE_SUCCESS; +} + +/** + * posix_spawn the specified processes + */ +static int odls_pspawn_fork_local_proc(void *cdptr) +{ + orte_odls_spawn_caddy_t *cd = (orte_odls_spawn_caddy_t*)cdptr; + pid_t pid; + orte_proc_t *child = cd->child; + posix_spawn_file_actions_t factions; + posix_spawnattr_t attrs; + sigset_t sigs; + int rc; + orte_iof_base_io_conf_t *opts = &cd->opts; + + ORTE_FLAG_UNSET(cd->child, ORTE_PROC_FLAG_ALIVE); + + /* setup the attrs object */ + rc = posix_spawnattr_init(&attrs); + if (0 != rc) { + child->state = ORTE_PROC_STATE_FAILED_TO_START; + child->exit_code = 1; + return ORTE_ERROR; + } + /* set the signal mask in the child process */ + sigprocmask(0, 0, &sigs); + sigprocmask(SIG_UNBLOCK, &sigs, 0); + posix_spawnattr_setsigmask(&attrs, &sigs); + + /* setup to close all fd's other than stdin/out/err */ + rc = posix_spawn_file_actions_init(&factions); + if (0 != rc) { + posix_spawnattr_destroy(&attrs); + child->state = ORTE_PROC_STATE_FAILED_TO_START; + child->exit_code = 1; + return ORTE_ERROR; + } + if (ORTE_SUCCESS != close_open_file_descriptors(&factions)) { + posix_spawn_file_actions_destroy(&factions); + posix_spawnattr_destroy(&attrs); + child->state = ORTE_PROC_STATE_FAILED_TO_START; + child->exit_code = 1; + return ORTE_ERROR; + } + /* close the parent end of the pipes in the child */ + if (opts->connect_stdin) { + posix_spawn_file_actions_addclose(&factions, opts->p_stdin[1]); + } + posix_spawn_file_actions_addclose(&factions, opts->p_stdout[0]); + if( !orte_iof_base.redirect_app_stderr_to_stdout ) { + posix_spawn_file_actions_addclose(&factions, opts->p_stderr[0]); + } + /* dup the stdin/stdout/stderr descriptors */ + if (opts->usepty) { + /* disable echo */ + struct termios term_attrs; + if (tcgetattr(opts->p_stdout[1], &term_attrs) < 0) { + return ORTE_ERR_PIPE_SETUP_FAILURE; + } + term_attrs.c_lflag &= ~ (ECHO | ECHOE | ECHOK | + ECHOCTL | ECHOKE | ECHONL); + term_attrs.c_iflag &= ~ (ICRNL | INLCR | ISTRIP | INPCK | IXON); + term_attrs.c_oflag &= ~ ( +#ifdef OCRNL + /* OS X 10.3 does not have this + value defined */ + OCRNL | +#endif + ONLCR); + if (tcsetattr(opts->p_stdout[1], TCSANOW, &term_attrs) == -1) { + return ORTE_ERR_PIPE_SETUP_FAILURE; + } + posix_spawn_file_actions_adddup2(&factions, fileno(stdout), opts->p_stdout[1]); + if (orte_iof_base.redirect_app_stderr_to_stdout) { + posix_spawn_file_actions_adddup2(&factions, fileno(stderr), opts->p_stdout[1]); + } + } else { + if (opts->p_stdout[1] != fileno(stdout)) { + posix_spawn_file_actions_adddup2(&factions, fileno(stdout), opts->p_stdout[1]); + } + if (orte_iof_base.redirect_app_stderr_to_stdout) { + posix_spawn_file_actions_adddup2(&factions, fileno(stderr), opts->p_stdout[1]); + } + } + if (opts->connect_stdin) { + if (opts->p_stdin[0] != fileno(stdin)) { + posix_spawn_file_actions_adddup2(&factions, fileno(stdin), opts->p_stdin[0]); + } + } + if (opts->p_stderr[1] != fileno(stderr) && !orte_iof_base.redirect_app_stderr_to_stdout) { + posix_spawn_file_actions_adddup2(&factions, fileno(stderr), opts->p_stderr[1]); + } + + /* Fork off the child */ + rc = posix_spawn(&pid, cd->app->app, &factions, &attrs, cd->argv, cd->env); + posix_spawn_file_actions_destroy(&factions); + posix_spawnattr_destroy(&attrs); + + /* as the parent, close the other ends of the pipes */ + if (cd->opts.connect_stdin) { + close(cd->opts.p_stdin[0]); + } + close(cd->opts.p_stdout[1]); + if( !orte_iof_base.redirect_app_stderr_to_stdout ) { + close(cd->opts.p_stderr[1]); + } + + if (rc < 0) { + ORTE_ERROR_LOG(ORTE_ERR_SYS_LIMITS_CHILDREN); + child->state = ORTE_PROC_STATE_FAILED_TO_START; + child->exit_code = ORTE_ERR_SYS_LIMITS_CHILDREN; + return ORTE_ERR_SYS_LIMITS_CHILDREN; + } + + cd->child->state = ORTE_PROC_STATE_RUNNING; + ORTE_FLAG_SET(cd->child, ORTE_PROC_FLAG_ALIVE); + return ORTE_SUCCESS; +} + + +/** + * Launch all processes allocated to the current node. + */ + +int orte_odls_pspawn_launch_local_procs(opal_buffer_t *data) +{ + int rc; + orte_jobid_t job; + + /* construct the list of children we are to launch */ + if (ORTE_SUCCESS != (rc = orte_odls_base_default_construct_child_list(data, &job))) { + OPAL_OUTPUT_VERBOSE((2, orte_odls_base_framework.framework_output, + "%s odls:pspawn:launch:local failed to construct child list on error %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_ERROR_NAME(rc))); + return rc; + } + + /* launch the local procs */ + ORTE_ACTIVATE_LOCAL_LAUNCH(job, odls_pspawn_fork_local_proc); + + return ORTE_SUCCESS; +} + + +/** + * Send a signal to a pid. Note that if we get an error, we set the + * return value and let the upper layer print out the message. + */ +static int send_signal(pid_t pd, int signal) +{ + int rc = ORTE_SUCCESS; + pid_t pid; + + if (orte_odls_globals.signal_direct_children_only) { + pid = pd; + } else { +#if HAVE_SETPGID + /* send to the process group so that any children of our children + * also receive the signal*/ + pid = -pd; +#else + pid = pd; +#endif + } + + OPAL_OUTPUT_VERBOSE((1, orte_odls_base_framework.framework_output, + "%s sending signal %d to pid %ld", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + signal, (long)pid)); + + if (kill(pid, signal) != 0) { + switch(errno) { + case EINVAL: + rc = ORTE_ERR_BAD_PARAM; + break; + case ESRCH: + /* This case can occur when we deliver a signal to a + process that is no longer there. This can happen if + we deliver a signal while the job is shutting down. + This does not indicate a real problem, so just + ignore the error. */ + break; + case EPERM: + rc = ORTE_ERR_PERM; + break; + default: + rc = ORTE_ERROR; + } + } + + return rc; +} + +static int orte_odls_pspawn_signal_local_procs(const orte_process_name_t *proc, int32_t signal) +{ + int rc; + + if (ORTE_SUCCESS != (rc = orte_odls_base_default_signal_local_procs(proc, signal, send_signal))) { + ORTE_ERROR_LOG(rc); + return rc; + } + return ORTE_SUCCESS; +} + +static int orte_odls_pspawn_restart_proc(orte_proc_t *child) +{ + int rc; + + /* restart the local proc */ + if (ORTE_SUCCESS != (rc = orte_odls_base_default_restart_proc(child, odls_pspawn_fork_local_proc))) { + OPAL_OUTPUT_VERBOSE((2, orte_odls_base_framework.framework_output, + "%s odls:pspawn:restart_proc failed to launch on error %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_ERROR_NAME(rc))); + } + return rc; +} diff --git a/orte/mca/odls/pspawn/odls_pspawn.h b/orte/mca/odls/pspawn/odls_pspawn.h new file mode 100644 index 0000000000..7f12d51038 --- /dev/null +++ b/orte/mca/odls/pspawn/odls_pspawn.h @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2017 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +/** + * @file: + */ + +#ifndef ORTE_ODLS_PSPAWN_H +#define ORTE_ODLS_PSPAWN_H + +#include "orte_config.h" + +#include "orte/mca/mca.h" + +#include "orte/mca/odls/odls.h" + +BEGIN_C_DECLS + +/* + * ODLS Pspawn module + */ +extern orte_odls_base_module_t orte_odls_pspawn_module; +ORTE_MODULE_DECLSPEC extern orte_odls_base_component_t mca_odls_pspawn_component; + +END_C_DECLS + +#endif /* ORTE_ODLS_PSPAWN_H */ diff --git a/orte/mca/odls/pspawn/odls_pspawn_component.c b/orte/mca/odls/pspawn/odls_pspawn_component.c new file mode 100644 index 0000000000..b55917087b --- /dev/null +++ b/orte/mca/odls/pspawn/odls_pspawn_component.c @@ -0,0 +1,103 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2017 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + * These symbols are in a file by themselves to provide nice linker + * semantics. Since linkers generally pull in symbols by object + * files, keeping these symbols as the only symbols in this file + * prevents utility programs such as "ompi_info" from having to import + * entire components just to query their version and parameters. + */ + +#include "orte_config.h" +#include "orte/constants.h" + +#include +#ifdef HAVE_UNISTD_H +#include +#endif +#include + +#include "orte/mca/mca.h" +#include "opal/mca/base/base.h" + +#include "orte/mca/odls/odls.h" +#include "orte/mca/odls/base/odls_private.h" +#include "orte/mca/odls/pspawn/odls_pspawn.h" + +/* + * Instantiate the public struct with all of our public information + * and pointers to our public functions in it + */ + +static int component_open(void); +static int component_close(void); +static int component_query(mca_base_module_t **module, int *priority); + + +orte_odls_base_component_t mca_odls_pspawn_component = { + /* First, the mca_component_t struct containing meta information + about the component itself */ + .version = { + ORTE_ODLS_BASE_VERSION_2_0_0, + /* Component name and version */ + .mca_component_name = "pspawn", + MCA_BASE_MAKE_VERSION(component, ORTE_MAJOR_VERSION, ORTE_MINOR_VERSION, + ORTE_RELEASE_VERSION), + + /* Component open and close functions */ + .mca_open_component = component_open, + .mca_close_component = component_close, + .mca_query_component = component_query, + }, + .base_data = { + /* The component is checkpoint ready */ + MCA_BASE_METADATA_PARAM_CHECKPOINT + }, +}; + + + +static int component_open(void) +{ + return ORTE_SUCCESS; +} + +static int component_query(mca_base_module_t **module, int *priority) +{ + /* the base open/select logic protects us against operation when + * we are NOT in a daemon, so we don't have to check that here + */ + + /* we have built some logic into the configure.m4 file that checks + * to see if we have "posix_spawn" support and only builds this component + * if we do. Hence, we only get here if we CAN build - in which + * case, we only should be considered for selection if specified + */ + *priority = 1; /* let others override us */ + *module = (mca_base_module_t *) &orte_odls_pspawn_module; + return ORTE_SUCCESS; +} + + +static int component_close(void) +{ + return ORTE_SUCCESS; +} diff --git a/orte/mca/odls/pspawn/owner.txt b/orte/mca/odls/pspawn/owner.txt new file mode 100644 index 0000000000..4ad6f408ca --- /dev/null +++ b/orte/mca/odls/pspawn/owner.txt @@ -0,0 +1,7 @@ +# +# owner/status file +# owner: institution that is responsible for this package +# status: e.g. active, maintenance, unmaintained +# +owner: INTEL +status: maintenance diff --git a/orte/test/system/Makefile b/orte/test/system/Makefile index 980f42f012..aac087d98f 100644 --- a/orte/test/system/Makefile +++ b/orte/test/system/Makefile @@ -1,7 +1,7 @@ PROGS = no_op sigusr_trap spin orte_nodename orte_spawn orte_loop_spawn orte_loop_child orte_abort get_limits \ orte_tool orte_no_op binom oob_stress iof_stress iof_delay radix opal_interface orte_spin segfault \ orte_exit test-time event-threads psm_keygen regex orte_errors evpri-test opal-evpri-test evpri-test2 \ - mapper reducer opal_hotel orte_dfs ulfm pmixtool threads + mapper reducer opal_hotel orte_dfs ulfm pmixtool threads pspawn all: $(PROGS) diff --git a/orte/test/system/pspawn.c b/orte/test/system/pspawn.c new file mode 100644 index 0000000000..96014912d9 --- /dev/null +++ b/orte/test/system/pspawn.c @@ -0,0 +1,45 @@ +/* -*- C -*- + * + * $HEADER$ + * + * The most basic of MPI applications + */ + +#include +#include +#include +#include +#include + +#include "opal/util/argv.h" + +int main(int argc, char* argv[]) +{ + int rc; + char **pargv = NULL; + pid_t pid; + posix_spawn_file_actions_t factions; + posix_spawnattr_t attrs; + + rc = posix_spawnattr_init(&attrs); + if (0 != rc) { + fprintf(stderr, "ERROR INIT ATTRS: %d\n", errno); + exit(1); + } + + rc = posix_spawn_file_actions_init(&factions); + if (0 != rc) { + fprintf(stderr, "ERROR INIT FACTIONS: %d\n", errno); + exit(1); + } + posix_spawn_file_actions_addclose(&factions, fileno(stdin)); + + opal_argv_append_nosize(&pargv, "hostname"); + + rc = posix_spawn(&pid, "/usr/bin/hostname", NULL, NULL, pargv, NULL); + posix_spawn_file_actions_destroy(&factions); + posix_spawnattr_destroy(&attrs); + + sleep(1); + return 0; +}