/* * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2007 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2007 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2007 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2008-2009 Sun Microsystems, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ * * These symbols are in a file by themselves to provide nice linker * semantics. Since linkers generally pull in symbols by object * files, keeping these symbols as the only symbols in this file * prevents utility programs such as "ompi_info" from having to import * entire components just to query their version and parameters. */ #include "orte_config.h" #include "orte/constants.h" #include #ifdef HAVE_UNISTD_H #include #endif #include #include #ifdef HAVE_STRINGS_H #include #endif #ifdef HAVE_SYS_SELECT_H #include #endif #ifdef HAVE_SYS_TIME_H #include #endif #ifdef HAVE_SYS_TYPES_H #include #endif #ifdef HAVE_SYS_STAT_H #include #endif #ifdef HAVE_SYS_WAIT_H #include #endif #include #include #ifdef HAVE_PWD_H #include #endif #include "opal/mca/installdirs/installdirs.h" #include "opal/mca/base/mca_base_param.h" #include "opal/util/output.h" #include "opal/event/event.h" #include "opal/util/argv.h" #include "opal/util/opal_environ.h" #include "opal/util/basename.h" #include "opal/util/bit_ops.h" #include "opal/class/opal_pointer_array.h" #include "orte/util/show_help.h" #include "orte/runtime/orte_wait.h" #include "orte/runtime/orte_globals.h" #include "orte/util/name_fns.h" #include "orte/util/nidmap.h" #include "orte/util/proc_info.h" #include "orte/mca/rml/rml.h" #include "orte/mca/rml/rml_types.h" #include "orte/mca/ess/ess.h" #include "orte/mca/ess/base/base.h" #include "orte/mca/errmgr/errmgr.h" #include "orte/mca/rmaps/rmaps.h" #include "orte/mca/routed/routed.h" #include "orte/mca/rml/base/rml_contact.h" #include "orte/mca/plm/plm.h" #include "orte/mca/plm/base/base.h" #include "orte/mca/plm/base/plm_private.h" #include "orte/mca/plm/rsh/plm_rsh.h" #if OPAL_HAVE_POSIX_THREADS && OPAL_THREADS_HAVE_DIFFERENT_PIDS && OPAL_ENABLE_PROGRESS_THREADS static int orte_plm_rsh_launch_threaded(orte_job_t *jdata); #endif static int remote_spawn(opal_buffer_t *launch); orte_plm_base_module_t orte_plm_rsh_module = { orte_plm_rsh_init, orte_plm_base_set_hnp_name, #if OPAL_HAVE_POSIX_THREADS && OPAL_THREADS_HAVE_DIFFERENT_PIDS && OPAL_ENABLE_PROGRESS_THREADS orte_plm_rsh_launch_threaded, #else orte_plm_rsh_launch, #endif remote_spawn, orte_plm_base_orted_terminate_job, orte_plm_rsh_terminate_orteds, orte_plm_base_orted_kill_local_procs, orte_plm_rsh_signal_job, orte_plm_rsh_finalize }; typedef enum { ORTE_PLM_RSH_SHELL_BASH = 0, ORTE_PLM_RSH_SHELL_ZSH, ORTE_PLM_RSH_SHELL_TCSH, ORTE_PLM_RSH_SHELL_CSH, ORTE_PLM_RSH_SHELL_KSH, ORTE_PLM_RSH_SHELL_SH, ORTE_PLM_RSH_SHELL_UNKNOWN } orte_plm_rsh_shell_t; /* These strings *must* follow the same order as the enum ORTE_PLM_RSH_SHELL_* */ static const char * orte_plm_rsh_shell_name[] = { "bash", "zsh", "tcsh", /* tcsh has to be first otherwise strstr finds csh */ "csh", "ksh", "sh", "unknown" }; /* * Local functions */ static void set_handler_default(int sig); static orte_plm_rsh_shell_t find_shell(char *shell); static int find_children(int rank, int parent, int me, int num_procs); /* local global storage of timing variables */ static struct timeval joblaunchstart, joblaunchstop; /* local global storage */ static orte_jobid_t active_job=ORTE_JOBID_INVALID; static orte_jobid_t local_slaves; /** * Init the module */ int orte_plm_rsh_init(void) { int rc; if (ORTE_SUCCESS != (rc = orte_plm_base_comm_start())) { ORTE_ERROR_LOG(rc); } /* we set the local slaves up to have a job family of zero. * this provides a convenient way of checking whether or * not a process is a local slave */ local_slaves = 0; return rc; } /** * Check the Shell variable on the specified node */ static int orte_plm_rsh_probe(char *nodename, orte_plm_rsh_shell_t *shell) { char ** argv; int argc, rc = ORTE_SUCCESS, i; int fd[2]; pid_t pid; char outbuf[4096]; OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output, "%s plm:rsh: going to check SHELL variable on node %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), nodename)); *shell = ORTE_PLM_RSH_SHELL_UNKNOWN; if (pipe(fd)) { OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output, "%s plm:rsh: pipe failed with errno=%d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), errno)); return ORTE_ERR_IN_ERRNO; } if ((pid = fork()) < 0) { OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output, "%s plm:rsh: fork failed with errno=%d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), errno)); return ORTE_ERR_IN_ERRNO; } else if (pid == 0) { /* child */ if (dup2(fd[1], 1) < 0) { OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output, "%s plm:rsh: dup2 failed with errno=%d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), errno)); exit(01); } /* Build argv array */ argv = opal_argv_copy(orte_plm_globals.rsh_agent_argv); argc = opal_argv_count(orte_plm_globals.rsh_agent_argv); opal_argv_append(&argc, &argv, nodename); opal_argv_append(&argc, &argv, "echo $SHELL"); execvp(argv[0], argv); exit(errno); } if (close(fd[1])) { OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output, "%s plm:rsh: close failed with errno=%d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), errno)); return ORTE_ERR_IN_ERRNO; } { ssize_t ret = 1; char* ptr = outbuf; size_t outbufsize = sizeof(outbuf); do { ret = read (fd[0], ptr, outbufsize-1); if (ret < 0) { if (errno == EINTR) continue; OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output, "%s plm:rsh: Unable to detect the remote shell (error %s)", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), strerror(errno))); rc = ORTE_ERR_IN_ERRNO; break; } if( outbufsize > 1 ) { outbufsize -= ret; ptr += ret; } } while( 0 != ret ); *ptr = '\0'; } close(fd[0]); if( outbuf[0] != '\0' ) { char *sh_name = rindex(outbuf, '/'); if( NULL != sh_name ) { sh_name++; /* skip '/' */ /* We cannot use "echo -n $SHELL" because -n is not portable. Therefore * we have to remove the "\n" */ if ( sh_name[strlen(sh_name)-1] == '\n' ) { sh_name[strlen(sh_name)-1] = '\0'; } /* Search for the substring of known shell-names */ for (i = 0; i < (int)(sizeof (orte_plm_rsh_shell_name)/ sizeof(orte_plm_rsh_shell_name[0])); i++) { if ( 0 == strcmp(sh_name, orte_plm_rsh_shell_name[i]) ) { *shell = (orte_plm_rsh_shell_t)i; break; } } } } OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output, "%s plm:rsh: node %s has SHELL: %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), nodename, (ORTE_PLM_RSH_SHELL_UNKNOWN == *shell) ? "UNHANDLED" : (char*)orte_plm_rsh_shell_name[*shell])); return rc; } /** * Callback on daemon exit. */ static void orte_plm_rsh_wait_daemon(pid_t pid, int status, void* cbdata) { orte_std_cntr_t cnt=1; uint8_t flag; orte_job_t *jdata; if (! WIFEXITED(status) || ! WEXITSTATUS(status) == 0) { /* if abnormal exit */ /* if we are not the HNP, send a message to the HNP alerting it * to the failure */ if (!ORTE_PROC_IS_HNP) { opal_buffer_t buf; orte_vpid_t *vpid=(orte_vpid_t*)cbdata; OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output, "%s daemon %d failed with status %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (int)*vpid, WEXITSTATUS(status))); OBJ_CONSTRUCT(&buf, opal_buffer_t); opal_dss.pack(&buf, &cnt, 1, ORTE_STD_CNTR); flag = 1; opal_dss.pack(&buf, &flag, 1, OPAL_UINT8); opal_dss.pack(&buf, vpid, 1, ORTE_VPID); orte_rml.send_buffer(ORTE_PROC_MY_HNP, &buf, ORTE_RML_TAG_REPORT_REMOTE_LAUNCH, 0); OBJ_DESTRUCT(&buf); } else { orte_proc_t *daemon=(orte_proc_t*)cbdata; jdata = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid); OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output, "%s daemon %d failed with status %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (int)daemon->name.vpid, WEXITSTATUS(status))); /* note that this daemon failed */ daemon->state = ORTE_PROC_STATE_FAILED_TO_START; /* increment the #daemons terminated so we will exit properly */ jdata->num_terminated++; /* report that the daemon has failed so we can exit */ orte_plm_base_launch_failed(ORTE_PROC_MY_NAME->jobid, pid, status, ORTE_JOB_STATE_FAILED_TO_START); } } /* release any waiting threads */ OPAL_THREAD_LOCK(&mca_plm_rsh_component.lock); if (mca_plm_rsh_component.num_children-- >= mca_plm_rsh_component.num_concurrent || mca_plm_rsh_component.num_children == 0) { opal_condition_signal(&mca_plm_rsh_component.cond); } OPAL_THREAD_UNLOCK(&mca_plm_rsh_component.lock); } static int setup_shell(orte_plm_rsh_shell_t *rshell, orte_plm_rsh_shell_t *lshell, char *nodename, int *argc, char ***argv) { orte_plm_rsh_shell_t remote_shell, local_shell; struct passwd *p; char *param; int rc; /* What is our local shell? */ local_shell = ORTE_PLM_RSH_SHELL_UNKNOWN; p = getpwuid(getuid()); if( NULL == p ) { /* This user is unknown to the system. Therefore, there is no reason we * spawn whatsoever in his name. Give up with a HUGE error message. */ orte_show_help( "help-plm-rsh.txt", "unknown-user", true, (int)getuid() ); return ORTE_ERR_FATAL; } param = p->pw_shell; local_shell = find_shell(p->pw_shell); /* If we didn't find it in getpwuid(), try looking at the $SHELL environment variable (see https://svn.open-mpi.org/trac/ompi/ticket/1060) */ if (ORTE_PLM_RSH_SHELL_UNKNOWN == local_shell && NULL != (param = getenv("SHELL"))) { local_shell = find_shell(param); } if (ORTE_PLM_RSH_SHELL_UNKNOWN == local_shell) { opal_output(0, "WARNING: local probe returned unhandled shell:%s assuming bash\n", (NULL != param) ? param : "unknown"); local_shell = ORTE_PLM_RSH_SHELL_BASH; } OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output, "%s plm:rsh: local shell: %d (%s)", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), local_shell, orte_plm_rsh_shell_name[local_shell])); /* What is our remote shell? */ if (orte_assume_same_shell) { remote_shell = local_shell; OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output, "%s plm:rsh: assuming same remote shell as local shell", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); } else { rc = orte_plm_rsh_probe(nodename, &remote_shell); if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); return rc; } if (ORTE_PLM_RSH_SHELL_UNKNOWN == remote_shell) { opal_output(0, "WARNING: rsh probe returned unhandled shell; assuming bash\n"); remote_shell = ORTE_PLM_RSH_SHELL_BASH; } } OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output, "%s plm:rsh: remote shell: %d (%s)", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), remote_shell, orte_plm_rsh_shell_name[remote_shell])); /* Do we need to source .profile on the remote side? - sh: yes (see bash(1)) - ksh: yes (see ksh(1)) - bash: no (see bash(1)) - [t]csh: no (see csh(1) and tcsh(1)) - zsh: no (see http://zsh.sourceforge.net/FAQ/zshfaq03.html#l19) */ if (ORTE_PLM_RSH_SHELL_SH == remote_shell || ORTE_PLM_RSH_SHELL_KSH == remote_shell) { int i; char **tmp; tmp = opal_argv_split("( test ! -r ./.profile || . ./.profile;", ' '); if (NULL == tmp) { return ORTE_ERR_OUT_OF_RESOURCE; } for (i = 0; NULL != tmp[i]; ++i) { opal_argv_append(argc, argv, tmp[i]); } opal_argv_free(tmp); } /* pass results back */ *rshell = remote_shell; *lshell = local_shell; return ORTE_SUCCESS; } static int setup_launch(int *argcptr, char ***argvptr, char *nodename, int *node_name_index1, int *proc_vpid_index, char *prefix_dir) { int argc; char **argv; char *param; orte_plm_rsh_shell_t remote_shell, local_shell; char *lib_base, *bin_base; int orted_argc; char **orted_argv; char *orted_cmd, *orted_prefix, *final_cmd; int orted_index; int rc; /* Figure out the basenames for the libdir and bindir. This requires some explanation: - Use opal_install_dirs.libdir and opal_install_dirs.bindir. - After a discussion on the devel-core mailing list, the developers decided that we should use the local directory basenames as the basis for the prefix on the remote note. This does not handle a few notable cases (e.g., if the libdir/bindir is not simply a subdir under the prefix, if the libdir/bindir basename is not the same on the remote node as it is here on the local node, etc.), but we decided that --prefix was meant to handle "the common case". If you need something more complex than this, a) edit your shell startup files to set PATH/LD_LIBRARY_PATH properly on the remove node, or b) use some new/to-be-defined options that explicitly allow setting the bindir/libdir on the remote node. We decided to implement these options (e.g., --remote-bindir and --remote-libdir) to orterun when it actually becomes a problem for someone (vs. a hypothetical situation). Hence, for now, we simply take the basename of this install's libdir and bindir and use it to append this install's prefix and use that on the remote node. */ lib_base = opal_basename(opal_install_dirs.libdir); bin_base = opal_basename(opal_install_dirs.bindir); /* * Build argv array */ argv = opal_argv_copy(orte_plm_globals.rsh_agent_argv); argc = opal_argv_count(orte_plm_globals.rsh_agent_argv); *node_name_index1 = argc; opal_argv_append(&argc, &argv, "