/* * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2007 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2007 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2007 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2008-2009 Sun Microsystems, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ * * These symbols are in a file by themselves to provide nice linker * semantics. Since linkers generally pull in symbols by object * files, keeping these symbols as the only symbols in this file * prevents utility programs such as "ompi_info" from having to import * entire components just to query their version and parameters. */ #include "orte_config.h" #include "orte/constants.h" #include #ifdef HAVE_UNISTD_H #include #endif #include #include #ifdef HAVE_STRINGS_H #include #endif #ifdef HAVE_SYS_SELECT_H #include #endif #ifdef HAVE_SYS_TIME_H #include #endif #ifdef HAVE_SYS_TYPES_H #include #endif #ifdef HAVE_SYS_STAT_H #include #endif #ifdef HAVE_SYS_WAIT_H #include #endif #include #include #ifdef HAVE_PWD_H #include #endif #include "opal/mca/installdirs/installdirs.h" #include "opal/mca/base/mca_base_param.h" #include "opal/util/output.h" #include "opal/util/opal_sos.h" #include "opal/mca/event/event.h" #include "opal/util/argv.h" #include "opal/util/opal_environ.h" #include "opal/util/basename.h" #include "opal/util/bit_ops.h" #include "orte/util/show_help.h" #include "orte/runtime/orte_wait.h" #include "orte/runtime/orte_globals.h" #include "orte/runtime/orte_quit.h" #include "orte/util/name_fns.h" #include "orte/util/nidmap.h" #include "orte/util/proc_info.h" #include "orte/mca/rml/rml.h" #include "orte/mca/rml/rml_types.h" #include "orte/mca/ess/ess.h" #include "orte/mca/ess/base/base.h" #include "orte/mca/errmgr/errmgr.h" #include "orte/mca/rmaps/rmaps.h" #include "orte/mca/routed/routed.h" #include "orte/mca/plm/plm.h" #include "orte/mca/plm/base/base.h" #include "orte/mca/plm/base/plm_private.h" #include "orte/mca/plm/base/plm_base_rsh_support.h" #include "orte/mca/plm/rshd/plm_rshd.h" static void ssh_child(char *cmd, char **argv) __opal_attribute_noreturn__; orte_plm_base_module_t orte_plm_rshd_module = { orte_plm_rshd_init, orte_plm_base_set_hnp_name, orte_plm_rshd_launch, NULL, orte_plm_rshd_terminate_job, orte_plm_rshd_terminate_orteds, NULL, orte_plm_rshd_signal_job, orte_plm_rshd_finalize }; /* * Local functions */ static void set_handler_default(int sig); /** * Init the module */ int orte_plm_rshd_init(void) { int rc; /* since I was selected, setup the rsh launch agent support */ if (ORTE_SUCCESS != (rc = orte_plm_base_rsh_launch_agent_setup(NULL, NULL))) { ORTE_ERROR_LOG(rc); return rc; } if (ORTE_SUCCESS != (rc = orte_plm_base_comm_start())) { ORTE_ERROR_LOG(rc); } return rc; } /** * Callback on daemon exit. */ static void wait_cb(pid_t pid, int status, void* cbdata) { orte_proc_t *proc = (orte_proc_t*)cbdata; orte_job_t *jdata; /* get the associated job object */ jdata = orte_get_job_data_object(proc->name.jobid); if (! WIFEXITED(status) || ! WEXITSTATUS(status) == 0) { /* if abnormal exit */ OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output, "%s proc %d failed with status %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (int)proc->name.vpid, WEXITSTATUS(status))); } /* note that this daemon failed */ orte_errmgr.update_state(proc->name.jobid, ORTE_JOB_STATE_FAILED_TO_START, NULL, ORTE_PROC_STATE_FAILED_TO_START, 0, status); /* release any waiting threads */ OPAL_THREAD_LOCK(&mca_plm_rshd_component.lock); /* decrement our #children */ mca_plm_rshd_component.num_children--; /* see if we can allow launching to continue */ if (mca_plm_rshd_component.num_children <= mca_plm_rshd_component.num_concurrent || mca_plm_rshd_component.num_children == 0) { opal_condition_signal(&mca_plm_rshd_component.cond); } OPAL_THREAD_UNLOCK(&mca_plm_rshd_component.lock); } /* actually ssh the child */ static void ssh_child(char *cmd, char **argv) { char** env; char* var; long fd, fdmax = sysconf(_SC_OPEN_MAX); int fdin; sigset_t sigs; /* setup environment */ env = opal_argv_copy(orte_launch_environ); /* Don't let ssh slurp all of our stdin! */ fdin = open("/dev/null", O_RDWR); dup2(fdin, 0); close(fdin); /* close all file descriptors w/ exception of stdin/stdout/stderr */ for(fd=3; fdcontrols & ORTE_JOB_CONTROL_LOCAL_SLAVE) { /* if this is a request to launch a local slave, * then we will not be launching an orted - we will * directly ssh the slave process itself. No mapping * is performed to support this - the caller must * provide all the info required to launch the job, * including the target hosts */ return orte_plm_base_local_slave_launch(jdata); } /* setup the job */ if (ORTE_SUCCESS != (rc = orte_plm_base_setup_job(jdata))) { ORTE_ERROR_LOG(rc); goto cleanup; } OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output, "%s plm:rshd: launching job %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOBID_PRINT(jdata->jobid))); /* default to declaring the job launch as having failed */ failed_job = jdata->jobid; /* launch each proc */ for (i=0; i < jdata->procs->size; i++) { if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, i))) { continue; } /* only launch this proc if it isn't already running */ if (ORTE_PROC_STATE_LAUNCHED <= proc->state) { continue; } OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output, "%s plm:rshd: launching proc %s on node %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&proc->name), proc->nodename)); if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, proc->app_idx))) { continue; } node = (orte_node_t*)proc->node; /* setup the launch */ if (ORTE_SUCCESS != (rc = orte_plm_base_setup_slave_launch(proc->nodename, app, "orte-bootproxy.sh", &argv, &cmd))) { ORTE_ERROR_LOG(rc); goto cleanup; } /* add the bootproxy cmd line options */ if (ORTE_SUCCESS != (rc = orte_plm_base_append_bootproxy_args(app, &argv, proc->name.jobid, proc->name.vpid, jdata->map->num_nodes, jdata->num_procs, proc->node_rank, proc->local_rank, node->num_procs, jdata->total_slots_alloc, false))) { ORTE_ERROR_LOG(rc); goto cleanup; } /* final cmd */ if (0 < opal_output_get_verbosity(orte_plm_globals.output)) { param = opal_argv_join(argv, ' '); opal_output(0, "%s plm:rshd: final cmd:\n\t%s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (NULL == param) ? "NULL" : param); if (NULL != param) free(param); } /* fork a child to exec the rsh/ssh session */ pid = fork(); if (pid < 0) { ORTE_ERROR_LOG(ORTE_ERR_SYS_LIMITS_CHILDREN); rc = ORTE_ERR_SYS_LIMITS_CHILDREN; goto cleanup; } /* child */ if (pid == 0) { /* do the ssh launch - this will exit if it fails */ ssh_child(cmd, argv); } /* father */ /* declare the child launched */ proc->state = ORTE_PROC_STATE_LAUNCHED; /* track number launched */ OPAL_THREAD_LOCK(&mca_plm_rshd_component.lock); if (mca_plm_rshd_component.num_children++ >= mca_plm_rshd_component.num_concurrent) { opal_condition_wait(&mca_plm_rshd_component.cond, &mca_plm_rshd_component.lock); } OPAL_THREAD_UNLOCK(&mca_plm_rshd_component.lock); /* cleanup */ opal_argv_free(argv); argv = NULL; free(cmd); cmd = NULL; /* setup callback on sigchild - wait until setup above is complete * as the callback can occur in the call to orte_wait_cb */ orte_wait_cb(pid, wait_cb, (void*)proc); } /* flag the launch as successful */ failed_launch = false; if (jdata->state < ORTE_JOB_STATE_UNTERMINATED) { jdata->state = ORTE_JOB_STATE_LAUNCHED; } cleanup: if (NULL != argv) { opal_argv_free(argv); } if (NULL != cmd) { free(cmd); } /* check for failed launch - if so, force terminate */ if (failed_launch) { orte_errmgr.update_state(failed_job, job_state, NULL, ORTE_PROC_STATE_UNDEF, 0, ORTE_ERROR_DEFAULT_EXIT_CODE); } return rc; } /** * Terminate all processes for a given job */ int orte_plm_rshd_terminate_job(orte_jobid_t jobid) { return ORTE_ERR_NOT_IMPLEMENTED; } /** * No orteds to terminate */ int orte_plm_rshd_terminate_orteds(void) { orte_quit(); return ORTE_SUCCESS; } int orte_plm_rshd_signal_job(orte_jobid_t jobid, int32_t signal) { /* no way to do this */ return ORTE_SUCCESS; } int orte_plm_rshd_finalize(void) { int rc; /* cleanup any pending recvs */ if (ORTE_SUCCESS != (rc = orte_plm_base_comm_stop())) { ORTE_ERROR_LOG(rc); } return rc; } static void set_handler_default(int sig) { struct sigaction act; act.sa_handler = SIG_DFL; act.sa_flags = 0; sigemptyset(&act.sa_mask); sigaction(sig, &act, (struct sigaction *)0); }