/* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2008 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2006-2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. * Copyright (c) 2007 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ * * These symbols are in a file by themselves to provide nice linker * semantics. Since linkers generally pull in symbols by object * files, keeping these symbols as the only symbols in this file * prevents utility programs such as "ompi_info" from having to import * entire components just to query their version and parameters. */ /** * @file: * Part of the gridengine launcher. * See plm_gridengine.h for an overview of how it works. */ #include "orte_config.h" #include "orte/constants.h" #include #ifdef HAVE_UNISTD_H #include #endif #include #include #ifdef HAVE_SYS_SELECT_H #include #endif #ifdef HAVE_SYS_TIME_H #include #endif #ifdef HAVE_SYS_TYPES_H #include #endif #ifdef HAVE_SYS_STAT_H #include #endif #ifdef HAVE_SYS_WAIT_H #include #endif #include #include #ifdef HAVE_PWD_H #include #endif #include "opal/mca/installdirs/installdirs.h" #include "opal/mca/base/mca_base_param.h" #include "opal/util/if.h" #include "opal/util/os_path.h" #include "opal/util/path.h" #include "opal/event/event.h" #include "opal/util/show_help.h" #include "opal/util/argv.h" #include "opal/util/opal_environ.h" #include "opal/util/output.h" #include "opal/util/basename.h" #include "opal/util/opal_environ.h" #include "orte/util/name_fns.h" #include "orte/runtime/orte_globals.h" #include "orte/util/session_dir.h" #include "orte/runtime/orte_wait.h" #include "orte/runtime/orte_wakeup.h" #include "orte/mca/rml/rml.h" #include "orte/mca/errmgr/errmgr.h" #include "orte/mca/rmaps/rmaps.h" #include "orte/mca/plm/plm.h" #include "orte/mca/plm/base/base.h" #include "orte/mca/plm/base/plm_private.h" #include "orte/mca/plm/gridengine/plm_gridengine.h" static int plm_gridengine_init(void); static int plm_gridengine_launch_job(orte_job_t *jdata); static int plm_gridengine_terminate_job(orte_jobid_t jobid); static int plm_gridengine_terminate_orteds(void); static int plm_gridengine_signal_job(orte_jobid_t jobid, int32_t signal); static int plm_gridengine_finalize(void); orte_plm_base_module_t orte_plm_gridengine_module = { plm_gridengine_init, orte_plm_base_set_hnp_name, plm_gridengine_launch_job, NULL, plm_gridengine_terminate_job, plm_gridengine_terminate_orteds, plm_gridengine_signal_job, plm_gridengine_finalize }; static void set_handler_default(int sig); /* global storage of active jobid being launched */ static orte_jobid_t active_job=ORTE_JOBID_INVALID; /** * Init the module */ int plm_gridengine_init(void) { int rc; if (ORTE_SUCCESS != (rc = orte_plm_base_comm_start())) { ORTE_ERROR_LOG(rc); } return rc; } /** * Fill the orted_path variable with the directory to the orted */ static int orte_plm_gridengine_fill_orted_path(char** orted_path) { struct stat buf; asprintf(orted_path, "%s/orted", opal_install_dirs.bindir); if (0 != stat(*orted_path, &buf)) { char *path = getenv("PATH"); if (NULL == path) { path = ("PATH is empty!"); } opal_show_help("help-plm-gridengine.txt", "no-local-orted", true, path, opal_install_dirs.bindir); return ORTE_ERR_NOT_FOUND; } return ORTE_SUCCESS; } /** * Callback on daemon exit. */ static void orte_plm_gridengine_wait_daemon(pid_t pid, int status, void* cbdata) { if (! WIFEXITED(status) || ! WEXITSTATUS(status) == 0) { /* Need to catch SIGUSR1/2 for "qrsh/qsub -notify" to work. * With "-notify" set, SIGUSR1/2 becomes the precursor for any pending * SIGSTOP/SIGKILL. So just return and ignore the daemon_failed * at the end as that would kill off the user processes */ if (SIGUSR1 == status || SIGUSR2 == status) { opal_output(0, "The daemon received a signal %d", status); return; } /* Otherwise, tell the user something went wrong. */ opal_output(0, "ERROR: A daemon failed to start as expected."); opal_output(0, "ERROR: There may be more information available from"); opal_output(0, "ERROR: the 'qstat -t' command on the Grid Engine tasks."); opal_output(0, "ERROR: If the problem persists, please restart the"); opal_output(0, "ERROR: Grid Engine PE job"); if (WIFEXITED(status)) { opal_output(0, "ERROR: The daemon exited unexpectedly with status %d.", WEXITSTATUS(status)); } else if (WIFSIGNALED(status)) { #ifdef WCOREDUMP if (WCOREDUMP(status)) { opal_output(0, "The daemon received a signal %d (with core).", WTERMSIG(status)); } else { opal_output(0, "The daemon received a signal %d.", WTERMSIG(status)); } #else opal_output(0, "The daemon received a signal %d.", WTERMSIG(status)); #endif /* WCOREDUMP */ } else { opal_output(0, "No extra status information is available: %d.", status); } /* report that the daemon has failed so we break out of the daemon * callback receive and can exit */ orte_plm_base_launch_failed(active_job, true, pid, status, ORTE_JOB_STATE_FAILED_TO_START); } } /** * Launch a daemon (bootproxy) on each node. The daemon will be responsible * for launching the application. */ /* When working in this function, ALWAYS jump to "cleanup" if * you encounter an error so that orterun will be woken up and * the job can cleanly terminate */ static int plm_gridengine_launch_job(orte_job_t *jdata) { orte_job_map_t *map; int node_name_index1; int node_name_index2; int proc_vpid_index; int orted_index; char *prefix_dir; char *param; char **argv=NULL; char **env=NULL; int argc; int rc; sigset_t sigs; char *lib_base = NULL, *bin_base = NULL; char *sge_root, *sge_arch; bool failed_launch = true; orte_app_context_t **apps; orte_node_t **nodes; orte_std_cntr_t nnode; /* create a jobid for this job */ if (ORTE_SUCCESS != (rc = orte_plm_base_create_jobid(&jdata->jobid))) { ORTE_ERROR_LOG(rc); goto cleanup; } OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output, "%s plm:gridengine: launching job %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOBID_PRINT(jdata->jobid))); /* setup the job */ if (ORTE_SUCCESS != (rc = orte_plm_base_setup_job(jdata))) { ORTE_ERROR_LOG(rc); goto cleanup; } /* set the active jobid */ active_job = jdata->jobid; /* Get the map for this job */ if (NULL == (map = orte_rmaps.get_job_map(active_job))) { ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); rc = ORTE_ERR_NOT_FOUND; goto cleanup; } apps = (orte_app_context_t**)jdata->apps->addr; nodes = (orte_node_t**)map->nodes->addr; if (map->num_new_daemons == 0) { /* have all the daemons we need - launch app */ OPAL_OUTPUT_VERBOSE((1, orte_plm_globals.output, "%s plm:gridengine: no new daemons to launch", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); goto launch_apps; } /* * Build argv array */ argv = opal_argv_split("qrsh", ' '); argc = opal_argv_count(argv); /* gridengine specific flags */ opal_argv_append(&argc, &argv, "-inherit");/*run tasks within curr job*/ opal_argv_append(&argc, &argv, "-noshell");/*execute w/o wrapping shell*/ opal_argv_append(&argc, &argv, "-nostdin");/*suppress input stream stdin*/ opal_argv_append(&argc, &argv, "-V"); /*task to have the env as job*/ if (mca_plm_gridengine_component.verbose) { opal_argv_append(&argc, &argv, "-verbose"); } node_name_index1 = argc; opal_argv_append(&argc, &argv, "