From a34be856aafceef9966115674ee62dc7255b3705 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Sat, 7 Apr 2012 13:36:24 +0000 Subject: [PATCH] Now that we have PMI support, this is no longer needed This commit was SVN r26254. --- orte/mca/ess/slurmd/Makefile.am | 47 -- orte/mca/ess/slurmd/configure.m4 | 44 -- orte/mca/ess/slurmd/ess_slurmd.h | 35 - orte/mca/ess/slurmd/ess_slurmd_component.c | 99 --- orte/mca/ess/slurmd/ess_slurmd_module.c | 730 --------------------- orte/mca/ess/slurmd/help-ess-slurmd.txt | 41 -- 6 files changed, 996 deletions(-) delete mode 100644 orte/mca/ess/slurmd/Makefile.am delete mode 100644 orte/mca/ess/slurmd/configure.m4 delete mode 100644 orte/mca/ess/slurmd/ess_slurmd.h delete mode 100644 orte/mca/ess/slurmd/ess_slurmd_component.c delete mode 100644 orte/mca/ess/slurmd/ess_slurmd_module.c delete mode 100644 orte/mca/ess/slurmd/help-ess-slurmd.txt diff --git a/orte/mca/ess/slurmd/Makefile.am b/orte/mca/ess/slurmd/Makefile.am deleted file mode 100644 index 9ea54cbca8..0000000000 --- a/orte/mca/ess/slurmd/Makefile.am +++ /dev/null @@ -1,47 +0,0 @@ -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -dist_pkgdata_DATA = help-ess-slurmd.txt - -sources = \ - ess_slurmd.h \ - ess_slurmd_component.c \ - ess_slurmd_module.c - -# Make the output library in this directory, and name it either -# mca__.la (for DSO builds) or libmca__.la -# (for static builds). - -if MCA_BUILD_orte_ess_slurmd_DSO -component_noinst = -component_install = mca_ess_slurmd.la -else -component_noinst = libmca_ess_slurmd.la -component_install = -endif - -mcacomponentdir = $(pkglibdir) -mcacomponent_LTLIBRARIES = $(component_install) -mca_ess_slurmd_la_SOURCES = $(sources) -mca_ess_slurmd_la_LDFLAGS = -module -avoid-version $(ess_slurmd_LDFLAGS) -mca_ess_slurmd_la_LIBADD = $(ess_slurmd_LIBS) - -noinst_LTLIBRARIES = $(component_noinst) -libmca_ess_slurmd_la_SOURCES =$(sources) -libmca_ess_slurmd_la_LDFLAGS = -module -avoid-version $(ess_slurmd_LDFLAGS) -libmca_ess_slurmd_la_LIBADD = $(ess_slurmd_LIBS) diff --git a/orte/mca/ess/slurmd/configure.m4 b/orte/mca/ess/slurmd/configure.m4 deleted file mode 100644 index f9350c01ab..0000000000 --- a/orte/mca/ess/slurmd/configure.m4 +++ /dev/null @@ -1,44 +0,0 @@ -# -*- shell-script -*- -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2009-2011 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2011 Los Alamos National Security, LLC. -# All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# -AC_DEFUN([MCA_orte_ess_slurmd_PRIORITY], [10]) - -# MCA_ess_slurmd_CONFIG([action-if-found], [action-if-not-found]) -# ----------------------------------------------------------- -AC_DEFUN([MCA_orte_ess_slurmd_CONFIG],[ - AC_CONFIG_FILES([orte/mca/ess/slurmd/Makefile]) - - ORTE_CHECK_SLURM([ess_slurmd], [ess_slurmd_good=1], [ess_slurmd_good=0]) - - # if check worked, set wrapper flags if so. - # Evaluate succeed / fail - AS_IF([test "$ess_slurmd_good" = "1" -a "$orte_without_full_support" = 0], - [ess_slurmd_WRAPPER_EXTRA_LDFLAGS="$ess_slurmd_LDFLAGS" - ess_slurmd_WRAPPER_EXTRA_LIBS="$ess_slurmd_LIBS" - $1], - [$2]) - - # set build flags to use in makefile - AC_SUBST([ess_slurmd_CPPFLAGS]) - AC_SUBST([ess_slurmd_LDFLAGS]) - AC_SUBST([ess_slurmd_LIBS]) - -])dnl diff --git a/orte/mca/ess/slurmd/ess_slurmd.h b/orte/mca/ess/slurmd/ess_slurmd.h deleted file mode 100644 index 409d23c88f..0000000000 --- a/orte/mca/ess/slurmd/ess_slurmd.h +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#ifndef ORTE_ESS_SLURMD_H -#define ORTE_ESS_SLURMD_H - -BEGIN_C_DECLS - -ORTE_MODULE_DECLSPEC extern orte_ess_base_component_t mca_ess_slurmd_component; - -/* - * Module open / close - */ -int orte_ess_slurmd_component_open(void); -int orte_ess_slurmd_component_close(void); -int orte_ess_slurmd_component_query(mca_base_module_t **module, int *priority); - -END_C_DECLS - -#endif /* ORTE_ESS_SLURMD_H */ diff --git a/orte/mca/ess/slurmd/ess_slurmd_component.c b/orte/mca/ess/slurmd/ess_slurmd_component.c deleted file mode 100644 index e345d18304..0000000000 --- a/orte/mca/ess/slurmd/ess_slurmd_component.c +++ /dev/null @@ -1,99 +0,0 @@ -/* - * Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2008-2011 Cisco Systems, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - * - * These symbols are in a file by themselves to provide nice linker - * semantics. Since linkers generally pull in symbols by object - * files, keeping these symbols as the only symbols in this file - * prevents utility programs such as "ompi_info" from having to import - * entire components just to query their version and parameters. - */ - -#include "orte_config.h" -#include "orte/constants.h" - -#include "orte/util/proc_info.h" - -#include "orte/mca/ess/ess.h" -#include "orte/mca/ess/slurmd/ess_slurmd.h" - -extern orte_ess_base_module_t orte_ess_slurmd_module; - -/* - * Instantiate the public struct with all of our public information - * and pointers to our public functions in it - */ -orte_ess_base_component_t mca_ess_slurmd_component = { - { - ORTE_ESS_BASE_VERSION_2_0_0, - - /* Component name and version */ - "slurmd", - ORTE_MAJOR_VERSION, - ORTE_MINOR_VERSION, - ORTE_RELEASE_VERSION, - - /* Component open and close functions */ - orte_ess_slurmd_component_open, - orte_ess_slurmd_component_close, - orte_ess_slurmd_component_query - }, - { - /* The component is checkpoint ready */ - MCA_BASE_METADATA_PARAM_CHECKPOINT - } -}; - - -int -orte_ess_slurmd_component_open(void) -{ - return ORTE_SUCCESS; -} - - -int orte_ess_slurmd_component_query(mca_base_module_t **module, int *priority) -{ - /* Are we an MPI proc running under a SLURM job? Were - * we given a path back to the HNP? If the - * answer to the first is "yes" and the second - * is "no", then we were not launched - * by mpirun but are in a slurm world - */ - - if (ORTE_PROC_IS_MPI && - NULL != getenv("SLURM_JOBID") && - NULL != getenv("SLURM_STEPID") && - NULL == orte_process_info.my_hnp_uri) { - *priority = 30; - *module = (mca_base_module_t *)&orte_ess_slurmd_module; - return ORTE_SUCCESS; - } - - /* Sadly, no */ - *priority = -1; - *module = NULL; - return ORTE_ERROR; -} - - -int -orte_ess_slurmd_component_close(void) -{ - return ORTE_SUCCESS; -} - diff --git a/orte/mca/ess/slurmd/ess_slurmd_module.c b/orte/mca/ess/slurmd/ess_slurmd_module.c deleted file mode 100644 index 90f59e0dba..0000000000 --- a/orte/mca/ess/slurmd/ess_slurmd_module.c +++ /dev/null @@ -1,730 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2011 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2008-2011 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2011 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - * - */ - -#include "orte_config.h" -#include "orte/constants.h" - -#ifdef HAVE_UNISTD_H -#include -#endif /* HAVE_UNISTD_H */ -#ifdef HAVE_STRING_H -#include -#endif /* HAVE_STRING_H */ -#include -#ifdef HAVE_NETDB_H -#include -#endif -#ifdef HAVE_IFADDRS_H -#include -#endif - -#include "opal/util/opal_environ.h" -#include "opal/util/output.h" -#include "opal/mca/base/mca_base_param.h" -#include "opal/util/argv.h" -#include "opal/class/opal_pointer_array.h" -#include "opal/mca/hwloc/base/base.h" -#include "opal/util/printf.h" - -#include "orte/util/proc_info.h" -#include "orte/util/show_help.h" -#include "orte/mca/errmgr/errmgr.h" -#include "orte/util/name_fns.h" -#include "orte/runtime/orte_globals.h" -#include "orte/util/nidmap.h" -#include "orte/util/pre_condition_transports.h" -#include "orte/util/regex.h" -#include "orte/runtime/orte_wait.h" - -#include "orte/mca/ess/ess.h" -#include "orte/mca/ess/base/base.h" -#include "orte/mca/ess/slurmd/ess_slurmd.h" - -static int rte_init(void); -static int rte_finalize(void); -static void rte_abort(int error_code, bool report); - -orte_ess_base_module_t orte_ess_slurmd_module = { - rte_init, - rte_finalize, - rte_abort, - orte_ess_base_proc_get_locality, - orte_ess_base_proc_get_daemon, - orte_ess_base_proc_get_hostname, - orte_ess_base_proc_get_local_rank, - orte_ess_base_proc_get_node_rank, - orte_ess_base_update_pidmap, - orte_ess_base_update_nidmap, - NULL /* ft_event */ -}; - -/* Local globals */ -static bool app_init_complete; -static bool slurm20; - -/* Local functions */ -static int discover_nodes(char *regexp, char*** nodelist); -static int parse_ranges(char *base, char *ranges, char ***names); -static int parse_range(char *base, char *range, char ***names); - -/**** MODULE FUNCTIONS ****/ - -static int rte_init(void) -{ - int ret; - char *error = NULL; - int32_t jobfam, stepid; - char **nodes = NULL; - char *envar; - int i, j; - orte_nid_t *node; - orte_jmap_t *jmap; - orte_pmap_t *pmap; - orte_vpid_t vpid; - int local_rank; - int nodeid; - int num_nodes; - int cpus_per_task; - char *regexp, *tasks_per_node; - int *ppn; - bool block=false, cyclic=false; - uint64_t unique_key[2]; - char *cs_env, *string_key; - - /* init flag */ - app_init_complete = false; - slurm20 = false; - - /* run the prolog */ - if (ORTE_SUCCESS != (ret = orte_ess_base_std_prolog())) { - error = "orte_ess_base_std_prolog"; - goto error; - } - - /* Only application procs can use this module. Since we - * were directly launched by srun, we need to bootstrap - * our own global info so we can startup. Srun will have - * provided that info in our environment, so get it from there - */ - - /* declare ourselves to be standalone - i.e., not launched by orted */ - orte_standalone_operation = true; - -#if OPAL_HAVE_HWLOC - /* get the topology */ - if (NULL == opal_hwloc_topology) { - if (OPAL_SUCCESS != opal_hwloc_base_get_topology()) { - error = "topology discovery"; - goto error; - } - } -#endif - - /* get the slurm jobid - this will be our job family */ - envar = getenv("SLURM_JOBID"); - /* don't need to check this for NULL - if it was, we would - * never have been selected anyway - */ - jobfam = strtol(envar, NULL, 10); - /* get the slurm stepid - this will be our local jobid */ - if (NULL == (envar = getenv("SLURM_STEPID"))) { - error = "could not get SLURM_STEPID"; - goto error; - } - /* because the stepid could be zero, and we want the local - * jobid to be unique, increment it by one so the system - * doesn't think that we are a bunch of daemons! - */ - stepid = strtol(envar, NULL, 10) + 1; - /* now build the jobid */ - ORTE_PROC_MY_NAME->jobid = ORTE_CONSTRUCT_LOCAL_JOBID(jobfam << 16, stepid); - - /* setup transport keys in case the MPI layer needs them - - * we can use the SLURM jobid and stepid as unique keys - * because they are unique values assigned by the RM - */ - unique_key[0] = (uint64_t)jobfam; - unique_key[1] = (uint64_t)stepid; - if (NULL == (string_key = orte_pre_condition_transports_print(unique_key))) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - if (NULL == (cs_env = mca_base_param_environ_variable("orte_precondition_transports",NULL,NULL))) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - asprintf(&envar, "%s=%s", cs_env, string_key); - putenv(envar); - /* cannot free the envar as that messes of our environ */ - free(cs_env); - free(string_key); - - /* get my local nodeid */ - if (NULL == (envar = getenv("SLURM_NODEID"))) { - error = "could not get SLURM_NODEID"; - goto error; - } - nodeid = strtol(envar, NULL, 10); - ORTE_PROC_MY_DAEMON->jobid = 0; - ORTE_PROC_MY_DAEMON->vpid = nodeid; - - /* get the node list */ - if (NULL == (regexp = getenv("SLURM_STEP_NODELIST"))) { - error = "could not get SLURM_STEP_NODELIST"; - goto error; - } - /* break that down into a list of nodes */ - if (ORTE_SUCCESS != (ret = discover_nodes(regexp, &nodes))) { - error = "could not parse node list"; - goto error; - } - num_nodes = opal_argv_count(nodes); - orte_process_info.num_nodes = num_nodes; - - /* setup the nidmap arrays */ - if (ORTE_SUCCESS != (ret = orte_util_nidmap_init(NULL))) { - ORTE_ERROR_LOG(ret); - error = "orte_util_nidmap_init"; - goto error; - } - - /* set the size of the nidmap storage so we minimize realloc's */ - if (ORTE_SUCCESS != (ret = opal_pointer_array_set_size(&orte_nidmap, orte_process_info.num_nodes))) { - error = "could not set pointer array size for nidmap"; - goto error; - } - - /* get the slurm procid - this will be our vpid */ - if (NULL == (envar = getenv("SLURM_PROCID"))) { - error = "could not get SLURM_PROCID"; - goto error; - } - ORTE_PROC_MY_NAME->vpid = strtol(envar, NULL, 10); - /* get the number of procs in this job */ - if (NULL == (envar = getenv("SLURM_STEP_NUM_TASKS"))) { - error = "could not get SLURM_STEP_NUM_TASKS"; - goto error; - } - orte_process_info.num_procs = strtol(envar, NULL, 10); - /* set the app_num so that MPI attributes get set correctly */ - orte_process_info.app_num = 1; - /* if this is SLURM 2.0 or above, get our port - * assignments for use in the OOB - */ - if (NULL != (envar = getenv("SLURM_STEP_RESV_PORTS"))) { - /* convert this to an MCA param that will be - * picked up by the OOB - */ - orte_oob_static_ports = strdup(envar); - slurm20 = true; - OPAL_OUTPUT_VERBOSE((1, orte_ess_base_output, - "%s using SLURM-reserved ports %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - envar)); - } - - /* get the number of tasks/node */ - if (NULL == (tasks_per_node = getenv("SLURM_STEP_TASKS_PER_NODE"))) { - error = "could not get SLURM_STEP_TASKS_PER_NODE"; - goto error; - } - - /* get the number of CPUs per task that the user provided to slurm */ - if (NULL != (envar = getenv("SLURM_CPUS_PER_TASK"))) { - cpus_per_task = strtol(envar, NULL, 10); - if(0 >= cpus_per_task) { - error = "got bad value from SLURM_CPUS_PER_TASK"; - goto error; - } - } else { - cpus_per_task = 1; - } - - /* compute the ppn */ - if (ORTE_SUCCESS != (ret = orte_regex_extract_ppn(num_nodes, tasks_per_node, &ppn))) { - error = "could not determine #procs on each node"; - goto error; - } - /* for slurm, we have to normalize the ppn by the cpus_per_task */ - for (i=0; i < num_nodes; i++) { - ppn[i] /= cpus_per_task; - } - - /* get the distribution (i.e., mapping) mode */ - if (NULL == (envar = getenv("SLURM_DISTRIBUTION")) || - 0 == strcmp(envar, "block")) { - /* assume byslot mapping */ - block = true; - } else if (0 == strcmp(envar, "cyclic")) { - /* bynode mapping */ - cyclic = true; - } else { - /* cannot currently support other mapping modes */ - error = "distribution/mapping mode not supported"; - goto error; - } - - /* construct the nidmap */ - for (i=0; i < num_nodes; i++) { - node = OBJ_NEW(orte_nid_t); - node->name = strdup(nodes[i]); - node->daemon = i; - node->index = i; - opal_pointer_array_set_item(&orte_nidmap, i, node); - } - opal_argv_free(nodes); - - /* create a job map for this job */ - jmap = OBJ_NEW(orte_jmap_t); - jmap->job = ORTE_PROC_MY_NAME->jobid; - opal_pointer_array_add(&orte_jobmap, jmap); - /* update the num procs */ - jmap->num_procs = orte_process_info.num_procs; - - /* set the size of the pidmap storage so we minimize realloc's */ - if (ORTE_SUCCESS != (ret = opal_pointer_array_set_size(&jmap->pmap, jmap->num_procs))) { - ORTE_ERROR_LOG(ret); - error = "could not set array size for pidmap"; - goto error; - } - - /* construct the pidmap */ - if (block) { - /* for each node, cycle through the ppn */ - vpid = 0; - for (i=0; i < orte_nidmap.size; i++) { - if (NULL == (node = (orte_nid_t*)opal_pointer_array_get_item(&orte_nidmap, i))) { - continue; - } - /* compute the vpid for each proc on this node - * and add a pmap entry for it - */ - for (j=0; j < ppn[i]; j++) { - pmap = OBJ_NEW(orte_pmap_t); - pmap->node = node->index; - pmap->local_rank = j; - pmap->node_rank = j; - if (ORTE_SUCCESS != (ret = opal_pointer_array_set_item(&jmap->pmap, vpid, pmap))) { - ORTE_ERROR_LOG(ret); - error = "could not set pmap values"; - goto error; - } - OPAL_OUTPUT_VERBOSE((1, orte_ess_base_output, - "%s node %d name %s rank %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - (int) node->index, node->name, (int)vpid)); - vpid++; - } - } - } else if (cyclic) { - /* cycle across the nodes */ - vpid = 0; - while (vpid < orte_process_info.num_procs) { - for (i=0; i < num_nodes && vpid < orte_process_info.num_procs; i++) { - if (0 < ppn[i]) { - if (NULL == (node = (orte_nid_t*)opal_pointer_array_get_item(&orte_nidmap, i))) { - /* this is an error */ - error = "error initializing process map"; - goto error; - } - pmap = OBJ_NEW(orte_pmap_t); - pmap->node = node->index; - pmap->local_rank = ppn[i]-1; - pmap->node_rank = ppn[i]-1; - if (ORTE_SUCCESS != (ret = opal_pointer_array_set_item(&jmap->pmap, vpid, pmap))) { - ORTE_ERROR_LOG(ret); - error = "could not set pmap values"; - goto error; - } - OPAL_OUTPUT_VERBOSE((1, orte_ess_base_output, - "%s node %d name %s rank %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - (int) node->index, node->name, (int)vpid)); - vpid++; - --ppn[i]; - } - } - } - } - free(ppn); - /* ensure we pick the correct critical components */ - putenv("OMPI_MCA_grpcomm=hier"); - putenv("OMPI_MCA_routed=direct"); - - /* get our local rank */ - if (NULL == (envar = getenv("SLURM_LOCALID"))) { - error = "could not get SLURM_LOCALID"; - goto error; - } - local_rank = strtol(envar, NULL, 10); - - OPAL_OUTPUT_VERBOSE((1, orte_ess_base_output, - "%s local rank %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - local_rank)); - - /* set max procs */ - if (orte_process_info.max_procs < orte_process_info.num_procs) { - orte_process_info.max_procs = orte_process_info.num_procs; - } - - /* now use the default procedure to finish my setup */ - if (ORTE_SUCCESS != (ret = orte_ess_base_app_setup())) { - ORTE_ERROR_LOG(ret); - error = "orte_ess_base_app_setup"; - goto error; - } - - /* flag that we completed init */ - app_init_complete = true; - - return ORTE_SUCCESS; - -error: - if (ORTE_ERR_SILENT != ret && !orte_report_silent_errors) { - orte_show_help("help-orte-runtime.txt", - "orte_init:startup:internal-failure", - true, error, ORTE_ERROR_NAME(ret), ret); - } - - return ret; -} - -static int rte_finalize(void) -{ - int ret = ORTE_SUCCESS; - - if (app_init_complete) { - /* use the default procedure to finish */ - if (ORTE_SUCCESS != (ret = orte_ess_base_app_finalize())) { - ORTE_ERROR_LOG(ret); - } - } - - /* remove the envars that we pushed into environ - * so we leave that structure intact - */ - unsetenv("OMPI_MCA_grpcomm"); - unsetenv("OMPI_MCA_routed"); - unsetenv("OMPI_MCA_orte_precondition_transports"); - - /* deconstruct my nidmap and jobmap arrays - this - * function protects itself from being called - * before things were initialized - */ - orte_util_nidmap_finalize(); - -#if OPAL_HAVE_HWLOC - if (NULL != opal_hwloc_topology) { - opal_hwloc_base_free_topology(opal_hwloc_topology); - opal_hwloc_topology = NULL; - } -#endif - - return ret; -} - -static void rte_abort(int error_code, bool report) -{ - if (ORTE_ERR_SOCKET_NOT_AVAILABLE == error_code && slurm20) { - /* exit silently with a special error code for slurm 2.0 */ - orte_ess_base_app_abort(108, false); - } else { - orte_ess_base_app_abort(error_code, report); - } -} - -/** - * Discover the available resources. - * - * In order to fully support slurm, we need to be able to handle - * node regexp/task_per_node strings such as: - * foo,bar 5,3 - * foo 5 - * foo[2-10,12,99-105],bar,foobar[3-11] 2(x10),5,100(x16) - * - * @param *regexp A node regular expression from SLURM (i.e. SLURM_NODELIST) - * @param **nodelist argv array to return the found nodes in - */ -static int discover_nodes(char *regexp, char*** names) -{ - int i, j, len, ret; - char *base; - char *orig; - bool found_range = false; - bool more_to_come = false; - - orig = base = strdup(regexp); - if (NULL == base) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - OPAL_OUTPUT_VERBOSE((1, orte_ess_base_output, - "%s ess:slurmd:discover: checking nodelist: %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - regexp)); - - do { - /* Find the base */ - len = strlen(base); - for (i = 0; i <= len; ++i) { - if (base[i] == '[') { - /* we found a range. this gets dealt with below */ - base[i] = '\0'; - found_range = true; - break; - } - if (base[i] == ',') { - /* we found a singleton node, and there are more to come */ - base[i] = '\0'; - found_range = false; - more_to_come = true; - break; - } - if (base[i] == '\0') { - /* we found a singleton node */ - found_range = false; - more_to_come = false; - break; - } - } - if(i == 0) { - /* we found a special character at the beginning of the string */ - orte_show_help("help-ras-slurm.txt", "slurm-env-var-bad-value", 1, regexp); - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - free(orig); - return ORTE_ERR_BAD_PARAM; - } - - if (found_range) { - /* If we found a range, now find the end of the range */ - for (j = i; j < len; ++j) { - if (base[j] == ']') { - base[j] = '\0'; - break; - } - } - if (j >= len) { - /* we didn't find the end of the range */ - orte_show_help("help-ess-slurdm.txt", "slurm-env-var-bad-value", 1, regexp); - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - free(orig); - return ORTE_ERR_BAD_PARAM; - } - - ret = parse_ranges(base, base + i + 1, names); - if(ORTE_SUCCESS != ret) { - orte_show_help("help-ras-slurm.txt", "slurm-env-var-bad-value", 1, regexp); - ORTE_ERROR_LOG(ret); - free(orig); - return ret; - } - if(base[j + 1] == ',') { - more_to_come = true; - base = &base[j + 2]; - } else { - more_to_come = false; - } - } else { - /* If we didn't find a range, just add the node */ - - OPAL_OUTPUT_VERBOSE((1, orte_ess_base_output, - "%s ess:slurmd:discover: found node %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - base)); - - if(ORTE_SUCCESS != (ret = opal_argv_append_nosize(names, base))) { - ORTE_ERROR_LOG(ret); - free(orig); - return ret; - } - /* set base equal to the (possible) next base to look at */ - base = &base[i + 1]; - } - } while(more_to_come); - - free(orig); - - /* All done */ - return ret; -} - - -/* - * Parse one or more ranges in a set - * - * @param base The base text of the node name - * @param *ranges A pointer to a range. This can contain multiple ranges - * (i.e. "1-3,10" or "5" or "9,0100-0130,250") - * @param ***names An argv array to add the newly discovered nodes to - */ -static int parse_ranges(char *base, char *ranges, char ***names) -{ - int i, len, ret; - char *start, *orig; - - /* Look for commas, the separator between ranges */ - - len = strlen(ranges); - for (orig = start = ranges, i = 0; i < len; ++i) { - if (',' == ranges[i]) { - ranges[i] = '\0'; - ret = parse_range(base, start, names); - if (ORTE_SUCCESS != ret) { - ORTE_ERROR_LOG(ret); - return ret; - } - start = ranges + i + 1; - } - } - - /* Pick up the last range, if it exists */ - - if (start < orig + len) { - - OPAL_OUTPUT_VERBOSE((1, orte_ess_base_output, - "%s ess:slurmd:discover: parse range %s (2)", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - start)); - - ret = parse_range(base, start, names); - if (ORTE_SUCCESS != ret) { - ORTE_ERROR_LOG(ret); - return ret; - } - } - - /* All done */ - return ORTE_SUCCESS; -} - - -/* - * Parse a single range in a set and add the full names of the nodes - * found to the names argv - * - * @param base The base text of the node name - * @param *ranges A pointer to a single range. (i.e. "1-3" or "5") - * @param ***names An argv array to add the newly discovered nodes to - */ -static int parse_range(char *base, char *range, char ***names) -{ - char *str, temp1[BUFSIZ]; - size_t i, j, start, end; - size_t base_len, len, num_len; - size_t str_start, str_end; - size_t num_str_len; - bool found; - int ret; - - len = strlen(range); - base_len = strlen(base); - /* Silence compiler warnings; start and end are always assigned - properly, below */ - start = end = 0; - - /* Look for the beginning of the first number */ - - for (found = false, i = 0; i < len; ++i) { - if (isdigit((int) range[i])) { - if (!found) { - str_start = i; - start = atoi(range + i); - found = true; - break; - } - } - } - if (!found) { - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - return ORTE_ERR_NOT_FOUND; - } - - /* Look for the end of the first number */ - - for (found = false, num_str_len = 0; i < len; ++i, ++num_str_len) { - if (!isdigit((int) range[i])) { - break; - } - } - - /* Was there no range, just a single number? */ - - if (i >= len) { - str_end = len; - end = start; - found = true; - } - - /* Nope, there was a range. Look for the beginning of the second - number */ - - else { - str_end = i - 1; - for (; i < len; ++i) { - if (isdigit((int) range[i])) { - end = atoi(range + i); - found = true; - break; - } - } - } - if (!found) { - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - return ORTE_ERR_NOT_FOUND; - } - - /* Make strings for all values in the range */ - - len = base_len + num_str_len + 32; - str = malloc(len); - if (NULL == str) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - strcpy(str, base); - for (i = start; i <= end; ++i) { - str[base_len] = '\0'; - snprintf(temp1, BUFSIZ - 1, "%lu", (long) i); - - /* Do we need zero pading? */ - - if ((num_len = strlen(temp1)) < num_str_len) { - for (j = base_len; j < base_len + (num_str_len - num_len); ++j) { - str[j] = '0'; - } - str[j] = '\0'; - } - strcat(str, temp1); - ret = opal_argv_append_nosize(names, str); - if(ORTE_SUCCESS != ret) { - ORTE_ERROR_LOG(ret); - free(str); - return ret; - } - } - free(str); - - /* All done */ - return ORTE_SUCCESS; -} diff --git a/orte/mca/ess/slurmd/help-ess-slurmd.txt b/orte/mca/ess/slurmd/help-ess-slurmd.txt deleted file mode 100644 index 8e9bf8400f..0000000000 --- a/orte/mca/ess/slurmd/help-ess-slurmd.txt +++ /dev/null @@ -1,41 +0,0 @@ -# -*- text -*- -# -# Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# -# This is the US/English help file for Open MPI MCA error messages. -# -[slurm-env-var-not-found] -While trying to determine what resources are available, ORTE -expects to find the following environment variables: - - SLURM_NODELIST - SLURM_TASKS_PER_NODE - -However, it was unable to find the following environment variable: - - %s - -#This is a fatal error. -[slurm-env-var-bad-value] -While trying to determine what nodes are being used, ORTE -uses the following environment variable: - - SLURM_NODELIST value: %s - -However, an error was encountered when trying to parse it - -This is a fatal error.