1
1

Enable direct launch of applications under SLURM. Compute all required nidmap and mpidmap info based on publicly available SLURM environmental variables so that no linkage to SLURM libraries is required.

Note: this requires that nodes not be shared by jobs/users. SLURM developers are working on an enhancement to remove this constraint.


Note 2: yes, the direct routed module returned! However, it is vastly different than the old one and has zero support for such things as comm_spawn. It is solely to support non-daemon, direct-launch environments.

This commit was SVN r20601.
Этот коммит содержится в:
Ralph Castain 2009-02-19 21:39:54 +00:00
родитель 76fc406b08
Коммит ca97f315fe
11 изменённых файлов: 1168 добавлений и 0 удалений

43
orte/mca/ess/slurmd/Makefile.am Обычный файл
Просмотреть файл

@ -0,0 +1,43 @@
#
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2005 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
sources = \
ess_slurmd.h \
ess_slurmd_component.c \
ess_slurmd_module.c
# Make the output library in this directory, and name it either
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
# (for static builds).
if OMPI_BUILD_ess_slurmd_DSO
component_noinst =
component_install = mca_ess_slurmd.la
else
component_noinst = libmca_ess_slurmd.la
component_install =
endif
mcacomponentdir = $(pkglibdir)
mcacomponent_LTLIBRARIES = $(component_install)
mca_ess_slurmd_la_SOURCES = $(sources)
mca_ess_slurmd_la_LDFLAGS = -module -avoid-version
noinst_LTLIBRARIES = $(component_noinst)
libmca_ess_slurmd_la_SOURCES =$(sources)
libmca_ess_slurmd_la_LDFLAGS = -module -avoid-version

37
orte/mca/ess/slurmd/configure.m4 Обычный файл
Просмотреть файл

@ -0,0 +1,37 @@
# -*- shell-script -*-
#
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2005 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# MCA_ess_slurmd_CONFIG([action-if-found], [action-if-not-found])
# -----------------------------------------------------------
AC_DEFUN([MCA_ess_slurmd_CONFIG],[
OMPI_CHECK_SLURM([ess_slurmd], [ess_slurmd_good=1], [ess_slurmd_good=0])
# if check worked, set wrapper flags if so.
# Evaluate succeed / fail
AS_IF([test "$ess_slurmd_good" = "1"],
[ess_slurmd_WRAPPER_EXTRA_LDFLAGS="$ess_slurmd_LDFLAGS"
ess_slurmd_WRAPPER_EXTRA_LIBS="$ess_slurmd_LIBS"
$1],
[$2])
# set build flags to use in makefile
AC_SUBST([ess_slurmd_CPPFLAGS])
AC_SUBST([ess_slurmd_LDFLAGS])
AC_SUBST([ess_slurmd_LIBS])
])dnl

27
orte/mca/ess/slurmd/configure.params Обычный файл
Просмотреть файл

@ -0,0 +1,27 @@
# -*- shell-script -*-
#
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2005 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# Copyright (c) 2007 Los Alamos National Security, LLC. All rights
# reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
PARAM_CONFIG_FILES="Makefile"
#
# Set the config priority so that, if we can build,
# all the SLURM and supporting components will build
PARAM_CONFIG_PRIORITY=10

35
orte/mca/ess/slurmd/ess_slurmd.h Обычный файл
Просмотреть файл

@ -0,0 +1,35 @@
/*
* Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef ORTE_ESS_SLURMD_H
#define ORTE_ESS_SLURMD_H
BEGIN_C_DECLS
ORTE_MODULE_DECLSPEC extern orte_ess_base_component_t mca_ess_slurmd_component;
/*
* Module open / close
*/
int orte_ess_slurmd_component_open(void);
int orte_ess_slurmd_component_close(void);
int orte_ess_slurmd_component_query(mca_base_module_t **module, int *priority);
END_C_DECLS
#endif /* ORTE_ESS_SLURMD_H */

96
orte/mca/ess/slurmd/ess_slurmd_component.c Обычный файл
Просмотреть файл

@ -0,0 +1,96 @@
/*
* Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*
* These symbols are in a file by themselves to provide nice linker
* semantics. Since linkers generally pull in symbols by object
* files, keeping these symbols as the only symbols in this file
* prevents utility programs such as "ompi_info" from having to import
* entire components just to query their version and parameters.
*/
#include "orte_config.h"
#include "orte/constants.h"
#include "orte/util/proc_info.h"
#include "orte/mca/ess/ess.h"
#include "orte/mca/ess/slurmd/ess_slurmd.h"
extern orte_ess_base_module_t orte_ess_slurmd_module;
/*
* Instantiate the public struct with all of our public information
* and pointers to our public functions in it
*/
orte_ess_base_component_t mca_ess_slurmd_component = {
{
ORTE_ESS_BASE_VERSION_2_0_0,
/* Component name and version */
"slurmd",
ORTE_MAJOR_VERSION,
ORTE_MINOR_VERSION,
ORTE_RELEASE_VERSION,
/* Component open and close functions */
orte_ess_slurmd_component_open,
orte_ess_slurmd_component_close,
orte_ess_slurmd_component_query
},
{
/* The component is checkpoint ready */
MCA_BASE_METADATA_PARAM_CHECKPOINT
}
};
int
orte_ess_slurmd_component_open(void)
{
return ORTE_SUCCESS;
}
int orte_ess_slurmd_component_query(mca_base_module_t **module, int *priority)
{
/* Are we running under a SLURM job? Were
* we given a path back to the HNP? If the
* answer to the first is "yes" and the second
* is "no", then we were not launched
* by mpirun but are in a slurm world
*/
if (NULL != getenv("SLURM_JOBID") &&
NULL == orte_process_info.my_hnp_uri) {
*priority = 30;
*module = (mca_base_module_t *)&orte_ess_slurmd_module;
return ORTE_SUCCESS;
}
/* Sadly, no */
*priority = -1;
*module = NULL;
return ORTE_ERROR;
}
int
orte_ess_slurmd_component_close(void)
{
return ORTE_SUCCESS;
}

558
orte/mca/ess/slurmd/ess_slurmd_module.c Обычный файл
Просмотреть файл

@ -0,0 +1,558 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2008 Cisco Systems, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*
*/
#include "orte_config.h"
#include "orte/constants.h"
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif /* HAVE_UNISTD_H */
#ifdef HAVE_STRING_H
#include <string.h>
#endif /* HAVE_STRING_H */
#include <ctype.h>
#ifdef HAVE_NETDB_H
#include <netdb.h>
#endif
#ifdef HAVE_IFADDRS_H
#include <ifaddrs.h>
#endif
#include "opal/util/opal_environ.h"
#include "opal/util/output.h"
#include "opal/mca/base/mca_base_param.h"
#include "opal/util/argv.h"
#include "opal/class/opal_pointer_array.h"
#include "opal/util/if.h"
#include "opal/util/net.h"
#include "opal/dss/dss.h"
#include "opal/mca/paffinity/paffinity.h"
#include "orte/util/proc_info.h"
#include "orte/util/show_help.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/util/name_fns.h"
#include "orte/runtime/orte_globals.h"
#include "orte/util/nidmap.h"
#include "orte/util/regex.h"
#include "orte/mca/rml/base/rml_contact.h"
#include "orte/runtime/orte_wait.h"
#include "orte/mca/ess/ess.h"
#include "orte/mca/ess/base/base.h"
#include "orte/mca/ess/slurmd/ess_slurmd.h"
static int rte_init(char flags);
static int rte_finalize(void);
static uint8_t proc_get_locality(orte_process_name_t *proc);
static orte_vpid_t proc_get_daemon(orte_process_name_t *proc);
static char* proc_get_hostname(orte_process_name_t *proc);
static uint32_t proc_get_arch(orte_process_name_t *proc);
static orte_local_rank_t proc_get_local_rank(orte_process_name_t *proc);
static orte_node_rank_t proc_get_node_rank(orte_process_name_t *proc);
static int update_arch(orte_process_name_t *proc, uint32_t arch);
static int update_pidmap(opal_byte_object_t *bo);
static int update_nidmap(opal_byte_object_t *bo);
orte_ess_base_module_t orte_ess_slurmd_module = {
rte_init,
rte_finalize,
orte_ess_base_app_abort,
proc_get_locality,
proc_get_daemon,
proc_get_hostname,
proc_get_arch,
proc_get_local_rank,
proc_get_node_rank,
update_arch,
update_pidmap,
update_nidmap,
NULL /* ft_event */
};
/**** MODULE FUNCTIONS ****/
static int rte_init(char flags)
{
int ret;
char *error = NULL;
int32_t jobfam, stepid;
char **nodes = NULL;
char *envar;
int i, j;
orte_nid_t *node;
orte_jmap_t *jmap;
orte_pmap_t pmap;
orte_vpid_t vpid;
int local_rank;
int nodeid;
int num_nodes;
int cpus_per_task;
char *regexp, *tasks_per_node;
int *ppn;
bool block=false, cyclic=false;
/* run the prolog */
if (ORTE_SUCCESS != (ret = orte_ess_base_std_prolog())) {
error = "orte_ess_base_std_prolog";
goto error;
}
/* Only application procs can use this module. Since we
* were directly launched by srun, we need to bootstrap
* our own global info so we can startup. Srun will have
* provided that info in our environment, so get it from there
*/
/* get the slurm jobid - this will be our job family */
envar = getenv("SLURM_JOBID");
/* don't need to check this for NULL - if it was, we would
* never have been selected anyway
*/
jobfam = strtol(envar, NULL, 10);
/* get the slurm stepid - this will be our local jobid */
if (NULL == (envar = getenv("SLURM_STEPID"))) {
error = "could not get SLURM_STEPID";
goto error;
}
stepid = strtol(envar, NULL, 10);
/* now build the jobid */
ORTE_PROC_MY_NAME->jobid = ORTE_CONSTRUCT_LOCAL_JOBID(jobfam << 16, stepid);
/* get the slurm procid - this will be our vpid */
if (NULL == (envar = getenv("SLURM_PROCID"))) {
error = "could not get SLURM_PROCID";
goto error;
}
ORTE_PROC_MY_NAME->vpid = strtol(envar, NULL, 10);
/* get our local rank */
if (NULL == (envar = getenv("SLURM_LOCALID"))) {
error = "could not get SLURM_LOCALID";
goto error;
}
local_rank = strtol(envar, NULL, 10);
OPAL_OUTPUT_VERBOSE((1, orte_ess_base_output,
"%s local rank %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
local_rank));
/* get the number of procs in this job */
if (NULL == (envar = getenv("SLURM_STEP_NUM_TASKS"))) {
error = "could not get SLURM_STEP_NUM_TASKS";
goto error;
}
orte_process_info.num_procs = strtol(envar, NULL, 10);
/* get my local nodeid */
if (NULL == (envar = getenv("SLURM_NODEID"))) {
error = "could not get SLURM_NODEID";
goto error;
}
nodeid = strtol(envar, NULL, 10);
ORTE_PROC_MY_DAEMON->jobid = 0;
ORTE_PROC_MY_DAEMON->vpid = nodeid;
/* get the number of ppn */
if (NULL == (tasks_per_node = getenv("SLURM_STEP_TASKS_PER_NODE"))) {
error = "could not get SLURM_STEP_TASKS_PER_NODE";
goto error;
}
/* get the number of CPUs per task that the user provided to slurm */
if (NULL != (envar = getenv("SLURM_CPUS_PER_TASK"))) {
cpus_per_task = strtol(envar, NULL, 10);
if(0 >= cpus_per_task) {
error = "got bad value from SLURM_CPUS_PER_TASK";
goto error;
}
} else {
cpus_per_task = 1;
}
/* get the node list */
if (NULL == (regexp = getenv("SLURM_STEP_NODELIST"))) {
error = "could not get SLURM_STEP_NODELIST";
goto error;
}
/* break that down into a list of nodes */
if (ORTE_SUCCESS != (ret = orte_regex_extract_node_names(regexp, &nodes))) {
error = "could not parse node list";
goto error;
}
num_nodes = opal_argv_count(nodes);
orte_process_info.num_nodes = num_nodes;
/* compute the ppn */
if (ORTE_SUCCESS != (ret = orte_regex_extract_ppn(num_nodes, tasks_per_node, &ppn))) {
error = "could not determine #procs on each node";
goto error;
}
/* for slurm, we have to normalize the ppn by the cpus_per_task */
for (i=0; i < num_nodes; i++) {
ppn[i] /= cpus_per_task;
}
/* get the distribution (i.e., mapping) mode */
if (NULL == (envar = getenv("SLURM_DISTRIBUTION")) ||
0 == strcmp(envar, "block")) {
/* assume byslot mapping */
block = true;
} else if (0 == strcmp(envar, "cyclic")) {
/* bynode mapping */
cyclic = true;
} else {
/* cannot currently support other mapping modes */
error = "distribution/mapping mode not supported";
goto error;
}
#if 0
SLURM_DIST_PLANESIZE=0
SLURM_DIST_LLLP=
#endif
/* setup the nidmap arrays */
if (ORTE_SUCCESS != (ret = orte_util_nidmap_init(NULL))) {
ORTE_ERROR_LOG(ret);
error = "orte_util_nidmap_init";
goto error;
}
/* set the size of the nidmap storage so we minimize realloc's */
if (ORTE_SUCCESS != (ret = opal_pointer_array_set_size(&orte_nidmap, orte_process_info.num_nodes))) {
error = "could not set pointer array size for nidmap";
goto error;
}
/* construct the nidmap */
for (i=0; i < num_nodes; i++) {
node = OBJ_NEW(orte_nid_t);
node->name = strdup(nodes[i]);
node->daemon = i;
node->index = opal_pointer_array_add(&orte_nidmap, node);
}
opal_argv_free(nodes);
/* create a job map for this job */
jmap = OBJ_NEW(orte_jmap_t);
jmap->job = ORTE_PROC_MY_NAME->jobid;
opal_pointer_array_add(&orte_jobmap, jmap);
/* update the num procs */
jmap->num_procs = orte_process_info.num_procs;
/* set the size of the pidmap storage so we minimize realloc's */
if (ORTE_SUCCESS != (ret = opal_value_array_set_size(&jmap->pmap, jmap->num_procs))) {
ORTE_ERROR_LOG(ret);
error = "could not set value array size for pidmap";
goto error;
}
/* construct the pidmap */
OBJ_CONSTRUCT(&pmap, orte_pmap_t);
if (block) {
/* for each node, cycle through the ppn */
vpid = 0;
for (i=0; i < num_nodes; i++) {
node = (orte_nid_t*)orte_nidmap.addr[i];
/* compute the vpid for each proc on this node
* and add a pmap entry for it
*/
for (j=0; j < ppn[i]; j++) {
pmap.node = node->index;
pmap.local_rank = j;
pmap.node_rank = j;
if (ORTE_SUCCESS != (ret = opal_value_array_set_item(&jmap->pmap, vpid, &pmap))) {
ORTE_ERROR_LOG(ret);
error = "could not set pmap values";
goto error;
}
OPAL_OUTPUT_VERBOSE((1, orte_ess_base_output,
"%s node %d name %s rank %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
(int) node->index, node->name, (int)vpid));
vpid++;
}
}
} else if (cyclic) {
/* cycle across the nodes */
vpid = 0;
while (vpid < orte_process_info.num_procs) {
for (i=0; i < num_nodes && vpid < orte_process_info.num_procs; i++) {
if (0 < ppn[i]) {
node = (orte_nid_t*)orte_nidmap.addr[i];
pmap.node = node->index;
pmap.local_rank = ppn[i]-1;
pmap.node_rank = ppn[i]-1;
if (ORTE_SUCCESS != (ret = opal_value_array_set_item(&jmap->pmap, vpid, &pmap))) {
ORTE_ERROR_LOG(ret);
error = "could not set pmap values";
goto error;
}
OPAL_OUTPUT_VERBOSE((1, orte_ess_base_output,
"%s node %d name %s rank %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
(int) node->index, node->name, (int)vpid));
vpid++;
--ppn[i];
}
}
}
}
OBJ_DESTRUCT(&pmap);
free(ppn);
/* ensure we pick the correct critical components */
putenv("OMPI_MCA_grpcomm=hier");
putenv("OMPI_MCA_routed=direct");
/* now use the default procedure to finish my setup */
if (ORTE_SUCCESS != (ret = orte_ess_base_app_setup())) {
ORTE_ERROR_LOG(ret);
error = "orte_ess_base_app_setup";
goto error;
}
return ORTE_SUCCESS;
error:
orte_show_help("help-orte-runtime.txt",
"orte_init:startup:internal-failure",
true, error, ORTE_ERROR_NAME(ret), ret);
return ret;
}
static int rte_finalize(void)
{
int ret;
/* use the default procedure to finish */
if (ORTE_SUCCESS != (ret = orte_ess_base_app_finalize())) {
ORTE_ERROR_LOG(ret);
}
/* deconstruct my nidmap and jobmap arrays */
orte_util_nidmap_finalize();
return ret;
}
static uint8_t proc_get_locality(orte_process_name_t *proc)
{
orte_nid_t *nid;
if (NULL == (nid = orte_util_lookup_nid(proc))) {
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
return OPAL_PROC_NON_LOCAL;
}
if (nid->daemon == ORTE_PROC_MY_DAEMON->vpid) {
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
"%s ess:slurmd: proc %s is LOCAL",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(proc)));
return (OPAL_PROC_ON_NODE | OPAL_PROC_ON_CU | OPAL_PROC_ON_CLUSTER);
}
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
"%s ess:slurmd: proc %s is REMOTE",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(proc)));
return OPAL_PROC_NON_LOCAL;
}
static orte_vpid_t proc_get_daemon(orte_process_name_t *proc)
{
orte_nid_t *nid;
if (NULL == (nid = orte_util_lookup_nid(proc))) {
/* don't generate an error message here - it could be a call to
* get a route to a proc in an unknown job. Let the caller decide
* if an error message is required
*/
return ORTE_VPID_INVALID;
}
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
"%s ess:slurmd: proc %s is hosted by daemon %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(proc),
ORTE_VPID_PRINT(nid->daemon)));
return nid->daemon;
}
static char* proc_get_hostname(orte_process_name_t *proc)
{
orte_nid_t *nid;
if (NULL == (nid = orte_util_lookup_nid(proc))) {
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
return NULL;
}
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
"%s ess:slurmd: proc %s is on host %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(proc),
nid->name));
return nid->name;
}
static uint32_t proc_get_arch(orte_process_name_t *proc)
{
orte_nid_t *nid;
if (NULL == (nid = orte_util_lookup_nid(proc))) {
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
return 0;
}
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
"%s ess:slurmd: proc %s has arch %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(proc),
nid->arch));
return nid->arch;
}
static int update_arch(orte_process_name_t *proc, uint32_t arch)
{
orte_nid_t *nid;
if (NULL == (nid = orte_util_lookup_nid(proc))) {
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
return ORTE_ERR_NOT_FOUND;
}
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
"%s ess:slurmd: updating proc %s to arch %0x",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(proc),
arch));
nid->arch = arch;
return ORTE_SUCCESS;
}
static orte_local_rank_t proc_get_local_rank(orte_process_name_t *proc)
{
orte_pmap_t *pmap;
if (NULL == (pmap = orte_util_lookup_pmap(proc))) {
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
return ORTE_LOCAL_RANK_INVALID;
}
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
"%s ess:slurmd: proc %s has local rank %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(proc),
(int)pmap->local_rank));
return pmap->local_rank;
}
static orte_node_rank_t proc_get_node_rank(orte_process_name_t *proc)
{
orte_pmap_t *pmap;
if (NULL == (pmap = orte_util_lookup_pmap(proc))) {
return ORTE_NODE_RANK_INVALID;
}
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
"%s ess:slurmd: proc %s has node rank %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(proc),
(int)pmap->node_rank));
return pmap->node_rank;
}
static int update_pidmap(opal_byte_object_t *bo)
{
int ret;
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
"%s ess:slurmd: updating pidmap",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
/* build the pmap */
if (ORTE_SUCCESS != (ret = orte_util_decode_pidmap(bo))) {
ORTE_ERROR_LOG(ret);
}
return ret;
}
static int update_nidmap(opal_byte_object_t *bo)
{
int rc;
/* decode the nidmap - the util will know what to do */
if (ORTE_SUCCESS != (rc = orte_util_decode_nodemap(bo))) {
ORTE_ERROR_LOG(rc);
}
return rc;
}
#if 0
/*** AVAILABLE SLURM ENVARS ***/
SLURM_JOB_ID=38749
SLURM_JOB_NUM_NODES=1
SLURM_JOB_NODELIST=odin097
SLURM_JOB_CPUS_PER_NODE=4
SLURM_JOBID=38749
SLURM_NNODES=1
SLURM_NODELIST=odin097
SLURM_TASKS_PER_NODE=2
SLURM_PRIO_PROCESS=0
SLURM_UMASK=0022
SLURM_NPROCS=2
SLURM_CPUS_PER_TASK=1
SLURM_STEPID=1
SLURM_SRUN_COMM_PORT=33650
SLURM_STEP_ID=1
SLURM_STEP_NODELIST=odin097
SLURM_STEP_NUM_NODES=1
SLURM_STEP_NUM_TASKS=2
SLURM_STEP_TASKS_PER_NODE=2
SLURM_STEP_LAUNCHER_HOSTNAME=(null)
SLURM_STEP_LAUNCHER_PORT=33650
SLURM_SRUN_COMM_HOST=129.79.240.100
SLURM_TASK_PID=5528
SLURM_CPUS_ON_NODE=4
SLURM_NODEID=0
SLURM_PROCID=1
SLURM_LOCALID=1
SLURM_LAUNCH_NODE_IPADDR=129.79.240.100
SLURM_GTIDS=0,1
SLURM_CHECKPOINT_PATH=/nfs/rinfs/san/homedirs/rhc
SLURMD_NODENAME=odin097
#endif

36
orte/mca/routed/direct/Makefile.am Обычный файл
Просмотреть файл

@ -0,0 +1,36 @@
#
# Copyright (c) 2007 Los Alamos National Security, LLC.
# All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
sources = \
routed_direct.h \
routed_direct.c \
routed_direct_component.c
# Make the output library in this directory, and name it either
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
# (for static builds).
if OMPI_BUILD_routed_direct_DSO
component_noinst =
component_install = mca_routed_direct.la
else
component_noinst = libmca_routed_direct.la
component_install =
endif
mcacomponentdir = $(pkglibdir)
mcacomponent_LTLIBRARIES = $(component_install)
mca_routed_direct_la_SOURCES = $(sources)
mca_routed_direct_la_LDFLAGS = -module -avoid-version
noinst_LTLIBRARIES = $(component_noinst)
libmca_routed_direct_la_SOURCES = $(sources)
libmca_routed_direct_la_LDFLAGS = -module -avoid-version

14
orte/mca/routed/direct/configure.params Обычный файл
Просмотреть файл

@ -0,0 +1,14 @@
# -*- shell-script -*-
#
# Copyright (c) 2007 Los Alamos National Security, LLC.
# All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# Specific to this module
PARAM_CONFIG_FILES="Makefile"

236
orte/mca/routed/direct/routed_direct.c Обычный файл
Просмотреть файл

@ -0,0 +1,236 @@
/*
* Copyright (c) 2007 Los Alamos National Security, LLC.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include "orte/constants.h"
#include "opal/threads/condition.h"
#include "opal/runtime/opal_progress.h"
#include "opal/dss/dss_types.h"
#include "opal/util/output.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/util/name_fns.h"
#include "orte/runtime/orte_globals.h"
#include "orte/mca/rml/base/rml_contact.h"
#include "orte/mca/routed/base/base.h"
#include "routed_direct.h"
static int init(void);
static int finalize(void);
static int delete_route(orte_process_name_t *proc);
static int update_route(orte_process_name_t *target,
orte_process_name_t *route);
static orte_process_name_t get_route(orte_process_name_t *target);
static int init_routes(orte_jobid_t job, opal_buffer_t *ndat);
static int route_lost(const orte_process_name_t *route);
static bool route_is_defined(const orte_process_name_t *target);
static int update_routing_tree(void);
static orte_vpid_t get_routing_tree(opal_list_t *children);
static int get_wireup_info(opal_buffer_t *buf);
static int set_lifeline(orte_process_name_t *proc);
#if OPAL_ENABLE_FT == 1
static int direct_ft_event(int state);
#endif
orte_routed_module_t orte_routed_direct_module = {
init,
finalize,
delete_route,
update_route,
get_route,
init_routes,
route_lost,
route_is_defined,
set_lifeline,
update_routing_tree,
get_routing_tree,
get_wireup_info,
#if OPAL_ENABLE_FT == 1
direct_ft_event
#else
NULL
#endif
};
/* local globals */
static opal_condition_t cond;
static opal_mutex_t lock;
static int init(void)
{
/* setup the global condition and lock */
OBJ_CONSTRUCT(&cond, opal_condition_t);
OBJ_CONSTRUCT(&lock, opal_mutex_t);
return ORTE_SUCCESS;
}
static int finalize(void)
{
/* destruct the global condition and lock */
OBJ_DESTRUCT(&cond);
OBJ_DESTRUCT(&lock);
return ORTE_SUCCESS;
}
static int delete_route(orte_process_name_t *proc)
{
OPAL_OUTPUT_VERBOSE((1, orte_routed_base_output,
"%s routed_direct_delete_route for %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(proc)));
/*There is nothing to do here */
return ORTE_SUCCESS;
}
static int update_route(orte_process_name_t *target,
orte_process_name_t *route)
{
OPAL_OUTPUT_VERBOSE((1, orte_routed_base_output,
"%s routed_direct_update: %s --> %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(target),
ORTE_NAME_PRINT(route)));
/*There is nothing to do here */
return ORTE_SUCCESS;
}
static orte_process_name_t get_route(orte_process_name_t *target)
{
orte_process_name_t *ret;
if (target->jobid == ORTE_JOBID_INVALID ||
target->vpid == ORTE_VPID_INVALID) {
ret = ORTE_NAME_INVALID;
} else {
/* all routes are direct */
ret = target;
}
OPAL_OUTPUT_VERBOSE((2, orte_routed_base_output,
"%s routed_direct_get(%s) --> %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(target),
ORTE_NAME_PRINT(ret)));
return *ret;
}
static int init_routes(orte_jobid_t job, opal_buffer_t *ndat)
{
int rc;
/* if ndat=NULL, then we are being called during orte_init. In this
* case, there is nothing to do
*/
if (NULL == ndat) {
return ORTE_SUCCESS;
}
/* if ndat != NULL, then this is being invoked by the proc to
* init a route to a specified process that is outside of our
* job family. It really doesn't matter as everything must
* go direct
*/
OPAL_OUTPUT_VERBOSE((1, orte_routed_base_output,
"%s routed_direct: init routes w/non-NULL data",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
if (ORTE_SUCCESS != (rc = orte_rml_base_update_contact_info(ndat))) {
ORTE_ERROR_LOG(rc);
return rc;
}
return ORTE_SUCCESS;
}
static int route_lost(const orte_process_name_t *route)
{
/* there is no lifeline, so we don't care */
return ORTE_SUCCESS;
}
static bool route_is_defined(const orte_process_name_t *target)
{
/* all routes are defined */
return true;
}
static int set_lifeline(orte_process_name_t *proc)
{
/* there is no lifeline */
return ORTE_SUCCESS;
}
static int update_routing_tree(void)
{
/* this is a meaningless command for a direct as I am not allowed to route */
return ORTE_ERR_NOT_SUPPORTED;
}
static orte_vpid_t get_routing_tree(opal_list_t *children)
{
/* this is a meaningless command for a direct as I am not allowed to route */
return ORTE_VPID_INVALID;
}
static int get_wireup_info(opal_buffer_t *buf)
{
/* this is a meaningless command for a direct as I am not allowed to route */
return ORTE_ERR_NOT_SUPPORTED;
}
#if OPAL_ENABLE_FT == 1
static int direct_ft_event(int state)
{
int ret, exit_status = ORTE_SUCCESS;
/******** Checkpoint Prep ********/
if(OPAL_CRS_CHECKPOINT == state) {
}
/******** Continue Recovery ********/
else if (OPAL_CRS_CONTINUE == state ) {
}
/******** Restart Recovery ********/
else if (OPAL_CRS_RESTART == state ) {
/*
* Re-exchange the routes
*/
if (ORTE_SUCCESS != (ret = orte_routed.init_routes(ORTE_PROC_MY_NAME->jobid, NULL))) {
exit_status = ret;
goto cleanup;
}
}
else if (OPAL_CRS_TERM == state ) {
/* Nothing */
}
else {
/* Error state = Nothing */
}
cleanup:
return exit_status;
}
#endif

27
orte/mca/routed/direct/routed_direct.h Обычный файл
Просмотреть файл

@ -0,0 +1,27 @@
/*
* Copyright (c) 2007 Los Alamos National Security, LLC.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef MCA_ROUTED_DIRECT_H
#define MCA_ROUTED_DIRECT_H
#include "orte_config.h"
#include "orte/types.h"
#include "orte/mca/routed/routed.h"
BEGIN_C_DECLS
ORTE_MODULE_DECLSPEC extern orte_routed_component_t mca_routed_direct_component;
extern orte_routed_module_t orte_routed_direct_module;
END_C_DECLS
#endif

Просмотреть файл

@ -0,0 +1,59 @@
/*
* Copyright (c) 2007 Los Alamos National Security, LLC.
* All rights reserved.
* Copyright (c) 2004-2008 The Trustees of Indiana University.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include "orte/constants.h"
#include "orte/types.h"
#include "opal/class/opal_hash_table.h"
#include "opal/mca/base/base.h"
#include "opal/mca/base/mca_base_param.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/runtime/orte_globals.h"
#include "orte/mca/routed/base/base.h"
#include "routed_direct.h"
static int orte_routed_direct_component_query(mca_base_module_t **module, int *priority);
/**
* component definition
*/
orte_routed_component_t mca_routed_direct_component = {
/* First, the mca_base_component_t struct containing meta
information about the component itself */
{
ORTE_ROUTED_BASE_VERSION_2_0_0,
"direct", /* MCA component name */
ORTE_MAJOR_VERSION, /* MCA component major version */
ORTE_MINOR_VERSION, /* MCA component minor version */
ORTE_RELEASE_VERSION, /* MCA component release version */
NULL,
NULL,
orte_routed_direct_component_query
},
{
/* This component can be checkpointed */
MCA_BASE_METADATA_PARAM_CHECKPOINT
}
};
static int orte_routed_direct_component_query(mca_base_module_t **module, int *priority)
{
/* allow selection only when specifically requested */
*priority = 0;
*module = (mca_base_module_t *) &orte_routed_direct_module;
return ORTE_SUCCESS;
}