1
1

Next round of LSF commits. Getting farther, but it still doesn't

fully work yet (everything is still .ompi_ignore'ed for everyone).

This commit was SVN r15398.
Этот коммит содержится в:
Jeff Squyres 2007-07-13 11:57:17 +00:00
родитель b9db0a4c2d
Коммит b20248709a
13 изменённых файлов: 545 добавлений и 48 удалений

Просмотреть файл

@ -10,6 +10,7 @@
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# Copyright (c) 2007 Cisco, Inc. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
@ -22,15 +23,73 @@
# --------------------------------------------------------
AC_DEFUN([OMPI_CHECK_LSF],[
AC_ARG_WITH([lsf],
[AC_HELP_STRING([--with-lsf],
[Directory where the LSF software is installed])])
[AC_HELP_STRING([--with-lsf(=DIR)],
[Build LSF support])])
AC_ARG_WITH([lsf-libdir],
[AC_HELP_STRING([--with-lsf-libdir=DIR],
[Search for LSF libraries in DIR])])
# Defaults
ompi_check_lsf_dir_msg="compiler default"
ompi_check_lsf_libdir_msg="linker default"
# Save directory names if supplied
AS_IF([test ! -z "$with_lsf" -a "$with_lsf" != "yes"],
[ompi_check_lsf_dir="$with_lsf"
ompi_check_lsf_dir_msg="$ompi_check_lsf_dir (from --with-lsf)"])
AS_IF([test ! -z "$with_lsf_libdir" -a "$with_lsf_libdir" != "yes"],
[ompi_check_lsf_libdir="$with_lsf_libdir"
ompi_check_lsf_libdir_msg="$ompi_check_lsf_libdir (from --with-lsf-libdir)"])
# If no directories were specified, look for LSF_LIBDIR,
# LSF_INCLUDEDIR, and/or LSF_ENVDIR.
AS_IF([test -z "$ompi_check_lsf_dir" -a -z "$ompi_check_lsf_libdir"],
[AS_IF([test ! -z "$LSF_ENVDIR" -a -z "$LSF_LIBDIR" -a -f "$LSF_ENVDIR/lsf.conf"],
[LSF_LIBDIR=`egrep ^LSF_LIBDIR= $LSF_ENVDIR/lsf.conf | cut -d= -f2-`])
AS_IF([test ! -z "$LSF_ENVDIR" -a -z "$LSF_INCLUDEDIR" -a -f "$LSF_ENVDIR/lsf.conf"],
[LSF_INCLUDEDIR=`egrep ^LSF_INCLUDEDIR= $LSF_ENVDIR/lsf.conf | cut -d= -f2-`])
AS_IF([test ! -z "$LSF_LIBDIR"],
[ompi_check_lsf_libdir=$LSF_LIBDIR
ompi_check_lsf_libdir_msg="$LSF_LIBDIR (from \$LSF_LIBDIR)"])
AS_IF([test ! -z "$LSF_INCLUDEDIR"],
[ompi_check_lsf_dir=`dirname $LSF_INCLUDEDIR`
ompi_check_lsf_dir_msg="$ompi_check_lsf_dir (from \$LSF_INCLUDEDIR)"])])
ompi_check_lsf_found=no
AS_IF([test "$with_lsf" = "no"],
[ompi_check_lsf_happy="no"],
[ompi_check_lsf_happy="yes"
AS_IF([test ! -z "$with_lsf" -a "$with_lsf" != "yes"],
[ompi_check_lsf_dir="$with_lsf"],
[ompi_check_lsf_dir=""])])
[ompi_check_lsf_happy="yes"])
ompi_check_lsf_$1_save_CPPFLAGS="$CPPFLAGS"
ompi_check_lsf_$1_save_LDFLAGS="$LDFLAGS"
ompi_check_lsf_$1_save_LIBS="$LIBS"
AS_IF([test "$ompi_check_lsf_happy" = "yes"],
[AC_MSG_CHECKING([for LSF dir])
AC_MSG_RESULT([$ompi_check_lsf_dir_msg])
AC_MSG_CHECKING([for LSF library dir])
AC_MSG_RESULT([$ompi_check_lsf_libdir_msg])
OMPI_CHECK_PACKAGE([$1],
[lsf/lsbatch.h],
[bat],
[lsb_launch],
[-llsf],
[$ompi_check_lsf_dir],
[$ompi_check_lsf_libdir],
[ompi_check_lsf_happy="yes"],
[ompi_check_lsf_happy="no"])])
CPPFLAGS="$ompi_check_lsf_$1_save_CPPFLAGS"
LDFLAGS="$ompi_check_lsf_$1_save_LDFLAGS"
LIBS="$ompi_check_lsf_$1_save_LIBS"
# Reset for the next time we're called
ompi_check_lsf_dir=
ompi_check_lsf_libdir=
AS_IF([test "$ompi_check_lsf_happy" = "yes"],
[$2],
[AS_IF([test ! -z "$with_lsf" -a "$with_lsf" != "no"],
[AC_MSG_WARN([LSF support requested (via --with-lsf) but not found.])
AC_MSG_ERROR([Aborting.])])
$3])
])

Просмотреть файл

@ -9,6 +9,7 @@
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# Copyright (c) 2007 Cisco, Inc. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
@ -20,6 +21,12 @@ AM_CPPFLAGS = $(pls_lsf_CPPFLAGS)
dist_pkgdata_DATA = help-pls-lsf.txt
bin_PROGRAMS = mytest
mytest_SOURCES = mytest.c
mytest_LDFLAGS = $(pls_lsf_LDFLAGS)
mytest_LDADD = $(pls_lsf_LIBS)
sources = \
pls_lsf.h \
pls_lsf_component.c \

Просмотреть файл

@ -114,8 +114,8 @@ static int pls_lsf_open(void)
&mca_pls_lsf_component.orted);
tmp = mca_base_param_reg_int_name("orte", "timing",
"Request that critical timing loops be measured",
false, false, 0, &value);
"Request that critical timing loops be measured",
false, false, 0, &value);
if (value != 0) {
mca_pls_lsf_component.timing = true;
} else {

Просмотреть файл

@ -88,9 +88,6 @@ static int pls_lsf_signal_job(orte_jobid_t jobid, int32_t signal, opal_list_t *a
static int pls_lsf_signal_proc(const orte_process_name_t *name, int32_t signal);
static int pls_lsf_finalize(void);
static int pls_lsf_start_proc(int argc, char **argv, char **env,
char *prefix);
/*
* Global variable
@ -124,21 +121,19 @@ static int pls_lsf_launch_job(orte_jobid_t jobid)
char **argv = NULL;
int argc;
int rc;
char *tmp;
char** env = NULL;
char* var;
char *nodelist_flat;
char **nodelist_argv;
int nodelist_argc;
orte_process_name_t name;
char *name_string;
char **custom_strings;
int num_args, i;
int i;
char *cur_prefix;
struct timeval joblaunchstart, launchstart, launchstop;
int proc_name_index = 0;
bool failed_launch = true;
printf("pls lsf being used to launch!\n");
if (mca_pls_lsf_component.timing) {
if (0 != gettimeofday(&joblaunchstart, NULL)) {
opal_output(0, "pls_lsf: could not obtain job start time");
@ -296,7 +291,22 @@ static int pls_lsf_launch_job(orte_jobid_t jobid)
* orterun can do the rest of its stuff. Instead, we'll catch any
* failures and deal with them elsewhere
*/
if (0 > lsb_launch(nodelist_argv, argv, LSF_DJOB_NOWAIT, env)) {
argv = NULL;
argc = 0;
opal_argv_append(&argc, &argv, "env");
opal_output(0, "launching on: %s", opal_argv_join(nodelist_argv, ' '));
opal_output(0, "launching: %s", opal_argv_join(argv, ' '));
if (lsb_launch(nodelist_argv, argv, LSF_DJOB_NOWAIT, env) < 0) {
ORTE_ERROR_LOG(ORTE_ERR_FAILED_TO_START);
opal_output(0, "got nonzero: %d", rc);
rc = ORTE_ERR_FAILED_TO_START;
goto cleanup;
}
opal_output(0, "launched ok");
sleep(5);
exit(0);
if (lsb_launch(nodelist_argv, argv, LSF_DJOB_NOWAIT, env) < 0) {
ORTE_ERROR_LOG(ORTE_ERR_FAILED_TO_START);
rc = ORTE_ERR_FAILED_TO_START;
goto cleanup;
@ -336,7 +346,9 @@ cleanup:
/* check for failed launch - if so, force terminate */
if (failed_launch) {
if (ORTE_SUCCESS != (rc = orte_smr.set_job_state(jobid, ORTE_JOB_STATE_FAILED_TO_START))) {
if (ORTE_SUCCESS !=
(rc = orte_smr.set_job_state(jobid,
ORTE_JOB_STATE_FAILED_TO_START))) {
ORTE_ERROR_LOG(rc);
}
@ -354,7 +366,8 @@ static int pls_lsf_terminate_job(orte_jobid_t jobid, struct timeval *timeout, op
int rc;
/* order them to kill their local procs for this job */
if (ORTE_SUCCESS != (rc = orte_pls_base_orted_kill_local_procs(jobid, timeout, attrs))) {
if (ORTE_SUCCESS !=
(rc = orte_pls_base_orted_kill_local_procs(jobid, timeout, attrs))) {
ORTE_ERROR_LOG(rc);
}
@ -419,29 +432,3 @@ static int pls_lsf_finalize(void)
return ORTE_SUCCESS;
}
static void lsf_wait_cb(pid_t pid, int status, void* cbdata)
{
/* not sure yet about how this will be used */
int rc;
if (0 != status) {
/* we have a problem */
opal_output(0, "ERROR: lsb_launch failed to start the required daemons.");
opal_output(0, "ERROR: This could be due to an inability to find the orted binary");
opal_output(0, "ERROR: on one or more remote nodes, lack of authority to execute");
opal_output(0, "ERROR: on one or more specified nodes, or other factors.");
/* set the job state so we know it failed to start */
if (ORTE_SUCCESS != (rc = orte_smr.set_job_state(active_job, ORTE_JOB_STATE_FAILED_TO_START))) {
ORTE_ERROR_LOG(rc);
}
/* force termination of the job */
if (ORTE_SUCCESS != (rc = orte_wakeup(active_job))) {
ORTE_ERROR_LOG(rc);
}
}
}

Просмотреть файл

@ -42,7 +42,7 @@ static int orte_ras_lsf_allocate(orte_jobid_t jobid, opal_list_t *attributes)
opal_list_item_t *item;
orte_ras_node_t *node;
int i, rc, num_nodes;
/* get the list of allocated nodes */
if ((num_nodes = lsb_getalloc(&nodelist)) < 0) {
opal_show_help("help-ras-lsf.txt", "nodelist-failed", true);
@ -54,6 +54,7 @@ static int orte_ras_lsf_allocate(orte_jobid_t jobid, opal_list_t *attributes)
/* step through the list */
for (i=0; i < num_nodes; i++) {
printf("lsf got node: %s\n", nodelist[i]);
/* is this a repeat of the current node? */
if (NULL != node && 0 == strcmp(nodelist[i], node->node_name)) {
/* it is a repeat - just bump the slot count */
@ -66,7 +67,6 @@ static int orte_ras_lsf_allocate(orte_jobid_t jobid, opal_list_t *attributes)
node->node_name = strdup(nodelist[i]);
node->node_slots = 1;
opal_list_append(&nodes, &node->super);
}
/* add any newly discovered nodes to the registry */
@ -97,7 +97,7 @@ cleanup:
/* release the nodelist from lsf */
opal_argv_free(nodelist);
return rc;
}

0
orte/mca/sds/lsf/.ompi_ignore Обычный файл
Просмотреть файл

2
orte/mca/sds/lsf/.ompi_unignore Обычный файл
Просмотреть файл

@ -0,0 +1,2 @@
rhc
jsquyres

51
orte/mca/sds/lsf/Makefile.am Обычный файл
Просмотреть файл

@ -0,0 +1,51 @@
#
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2005 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# Copyright (c) 2007 Cisco, Inc. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
AM_CPPFLAGS = $(pls_lsf_CPPFLAGS)
sources = \
sds_lsf.h \
sds_lsf_component.c \
sds_lsf_module.c
# Make the output library in this directory, and name it either
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
# (for static builds).
if OMPI_BUILD_sds_lsf_DSO
component_noinst =
component_install = mca_sds_lsf.la
else
component_noinst = libmca_sds_lsf.la
component_install =
endif
mcacomponentdir = $(pkglibdir)
mcacomponent_LTLIBRARIES = $(component_install)
mca_sds_lsf_la_SOURCES = $(sources)
mca_sds_lsf_la_LDFLAGS = -module -avoid-version $(sds_lsf_LDFLAGS)
mca_sds_lsf_la_LIBADD = \
$(sds_lsf_LIBS) \
$(top_ompi_builddir)/orte/libopen-rte.la \
$(top_ompi_builddir)/opal/libopen-pal.la
noinst_LTLIBRARIES = $(component_noinst)
libmca_sds_lsf_la_SOURCES =$(sources)
libmca_sds_lsf_la_LDFLAGS = -module -avoid-version $(sds_lsf_LDFLAGS)
libmca_sds_lsf_la_LIBADD = $(sds_lsf_LIBS)

38
orte/mca/sds/lsf/configure.m4 Обычный файл
Просмотреть файл

@ -0,0 +1,38 @@
# -*- shell-script -*-
#
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2005 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# Copyright (c) 2007 Cisco, Inc. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# MCA_sds_lsf_CONFIG([action-if-found], [action-if-not-found])
# -----------------------------------------------------------
AC_DEFUN([MCA_sds_lsf_CONFIG],[
OMPI_CHECK_LSF([sds_lsf], [sds_lsf_good=1], [sds_lsf_good=0])
# if check worked, set wrapper flags if so.
# Evaluate succeed / fail
AS_IF([test "$sds_lsf_good" = "1"],
[sds_lsf_WRAPPER_EXTRA_LDFLAGS="$sds_lsf_LDFLAGS"
sds_lsf_WRAPPER_EXTRA_LIBS="$sds_lsf_LIBS"
$1],
[$2])
# set build flags to use in makefile
AC_SUBST([sds_lsf_CPPFLAGS])
AC_SUBST([sds_lsf_LDFLAGS])
AC_SUBST([sds_lsf_LIBS])
])dnl

22
orte/mca/sds/lsf/configure.params Обычный файл
Просмотреть файл

@ -0,0 +1,22 @@
# -*- shell-script -*-
#
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2005 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# Copyright (c) 2007 Los Alamos National Security, LLC. All rights
# reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
PARAM_CONFIG_FILES="Makefile"

46
orte/mca/sds/lsf/sds_lsf.h Обычный файл
Просмотреть файл

@ -0,0 +1,46 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007 Cisco, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef ORTE_SDS_LSF_H
#define ORTE_SDS_LSF_H
BEGIN_C_DECLS
ORTE_MODULE_DECLSPEC extern orte_sds_base_component_t mca_sds_lsf_component;
/*
* Module open / close
*/
int orte_sds_lsf_component_open(void);
int orte_sds_lsf_component_close(void);
orte_sds_base_module_t* orte_sds_lsf_component_init(int *priority);
/*
* Startup / Shutdown
*/
int orte_sds_lsf_finalize(void);
/*
* Module functions
*/
int orte_sds_lsf_set_name(void);
END_C_DECLS
#endif /* ORTE_SDS_LSF_H */

102
orte/mca/sds/lsf/sds_lsf_component.c Обычный файл
Просмотреть файл

@ -0,0 +1,102 @@
/*
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007 Cisco, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include <lsf/lsbatch.h>
#include "orte/orte_constants.h"
#include "orte/mca/sds/sds.h"
#include "orte/mca/sds/lsf/sds_lsf.h"
#include "opal/mca/base/mca_base_param.h"
extern orte_sds_base_module_t orte_sds_lsf_module;
/*
* Instantiate the public struct with all of our public information
* and pointers to our public functions in it
*/
orte_sds_base_component_t mca_sds_lsf_component = {
{
/* Indicate that we are a sds v1.0.0 component (which also
implies a specific MCA version) */
ORTE_SDS_BASE_VERSION_1_0_0,
/* Component name and version */
"lsf",
ORTE_MAJOR_VERSION,
ORTE_MINOR_VERSION,
ORTE_RELEASE_VERSION,
/* Component open and close functions */
orte_sds_lsf_component_open,
orte_sds_lsf_component_close
},
/* Next the MCA v1.0.0 component meta data */
{
/* The component is checkpoint ready */
MCA_BASE_METADATA_PARAM_CHECKPOINT
},
/* Initialization / querying functions */
orte_sds_lsf_component_init
};
int orte_sds_lsf_component_open(void)
{
return ORTE_SUCCESS;
}
orte_sds_base_module_t *orte_sds_lsf_component_init(int *priority)
{
int id;
char *mode;
/* check if lsf is running here */
if (lsb_init("ORTE launcher") < 0) {
/* nope, not here */
return NULL;
}
id = mca_base_param_register_string("ns", "nds", NULL, NULL, NULL);
mca_base_param_lookup_string(id, &mode);
if (NULL == mode || 0 != strcmp("lsf", mode)) {
if (NULL != mode) {
free(mode);
}
return NULL;
}
if (NULL != mode) {
free(mode);
}
*priority = 20;
return &orte_sds_lsf_module;
}
int orte_sds_lsf_component_close(void)
{
return ORTE_SUCCESS;
}

183
orte/mca/sds/lsf/sds_lsf_module.c Обычный файл
Просмотреть файл

@ -0,0 +1,183 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007 Cisco, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*
*/
#include "orte_config.h"
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif /* HAVE_UNISTD_H */
#ifdef HAVE_STRING_H
#include <string.h>
#endif /* HAVE_STRING_H */
#include <ctype.h>
#include <lsf/lsbatch.h>
#include "orte/orte_constants.h"
#include "orte/mca/sds/sds.h"
#include "orte/mca/sds/base/base.h"
#include "orte/mca/sds/lsf/sds_lsf.h"
#include "orte/util/proc_info.h"
#include "opal/util/opal_environ.h"
#include "opal/mca/base/mca_base_param.h"
#include "orte/mca/ns/ns.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/ns/base/base.h"
#include "orte/util/sys_info.h"
#include "opal/util/argv.h"
orte_sds_base_module_t orte_sds_lsf_module = {
orte_sds_base_basic_contact_universe,
orte_sds_lsf_set_name,
orte_sds_lsf_finalize,
};
static char *get_lsf_nodename(int nodeid);
int orte_sds_lsf_set_name(void)
{
int rc;
int id;
char* name_string = NULL;
int lsf_nodeid;
/* start by getting our cellid, jobid, and vpid (which is the
starting vpid for the list of daemons) */
id = mca_base_param_register_string("ns", "nds", "name", NULL, NULL);
mca_base_param_lookup_string(id, &name_string);
if (name_string != NULL) {
if (ORTE_SUCCESS !=
(rc = orte_ns.convert_string_to_process_name(&(orte_process_info.my_name),
name_string))) {
ORTE_ERROR_LOG(rc);
free(name_string);
return rc;
}
free(name_string);
} else {
orte_cellid_t cellid;
orte_jobid_t jobid;
orte_vpid_t vpid;
char* cellid_string;
char* jobid_string;
char* vpid_string;
id = mca_base_param_register_string("ns", "nds", "cellid", NULL, NULL);
mca_base_param_lookup_string(id, &cellid_string);
if (NULL == cellid_string) {
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
return ORTE_ERR_NOT_FOUND;
}
if (ORTE_SUCCESS !=
(rc = orte_ns.convert_string_to_cellid(&cellid, cellid_string))) {
ORTE_ERROR_LOG(rc);
return(rc);
}
id = mca_base_param_register_string("ns", "nds", "jobid", NULL, NULL);
mca_base_param_lookup_string(id, &jobid_string);
if (NULL == jobid_string) {
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
return ORTE_ERR_NOT_FOUND;
}
if (ORTE_SUCCESS !=
(rc = orte_ns.convert_string_to_jobid(&jobid, jobid_string))) {
ORTE_ERROR_LOG(rc);
return(rc);
}
id = mca_base_param_register_string("ns", "nds", "vpid", NULL, NULL);
mca_base_param_lookup_string(id, &vpid_string);
if (NULL == vpid_string) {
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
return ORTE_ERR_NOT_FOUND;
}
if (ORTE_SUCCESS !=
(rc = orte_ns.convert_string_to_vpid(&vpid, vpid_string))) {
ORTE_ERROR_LOG(rc);
return(rc);
}
if (ORTE_SUCCESS !=
(rc = orte_ns.create_process_name(&(orte_process_info.my_name),
cellid, jobid, vpid))) {
ORTE_ERROR_LOG(rc);
return rc;
}
}
/* fix up the base name and make it the "real" name */
lsf_nodeid = atoi(getenv("LSB_JOBINDEX"));
orte_process_info.my_name->vpid += lsf_nodeid;
/* fix up the system info nodename to match exactly what lsf returned */
if (NULL != orte_system_info.nodename) {
free(orte_system_info.nodename);
}
orte_system_info.nodename = get_lsf_nodename(lsf_nodeid);
/* get the non-name common environmental variables */
if (ORTE_SUCCESS != (rc = orte_sds_env_get())) {
ORTE_ERROR_LOG(rc);
return rc;
}
return ORTE_SUCCESS;
}
int orte_sds_lsf_finalize(void)
{
return ORTE_SUCCESS;
}
static char *get_lsf_nodename(int nodeid)
{
char **names = NULL;
char *lsf_nodelist;
char *ret;
lsf_nodelist = getenv("OMPI_MCA_orte_lsf_nodelist");
if (NULL == lsf_nodelist) {
return NULL;
}
/* split the node list into an argv array */
names = opal_argv_split(lsf_nodelist, ',');
if (NULL == names) { /* got an error */
return NULL;
}
/* check to see if there are enough entries */
if (nodeid > opal_argv_count(names)) {
return NULL;
}
ret = strdup(names[nodeid]);
opal_argv_free(names);
/* All done */
return ret;
}