1
1

Restore bproc code - if someone wants to maintain it, then more power to them...but it would definitely be easier if the old code is in the trunk. This is all .ompi_ignore'd except for me so I can play with making it compile again in my copious free time.

This commit was SVN r18716.
Этот коммит содержится в:
Ralph Castain 2008-06-24 01:27:22 +00:00
родитель 3e61a3f92e
Коммит 17fcd72b5d
46 изменённых файлов: 5555 добавлений и 0 удалений

67
config/ompi_check_bproc.m4 Обычный файл
Просмотреть файл

@ -0,0 +1,67 @@
# -*- shell-script -*-
#
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2005 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# new bproc is LANL versions >= 3.2.0
# old bproc is all Scyld versions and LANL version < 3.2.0
# OMPI_CHECK_BPROC(prefix, [action-if-new-bproc], [action-if-old-bproc],
# [action-if-not-found])
# --------------------------------------------------------
AC_DEFUN([OMPI_CHECK_BPROC],[
AC_ARG_WITH([bproc],
[AC_HELP_STRING([--with-bproc],
[Directory where the BProc software is installed])])
AS_IF([test ! -z "$with_bproc" -a "$with_bproc" = "no"],[$4], [
ompi_check_bproc_save_CPPFLAGS="$CPPFLAGS"
ompi_check_bproc_save_LDFLAGS="$LDFLAGS"
ompi_check_bproc_save_LIBS="$LIBS"
AS_IF([test ! -z "$with_bproc" -a "$with_bproc" != "yes"],
[CPPFLAGS="$CPPFLAGS -I$with_bproc/include"
LDFLAGS="$LDFLAGS -L$with_bproc/lib"])
AC_CHECK_HEADERS([sys/bproc.h],
[AC_CHECK_LIB([bproc],
[bproc_numnodes],
[ompi_check_bproc_happy="yes"],
[ompi_check_bproc_happy="no"])],
[ompi_check_bproc_happy="no"])
# Check for Scyld bproc or an old version of LANL Bproc (pre 3.2.0)
AS_IF([test "$ompi_check_bproc_happy" = "yes"],
[AC_CHECK_HEADERS([sys/bproc_common.h],[ompi_check_bproc_happy="new"],
[ompi_check_bproc_happy="old"],
[#include <stdint.h>
#include <sys/socket.h>])])
CPPFLAGS="$ompi_check_bproc_save_CPPFLAGS"
LDFLAGS="$ompi_check_bproc_save_LDFLAGS"
LIBS="$ompi_check_bproc_save_LIBS"
AS_IF([test "$ompi_check_bproc_happy" != "no"],
[AS_IF([test ! -z "$with_bproc" -a "$with_bproc" != "yes"],
[$1_CPPFLAGS="$$1_CPPFLAGS -I$with_bproc/include"
$1_LDFLAGS="$$1_LDFLAGS -L$with_bproc/lib"])
$1_LIBS="$$1_LIBS -lbproc"
AS_IF([test "$ompi_check_bproc_happy" = "new"], [$2], [$3])],
[AS_IF([test ! -z "$with_bproc"],
[AC_MSG_ERROR([BProc support request but not found. Perhaps
you need to specify the location of the BProc libraries.])])
$4])
])
])

0
orte/mca/ess/bproc/.ompi_ignore Обычный файл
Просмотреть файл

1
orte/mca/ess/bproc/.ompi_unignore Обычный файл
Просмотреть файл

@ -0,0 +1 @@
rhc

54
orte/mca/ess/bproc/Makefile.am Обычный файл
Просмотреть файл

@ -0,0 +1,54 @@
#
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2005 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# Use the top-level Makefile.options
AM_CPPFLAGS = $(ess_bproc_CPPFLAGS)
sources = \
ess_bproc.h \
ess_bproc_component.c \
ess_bproc_module.c
# Make the output library in this directory, and name it either
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
# (for static builds).
if OMPI_BUILD_ess_bproc_DSO
component_noinst =
component_install = mca_ess_bproc.la
else
component_noinst = libmca_ess_bproc.la
component_install =
endif
mcacomponentdir = $(libdir)/openmpi
mcacomponent_LTLIBRARIES = $(component_install)
mca_ess_bproc_la_SOURCES = $(sources)
mca_ess_bproc_la_LDFLAGS = -module -avoid-version $(ess_bproc_LDFLAGS)
mca_ess_bproc_la_LIBADD = \
$(ess_bproc_LIBS) \
$(top_ompi_builddir)/orte/libopen-rte.la \
$(top_ompi_builddir)/opal/libopen-pal.la
noinst_LTLIBRARIES = $(component_noinst)
libmca_ess_bproc_la_SOURCES =$(sources)
libmca_ess_bproc_la_LDFLAGS = -module -avoid-version $(ess_bproc_LDFLAGS)
libmca_ess_bproc_la_LIBADD = $(ess_bproc_LIBS)

36
orte/mca/ess/bproc/configure.m4 Обычный файл
Просмотреть файл

@ -0,0 +1,36 @@
# -*- shell-script -*-
#
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2005 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# MCA_ess_bproc_CONFIG([action-if-found], [action-if-not-found])
# -----------------------------------------------------------
AC_DEFUN([MCA_ess_bproc_CONFIG],[
OMPI_CHECK_BPROC([ess_bproc], [ess_bproc_good=1],
[ess_bproc_good=1], [ess_bproc_good=0])
# if check worked, set wrapper flags if so.
# Evaluate succeed / fail
AS_IF([test "$ess_bproc_good" = "1"],
[ess_bproc_WRAPPER_EXTRA_LDFLAGS="$ess_bproc_LDFLAGS"
ess_bproc_WRAPPER_EXTRA_LIBS="$ess_bproc_LIBS"
$1],
[$2])
# set build flags to use in makefile
AC_SUBST([ess_bproc_CPPFLAGS])
AC_SUBST([ess_bproc_LDFLAGS])
AC_SUBST([ess_bproc_LIBS])
])dnl

22
orte/mca/ess/bproc/configure.params Обычный файл
Просмотреть файл

@ -0,0 +1,22 @@
# -*- shell-script -*-
#
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2005 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# Copyright (c) 2007 Los Alamos National Security, LLC. All rights
# reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
PARAM_CONFIG_FILES="Makefile"

48
orte/mca/ess/bproc/ess_bproc.h Обычный файл
Просмотреть файл

@ -0,0 +1,48 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef ORTE_SDS_BPROC_H
#define ORTE_SDS_BPROC_H
#if defined(c_plusplus) || defined(__cplusplus)
extern "C" {
#endif
/*
* Module open / close
*/
int orte_sds_bproc_component_open(void);
int orte_sds_bproc_component_close(void);
orte_sds_base_module_t* orte_sds_bproc_component_init(int *priority);
/*
* Startup / Shutdown
*/
int orte_sds_bproc_finalize(void);
/*
* Module functions
*/
int orte_sds_bproc_set_name(void);
#if defined(c_plusplus) || defined(__cplusplus)
}
#endif
#endif /* ORTE_SDS_BPROC_H */

97
orte/mca/ess/bproc/ess_bproc_component.c Обычный файл
Просмотреть файл

@ -0,0 +1,97 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*
* These symbols are in a file by themselves to provide nice linker
* semantics. Since linkers generally pull in symbols by object
* files, keeping these symbols as the only symbols in this file
* prevents utility programs such as "ompi_info" from having to import
* entire components just to query their version and parameters.
*/
#include "orte_config.h"
#include "orte/orte_constants.h"
#include "orte/mca/sds/sds.h"
#include "orte/mca/sds/bproc/sds_bproc.h"
#include "opal/mca/base/mca_base_param.h"
extern orte_sds_base_module_t orte_sds_bproc_module;
/*
* Instantiate the public struct with all of our public information
* and pointers to our public functions in it
*/
orte_sds_base_component_t mca_sds_bproc_component = {
/* First, the mca_component_t struct containing meta information
about the component itself */
{
/* Indicate that we are a sds v1.0.0 component (which also
implies a specific MCA version) */
ORTE_SDS_BASE_VERSION_1_0_0,
/* Component name and version */
"bproc",
ORTE_MAJOR_VERSION,
ORTE_MINOR_VERSION,
ORTE_RELEASE_VERSION,
/* Component open and close functions */
orte_sds_bproc_component_open,
orte_sds_bproc_component_close
},
/* Next the MCA v1.0.0 component meta data */
{
/* Whether the component is checkpointable or not */
true
},
/* Initialization / querying functions */
orte_sds_bproc_component_init
};
int
orte_sds_bproc_component_open(void)
{
return ORTE_SUCCESS;
}
orte_sds_base_module_t *
orte_sds_bproc_component_init(int *priority)
{
int id;
char *mode;
/* okay, not seed/singleton attempt another approach */
id = mca_base_param_register_string("ns", "nds", NULL, NULL, NULL);
mca_base_param_lookup_string(id, &mode);
if (NULL == mode || 0 != strcmp("bproc", mode)) { return NULL; }
*priority = 20;
return &orte_sds_bproc_module;
}
int
orte_sds_bproc_component_close(void)
{
return ORTE_SUCCESS;
}

178
orte/mca/ess/bproc/ess_bproc_module.c Обычный файл
Просмотреть файл

@ -0,0 +1,178 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*
*/
#include "orte_config.h"
#include <sys/bproc.h>
#include "orte/orte_constants.h"
#include "orte/util/sys_info.h"
#include "opal/util/output.h"
#include "opal/mca/base/mca_base_param.h"
#include "orte/mca/sds/sds.h"
#include "orte/mca/sds/base/base.h"
#include "orte/mca/sds/bproc/sds_bproc.h"
#include "orte/mca/ns/ns.h"
#include "orte/mca/ns/base/base.h"
#include "orte/mca/errmgr/base/base.h"
orte_sds_base_module_t orte_sds_bproc_module = {
orte_sds_base_basic_contact_universe,
orte_sds_bproc_set_name,
orte_sds_bproc_finalize,
};
/**
* Sets up the process name from the information put into the environment
* by the bproc launcher and orte_ns_nds_bproc_put.
* @retval ORTE_SUCCESS
* @retval error
*/
int orte_sds_bproc_set_name(void)
{
int rc;
int id;
char* name_string = NULL;
id = mca_base_param_register_string("ns", "nds", "name", NULL, NULL);
mca_base_param_lookup_string(id, &name_string);
if(name_string != NULL) {
if (ORTE_SUCCESS != (rc = orte_ns.convert_string_to_process_name(
&(orte_process_info.my_name),
name_string))) {
ORTE_ERROR_LOG(rc);
free(name_string);
return rc;
}
free(name_string);
} else {
orte_cellid_t cellid;
orte_jobid_t jobid;
orte_vpid_t vpid;
orte_vpid_t vpid_start;
char* cellid_string;
char* jobid_string;
char* vpid_string;
int num_procs;
char *bproc_rank_string;
int bproc_rank;
int stride;
id = mca_base_param_register_string("ns", "nds", "cellid", NULL, NULL);
mca_base_param_lookup_string(id, &cellid_string);
if (NULL == cellid_string) {
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
return ORTE_ERR_NOT_FOUND;
}
if (ORTE_SUCCESS != (rc = orte_ns.convert_string_to_cellid(&cellid, cellid_string))) {
ORTE_ERROR_LOG(rc);
return(rc);
}
id = mca_base_param_register_string("ns", "nds", "jobid", NULL, NULL);
mca_base_param_lookup_string(id, &jobid_string);
if (NULL == jobid_string) {
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
return ORTE_ERR_NOT_FOUND;
}
if (ORTE_SUCCESS != (rc = orte_ns.convert_string_to_jobid(&jobid, jobid_string))) {
ORTE_ERROR_LOG(rc);
return(rc);
}
/* BPROC_RANK is set by bproc when we do a parallel launch */
bproc_rank_string = getenv("BPROC_RANK");
if (NULL == bproc_rank_string) {
opal_output(0, "orte_ns_nds_bproc_get: Error: Environment variable "
"BPROC_RANK not found.\n");
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
return ORTE_ERR_NOT_FOUND;
}
bproc_rank = (int)strtol(bproc_rank_string, NULL, 10);
/* to compute our process name, we need to know two other things: the
* stride (i.e., the size of the step between vpids in this launch
* wave) and the starting vpid of this launch. Get those values here
*/
id = mca_base_param_register_int("pls", "bproc", "stride", NULL, -1);
mca_base_param_lookup_int(id, &stride);
if (stride < 0) {
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
return ORTE_ERR_NOT_FOUND;
}
id = mca_base_param_register_string("ns", "nds", "vpid_start", NULL, NULL);
mca_base_param_lookup_string(id, &vpid_string);
if (NULL == vpid_string) {
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
return ORTE_ERR_NOT_FOUND;
}
rc = orte_ns.convert_string_to_vpid(&vpid_start, vpid_string);
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
return(rc);
}
/* compute our vpid */
vpid = vpid_start + (bproc_rank * stride);
/* create our name */
if (ORTE_SUCCESS != (rc = orte_ns.create_process_name(
&(orte_process_info.my_name),
cellid,
jobid,
vpid))) {
ORTE_ERROR_LOG(rc);
return rc;
}
id = mca_base_param_register_int("ns", "nds", "num_procs", NULL, -1);
mca_base_param_lookup_int(id, &num_procs);
if (num_procs < 0) {
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
return ORTE_ERR_NOT_FOUND;
}
orte_process_info.num_procs = (size_t)num_procs;
id = mca_base_param_register_string("ns", "nds", "global_vpid_start", NULL, NULL);
mca_base_param_lookup_string(id, &vpid_string);
if (NULL == vpid_string) {
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
return ORTE_ERR_NOT_FOUND;
}
rc = orte_ns.convert_string_to_vpid(&orte_process_info.vpid_start, vpid_string);
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
return(rc);
}
if(NULL != orte_system_info.nodename)
free(orte_system_info.nodename);
asprintf(&orte_system_info.nodename, "%d", bproc_currnode());
}
return ORTE_SUCCESS;
}
int
orte_sds_bproc_finalize(void)
{
return ORTE_SUCCESS;
}

151
orte/mca/ess/bproc/ess_bproc_put.c Обычный файл
Просмотреть файл

@ -0,0 +1,151 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#include <stdlib.h>
#include <errno.h>
#include "opal/util/opal_environ.h"
#include "opal/util/output.h"
#include "opal/mca/base/mca_base_param.h"
#include "orte/orte_constants.h"
#include "orte/mca/sds/base/base.h"
#include "orte/mca/ns/base/base.h"
#include "orte/mca/ns/ns.h"
#include "orte/mca/errmgr/base/base.h"
/**
* sets up the environment so that a process launched with the bproc launcher can
* figure out its name
* @param cell the cell that the process belongs to.
* @param job the job the process belongs to
* @param vpid_start the starting vpid for the current parallel launch
* @param global_vpid_start the starting vpid for the job
* @param num_procs the number of user processes in the job
* @param env a pointer to the environment to setup
* @retval ORTE_SUCCESS
* @retval error
*/
int orte_ns_nds_bproc_put(orte_cellid_t cell, orte_jobid_t job,
orte_vpid_t vpid_start, orte_vpid_t global_vpid_start,
int num_procs, char ***env) {
char* param;
char* value;
int rc;
/* set the mode to bproc */
if(NULL == (param = mca_base_param_environ_variable("ns","nds",NULL))) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
opal_setenv(param, "bproc", true, env);
free(param);
/* not a seed */
if(NULL == (param = mca_base_param_environ_variable("seed",NULL,NULL))) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
opal_unsetenv(param, env);
free(param);
/* since we want to pass the name as separate components, make sure
* that the "name" environmental variable is cleared!
*/
if(NULL == (param = mca_base_param_environ_variable("ns","nds","name"))) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
opal_unsetenv(param, env);
free(param);
/* setup the name */
if(ORTE_SUCCESS != (rc = orte_ns.convert_cellid_to_string(&value, cell))) {
ORTE_ERROR_LOG(rc);
return rc;
}
if(NULL == (param = mca_base_param_environ_variable("ns","nds","cellid"))) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
opal_setenv(param, value, true, env);
free(param);
free(value);
if(ORTE_SUCCESS != (rc = orte_ns.convert_jobid_to_string(&value, job))) {
ORTE_ERROR_LOG(rc);
return rc;
}
if(NULL == (param = mca_base_param_environ_variable("ns","nds","jobid"))) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
opal_setenv(param, value, true, env);
free(param);
free(value);
rc = orte_ns.convert_vpid_to_string(&value, vpid_start);
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
return(rc);
}
if(NULL == (param = mca_base_param_environ_variable("ns","nds","vpid_start"))) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
opal_setenv(param, value, true, env);
free(param);
free(value);
rc = orte_ns.convert_vpid_to_string(&value, global_vpid_start);
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
return(rc);
}
if(NULL == (param = mca_base_param_environ_variable("ns","nds","global_vpid_start"))) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
opal_setenv(param, value, true, env);
free(param);
free(value);
asprintf(&value, "%d", num_procs);
if(NULL == (param = mca_base_param_environ_variable("ns","nds","num_procs")))
{
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
opal_setenv(param, value, true, env);
free(param);
free(value);
/* we have to set this environmental variable so bproc will give us our rank
* after the launch */
putenv("BPROC_RANK=XXXXXXX");
opal_setenv("BPROC_RANK", "XXXXXXX", true, env);
return ORTE_SUCCESS;
}

0
orte/mca/odls/bproc/.ompi_ignore Обычный файл
Просмотреть файл

1
orte/mca/odls/bproc/.ompi_unignore Обычный файл
Просмотреть файл

@ -0,0 +1 @@
rhc

51
orte/mca/odls/bproc/Makefile.am Обычный файл
Просмотреть файл

@ -0,0 +1,51 @@
#
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2005 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# Copyright (c) 2006 Cisco Systems, Inc. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
AM_CPPFLAGS = -I$(top_ompi_builddir)/src/include $(odls_bproc_CPPFLAGS)
# Make the output library in this directory, and name it either
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
# (for static builds).
if OMPI_BUILD_odls_bproc_DSO
component_noinst =
component_install = mca_odls_bproc.la
else
component_noinst = libmca_odls_bproc.la
component_install =
endif
sources = \
odls_bproc.h \
odls_bproc.c \
odls_bproc_component.c
mcacomponentdir = $(libdir)/openmpi
mcacomponent_LTLIBRARIES = $(component_install)
mca_odls_bproc_la_SOURCES = $(sources)
mca_odls_bproc_la_LIBADD = \
$(odls_bproc_LIBS) \
$(top_ompi_builddir)/orte/libopen-rte.la \
$(top_ompi_builddir)/opal/libopen-pal.la
mca_odls_bproc_la_LDFLAGS = -module -avoid-version $(odls_bproc_LDFLAGS)
noinst_LTLIBRARIES = $(component_noinst)
libmca_odls_bproc_la_SOURCES = $(sources)
libmca_odls_bproc_la_LIBADD = $(odls_bproc_LIBS)
libmca_odls_bproc_la_LDFLAGS = -module -avoid-version $(odls_bproc_LDFLAGS)

38
orte/mca/odls/bproc/configure.m4 Обычный файл
Просмотреть файл

@ -0,0 +1,38 @@
# -*- shell-script -*-
#
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2005 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# MCA_odls_bproc_CONFIG([action-if-found], [action-if-not-found])
# -----------------------------------------------------------
AC_DEFUN([MCA_odls_bproc_CONFIG],[
OMPI_CHECK_BPROC([odls_bproc], [odls_bproc_good=1],
[odls_bproc_good=1], [odls_bproc_good=0])
# if check worked, set wrapper flags if so.
# Evaluate succeed / fail
AS_IF([test "$odls_bproc_good" = "1"],
[odls_bproc_WRAPPER_EXTRA_LDFLAGS="$odls_bproc_LDFLAGS"
odls_bproc_WRAPPER_EXTRA_LIBS="$odls_bproc_LIBS"
$1],
[$2])
# set build flags to use in makefile
AC_SUBST([odls_bproc_CPPFLAGS])
AC_SUBST([odls_bproc_LDFLAGS])
AC_SUBST([odls_bproc_LIBS])
])dnl

24
orte/mca/odls/bproc/configure.params Обычный файл
Просмотреть файл

@ -0,0 +1,24 @@
# -*- shell-script -*-
#
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2005 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# Copyright (c) 2007 Los Alamos National Security, LLC. All rights
# reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# Specific to this module
PARAM_CONFIG_FILES="Makefile"

697
orte/mca/odls/bproc/odls_bproc.c Обычный файл
Просмотреть файл

@ -0,0 +1,697 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/**
* @file:
* Part of the bproc launcher.
* See odls_bproc.h for an overview of how it works.
*/
#include "orte_config.h"
#include <stdlib.h>
#include <unistd.h>
#include <sys/types.h>
#include <fcntl.h>
#include <pty.h>
#include <dirent.h>
#include "opal/mca/base/mca_base_param.h"
#include "opal/runtime/opal_progress.h"
#include "opal/threads/condition.h"
#include "opal/util/os_dirpath.h"
#include "opal/util/os_path.h"
#include "opal/util/output.h"
#include "orte/dss/dss.h"
#include "orte/util/sys_info.h"
#include "orte/orte_constants.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/gpr/gpr.h"
#include "orte/mca/iof/iof.h"
#include "orte/mca/iof/base/iof_base_setup.h"
#include "orte/mca/ns/base/base.h"
#include "orte/mca/oob/base/base.h"
#include "orte/mca/rml/rml.h"
#include "orte/util/session_dir.h"
#include "orte/util/univ_info.h"
#include "odls_bproc.h"
/**
* Initialization of the bproc_orted module with all the needed function pointers
*/
orte_odls_base_module_t orte_odls_bproc_module = {
orte_odls_bproc_subscribe_launch_data,
orte_odls_bproc_get_add_procs_data,
orte_odls_bproc_launch_local_procs,
orte_odls_bproc_kill_local_procs,
orte_odls_bproc_signal_local_procs
};
static int odls_bproc_make_dir(char *directory);
static char * odls_bproc_get_base_dir_name(int proc_rank, orte_jobid_t jobid,
orte_std_cntr_t app_context);
static void odls_bproc_delete_dir_tree(char * path);
static int odls_bproc_remove_dir(void);
static void odls_bproc_send_cb(int status, orte_process_name_t * peer,
orte_buffer_t* buffer, int tag, void* cbdata);
static int odls_bproc_setup_stdio(orte_process_name_t *proc_name,
int proc_rank, orte_jobid_t jobid,
orte_std_cntr_t app_context, bool connect_stdin);
int orte_odls_bproc_get_add_procs_data(orte_gpr_notify_data_t **data, orte_job_map_t *map)
{
return ORTE_ERR_NOT_IMPLEMENTED;
}
/**
* Creates the passed directory. If the directory already exists, it and its
* contents will be deleted then the directory will be created.
* @param directory The directory to be created.
* @retval ORTE_SUCCESS
* @retval error
*/
static int
odls_bproc_make_dir(char *directory)
{
struct stat buf;
mode_t my_mode = S_IRWXU; /* at the least, I need to be able to do anything */
if (0 == stat(directory, &buf)) { /* exists - delete it and its contents */
odls_bproc_delete_dir_tree(directory);
}
/* try to create it with proper mode */
return(opal_os_dirpath_create(directory, my_mode));
}
/**
* Returns a path of the form:
* @code
* /tmp/openmpi-bproc-<user>/<universe>/<jobid>-<app_context>/<proc_rank>/
* @endcode
* which is used to put links to the pty/pipes in
* @param proc_rank the process's rank on the node
* @param jobid the jobid the proc belongs to
* @param app_context the application context number within the job
* @retval path
*/
static char *
odls_bproc_get_base_dir_name(int proc_rank, orte_jobid_t jobid,
orte_std_cntr_t app_context)
{
char *path = NULL, *user = NULL, *job = NULL;
int rc;
/* ensure that system info is set */
orte_sys_info();
if (NULL == orte_universe_info.name) { /* error condition */
ORTE_ERROR_LOG(ORTE_ERROR);
return NULL;
}
rc = orte_ns.convert_jobid_to_string(&job, jobid);
if(ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
return NULL;
}
/* get the username set by the bproc pls. We need to get it from here
* because on many bproc systems the method we use to get the username
* from the system on the backend fails and we only get the uid. */
rc = mca_base_param_register_string("pls", "bproc", "username", NULL,
orte_system_info.user);
mca_base_param_lookup_string(rc,&user);
if (0 > asprintf(&path, OPAL_PATH_SEP"tmp"OPAL_PATH_SEP"openmpi-bproc-%s"OPAL_PATH_SEP"%s"OPAL_PATH_SEP"%s-%d"OPAL_PATH_SEP"%d",
user, orte_universe_info.name,
job, (int) app_context, proc_rank)) {
ORTE_ERROR_LOG(ORTE_ERROR);
path = NULL;
}
if(0 < mca_odls_bproc_component.debug) {
opal_output(0, "odls bproc io setup. Path: %s\n", path);
}
free(user);
free(job);
return path;
}
/**
* deletes the passed directory tree recursively
* @param path the path to the base directory to delete
*/
static void
odls_bproc_delete_dir_tree(char * path)
{
DIR *dp;
struct dirent *ep;
char *filenm;
int ret;
struct stat buf;
dp = opendir(path);
if (NULL == dp) {
return;
}
while (NULL != (ep = readdir(dp)) ) {
/* skip: . and .. */
if ((0 != strcmp(ep->d_name, ".")) && (0 != strcmp(ep->d_name, ".."))) {
filenm = opal_os_path(false, path, ep->d_name, NULL);
ret = stat(filenm, &buf);
if (ret < 0 || S_ISDIR(buf.st_mode)) {
odls_bproc_delete_dir_tree(filenm);
free(filenm);
continue;
}
unlink(filenm);
free(filenm);
}
}
closedir(dp);
rmdir(path);
}
/**
* Removes the bproc directory
* @code /tmp/openmpi-bproc-<user>/ @endcode and all of its contents
* @retval ORTE_SUCCESS
* @retval error
*/
static int
odls_bproc_remove_dir()
{
char *frontend = NULL, *user = NULL, *filename = NULL;
int id;
/* get the username set by the bproc pls. We need to get it from here
* because on many bproc systems the method we use to get the username
* from the system on the backend fails and we only get the uid. */
id = mca_base_param_register_string("pls", "bproc", "username", NULL,
orte_system_info.user);
mca_base_param_lookup_string(id,&user);
asprintf(&filename, "openmpi-bproc-%s", user );
if( NULL == filename ) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERROR;
}
frontend = opal_os_path(false, "tmp", filename, NULL );
free(filename); /* Always free the filename */
if (NULL == frontend) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERROR;
}
/* we do our best to clean up the directory tree, but we ignore errors*/
odls_bproc_delete_dir_tree(frontend);
free(frontend);
return ORTE_SUCCESS;
}
/**
* Callback function for when we tell mpirun we are ready
* @param status
* @param peer
* @param buffer
* @param tag
* @param cbdata
*/
static void
odls_bproc_send_cb(int status, orte_process_name_t * peer,
orte_buffer_t* buffer, int tag, void* cbdata)
{
OBJ_RELEASE(buffer);
}
/**
* Create Standard I/O symlinks in the filesystem for a given proc
*
* Create Standard I/O symlinks in the filesystem for a given proc.
* The symlinks will be placed in:
* @code
* /tmp/openmpi-bproc-<user>/<universe>/<jobid>-<app_context>/<proc_rank>/
* @endcode
*
* The symlinks will be to FIFOs for stdin and stderr. stdout will either
* be to a FIFO or pty, depending on the configuration of Open MPI.
*
* @param proc_rank the process's rank on the node
* @param jobid the jobid the proc belongs to
* @param app_context the application context number within the job
* @param connect_stdin if true, stdin will be connected, otherwise it will be
* set to /dev/null
*
* @retval ORTE_SUCCESS
* @retval error
*/
static int
odls_bproc_setup_stdio(orte_process_name_t *proc_name, int proc_rank,
orte_jobid_t jobid,
orte_std_cntr_t app_context, bool connect_stdin)
{
char *path_prefix, *fd_link_path = NULL;
int rc = ORTE_SUCCESS, fd;
#if defined(HAVE_OPENPTY) && (OMPI_ENABLE_PTY_SUPPORT != 0)
int amaster, aslave;
char pty_name[256];
struct termios term_attrs;
#endif
path_prefix = odls_bproc_get_base_dir_name(proc_rank, jobid, (size_t)app_context);
if (NULL == path_prefix) {
rc = ORTE_ERROR;
ORTE_ERROR_LOG(rc);
goto cleanup;
}
/* check for existence and access, or create it */
if (ORTE_SUCCESS != (rc = odls_bproc_make_dir(path_prefix))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
/* setup the stdin FIFO. Always use a fifo for the same reason we
always use a pipe in the iof_setup code -- don't want to flush
onto the floor during close */
fd_link_path = opal_os_path( false, path_prefix, "0", NULL );
if (NULL == fd_link_path) {
rc = ORTE_ERROR;
ORTE_ERROR_LOG(rc);
goto cleanup;
}
if (connect_stdin) {
if (0 != mkfifo(fd_link_path, S_IRWXU)) {
perror("odls_bproc mkfifo failed");
rc = ORTE_ERROR;
ORTE_ERROR_LOG(rc);
goto cleanup;
}
fd = open(fd_link_path, O_RDWR);
if (-1 == fd) {
perror("odls_bproc open failed");
rc = ORTE_ERROR;
ORTE_ERROR_LOG(rc);
goto cleanup;
}
orte_iof.iof_publish(proc_name, ORTE_IOF_SINK,
ORTE_IOF_STDIN, fd);
} else {
if(0 != symlink("/dev/null", fd_link_path)) {
perror("odls_bproc could not create symlink");
rc = ORTE_ERROR;
ORTE_ERROR_LOG(rc);
goto cleanup;
}
}
free(fd_link_path);
fd_link_path = NULL;
/* setup the stdout PTY / FIFO */
fd_link_path = opal_os_path( false, path_prefix, "1", NULL );
if (NULL == fd_link_path) {
rc = ORTE_ERROR;
ORTE_ERROR_LOG(rc);
goto cleanup;
}
#if defined(HAVE_OPENPTY) && (OMPI_ENABLE_PTY_SUPPORT != 0)
if (0 != openpty(&amaster, &aslave, pty_name, NULL, NULL)) {
opal_output(0, "odls_bproc: openpty failed, using pipes instead");
goto stdout_fifo_setup;
}
if (0 != symlink(pty_name, fd_link_path)) {
rc = ORTE_ERROR;
ORTE_ERROR_LOG(rc);
goto cleanup;
}
if (tcgetattr(aslave, &term_attrs) < 0) {
rc = ORTE_ERROR;
ORTE_ERROR_LOG(rc);
goto cleanup;
}
term_attrs.c_lflag &= ~ (ECHO | ECHOE | ECHOK |
ECHOCTL | ECHOKE | ECHONL);
term_attrs.c_iflag &= ~ (ICRNL | INLCR | ISTRIP | INPCK | IXON);
term_attrs.c_oflag &= ~ (OCRNL | ONLCR);
if (tcsetattr(aslave, TCSANOW, &term_attrs) == -1) {
rc = ORTE_ERROR;
ORTE_ERROR_LOG(rc);
goto cleanup;
}
orte_iof.iof_publish(proc_name, ORTE_IOF_SOURCE,
ORTE_IOF_STDOUT, amaster);
goto stderr_fifo_setup;
stdout_fifo_setup:
#endif
if (0 != mkfifo(fd_link_path, S_IRWXU)) {
perror("odls_bproc mkfifo failed");
rc = ORTE_ERROR;
goto cleanup;
}
fd = open(fd_link_path, O_RDWR);
if (-1 == fd) {
perror("odls_bproc open failed");
rc = ORTE_ERROR;
goto cleanup;
}
orte_iof.iof_publish(proc_name, ORTE_IOF_SOURCE,
ORTE_IOF_STDOUT, fd);
#if defined(HAVE_OPENPTY) && (OMPI_ENABLE_PTY_SUPPORT != 0)
stderr_fifo_setup:
#endif
free(fd_link_path);
fd_link_path = NULL;
/* setup the stderr FIFO. Always a fifo */
fd_link_path = opal_os_path( false, path_prefix, "2", NULL );
if (NULL == fd_link_path) {
rc = ORTE_ERROR;
ORTE_ERROR_LOG(rc);
goto cleanup;
}
if (0 != mkfifo(fd_link_path, S_IRWXU)) {
perror("odls_bproc mkfifo failed");
rc = ORTE_ERROR;
goto cleanup;
}
fd = open(fd_link_path, O_RDWR);
if (-1 == fd) {
perror("odls_bproc open failed");
rc = ORTE_ERROR;
goto cleanup;
}
orte_iof.iof_publish(proc_name, ORTE_IOF_SOURCE,
ORTE_IOF_STDERR, fd);
cleanup:
if (NULL != path_prefix) {
free(path_prefix);
}
if (NULL != fd_link_path) {
free(fd_link_path);
}
return rc;
}
/* this entire function gets called within a GPR compound command,
* so the subscription actually doesn't get done until the orted
* executes the compound command
*/
int orte_odls_bproc_subscribe_launch_data(orte_jobid_t job, orte_gpr_notify_cb_fn_t cbfunc)
{
char *segment;
orte_gpr_value_t *values[1];
orte_gpr_subscription_t *subs, sub=ORTE_GPR_SUBSCRIPTION_EMPTY;
orte_gpr_trigger_t *trigs, trig=ORTE_GPR_TRIGGER_EMPTY;
char* keys[] = {
ORTE_PROC_NAME_KEY,
ORTE_PROC_APP_CONTEXT_KEY,
ORTE_NODE_NAME_KEY,
};
int num_keys = 3;
int i, rc;
/* get the job segment name */
if (ORTE_SUCCESS != (rc = orte_schema.get_job_segment_name(&segment, job))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* attach ourselves to the "standard" orted trigger */
if (ORTE_SUCCESS !=
(rc = orte_schema.get_std_trigger_name(&(trig.name),
ORTED_LAUNCH_STAGE_GATE_TRIGGER, job))) {
ORTE_ERROR_LOG(rc);
free(segment);
return rc;
}
/* ask for return of all data required for launching local processes */
subs = &sub;
sub.action = ORTE_GPR_NOTIFY_DELETE_AFTER_TRIG;
if (ORTE_SUCCESS != (rc = orte_schema.get_std_subscription_name(&(sub.name),
ORTED_LAUNCH_STG_SUB,
job))) {
ORTE_ERROR_LOG(rc);
free(segment);
free(trig.name);
return rc;
}
sub.cnt = 1;
sub.values = values;
if (ORTE_SUCCESS != (rc = orte_gpr.create_value(&(values[0]), ORTE_GPR_KEYS_OR | ORTE_GPR_TOKENS_OR,
segment, num_keys, 0))) {
ORTE_ERROR_LOG(rc);
free(segment);
free(sub.name);
free(trig.name);
return rc;
}
for (i=0; i < num_keys; i++) {
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(values[0]->keyvals[i]),
keys[i], ORTE_UNDEF, NULL))) {
ORTE_ERROR_LOG(rc);
free(segment);
free(sub.name);
free(trig.name);
OBJ_RELEASE(values[0]);
return rc;
}
}
sub.cbfunc = cbfunc;
trigs = &trig;
/* do the subscription */
if (ORTE_SUCCESS != (rc = orte_gpr.subscribe(1, &subs, 1, &trigs))) {
ORTE_ERROR_LOG(rc);
}
free(segment);
free(sub.name);
free(trig.name);
OBJ_RELEASE(values[0]);
return rc;
}
/**
* Setup io for the current node, then tell orterun we are ready for the actual
* processes.
* @retval ORTE_SUCCESS
* @retval error
*/
int
orte_odls_bproc_launch_local_procs(orte_gpr_notify_data_t *data, char **base_environ)
{
odls_bproc_child_t *child;
opal_list_item_t* item;
orte_gpr_value_t *value, **values;
orte_gpr_keyval_t *kval;
char *node_name;
int rc;
orte_std_cntr_t i, j, kv, kv2, *sptr;
int src = 0;
orte_buffer_t *ack;
bool connect_stdin;
orte_jobid_t jobid;
int cycle = 0;
/* first, retrieve the job number we are to launch from the
* returned data - we can extract the jobid directly from the
* subscription name we created
*/
if (ORTE_SUCCESS != (rc = orte_schema.extract_jobid_from_std_trigger_name(&jobid, data->target))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/**
* hack for bproc4, change process group so that we do not receive signals
* from the parent/front-end process, as bproc4 does not currently allow the
* process to intercept the signal
*/
setpgid(0,0);
/* loop through the returned data to find the global info and
* the info for processes going onto this node
*/
values = (orte_gpr_value_t**)(data->values)->addr;
for (j=0, i=0; i < data->cnt && j < (data->values)->size; j++) { /* loop through all returned values */
if (NULL != values[j]) {
i++;
value = values[j];
/* this must have come from one of the process containers, so it must
* contain data for a proc structure - see if it belongs to this node
*/
for (kv=0; kv < value->cnt; kv++) {
kval = value->keyvals[kv];
if (strcmp(kval->key, ORTE_NODE_NAME_KEY) == 0) {
/* Most C-compilers will bark if we try to directly compare the string in the
* kval data area against a regular string, so we need to "get" the data
* so we can access it */
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&node_name, kval->value, ORTE_STRING))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* if this is our node...must also protect against a zero-length string */
if (NULL != node_name && 0 == strcmp(node_name, orte_system_info.nodename)) {
/* ...harvest the info into a new child structure */
child = OBJ_NEW(odls_bproc_child_t);
for (kv2 = 0; kv2 < value->cnt; kv2++) {
kval = value->keyvals[kv2];
if(strcmp(kval->key, ORTE_PROC_NAME_KEY) == 0) {
/* copy the name into the child object */
if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&(child->name), kval->value->data, ORTE_NAME))) {
ORTE_ERROR_LOG(rc);
return rc;
}
continue;
}
if(strcmp(kval->key, ORTE_PROC_APP_CONTEXT_KEY) == 0) {
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&sptr, kval->value, ORTE_STD_CNTR))) {
ORTE_ERROR_LOG(rc);
return rc;
}
child->app_idx = *sptr; /* save the index into the app_context objects */
continue;
}
} /* kv2 */
/* protect operation on the global list of children */
OPAL_THREAD_LOCK(&mca_odls_bproc_component.mutex);
opal_list_append(&mca_odls_bproc_component.children, &child->super);
opal_condition_signal(&mca_odls_bproc_component.cond);
OPAL_THREAD_UNLOCK(&mca_odls_bproc_component.mutex);
}
}
} /* for kv */
} /* for j */
}
/* set up the io files for our children */
for(item = opal_list_get_first(&mca_odls_bproc_component.children);
item != opal_list_get_end(&mca_odls_bproc_component.children);
item = opal_list_get_next(item)) {
child = (odls_bproc_child_t *) item;
if(0 < mca_odls_bproc_component.debug) {
opal_output(0, "orte_odls_bproc_launch: setting up io for "
"[%lu,%lu,%lu] proc rank %lu\n",
ORTE_NAME_ARGS((child->name)),
child->name->vpid);
}
/* only setup to forward stdin if it is rank 0, otherwise connect
* to /dev/null */
if(0 == child->name->vpid) {
connect_stdin = true;
} else {
connect_stdin = false;
}
rc = odls_bproc_setup_stdio(child->name, cycle,
jobid, child->app_idx,
connect_stdin);
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
cycle++;
}
/* message to indicate that we are ready */
ack = OBJ_NEW(orte_buffer_t);
rc = orte_dss.pack(ack, &src, 1, ORTE_INT);
if(ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
}
rc = mca_oob_send_packed_nb(ORTE_PROC_MY_HNP, ack, ORTE_RML_TAG_BPROC, 0,
odls_bproc_send_cb, NULL);
if (0 > rc) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
rc = ORTE_SUCCESS;
cleanup:
return rc;
}
/**
* Function to terminate a job. Since this component only runs on remote nodes
* and doesn't actually launch any processes, this function is not needed
* so is a noop.
*/
int orte_odls_bproc_kill_local_procs(orte_jobid_t job, bool set_state)
{
orte_iof.iof_flush();
return ORTE_SUCCESS;
}
/**
* Function to signal a process. Since this component only runs on remote nodes
* and doesn't actually launch any processes, this function is not needed
* so is a noop.
* @param proc the process's name
* @param signal The signal to send
* @retval ORTE_SUCCESS
*/
int orte_odls_bproc_signal_local_procs(const orte_process_name_t* proc, int32_t signal)
{
orte_iof.iof_flush();
return ORTE_SUCCESS;
}
/**
* Finalizes the bproc module. Cleanup tmp directory/files
* used for I/O forwarding.
* @retval ORTE_SUCCESS
*/
int orte_odls_bproc_finalize(void)
{
orte_iof.iof_flush();
odls_bproc_remove_dir();
orte_session_dir_finalize(orte_process_info.my_name);
return ORTE_SUCCESS;
}

112
orte/mca/odls/bproc/odls_bproc.h Обычный файл
Просмотреть файл

@ -0,0 +1,112 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/**
* @file:
* Part of the bproc launching system. This launching system is broken into 2
* parts: one runs under the PLS on the head node to launch the orteds, and the
* other serves as the orted's local launcher.
*
* The main job of this component is to setup ptys/pipes for IO forwarding.
* See pls_bproc.h for an overview of how the entire bproc launching system works.
*/
#ifndef ORTE_ODLS_BPROC_H_
#define ORTE_ODLS_BPROC_H_
#include "orte_config.h"
#include <sys/bproc.h>
#include "opal/mca/mca.h"
#include "opal/threads/condition.h"
#include "orte/mca/gpr/gpr_types.h"
#include "orte/mca/rmaps/rmaps_types.h"
#include "orte/mca/odls/odls.h"
#if defined(c_plusplus) || defined(__cplusplus)
extern "C" {
#endif
/*
* Module open / close
*/
int orte_odls_bproc_component_open(void);
int orte_odls_bproc_component_close(void);
int orte_odls_bproc_finalize(void);
orte_odls_base_module_t* orte_odls_bproc_init(int *priority);
/*
* Startup / Shutdown
*/
int orte_odls_bproc_finalize(void);
/*
* Interface
*/
int orte_odls_bproc_subscribe_launch_data(orte_jobid_t job, orte_gpr_notify_cb_fn_t cbfunc);
int orte_odls_bproc_get_add_procs_data(orte_gpr_notify_data_t **data, orte_job_map_t *map);
int orte_odls_bproc_launch_local_procs(orte_gpr_notify_data_t *data, char **base_environ);
int orte_odls_bproc_kill_local_procs(orte_jobid_t job, bool set_state);
int orte_odls_bproc_signal_local_procs(const orte_process_name_t* proc_name, int32_t signal);
/**
* ODLS bproc_orted component
*/
struct orte_odls_bproc_component_t {
orte_odls_base_component_t super;
/**< The base class */
int debug;
/**< If greater than 0 print debugging information */
int priority;
/**< The priority of this component. This will be returned if we determine
* that bproc is available and running on this node, */
opal_mutex_t lock;
/**< Lock used to prevent some race conditions */
opal_condition_t cond;
/**< Condition used to wake up waiting threads */
opal_list_t children;
/**< list of children on this node */
};
/**
* Convenience typedef
*/
typedef struct orte_odls_bproc_component_t orte_odls_bproc_component_t;
/*
* List object to locally store the process names and pids of
* our children. This can subsequently be used to order termination
* or pass signals without looking the info up again.
*/
typedef struct odls_bproc_child_t {
opal_list_item_t super; /* required to place this on a list */
orte_process_name_t *name; /* the OpenRTE name of the proc */
pid_t pid; /* local pid of the proc */
orte_std_cntr_t app_idx; /* index of the app_context for this proc */
bool alive; /* is this proc alive? */
} odls_bproc_child_t;
OBJ_CLASS_DECLARATION(odls_bproc_child_t);
ORTE_MODULE_DECLSPEC extern orte_odls_bproc_component_t mca_odls_bproc_component;
extern orte_odls_base_module_t orte_odls_bproc_module;
#if defined(c_plusplus) || defined(__cplusplus)
}
#endif
#endif /* ORTE_ODLS_BPROC_H_ */

134
orte/mca/odls/bproc/odls_bproc_component.c Обычный файл
Просмотреть файл

@ -0,0 +1,134 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*
*/
/**
* @file:
* Takes care of the component stuff for the MCA.
*/
#include "orte_config.h"
#include "orte/orte_constants.h"
#include "opal/mca/base/mca_base_param.h"
#include "orte/util/proc_info.h"
#include "orte/mca/odls/odls.h"
#include "odls_bproc.h"
/* instance the child list object */
static void odls_bproc_child_constructor(odls_bproc_child_t *ptr)
{
ptr->name = NULL;
ptr->app_idx = -1;
ptr->alive = false;
}
static void odls_bproc_child_destructor(odls_bproc_child_t *ptr)
{
if (NULL != ptr->name) free(ptr->name);
}
OBJ_CLASS_INSTANCE(odls_bproc_child_t,
opal_list_item_t,
odls_bproc_child_constructor,
odls_bproc_child_destructor);
/**
* The bproc component data structure used to store all the relevent data
* about this component.
*/
orte_odls_bproc_component_t mca_odls_bproc_component = {
{
/* First, the mca_component_t struct containing meta information
about the component itself */
{
/* Indicate that we are a odls v1.3.0 component (which also
implies a specific MCA version) */
ORTE_ODLS_BASE_VERSION_1_3_0,
/* Component name and version */
"bproc",
ORTE_MAJOR_VERSION,
ORTE_MINOR_VERSION,
ORTE_RELEASE_VERSION,
/* Component open and close functions */
orte_odls_bproc_component_open,
orte_odls_bproc_component_close
},
/* Next the MCA v1.0.0 component meta data */
{
/* Whether the component is checkpointable or not */
false
},
/* Initialization / querying functions */
orte_odls_bproc_init,
orte_odls_bproc_finalize
}
};
/**
* Opens the pls_bproc component, setting all the needed mca parameters and
* finishes setting up the component struct.
*/
int orte_odls_bproc_component_open(void)
{
/* initialize globals */
OBJ_CONSTRUCT(&mca_odls_bproc_component.lock, opal_mutex_t);
OBJ_CONSTRUCT(&mca_odls_bproc_component.cond, opal_condition_t);
OBJ_CONSTRUCT(&mca_odls_bproc_component.children, opal_list_t);
/* lookup parameters */
mca_base_param_reg_int(&mca_odls_bproc_component.super.version,
"priority", NULL, false, false, 100,
&mca_odls_bproc_component.priority);
mca_base_param_reg_int(&mca_odls_bproc_component.super.version,
"debug", "If > 0 prints library debugging information",
false, false, 0, &mca_odls_bproc_component.debug);
return ORTE_SUCCESS;
}
/**
* Initializes the module. We do not want to run unless we are not the seed,
* bproc is running, and we are not on the master node.
*/
orte_odls_base_module_t *orte_odls_bproc_init(int *priority)
{
int ret;
struct bproc_version_t version;
/* the base open/select logic protects us against operation when
* we are NOT in a daemon, so we don't have to check that here
*/
/* check to see if BProc is running here */
ret = bproc_version(&version);
if (ret != 0) {
return NULL;
}
*priority = mca_odls_bproc_component.priority;
return &orte_odls_bproc_module;
}
/**
* Component close function.
*/
int orte_odls_bproc_component_close(void)
{
OBJ_DESTRUCT(&mca_odls_bproc_component.lock);
OBJ_DESTRUCT(&mca_odls_bproc_component.cond);
OBJ_DESTRUCT(&mca_odls_bproc_component.children);
return ORTE_SUCCESS;
}

0
orte/mca/plm/bproc/.ompi_ignore Обычный файл
Просмотреть файл

1
orte/mca/plm/bproc/.ompi_unignore Обычный файл
Просмотреть файл

@ -0,0 +1 @@
rhc

51
orte/mca/plm/bproc/Makefile.am Обычный файл
Просмотреть файл

@ -0,0 +1,51 @@
#
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2005 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
dist_pkgdata_DATA = help-plm-bproc.txt
# Make the output library in this directory, and name it either
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
# (for static builds).
if OMPI_BUILD_plm_bproc_DSO
component_noinst =
component_install = mca_plm_bproc.la
else
component_noinst = libmca_plm_bproc.la
component_install =
endif
sources = \
plm_bproc.h \
plm_bproc.c \
plm_bproc_state.c \
plm_bproc_component.c
mcacomponentdir = $(libdir)/openmpi
mcacomponent_LTLIBRARIES = $(component_install)
mca_plm_bproc_la_SOURCES = $(sources)
mca_plm_bproc_la_LIBADD = \
$(plm_bproc_LIBS) \
$(top_ompi_builddir)/orte/libopen-rte.la \
$(top_ompi_builddir)/opal/libopen-pal.la
mca_plm_bproc_la_LDFLAGS = -module -avoid-version $(plm_bproc_LDFLAGS)
noinst_LTLIBRARIES = $(component_noinst)
libmca_plm_bproc_la_SOURCES = $(sources)
libmca_plm_bproc_la_LIBADD = $(plm_bproc_LIBS)
libmca_plm_bproc_la_LDFLAGS = -module -avoid-version $(plm_bproc_LDFLAGS)

41
orte/mca/plm/bproc/configure.m4 Обычный файл
Просмотреть файл

@ -0,0 +1,41 @@
# -*- shell-script -*-
#
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2005 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# MCA_plm_bproc_CONFIG([action-if-found], [action-if-not-found])
# -----------------------------------------------------------
AC_DEFUN([MCA_plm_bproc_CONFIG],[
OMPI_CHECK_BPROC([plm_bproc], [plm_bproc_good=2],
[plm_bproc_good=1], [plm_bproc_good=0])
# if check worked, set wrapper flags if so.
# Evaluate succeed / fail
AS_IF([test "$plm_bproc_good" = "1"],
[AC_DEFINE_UNQUOTED([MCA_plm_bproc_scyld], [1],
[Defined if we are using Scyld bproc or pre 3.2.0 LANL bproc])])
AS_IF([test "$plm_bproc_good" != "0"],
[plm_bproc_WRAPPER_EXTRA_LDFLAGS="$plm_bproc_LDFLAGS"
plm_bproc_WRAPPER_EXTRA_LIBS="$plm_bproc_LIBS"
$1],
[$2])
# set build flags to use in makefile
AC_SUBST([plm_bproc_CPPFLAGS])
AC_SUBST([plm_bproc_LDFLAGS])
AC_SUBST([plm_bproc_LIBS])
])dnl

24
orte/mca/plm/bproc/configure.params Обычный файл
Просмотреть файл

@ -0,0 +1,24 @@
# -*- shell-script -*-
#
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2005 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# Copyright (c) 2007 Los Alamos National Security, LLC. All rights
# reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# Specific to this module
PARAM_CONFIG_FILES="Makefile"

107
orte/mca/plm/bproc/help-plm-bproc.txt Обычный файл
Просмотреть файл

@ -0,0 +1,107 @@
# -*- text -*-
#
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2005 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# This is the US/English general help file for Open RTE's orterun.
#
[bproc-vexecmove-launch]
The bproc PLS component was not able to launch %s on node %d and therefore
cannot continue. Errno was set to %d.
[bproc-vexecmove-fork]
The bproc PLS component was not able to fork and therefore cannot continue.
Errno was set to %d.
[no-orted]
The bproc PLS component was not able to find the executable "%s" in the
current directory, your PATH, or in the directory where Open MPI was
initially installed, and therefore cannot continue.
For reference, we looked for
%s
Your current PATH is:
%s
We also looked for orted in the following directory:
%s
You may need to set your PATH properly, or set the MCA parameter
pls_bproc_orted to be the path to "orted".
[daemon-launch-number]
The bproc PLS component was not able to launch all the daemons on the remote
nodes and therefore cannot continue.
We attempted to launch %d daemons but only %d were actually launched.
For reference, we tried to launch %s
[daemon-launch-bad-pid]
The bproc PLS component was not able to launch all the daemons on the remote
nodes and therefore cannot continue.
On node %d the daemon pid was %d and errno was set to %d.
For reference, we tried to launch %s
[daemon-died-no-signal]
A daemon (pid %d) launched by the bproc PLS component on node %d died
unexpectedly so we are aborting.
This may be because the daemon was unable to find all the needed shared
libraries on the remote node. You may set your LD_LIBRARY_PATH to have the
location of the shared libraries on the remote nodes and this will
automatically be forwarded to the remote nodes.
[daemon-died-signal]
A daemon (pid %d) launched by the bproc PLS component on node %d died
unexpectedly on signal %d so we are aborting.
This may be because the daemon was unable to find all the needed shared
libraries on the remote node. You may set your LD_LIBRARY_PATH to have the
location of the shared libraries on the remote nodes and this will
automatically be forwarded to the remote nodes.
[proc-launch-number]
The bproc PLS component was not able to launch all the processes on the remote
nodes and therefore cannot continue.
We attempted to launch %d processes but only %d were actually launched.
For reference, we tried to launch %s
[proc-launch-bad-pid]
The bproc PLS component was not able to launch all the processes on the remote
nodes and therefore cannot continue.
On node %d the process pid was %d and errno was set to %d.
For reference, we tried to launch %s
[mismatched-slots]
The current bproc support requires that the number of available
slots on each node be the same. Note that this does -not- mean
that the number of processes you want to launch must be the same.
It only requires that you have access to the same number of process
slots on each node.
This is not something inherent to Open MPI, but rather a reported
characteristic of Bproc. We are in the process of confirming that
this requirement remains in effect. If we find that it has been
removed, then we will revise the system to support varying
numbers of slots on the allocated nodes.
Meantime, please revise your hostfile or other allocation so they
report the same number of process slots on each node. If you want
to force a particular mapping of numbers of processes to each node,
please use any of the other Open MPI mechanisms for doing so.

1430
orte/mca/plm/bproc/plm_bproc.c Обычный файл

Разница между файлами не показана из-за своего большого размера Загрузить разницу

150
orte/mca/plm/bproc/plm_bproc.h Обычный файл
Просмотреть файл

@ -0,0 +1,150 @@
/* -*- C -*-
*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*
*
*/
/**
* @file:
* Header file for the bproc launcher. This launcher is actually split into 2
* modules: pls_bproc & pls_bproc_orted. The general idea behind this launcher is:
* -# pls_bproc is called by orterun. It figures out the process mapping and
* launches orted's on the nodes
* -# pls_bproc_orted is called by orted. This module intializes either a pty or
* pipes, places symlinks to them in well know points of the filesystem, and
* sets up the io forwarding. It then sends an ack back to orterun.
* -# pls_bproc waits for an ack to come back from the orteds, then does several
* parallel launches of the application processes. The number of launches is
* equal to the maximum number of processes on a node. For example, if there
* were 2 processes assigned to node 1, and 1 process asigned to node 2, we
* would do a parallel launch that launches on process on each node, then
* another which launches another process on node 1.
*/
#ifndef ORTE_PLS_BPROC_H_
#define ORTE_PLS_BPROC_H_
#include "orte_config.h"
#include "orte/orte_constants.h"
#include <sys/bproc.h>
#ifdef HAVE_SYS_TIME_H
#include <sys/time.h>
#endif
#include "opal/threads/condition.h"
#include "orte/class/orte_pointer_array.h"
#include "orte/util/proc_info.h"
#include "orte/mca/rml/rml_types.h"
#include "orte/mca/pls/base/base.h"
#if defined(c_plusplus) || defined(__cplusplus)
extern "C" {
#endif
/*
* Module open / close
*/
int orte_pls_bproc_component_open(void);
int orte_pls_bproc_component_close(void);
/*
* Startup / Shutdown
*/
orte_pls_base_module_t* orte_pls_bproc_init(int *priority);
int orte_pls_bproc_finalize(void);
/*
* Interface
*/
int orte_pls_bproc_launch(orte_jobid_t);
int orte_pls_bproc_terminate_job(orte_jobid_t, struct timeval *timeout, opal_list_t*);
int orte_pls_bproc_terminate_proc(const orte_process_name_t* proc_name);
int orte_pls_bproc_terminate_orteds(orte_jobid_t jobid, struct timeval *timeout, opal_list_t*);
int orte_pls_bproc_signal_job(orte_jobid_t, int32_t, opal_list_t*);
int orte_pls_bproc_signal_proc(const orte_process_name_t* proc_name, int32_t);
int orte_pls_bproc_cancel_operation(void);
/* Utility routine to get/set process pid */
ORTE_DECLSPEC int orte_pls_bproc_set_proc_pid(const orte_process_name_t*, pid_t, int);
ORTE_DECLSPEC int orte_pls_bproc_get_proc_pid(const orte_process_name_t*, pid_t*);
/**
* Utility routine to retreive all process pids w/in a specified job.
*/
ORTE_DECLSPEC int orte_pls_bproc_get_proc_pids(orte_jobid_t jobid, pid_t** pids,
orte_std_cntr_t* num_pids,
opal_list_t *attrs);
/**
* Utility routine to get/set daemon pid
*/
ORTE_DECLSPEC int orte_pls_bproc_set_node_pid(orte_cellid_t cellid, char* node_name, orte_jobid_t jobid, pid_t pid);
ORTE_DECLSPEC int orte_pls_bproc_get_node_pids(orte_jobid_t jobid, pid_t** pids, orte_std_cntr_t* num_pids);
/* utility functions for abort communications */
int orte_pls_bproc_comm_start(void);
int orte_pls_bproc_comm_stop(void);
void orte_pls_bproc_recv(int status, orte_process_name_t* sender,
orte_buffer_t* buffer, orte_rml_tag_t tag,
void* cbdata);
/**
* PLS bproc Component
*/
struct orte_pls_bproc_component_t {
orte_pls_base_component_t super;
/**< The base class */
char * orted;
/**< The orted executable. This can be an absolute path, or if not found
* we will look for it in the user's path */
int debug;
/**< If greater than 0 print debugging information */
int priority;
/**< The priority of this component. This will be returned if we determine
* that bproc is available and running on this node, */
int terminate_sig;
/**< The signal that gets sent to a process to kill it. */
opal_mutex_t lock;
/**< Lock used to prevent some race conditions */
opal_condition_t condition;
/**< Condition that is signaled when all the daemons have died */
bool recv_issued;
/**< Indicates that the comm recv for reporting abnormal proc termination
* has been issued
*/
bool do_not_launch;
/**< for test purposes, do everything but the actual launch */
orte_std_cntr_t num_daemons;
/**< track the number of daemons being launched so we can tell when
* all have reported in */
};
/**
* Convenience typedef
*/
typedef struct orte_pls_bproc_component_t orte_pls_bproc_component_t;
ORTE_DECLSPEC orte_pls_bproc_component_t mca_pls_bproc_component;
ORTE_DECLSPEC orte_pls_base_module_t orte_pls_bproc_module;
#if defined(c_plusplus) || defined(__cplusplus)
}
#endif
#endif /* ORTE_PLS_BPROC_H_ */

123
orte/mca/plm/bproc/plm_bproc_component.c Обычный файл
Просмотреть файл

@ -0,0 +1,123 @@
/* -*- C -*-
*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*
*/
/**
* @file:
* Takes care of the component stuff for the MCA.
*/
#include "orte_config.h"
#include "orte/mca/errmgr/errmgr.h"
#include "opal/mca/mca.h"
#include "opal/mca/base/mca_base_param.h"
#include "pls_bproc.h"
/**
* The bproc component data structure used to store all the relevent data about
* this component.
*/
orte_pls_bproc_component_t mca_pls_bproc_component = {
{
{
ORTE_PLS_BASE_VERSION_1_3_0,
"bproc", /* MCA component name */
ORTE_MAJOR_VERSION, /* MCA component major version */
ORTE_MINOR_VERSION, /* MCA component minor version */
ORTE_RELEASE_VERSION, /* MCA component release version */
orte_pls_bproc_component_open, /* component open */
orte_pls_bproc_component_close /* component close */
},
{
false /* checkpoint / restart */
},
orte_pls_bproc_init /* component init */
}
};
/**
* Opens the pls_bproc component, setting all the needed mca parameters and
* finishes setting up the component struct.
*/
int orte_pls_bproc_component_open(void) {
int rc;
/* init parameters */
mca_base_component_t *c = &mca_pls_bproc_component.super.pls_version;
mca_base_param_reg_int(c, "priority", NULL, false, false, 100,
&mca_pls_bproc_component.priority);
mca_base_param_reg_int(c, "debug",
"If > 0 prints library debugging information",
false, false, 0, &mca_pls_bproc_component.debug);
mca_base_param_reg_int(c, "terminate_sig",
"Signal sent to processes to terminate them", false,
false, 9, &mca_pls_bproc_component.terminate_sig);
mca_base_param_reg_string(c, "orted", "Path to where orted is installed",
false, false, "orted", &mca_pls_bproc_component.orted);
mca_base_param_reg_int(c, "nolaunch", NULL, false, false, (int)false,
&rc);
if ((int)false == rc) {
mca_pls_bproc_component.do_not_launch = false;
} else {
mca_pls_bproc_component.do_not_launch = true;
}
mca_pls_bproc_component.recv_issued = false;
OBJ_CONSTRUCT(&mca_pls_bproc_component.lock, opal_mutex_t);
OBJ_CONSTRUCT(&mca_pls_bproc_component.condition, opal_condition_t);
return ORTE_SUCCESS;
}
/**
* Closes the pls_bproc component
*/
int orte_pls_bproc_component_close(void) {
OBJ_DESTRUCT(&mca_pls_bproc_component.lock);
OBJ_DESTRUCT(&mca_pls_bproc_component.condition);
return ORTE_SUCCESS;
}
/**
* Initializes the module. We do not want to run unless we are the seed, bproc
* is running, and we are the master node.
*/
orte_pls_base_module_t* orte_pls_bproc_init(int *priority) {
int ret;
struct bproc_version_t version;
/* are we the seed */
if(orte_process_info.seed == false)
return NULL;
/* okay, we are in an HNP - now check to see if BProc is running here */
if (!mca_pls_bproc_component.do_not_launch) {
ret = bproc_version(&version);
if (ret != 0) {
return NULL;
}
}
/* only launch from the master node */
if (bproc_currnode() != BPROC_NODE_MASTER) {
return NULL;
}
*priority = mca_pls_bproc_component.priority;
return &orte_pls_bproc_module;
}

402
orte/mca/plm/bproc/plm_bproc_state.c Обычный файл
Просмотреть файл

@ -0,0 +1,402 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include "orte/orte_constants.h"
#include "opal/util/output.h"
#include "opal/mca/mca.h"
#include "opal/mca/base/base.h"
#include "orte/dss/dss.h"
#include "orte/mca/ns/ns.h"
#include "orte/mca/gpr/gpr.h"
#include "orte/mca/rml/rml.h"
#include "orte/mca/smr/smr.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/schema/schema.h"
#include "orte/mca/pls/bproc/pls_bproc.h"
/**
* Set the process pid in the job segment and indicate the state
* as being launched.
*/
int orte_pls_bproc_set_proc_pid(const orte_process_name_t *name, pid_t pid, int nodenum)
{
orte_gpr_value_t *values[1];
char *segment;
char *nodename;
int rc;
if(ORTE_SUCCESS != (rc = orte_schema.get_job_segment_name(&segment, name->jobid))) {
ORTE_ERROR_LOG(rc);
return rc;
}
if (ORTE_SUCCESS != (rc = orte_gpr.create_value(&values[0],
ORTE_GPR_OVERWRITE,
segment,
2, 0))) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
free(segment);
return ORTE_ERR_OUT_OF_RESOURCE;
}
free(segment);
if(ORTE_SUCCESS != (rc = orte_schema.get_proc_tokens(&(values[0]->tokens), &(values[0]->num_tokens), (orte_process_name_t*)name))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(values[0]);
return rc;
}
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(values[0]->keyvals[0]), ORTE_PROC_LOCAL_PID_KEY, ORTE_PID, &pid))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(values[0]);
return rc;
}
asprintf(&nodename, "%ld", (long)nodenum);
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(values[0]->keyvals[1]), ORTE_NODE_NAME_KEY, ORTE_STRING, nodename))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(values[0]);
free(nodename);
return rc;
}
free(nodename);
rc = orte_gpr.put(1, values);
if(ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(values[0]);
return rc;
}
OBJ_RELEASE(values[0]);
/* set the process state to LAUNCHED */
if (ORTE_SUCCESS != (rc = orte_smr.set_proc_state((orte_process_name_t*)name, ORTE_PROC_STATE_LAUNCHED, 0))) {
ORTE_ERROR_LOG(rc);
}
return rc;
}
/**
* Retreive a specified process pid from the registry.
*/
int orte_pls_bproc_get_proc_pid(const orte_process_name_t* name, pid_t* pid)
{
char *segment;
char **tokens;
orte_std_cntr_t num_tokens;
char *keys[2];
orte_gpr_value_t** values = NULL;
orte_std_cntr_t i, num_values = 0;
pid_t *pptr;
int rc;
/* query the job segment on the registry */
if(ORTE_SUCCESS != (rc = orte_schema.get_job_segment_name(&segment, name->jobid))) {
ORTE_ERROR_LOG(rc);
return rc;
}
if(ORTE_SUCCESS != (rc = orte_schema.get_proc_tokens(&tokens, &num_tokens, (orte_process_name_t*)name))) {
free(segment);
ORTE_ERROR_LOG(rc);
return rc;
}
keys[0] = ORTE_PROC_LOCAL_PID_KEY;
keys[1] = NULL;
rc = orte_gpr.get(
ORTE_GPR_KEYS_OR|ORTE_GPR_TOKENS_OR,
segment,
tokens,
keys,
&num_values,
&values
);
if(rc != ORTE_SUCCESS) {
free(segment);
return rc;
}
if(0 == num_values) {
rc = ORTE_ERR_NOT_FOUND;
ORTE_ERROR_LOG(rc);
goto cleanup;
}
if(1 != num_values || values[0]->cnt != 1) {
rc = ORTE_ERR_NOT_FOUND;
ORTE_ERROR_LOG(rc);
goto cleanup;
}
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&pptr, values[0]->keyvals[0]->value, ORTE_PID))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
*pid = *pptr;
cleanup:
if(NULL != values) {
for(i=0; i<num_values; i++) {
if(NULL != values[i]) {
OBJ_RELEASE(values[i]);
}
}
if (NULL != values) free(values);
}
free(segment);
return rc;
}
/**
* Retrieve all process pids for the specified job.
*/
int orte_pls_bproc_get_proc_pids(orte_jobid_t jobid, pid_t **pids, orte_std_cntr_t* num_pids, opal_list_t *attrs)
{
char *segment;
char *keys[2];
orte_gpr_value_t** values = NULL;
orte_std_cntr_t i, num_values = 0;
pid_t *pptr;
int rc;
/* query the job segment on the registry */
if(ORTE_SUCCESS != (rc = orte_schema.get_job_segment_name(&segment, jobid))) {
ORTE_ERROR_LOG(rc);
return rc;
}
keys[0] = ORTE_PROC_PID_KEY;
keys[1] = NULL;
rc = orte_gpr.get(
ORTE_GPR_KEYS_OR|ORTE_GPR_TOKENS_OR,
segment,
NULL,
keys,
&num_values,
&values
);
if(rc != ORTE_SUCCESS) {
free(segment);
return rc;
}
if(0 == num_values) {
*pids = NULL;
} else {
*pids = (pid_t*)malloc(sizeof(pid_t)*num_values);
for(i=0; i<num_values; i++) {
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&pptr, values[i]->keyvals[0]->value, ORTE_PID))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
(*pids)[i] = *pptr;
}
}
*num_pids = num_values;
cleanup:
if(NULL != values) {
for(i=0; i<num_values; i++) {
if(NULL != values[i]) {
OBJ_RELEASE(values[i]);
}
}
if (NULL != values) free(values);
}
free(segment);
return rc;
}
/**
* Add a key-value to the node segment containing the process pid for
* the daemons.
*/
int orte_pls_bproc_set_node_pid(orte_cellid_t cellid, char* node_name, orte_jobid_t jobid, pid_t pid)
{
orte_gpr_value_t *values[1];
char *jobid_string, *key;
int rc;
if (ORTE_SUCCESS != (rc = orte_gpr.create_value(&values[0],
ORTE_GPR_OVERWRITE,
ORTE_NODE_SEGMENT,
1, 0))) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
if (ORTE_SUCCESS != (rc = orte_schema.get_node_tokens(&(values[0]->tokens), &(values[0]->num_tokens), cellid, node_name))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(values[0]);
return rc;
}
if (ORTE_SUCCESS != (rc = orte_ns.convert_jobid_to_string(&jobid_string, jobid))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(values[0]);
return rc;
}
asprintf(&key, "%s-%s", ORTE_PROC_PID_KEY, jobid_string);
free(jobid_string);
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(values[0]->keyvals[0]), key, ORTE_PID, &pid))) {
ORTE_ERROR_LOG(rc);
free(key);
OBJ_RELEASE(values[0]);
return rc;
}
free(key);
rc = orte_gpr.put(1, values);
if(ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
}
OBJ_RELEASE(values[0]);
return rc;
}
/**
* Retrieve all daemon pids for the specified job.
*/
int orte_pls_bproc_get_node_pids(orte_jobid_t jobid, pid_t **pids, orte_std_cntr_t* num_pids)
{
char *keys[2];
orte_gpr_value_t** values = NULL;
orte_std_cntr_t i, num_values = 0;
int rc;
char *jobid_string;
pid_t *pptr;
if(ORTE_SUCCESS != (rc = orte_ns.convert_jobid_to_string(&jobid_string, jobid)))
goto cleanup;
asprintf(&keys[0], "%s-%s", ORTE_PROC_PID_KEY, jobid_string);
free(jobid_string);
keys[1] = NULL;
rc = orte_gpr.get(
ORTE_GPR_KEYS_OR|ORTE_GPR_TOKENS_OR,
ORTE_NODE_SEGMENT,
NULL,
keys,
&num_values,
&values
);
if(rc != ORTE_SUCCESS) {
free(keys[0]);
return rc;
}
if(0 == num_values) {
*pids = NULL;
} else {
*pids = (pid_t*)malloc(sizeof(pid_t)*num_values);
for(i=0; i<num_values; i++) {
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&pptr, values[i]->keyvals[0]->value, ORTE_PID))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
*(pids[i]) = *pptr;
}
}
*num_pids = num_values;
cleanup:
if(NULL != values) {
for(i=0; i<num_values; i++)
OBJ_RELEASE(values[i]);
if (NULL != values) free(values);
}
free(keys[0]);
return rc;
}
/*
* FUNCTIONS FOR DEALING WITH ABNORMAL TERMINATION OF BPROC
* APPLICATION PROCESSES
*/
int orte_pls_bproc_comm_start(void)
{
int rc;
if (mca_pls_bproc_component.recv_issued) {
return ORTE_SUCCESS;
}
if (ORTE_SUCCESS != (rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD,
ORTE_RML_TAG_BPROC_ABORT,
ORTE_RML_PERSISTENT,
orte_pls_bproc_recv,
NULL))) {
ORTE_ERROR_LOG(rc);
}
mca_pls_bproc_component.recv_issued = true;
return rc;
}
int orte_pls_bproc_comm_stop(void)
{
int rc;
if (!mca_pls_bproc_component.recv_issued) {
return ORTE_SUCCESS;
}
if (ORTE_SUCCESS != (rc = orte_rml.recv_cancel(ORTE_NAME_WILDCARD, ORTE_RML_TAG_BPROC_ABORT))) {
ORTE_ERROR_LOG(rc);
}
mca_pls_bproc_component.recv_issued = false;
return rc;
}
/*
* handle message from proxies
* NOTE: The incoming buffer "buffer" is OBJ_RELEASED by the calling program.
* DO NOT RELEASE THIS BUFFER IN THIS CODE
*/
void orte_pls_bproc_recv(int status, orte_process_name_t* sender,
orte_buffer_t* buffer, orte_rml_tag_t tag,
void* cbdata)
{
int rc;
/* we don't care what was in the buffer - just set the state of the sender to ABORTED */
if (ORTE_SUCCESS != (rc = orte_smr.set_proc_state(sender, ORTE_PROC_STATE_ABORTED, 0))) {
ORTE_ERROR_LOG(rc);
}
}

460
orte/mca/plm/bproc/smr_bproc.c Обычный файл
Просмотреть файл

@ -0,0 +1,460 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include <pwd.h>
#include <grp.h>
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#include "orte/orte_constants.h"
#include "orte/orte_types.h"
#include "opal/util/output.h"
#include "opal/class/opal_list.h"
#include "orte/util/proc_info.h"
#include "orte/mca/ns/ns.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/gpr/base/base.h"
#include "orte/mca/schema/schema_types.h"
#include "orte/mca/smr/base/smr_private.h"
#include "orte/mca/smr/bproc/smr_bproc.h"
#define BIT_MASK(bit) (bit_set)(1 << (bit))
#define EMPTY_SET (bit_set)0
#define BIT_NODE_NAME 0
#define BIT_NODE_STATE 1
#define BIT_NODE_BPROC_STATUS 2
#define BIT_NODE_BPROC_MODE 3
#define BIT_NODE_BPROC_USER 4
#define BIT_NODE_BPROC_GROUP 5
#define BIT_SET_ALL ( BIT_MASK(BIT_NODE_NAME) \
| BIT_MASK(BIT_NODE_STATE) \
| BIT_MASK(BIT_NODE_BPROC_STATUS) \
| BIT_MASK(BIT_NODE_BPROC_MODE) \
| BIT_MASK(BIT_NODE_BPROC_USER) \
| BIT_MASK(BIT_NODE_BPROC_GROUP))
/* define some local variables/types */
typedef unsigned int bit_set;
static opal_list_t active_node_list;
static bool initialized=false;
static inline void set_bit(bit_set *set, int bit)
{
*set |= BIT_MASK(bit);
}
static inline int is_set(bit_set set, int bit)
{
return (set & BIT_MASK(bit)) == BIT_MASK(bit);
}
static inline int num_bits(bit_set set)
{
int cnt = 0;
int bit;
for (bit = sizeof(bit_set) * 8 - 1; bit >= 0; bit--)
if (is_set(set, bit))
cnt++;
return cnt;
}
static inline int empty_set(bit_set set)
{
return set == EMPTY_SET;
}
/**
* Query the bproc node status
*/
static int orte_smr_bproc_node_state(char *status)
{
if (strcmp(status, "up") == 0)
return ORTE_NODE_STATE_UP;
if (strcmp(status, "down") == 0)
return ORTE_NODE_STATE_DOWN;
if (strcmp(status, "boot") == 0)
return ORTE_NODE_STATE_REBOOT;
return ORTE_NODE_STATE_UNKNOWN;
}
static bit_set find_changes(struct bproc_node_info_t *old, struct bproc_node_info_t *new)
{
bit_set changes = EMPTY_SET;
if (orte_smr_bproc_node_state(old->status)
!= orte_smr_bproc_node_state(new->status))
set_bit(&changes, BIT_NODE_STATE);
if (strcmp(old->status, new->status) != 0)
set_bit(&changes, BIT_NODE_BPROC_STATUS);
if (old->mode != new->mode)
set_bit(&changes, BIT_NODE_BPROC_MODE);
if (old->group != new->group)
set_bit(&changes, BIT_NODE_BPROC_GROUP);
if (old->user != new->user)
set_bit(&changes, BIT_NODE_BPROC_USER);
if (old->node != new->node)
set_bit(&changes, BIT_NODE_NAME);
return changes;
}
/**
* Process a BProc update notice
*/
static void update_registry(bit_set changes, struct bproc_node_info_t *ni)
{
int idx;
int ret;
int cnt;
orte_node_state_t state;
char *node_name;
char *user;
char *group;
struct passwd *pwd;
struct group *grp;
orte_gpr_value_t *value;
int rc;
orte_smr_node_state_tracker_t *node;
opal_list_item_t *item;
cnt = num_bits(changes);
/*
* Check if there's anything to do
*/
if (cnt == 0)
return;
/* check and update the general cluster status segment - this segment has entries
* for every node in the cluster, not just the ones we want to monitor
*/
if (ORTE_SUCCESS != (rc = orte_gpr.create_value(&value, ORTE_GPR_OVERWRITE | ORTE_GPR_TOKENS_AND,
ORTE_BPROC_NODE_SEGMENT, cnt, 0))) {
ORTE_ERROR_LOG(rc);
return;
}
idx = 0;
if (is_set(changes, BIT_NODE_STATE)) {
state = orte_smr_bproc_node_state(ni->status);
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[idx]), ORTE_NODE_STATE_KEY, ORTE_NODE_STATE, &state))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(value);
return;
}
idx++;
}
if (is_set(changes, BIT_NODE_BPROC_STATUS)) {
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[idx]), ORTE_SMR_BPROC_NODE_STATUS, ORTE_STRING, ni->status))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(value);
return;
}
idx++;
}
if (is_set(changes, BIT_NODE_BPROC_MODE)) {
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[idx]), ORTE_SMR_BPROC_NODE_MODE, ORTE_UINT32, &(ni->mode)))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(value);
return;
}
idx++;
}
if (is_set(changes, BIT_NODE_BPROC_USER)) {
if ((pwd = getpwuid(ni->user)))
user = strdup(pwd->pw_name);
else
asprintf(&user, "%d\n", ni->user);
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[idx]), ORTE_SMR_BPROC_NODE_USER, ORTE_STRING, user))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(value);
free(user);
return;
}
free(user);
idx++;
}
if (is_set(changes, BIT_NODE_BPROC_GROUP)) {
if ((grp = getgrgid(ni->group)))
group = strdup(grp->gr_name);
else
asprintf(&group, "%d\n", ni->group);
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[idx]), ORTE_SMR_BPROC_NODE_GROUP, ORTE_STRING, group))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(value);
free(group);
return;
}
free(group);
idx++;
}
asprintf(&node_name, "%d", ni->node);
if (is_set(changes, BIT_NODE_NAME)) {
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[idx]), ORTE_NODE_NAME_KEY, ORTE_STRING, node_name))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(value);
free(node_name);
return;
}
idx++;
}
if (idx != cnt) {
opal_output(0, "smr_bproc: internal error %d != %d\n", idx, cnt);
free(node_name);
OBJ_RELEASE(value);
opal_event_del(&mca_smr_bproc_component.notify_event);
return;
}
ret = orte_schema.get_node_tokens(&(value->tokens), &(value->num_tokens),
ORTE_PROC_MY_NAME->cellid, node_name);
if (ret != ORTE_SUCCESS) {
ORTE_ERROR_LOG(ret);
OBJ_RELEASE(value);
free(node_name);
opal_event_del(&mca_smr_bproc_component.notify_event);
return;
}
if (mca_smr_bproc_component.debug)
opal_output(0, "updating node %d to segment %s\n", ni->node, value->segment);
if ((ret = orte_gpr.put(1, &value)) != ORTE_SUCCESS) {
ORTE_ERROR_LOG(ret);
opal_event_del(&mca_smr_bproc_component.notify_event);
}
OBJ_RELEASE(value);
/* now let's see if this is one of the nodes we are monitoring and
* update it IFF it the state changed to specified conditions. This
* action will trigger a callback to the right place to decide what
* to do about it
*/
if (mca_smr_bproc_component.monitoring &&
is_set(changes, BIT_NODE_STATE)) {
/* see if this is a node we are monitoring */
for (item = opal_list_get_first(&active_node_list);
item != opal_list_get_end(&active_node_list);
item = opal_list_get_next(item)) {
node = (orte_smr_node_state_tracker_t*)item;
if (0 == strcmp(node->nodename, node_name)) {
/* This is a node we are monitoring. If this is a state we care about,
* and the state has changed (so we only do this once) - trip the alert monitor
*/
if (state != node->state &&
(state == ORTE_NODE_STATE_DOWN || state == ORTE_NODE_STATE_REBOOT)) {
if (ORTE_SUCCESS != (rc = orte_gpr.create_value(&value, ORTE_GPR_OVERWRITE | ORTE_GPR_TOKENS_AND,
ORTE_BPROC_NODE_SEGMENT, 1, 0))) {
ORTE_ERROR_LOG(rc);
return;
}
value->tokens[0] = strdup(ORTE_BPROC_NODE_GLOBALS);
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[0]),
ORTE_BPROC_NODE_ALERT_CNTR,
ORTE_UNDEF, NULL))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(value);
return;
}
if ((rc = orte_gpr.increment_value(value)) != ORTE_SUCCESS) {
ORTE_ERROR_LOG(rc);
opal_event_del(&mca_smr_bproc_component.notify_event);
}
OBJ_RELEASE(value);
}
/* update our local records */
node->state = state;
/* cleanup and return - no need to keep searching */
free(node_name);
return;
}
}
}
/* if this isn't someone we are monitoring, or it doesn't meet specified conditions,
* then just cleanup and leave
*/
free(node_name);
}
static int do_update(struct bproc_node_set_t *ns)
{
int i;
int changed = 0;
bit_set changes;
struct bproc_node_info_t *ni;
/* we assume the number of nodes does not change */
for (i = 0; i < ns->size; i++) {
ni = &ns->node[i];
if (mca_smr_bproc_component.node_set.size > 0
&& mca_smr_bproc_component.node_set.size == ns->size)
changes = find_changes(&mca_smr_bproc_component.node_set.node[i], ni);
else
changes = BIT_SET_ALL;
if (!empty_set(changes)) {
update_registry(changes, ni);
changed = 1;
}
}
if (changed) {
if (mca_smr_bproc_component.node_set.size != 0)
bproc_nodeset_free(&mca_smr_bproc_component.node_set);
mca_smr_bproc_component.node_set = *ns;
}
return changed;
}
static void orte_smr_bproc_notify_handler(int fd, short flags, void *user)
{
struct bproc_node_set_t ns = BPROC_EMPTY_NODESET;
if (bproc_nodelist_(&ns, fd) < 0) {
/* bproc_nodelist_ error */
opal_event_del(&mca_smr_bproc_component.notify_event);
return;
}
if (!do_update(&ns))
bproc_nodeset_free(&ns);
}
/**
* Register a callback to receive BProc update notifications
*/
static int orte_smr_bproc_module_init(void)
{
if (mca_smr_bproc_component.debug)
opal_output(0, "init smr_bproc_module\n");
mca_smr_bproc_component.node_set.size = 0;
/* construct the monitored node list so we can track who is being monitored */
OBJ_CONSTRUCT(&active_node_list, opal_list_t);
return ORTE_SUCCESS;
}
/*
* Setup to begin monitoring a job
*/
int orte_smr_bproc_begin_monitoring(orte_job_map_t *map, orte_gpr_trigger_cb_fn_t cbfunc, void *user_tag)
{
struct bproc_node_set_t ns = BPROC_EMPTY_NODESET;
opal_list_item_t *item;
orte_mapped_node_t *node;
orte_smr_node_state_tracker_t *newnode;
/* if our internal structures haven't been initialized, then
* set them up
*/
if (!initialized) {
orte_smr_bproc_module_init();
initialized = true;
}
/* setup the local monitoring list */
for (item = opal_list_get_first(&map->nodes);
item != opal_list_get_end(&map->nodes);
item = opal_list_get_next(item)) {
node = (orte_mapped_node_t*)item;
newnode = OBJ_NEW(orte_smr_node_state_tracker_t);
newnode->cell = node->cell;
newnode->nodename = strdup(node->nodename);
opal_list_append(&active_node_list, &newnode->super);
}
/* define the alert monitor to call the cbfunc if we trigger the alert */
orte_smr.define_alert_monitor(map->job, ORTE_BPROC_NODE_ALERT_TRIG,
ORTE_BPROC_NODE_ALERT_CNTR,
0, 1, true, cbfunc, user_tag);
/*
* Set initial node status for all nodes in the local cell. We will
* receive reports from them all, but we will only provide alerts
* on those we are actively monitoring
*/
if (bproc_nodelist(&ns) < 0)
return ORTE_ERROR;
if (!do_update(&ns))
bproc_nodeset_free(&ns);
/*
* Now register notify event
*/
mca_smr_bproc_component.notify_fd = bproc_notifier();
if (mca_smr_bproc_component.notify_fd < 0)
return ORTE_ERROR;
memset(&mca_smr_bproc_component.notify_event, 0, sizeof(opal_event_t));
opal_event_set(
&mca_smr_bproc_component.notify_event,
mca_smr_bproc_component.notify_fd,
OPAL_EV_READ|OPAL_EV_PERSIST,
orte_smr_bproc_notify_handler,
0);
opal_event_add(&mca_smr_bproc_component.notify_event, 0);
return ORTE_SUCCESS;
}
/**
* Cleanup
*/
int orte_smr_bproc_finalize(void)
{
opal_event_del(&mca_smr_bproc_component.notify_event);
return ORTE_SUCCESS;
}

134
orte/mca/plm/bproc/smr_bproc_component.c Обычный файл
Просмотреть файл

@ -0,0 +1,134 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include "orte/orte_constants.h"
#include "opal/mca/base/base.h"
#include "opal/mca/base/mca_base_param.h"
#include "orte/util/proc_info.h"
#include "opal/util/output.h"
#include "orte/mca/smr/base/smr_private.h"
#include "orte/mca/smr/bproc/smr_bproc.h"
/*
* Local functions
*/
static int orte_smr_bproc_open(void);
static int orte_smr_bproc_close(void);
static orte_smr_base_module_t* orte_smr_bproc_init(int *priority);
orte_smr_bproc_component_t mca_smr_bproc_component = {
{
/* First, the mca_base_module_t struct containing meta
information about the module itself */
{
/* Indicate that we are a bproc smr v1.3.0 module (which also
implies a specific MCA version) */
ORTE_SMR_BASE_VERSION_1_3_0,
"bproc", /* MCA module name */
ORTE_MAJOR_VERSION, /* MCA module major version */
ORTE_MINOR_VERSION, /* MCA module minor version */
ORTE_RELEASE_VERSION, /* MCA module release version */
orte_smr_bproc_open, /* component open */
orte_smr_bproc_close /* component close */
},
/* Next the MCA v1.0.0 module meta data */
{
/* Whether the module is checkpointable or not */
false
},
orte_smr_bproc_init
}
};
orte_smr_base_module_t orte_smr_bproc_module = {
orte_smr_base_get_proc_state,
orte_smr_base_set_proc_state,
orte_smr_base_get_node_state,
orte_smr_base_set_node_state,
orte_smr_base_get_job_state,
orte_smr_base_set_job_state,
orte_smr_bproc_begin_monitoring,
orte_smr_base_init_job_stage_gates,
orte_smr_base_init_orted_stage_gates,
orte_smr_base_define_alert_monitor,
orte_smr_base_job_stage_gate_subscribe,
orte_smr_bproc_finalize
};
/**
* Utility function to register parameters
*/
static int orte_smr_bproc_param_register_int(
const char* param_name,
int default_value)
{
int id = mca_base_param_register_int("smr","bproc",param_name,NULL,default_value);
int param_value = default_value;
mca_base_param_lookup_int(id,&param_value);
return param_value;
}
/**
*
*/
static int orte_smr_bproc_open(void)
{
mca_smr_bproc_component.debug =
orte_smr_bproc_param_register_int("debug", 0);
mca_smr_bproc_component.priority =
orte_smr_bproc_param_register_int("priority", 1);
mca_smr_bproc_component.monitoring = false;
return ORTE_SUCCESS;
}
/**
*
*/
static orte_smr_base_module_t* orte_smr_bproc_init(int *priority)
{
if (!orte_process_info.seed) {
return NULL;
}
*priority = mca_smr_bproc_component.priority;
return &orte_smr_bproc_module;
}
/**
*
*/
static int orte_smr_bproc_close(void)
{
return ORTE_SUCCESS;
}

0
orte/mca/ras/bjs/.ompi_ignore Обычный файл
Просмотреть файл

1
orte/mca/ras/bjs/.ompi_unignore Обычный файл
Просмотреть файл

@ -0,0 +1 @@
rhc

54
orte/mca/ras/bjs/Makefile.am Обычный файл
Просмотреть файл

@ -0,0 +1,54 @@
#
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2005 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# Use the top-level Makefile.options
# Make the output library in this directory, and name it either
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
# (for static builds).
if OMPI_BUILD_ras_bjs_DSO
component_noinst =
component_install = mca_ras_bjs.la
else
component_noinst = libmca_ras_bjs.la
component_install =
endif
AM_CPPFLAGS= $(ras_bjs_CPPFLAGS)
proxy_SOURCES = \
ras_bjs.c \
ras_bjs.h \
ras_bjs_component.c
mcacomponentdir = $(libdir)/openmpi
mcacomponent_LTLIBRARIES = $(component_install)
mca_ras_bjs_la_SOURCES = $(proxy_SOURCES)
mca_ras_bjs_la_LIBADD = \
$(ras_bjs_LIBS) \
$(top_ompi_builddir)/orte/libopen-rte.la \
$(top_ompi_builddir)/opal/libopen-pal.la
mca_ras_bjs_la_LDFLAGS = -module -avoid-version $(ras_bjs_LDFLAGS)
noinst_LTLIBRARIES = $(component_noinst)
libmca_ras_bjs_la_SOURCES = $(proxy_SOURCES)
libmca_ras_bjs_la_LIBADD = $(ras_bjs_LIBS)
libmca_ras_bjs_la_LDFLAGS = -module -avoid-version $(ras_bjs_LDFLAGS)

38
orte/mca/ras/bjs/configure.m4 Обычный файл
Просмотреть файл

@ -0,0 +1,38 @@
# -*- shell-script -*-
#
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2005 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# MCA_ras_bjs_CONFIG([action-if-found], [action-if-not-found])
# -----------------------------------------------------------
AC_DEFUN([MCA_ras_bjs_CONFIG],[
OMPI_CHECK_BPROC([ras_bjs], [ras_bjs_good=1], [ras_bjs_good=1],
[ras_bjs_good=0])
# if check worked, set wrapper flags if so.
# Evaluate succeed / fail
AS_IF([test "$ras_bjs_good" = "1"],
[ras_bjs_WRAPPER_EXTRA_LDFLAGS="$ras_bjs_LDFLAGS"
ras_bjs_WRAPPER_EXTRA_LIBS="$ras_bjs_LIBS"
$1],
[$2])
# set build flags to use in makefile
AC_SUBST([ras_bjs_CPPFLAGS])
AC_SUBST([ras_bjs_LDFLAGS])
AC_SUBST([ras_bjs_LIBS])
])dnl

24
orte/mca/ras/bjs/configure.params Обычный файл
Просмотреть файл

@ -0,0 +1,24 @@
# -*- shell-script -*-
#
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2005 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# Copyright (c) 2007 Los Alamos National Security, LLC. All rights
# reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# Specific to this module
PARAM_CONFIG_FILES="Makefile"

291
orte/mca/ras/bjs/ras_bjs.c Обычный файл
Просмотреть файл

@ -0,0 +1,291 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include <errno.h>
#include <unistd.h>
#include <string.h>
#include <sys/bproc.h>
#include "orte/orte_constants.h"
#include "orte/orte_types.h"
#include "opal/util/argv.h"
#include "opal/util/output.h"
#include "orte/dss/dss.h"
#include "orte/mca/rmgr/rmgr.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/ras/base/ras_private.h"
#include "ras_bjs.h"
/**
* Query the bproc node status
*/
static int orte_ras_bjs_node_state(int node)
{
#if defined BPROC_API_VERSION && BPROC_API_VERSION >= 4
char nodestatus[BPROC_STATE_LEN + 1];
bproc_nodestatus(node, nodestatus, sizeof(nodestatus));
if (strcmp(nodestatus, "up") == 0)
return ORTE_NODE_STATE_UP;
if (strcmp(nodestatus, "down") == 0)
return ORTE_NODE_STATE_DOWN;
if (strcmp(nodestatus, "boot") == 0)
return ORTE_NODE_STATE_REBOOT;
return ORTE_NODE_STATE_UNKNOWN;
#else
switch(bproc_nodestatus(node)) {
case bproc_node_up:
return ORTE_NODE_STATE_UP;
case bproc_node_down:
return ORTE_NODE_STATE_DOWN;
case bproc_node_boot:
return ORTE_NODE_STATE_REBOOT;
default:
return ORTE_NODE_STATE_UNKNOWN;
}
#endif
}
/**
* Parse the NODELIST to determine the number of process
* slots/processors available on the node.
*/
static size_t orte_ras_bjs_node_slots(char* node_name)
{
static char** nodelist = NULL;
char** ptr;
size_t count = 0;
if(nodelist == NULL)
nodelist = opal_argv_split(getenv("NODELIST"), ',');
ptr = nodelist;
while(ptr && *ptr) {
if(strcmp(*ptr, node_name) == 0)
count++;
ptr++;
}
return count;
}
/**
* Resolve the node name to node number.
*/
static int orte_ras_bjs_node_resolve(char* node_name, int* node_num)
{
/* for now we expect this to be the node number */
if(NULL == node_name || sscanf(node_name, "%d", node_num) != 1)
return ORTE_ERROR;
return ORTE_SUCCESS;
}
/**
* Discover the available resources.
* - validate any nodes specified via hostfile/commandline
* - check for additional nodes that have already been allocated
*/
static int orte_ras_bjs_discover(
opal_list_t* nodelist,
orte_app_context_t** context,
size_t num_context)
{
char* nodes;
char* ptr;
opal_list_item_t* item;
opal_list_t new_nodes;
int rc;
/* query the nodelist from the registry */
if(ORTE_SUCCESS != (rc = orte_ras_base_node_query(nodelist))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* validate that any user supplied nodes actually exist, etc. */
item = opal_list_get_first(nodelist);
while(item != opal_list_get_end(nodelist)) {
opal_list_item_t* next = opal_list_get_next(item);
int node_num;
orte_ras_node_t* node = (orte_ras_node_t*)item;
if(ORTE_SUCCESS != orte_ras_bjs_node_resolve(node->node_name, &node_num)) {
opal_list_remove_item(nodelist,item);
OBJ_DESTRUCT(item);
item = next;
continue;
}
if(orte_ras_bjs_node_state(node_num) != ORTE_NODE_STATE_UP) {
opal_list_remove_item(nodelist,item);
OBJ_DESTRUCT(item);
item = next;
continue;
}
if(bproc_access(node_num, BPROC_X_OK) != 0) {
opal_list_remove_item(nodelist,item);
OBJ_DESTRUCT(item);
item = next;
continue;
}
/* try and determine the number of available slots */
if(node->node_slots == 0) {
node->node_slots = orte_ras_bjs_node_slots(node->node_name);
}
item = next;
}
/* parse the node list and check node status/access */
nodes = getenv("NODES");
if (NULL == nodes) {
return ORTE_ERR_NOT_AVAILABLE;
}
OBJ_CONSTRUCT(&new_nodes, opal_list_t);
while(NULL != (ptr = strsep(&nodes,","))) {
orte_ras_node_t *node;
orte_node_state_t node_state;
int node_num;
/* is this node already in the list */
for(item = opal_list_get_first(nodelist);
item != opal_list_get_end(nodelist);
item = opal_list_get_next(item)) {
node = (orte_ras_node_t*)item;
if(strcmp(node->node_name, ptr) == 0)
break;
}
if(item != opal_list_get_end(nodelist))
continue;
if(sscanf(ptr, "%d", &node_num) != 1) {
continue;
}
if(ORTE_NODE_STATE_UP != (node_state = orte_ras_bjs_node_state(node_num))) {
opal_output(0, "error: a specified node (%d) is not up.\n", node_num);
rc = ORTE_ERROR;
goto cleanup;
}
if(bproc_access(node_num, BPROC_X_OK) != 0) {
opal_output(0, "error: a specified node (%d) is not accessible.\n", node_num);
rc = ORTE_ERROR;
goto cleanup;
}
/* create a new node entry */
node = OBJ_NEW(orte_ras_node_t);
node->node_name = strdup(ptr);
node->node_state = node_state;
node->node_slots = orte_ras_bjs_node_slots(node->node_name);
opal_list_append(&new_nodes, &node->super);
}
/* add any newly discovered nodes to the registry */
if(opal_list_get_size(&new_nodes)) {
rc = orte_ras_base_node_insert(&new_nodes);
if(ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
}
}
/* append them to the nodelist */
while(NULL != (item = opal_list_remove_first(&new_nodes)))
opal_list_append(nodelist, item);
cleanup:
OBJ_DESTRUCT(&new_nodes);
return rc;
}
/**
* Discover available (pre-allocated) nodes. Allocate the
* requested number of nodes/process slots to the job.
*
*/
static int orte_ras_bjs_allocate(orte_jobid_t jobid, opal_list_t *attributes)
{
opal_list_t nodes;
opal_list_item_t* item;
int rc;
orte_app_context_t **context = NULL;
orte_std_cntr_t i, num_context = 0;
OBJ_CONSTRUCT(&nodes, opal_list_t);
rc = orte_rmgr.get_app_context(jobid, &context, &num_context);
if(ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
if(ORTE_SUCCESS != (rc = orte_ras_bjs_discover(&nodes, context, num_context))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
rc = orte_ras_base_allocate_nodes(jobid, &nodes);
if(ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
}
cleanup:
while(NULL != (item = opal_list_remove_first(&nodes))) {
OBJ_RELEASE(item);
}
OBJ_DESTRUCT(&nodes);
for(i=0; i<num_context; i++) {
OBJ_RELEASE(context[i]);
}
if (NULL != context) {
free(context);
}
return rc;
}
static int orte_ras_bjs_deallocate(orte_jobid_t jobid)
{
return ORTE_SUCCESS;
}
static int orte_ras_bjs_finalize(void)
{
return ORTE_SUCCESS;
}
orte_ras_base_module_t orte_ras_bjs_module = {
orte_ras_bjs_allocate,
orte_ras_base_node_insert,
orte_ras_base_node_query,
orte_ras_base_node_query_alloc,
orte_ras_base_node_lookup,
orte_ras_bjs_deallocate,
orte_ras_bjs_finalize
};

50
orte/mca/ras/bjs/ras_bjs.h Обычный файл
Просмотреть файл

@ -0,0 +1,50 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/**
* @file
*
* Resource Allocation (LSF over BPROC)
*/
#ifndef ORTE_RAS_BJS_H
#define ORTE_RAS_BJS_H
#include "orte/mca/ras/ras.h"
#if defined(c_plusplus) || defined(__cplusplus)
extern "C" {
#endif
/**
* RAS Component
*/
struct orte_ras_bjs_component_t {
orte_ras_base_component_t super;
int debug;
int priority;
char *schedule_policy;
};
typedef struct orte_ras_bjs_component_t orte_ras_bjs_component_t;
ORTE_DECLSPEC extern orte_ras_bjs_component_t mca_ras_bjs_component;
ORTE_DECLSPEC extern orte_ras_base_module_t orte_ras_bjs_module;
#if defined(c_plusplus) || defined(__cplusplus)
}
#endif
#endif

131
orte/mca/ras/bjs/ras_bjs_component.c Обычный файл
Просмотреть файл

@ -0,0 +1,131 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include "orte/orte_constants.h"
#include "opal/mca/base/base.h"
#include "opal/mca/base/mca_base_param.h"
#include "orte/util/proc_info.h"
#include "opal/util/output.h"
#include "ras_bjs.h"
/*
* Local functions
*/
static int orte_ras_bjs_open(void);
static int orte_ras_bjs_close(void);
static orte_ras_base_module_t* orte_ras_bjs_init(int* priority);
orte_ras_bjs_component_t mca_ras_bjs_component = {
{
/* First, the mca_base_component_t struct containing meta
information about the component itself */
{
/* Indicate that we are a ras v1.3.0 component (which also
implies a specific MCA version) */
ORTE_RAS_BASE_VERSION_1_3_0,
"bjs", /* MCA component name */
ORTE_MAJOR_VERSION, /* MCA component major version */
ORTE_MINOR_VERSION, /* MCA component minor version */
ORTE_RELEASE_VERSION, /* MCA component release version */
orte_ras_bjs_open, /* component open */
orte_ras_bjs_close /* component close */
},
/* Next the MCA v1.0.0 component meta data */
{
/* Whether the component is checkpointable or not */
false
},
orte_ras_bjs_init
}
};
/**
* Convience functions to lookup MCA parameter values.
*/
static int orte_ras_bjs_param_register_int(
const char* param_name,
int default_value)
{
int id = mca_base_param_register_int("ras","bjs",param_name,NULL,default_value);
int param_value = default_value;
mca_base_param_lookup_int(id,&param_value);
return param_value;
}
static char* orte_ras_bjs_param_register_string(
const char * a, const char *b, const char *c,
const char* default_value)
{
char *param_value;
int id = mca_base_param_register_string(a, b, c, NULL, default_value);
mca_base_param_lookup_string(id, &param_value);
return param_value;
}
/**
* component open/close/init function
*/
static int orte_ras_bjs_open(void)
{
mca_ras_bjs_component.debug = orte_ras_bjs_param_register_int("debug",1);
mca_ras_bjs_component.priority = orte_ras_bjs_param_register_int("priority",75);
/* JMS To be changed post-beta to LAM's C/N command line notation */
mca_ras_bjs_component.schedule_policy =
orte_ras_bjs_param_register_string("ras", "base", "schedule_policy", "slot");
return ORTE_SUCCESS;
}
static orte_ras_base_module_t *orte_ras_bjs_init(int* priority)
{
/* if we are not an HNP, then we must not be selected */
if (!orte_process_info.seed) {
return NULL;
}
#if 0
if(getenv("NODES") == NULL) {
return NULL;
}
#endif
*priority = mca_ras_bjs_component.priority;
return &orte_ras_bjs_module;
}
/**
* Close all subsystems.
*/
static int orte_ras_bjs_close(void)
{
return ORTE_SUCCESS;
}

0
orte/mca/ras/lsf_bproc/.ompi_ignore Обычный файл
Просмотреть файл

1
orte/mca/ras/lsf_bproc/.ompi_unignore Обычный файл
Просмотреть файл

@ -0,0 +1 @@
rhc

54
orte/mca/ras/lsf_bproc/Makefile.am Обычный файл
Просмотреть файл

@ -0,0 +1,54 @@
#
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2005 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# Use the top-level Makefile.options
# Make the output library in this directory, and name it either
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
# (for static builds).
if OMPI_BUILD_ras_lsf_bproc_DSO
component_noinst =
component_install = mca_ras_lsf_bproc.la
else
component_noinst = libmca_ras_lsf_bproc.la
component_install =
endif
AM_CPPFLAGS= $(ras_lsf_bproc_CPPFLAGS)
proxy_SOURCES = \
ras_lsf_bproc.c \
ras_lsf_bproc.h \
ras_lsf_bproc_component.c
mcacomponentdir = $(libdir)/openmpi
mcacomponent_LTLIBRARIES = $(component_install)
mca_ras_lsf_bproc_la_SOURCES = $(proxy_SOURCES)
mca_ras_lsf_bproc_la_LIBADD = \
$(ras_lsf_bproc_LIBS) \
$(top_ompi_builddir)/orte/libopen-rte.la \
$(top_ompi_builddir)/opal/libopen-pal.la
mca_ras_lsf_bproc_la_LDFLAGS = -module -avoid-version $(ras_lsf_bproc_LDFLAGS)
noinst_LTLIBRARIES = $(component_noinst)
libmca_ras_lsf_bproc_la_SOURCES = $(proxy_SOURCES)
libmca_ras_lsf_bproc_la_LIBADD = $(ras_lsf_bproc_LIBS)
libmca_ras_lsf_bproc_la_LDFLAGS = -module -avoid-version $(ras_lsf_bproc_LDFLAGS)

38
orte/mca/ras/lsf_bproc/configure.m4 Обычный файл
Просмотреть файл

@ -0,0 +1,38 @@
# -*- shell-script -*-
#
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2005 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# MCA_ras_lsf_bproc_CONFIG([action-if-found], [action-if-not-found])
# -----------------------------------------------------------
AC_DEFUN([MCA_ras_lsf_bproc_CONFIG],[
OMPI_CHECK_BPROC([ras_lsf_bproc], [ras_lsf_bproc_good=1],
[ras_lsf_bproc_good=1], [ras_lsf_bproc_good=0])
# if check worked, set wrapper flags if so.
# Evaluate succeed / fail
AS_IF([test "$ras_lsf_bproc_good" = "1"],
[ras_lsf_bproc_WRAPPER_EXTRA_LDFLAGS="$ras_lsf_bproc_LDFLAGS"
ras_lsf_bproc_WRAPPER_EXTRA_LIBS="$ras_lsf_bproc_LIBS"
$1],
[$2])
# set build flags to use in makefile
AC_SUBST([ras_lsf_bproc_CPPFLAGS])
AC_SUBST([ras_lsf_bproc_LDFLAGS])
AC_SUBST([ras_lsf_bproc_LIBS])
])dnl

24
orte/mca/ras/lsf_bproc/configure.params Обычный файл
Просмотреть файл

@ -0,0 +1,24 @@
# -*- shell-script -*-
#
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2005 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# Copyright (c) 2007 Los Alamos National Security, LLC. All rights
# reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# Specific to this module
PARAM_CONFIG_FILES="Makefile"

55
orte/mca/ras/lsf_bproc/ras_lsf_bproc.c Обычный файл
Просмотреть файл

@ -0,0 +1,55 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include <errno.h>
#include <unistd.h>
#include <string.h>
#include "orte/orte_constants.h"
#include "orte/mca/ras/base/ras_private.h"
#include "ras_lsf_bproc.h"
static int orte_ras_lsf_bproc_allocate(orte_jobid_t jobid, opal_list_t *attributes)
{
return ORTE_SUCCESS;
}
static int orte_ras_lsf_bproc_deallocate(orte_jobid_t jobid)
{
return ORTE_SUCCESS;
}
static int orte_ras_lsf_bproc_finalize(void)
{
return ORTE_SUCCESS;
}
orte_ras_base_module_t orte_ras_lsf_bproc_module = {
orte_ras_lsf_bproc_allocate,
orte_ras_base_node_insert,
orte_ras_base_node_query,
orte_ras_base_node_query_alloc,
orte_ras_base_node_lookup,
orte_ras_lsf_bproc_deallocate,
orte_ras_lsf_bproc_finalize
};

49
orte/mca/ras/lsf_bproc/ras_lsf_bproc.h Обычный файл
Просмотреть файл

@ -0,0 +1,49 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/**
* @file
*
* Resource Allocation (LSF over BPROC)
*/
#ifndef ORTE_RAS_LSF_BPROC_H
#define ORTE_RAS_LSF_BPROC_H
#include "orte/mca/ras/ras.h"
#if defined(c_plusplus) || defined(__cplusplus)
extern "C" {
#endif
/**
* RAS Component
*/
struct orte_ras_lsf_bproc_component_t {
orte_ras_base_component_t super;
int debug;
int priority;
};
typedef struct orte_ras_lsf_bproc_component_t orte_ras_lsf_bproc_component_t;
ORTE_DECLSPEC extern orte_ras_lsf_bproc_component_t mca_ras_lsf_bproc_component;
ORTE_DECLSPEC extern orte_ras_base_module_t orte_ras_lsf_bproc_module;
#if defined(c_plusplus) || defined(__cplusplus)
}
#endif
#endif

Просмотреть файл

@ -0,0 +1,111 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include "orte/orte_constants.h"
#include "opal/mca/base/base.h"
#include "opal/mca/base/mca_base_param.h"
#include "orte/util/proc_info.h"
#include "opal/util/output.h"
#include "ras_lsf_bproc.h"
/*
* Local functions
*/
static int orte_ras_lsf_bproc_open(void);
static int orte_ras_lsf_bproc_close(void);
static orte_ras_base_module_t* orte_ras_lsf_bproc_init(int* priority);
orte_ras_lsf_bproc_component_t mca_ras_lsf_bproc_component = {
{
/* First, the mca_base_component_t struct containing meta
information about the component itself */
{
/* Indicate that we are a ras v1.3.0 component (which also
implies a specific MCA version) */
ORTE_RAS_BASE_VERSION_1_3_0,
"lsf_bproc", /* MCA component name */
ORTE_MAJOR_VERSION, /* MCA component major version */
ORTE_MINOR_VERSION, /* MCA component minor version */
ORTE_RELEASE_VERSION, /* MCA component release version */
orte_ras_lsf_bproc_open, /* component open */
orte_ras_lsf_bproc_close /* component close */
},
/* Next the MCA v1.0.0 component meta data */
{
/* Whether the component is checkpointable or not */
false
},
orte_ras_lsf_bproc_init
}
};
/**
* Convience functions to lookup MCA parameters
*/
static int orte_ras_lsf_bproc_param_register_int(
const char* param_name,
int default_value)
{
int id = mca_base_param_register_int("ras","lsf_bproc",param_name,NULL,default_value);
int param_value = default_value;
mca_base_param_lookup_int(id,&param_value);
return param_value;
}
/**
* component open/close/init function
*/
static int orte_ras_lsf_bproc_open(void)
{
mca_ras_lsf_bproc_component.debug = orte_ras_lsf_bproc_param_register_int("debug",1);
mca_ras_lsf_bproc_component.priority = orte_ras_lsf_bproc_param_register_int("priority",-1);
return ORTE_SUCCESS;
}
static orte_ras_base_module_t *orte_ras_lsf_bproc_init(int* priority)
{
/* if we are not an HNP, then we must not be selected */
if (!orte_process_info.seed) {
return NULL;
}
*priority = mca_ras_lsf_bproc_component.priority;
return NULL;
}
/**
* Close all subsystems.
*/
static int orte_ras_lsf_bproc_close(void)
{
return ORTE_SUCCESS;
}