Restore bproc code - if someone wants to maintain it, then more power to them...but it would definitely be easier if the old code is in the trunk. This is all .ompi_ignore'd except for me so I can play with making it compile again in my copious free time.
This commit was SVN r18716.
Этот коммит содержится в:
родитель
3e61a3f92e
Коммит
17fcd72b5d
67
config/ompi_check_bproc.m4
Обычный файл
67
config/ompi_check_bproc.m4
Обычный файл
@ -0,0 +1,67 @@
|
||||
# -*- shell-script -*-
|
||||
#
|
||||
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
# University Research and Technology
|
||||
# Corporation. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
# of Tennessee Research Foundation. All rights
|
||||
# reserved.
|
||||
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
# University of Stuttgart. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
# All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
# new bproc is LANL versions >= 3.2.0
|
||||
# old bproc is all Scyld versions and LANL version < 3.2.0
|
||||
# OMPI_CHECK_BPROC(prefix, [action-if-new-bproc], [action-if-old-bproc],
|
||||
# [action-if-not-found])
|
||||
# --------------------------------------------------------
|
||||
AC_DEFUN([OMPI_CHECK_BPROC],[
|
||||
AC_ARG_WITH([bproc],
|
||||
[AC_HELP_STRING([--with-bproc],
|
||||
[Directory where the BProc software is installed])])
|
||||
|
||||
AS_IF([test ! -z "$with_bproc" -a "$with_bproc" = "no"],[$4], [
|
||||
ompi_check_bproc_save_CPPFLAGS="$CPPFLAGS"
|
||||
ompi_check_bproc_save_LDFLAGS="$LDFLAGS"
|
||||
ompi_check_bproc_save_LIBS="$LIBS"
|
||||
|
||||
AS_IF([test ! -z "$with_bproc" -a "$with_bproc" != "yes"],
|
||||
[CPPFLAGS="$CPPFLAGS -I$with_bproc/include"
|
||||
LDFLAGS="$LDFLAGS -L$with_bproc/lib"])
|
||||
AC_CHECK_HEADERS([sys/bproc.h],
|
||||
[AC_CHECK_LIB([bproc],
|
||||
[bproc_numnodes],
|
||||
[ompi_check_bproc_happy="yes"],
|
||||
[ompi_check_bproc_happy="no"])],
|
||||
[ompi_check_bproc_happy="no"])
|
||||
|
||||
# Check for Scyld bproc or an old version of LANL Bproc (pre 3.2.0)
|
||||
AS_IF([test "$ompi_check_bproc_happy" = "yes"],
|
||||
[AC_CHECK_HEADERS([sys/bproc_common.h],[ompi_check_bproc_happy="new"],
|
||||
[ompi_check_bproc_happy="old"],
|
||||
[#include <stdint.h>
|
||||
#include <sys/socket.h>])])
|
||||
|
||||
CPPFLAGS="$ompi_check_bproc_save_CPPFLAGS"
|
||||
LDFLAGS="$ompi_check_bproc_save_LDFLAGS"
|
||||
LIBS="$ompi_check_bproc_save_LIBS"
|
||||
|
||||
AS_IF([test "$ompi_check_bproc_happy" != "no"],
|
||||
[AS_IF([test ! -z "$with_bproc" -a "$with_bproc" != "yes"],
|
||||
[$1_CPPFLAGS="$$1_CPPFLAGS -I$with_bproc/include"
|
||||
$1_LDFLAGS="$$1_LDFLAGS -L$with_bproc/lib"])
|
||||
$1_LIBS="$$1_LIBS -lbproc"
|
||||
AS_IF([test "$ompi_check_bproc_happy" = "new"], [$2], [$3])],
|
||||
[AS_IF([test ! -z "$with_bproc"],
|
||||
[AC_MSG_ERROR([BProc support request but not found. Perhaps
|
||||
you need to specify the location of the BProc libraries.])])
|
||||
$4])
|
||||
])
|
||||
])
|
0
orte/mca/ess/bproc/.ompi_ignore
Обычный файл
0
orte/mca/ess/bproc/.ompi_ignore
Обычный файл
1
orte/mca/ess/bproc/.ompi_unignore
Обычный файл
1
orte/mca/ess/bproc/.ompi_unignore
Обычный файл
@ -0,0 +1 @@
|
||||
rhc
|
54
orte/mca/ess/bproc/Makefile.am
Обычный файл
54
orte/mca/ess/bproc/Makefile.am
Обычный файл
@ -0,0 +1,54 @@
|
||||
#
|
||||
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
# University Research and Technology
|
||||
# Corporation. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
# of Tennessee Research Foundation. All rights
|
||||
# reserved.
|
||||
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
# University of Stuttgart. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
# All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
# Use the top-level Makefile.options
|
||||
|
||||
|
||||
|
||||
AM_CPPFLAGS = $(ess_bproc_CPPFLAGS)
|
||||
|
||||
sources = \
|
||||
ess_bproc.h \
|
||||
ess_bproc_component.c \
|
||||
ess_bproc_module.c
|
||||
|
||||
# Make the output library in this directory, and name it either
|
||||
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
|
||||
# (for static builds).
|
||||
|
||||
if OMPI_BUILD_ess_bproc_DSO
|
||||
component_noinst =
|
||||
component_install = mca_ess_bproc.la
|
||||
else
|
||||
component_noinst = libmca_ess_bproc.la
|
||||
component_install =
|
||||
endif
|
||||
|
||||
mcacomponentdir = $(libdir)/openmpi
|
||||
mcacomponent_LTLIBRARIES = $(component_install)
|
||||
mca_ess_bproc_la_SOURCES = $(sources)
|
||||
mca_ess_bproc_la_LDFLAGS = -module -avoid-version $(ess_bproc_LDFLAGS)
|
||||
mca_ess_bproc_la_LIBADD = \
|
||||
$(ess_bproc_LIBS) \
|
||||
$(top_ompi_builddir)/orte/libopen-rte.la \
|
||||
$(top_ompi_builddir)/opal/libopen-pal.la
|
||||
|
||||
noinst_LTLIBRARIES = $(component_noinst)
|
||||
libmca_ess_bproc_la_SOURCES =$(sources)
|
||||
libmca_ess_bproc_la_LDFLAGS = -module -avoid-version $(ess_bproc_LDFLAGS)
|
||||
libmca_ess_bproc_la_LIBADD = $(ess_bproc_LIBS)
|
36
orte/mca/ess/bproc/configure.m4
Обычный файл
36
orte/mca/ess/bproc/configure.m4
Обычный файл
@ -0,0 +1,36 @@
|
||||
# -*- shell-script -*-
|
||||
#
|
||||
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
# University Research and Technology
|
||||
# Corporation. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
# of Tennessee Research Foundation. All rights
|
||||
# reserved.
|
||||
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
# University of Stuttgart. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
# All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
# MCA_ess_bproc_CONFIG([action-if-found], [action-if-not-found])
|
||||
# -----------------------------------------------------------
|
||||
AC_DEFUN([MCA_ess_bproc_CONFIG],[
|
||||
OMPI_CHECK_BPROC([ess_bproc], [ess_bproc_good=1],
|
||||
[ess_bproc_good=1], [ess_bproc_good=0])
|
||||
# if check worked, set wrapper flags if so.
|
||||
# Evaluate succeed / fail
|
||||
AS_IF([test "$ess_bproc_good" = "1"],
|
||||
[ess_bproc_WRAPPER_EXTRA_LDFLAGS="$ess_bproc_LDFLAGS"
|
||||
ess_bproc_WRAPPER_EXTRA_LIBS="$ess_bproc_LIBS"
|
||||
$1],
|
||||
[$2])
|
||||
# set build flags to use in makefile
|
||||
AC_SUBST([ess_bproc_CPPFLAGS])
|
||||
AC_SUBST([ess_bproc_LDFLAGS])
|
||||
AC_SUBST([ess_bproc_LIBS])
|
||||
])dnl
|
22
orte/mca/ess/bproc/configure.params
Обычный файл
22
orte/mca/ess/bproc/configure.params
Обычный файл
@ -0,0 +1,22 @@
|
||||
# -*- shell-script -*-
|
||||
#
|
||||
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
# University Research and Technology
|
||||
# Corporation. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
# of Tennessee Research Foundation. All rights
|
||||
# reserved.
|
||||
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
# University of Stuttgart. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
# All rights reserved.
|
||||
# Copyright (c) 2007 Los Alamos National Security, LLC. All rights
|
||||
# reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
PARAM_CONFIG_FILES="Makefile"
|
48
orte/mca/ess/bproc/ess_bproc.h
Обычный файл
48
orte/mca/ess/bproc/ess_bproc.h
Обычный файл
@ -0,0 +1,48 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#ifndef ORTE_SDS_BPROC_H
|
||||
#define ORTE_SDS_BPROC_H
|
||||
|
||||
#if defined(c_plusplus) || defined(__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Module open / close
|
||||
*/
|
||||
int orte_sds_bproc_component_open(void);
|
||||
int orte_sds_bproc_component_close(void);
|
||||
orte_sds_base_module_t* orte_sds_bproc_component_init(int *priority);
|
||||
|
||||
/*
|
||||
* Startup / Shutdown
|
||||
*/
|
||||
int orte_sds_bproc_finalize(void);
|
||||
|
||||
/*
|
||||
* Module functions
|
||||
*/
|
||||
int orte_sds_bproc_set_name(void);
|
||||
|
||||
|
||||
#if defined(c_plusplus) || defined(__cplusplus)
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* ORTE_SDS_BPROC_H */
|
97
orte/mca/ess/bproc/ess_bproc_component.c
Обычный файл
97
orte/mca/ess/bproc/ess_bproc_component.c
Обычный файл
@ -0,0 +1,97 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*
|
||||
* These symbols are in a file by themselves to provide nice linker
|
||||
* semantics. Since linkers generally pull in symbols by object
|
||||
* files, keeping these symbols as the only symbols in this file
|
||||
* prevents utility programs such as "ompi_info" from having to import
|
||||
* entire components just to query their version and parameters.
|
||||
*/
|
||||
|
||||
#include "orte_config.h"
|
||||
|
||||
#include "orte/orte_constants.h"
|
||||
#include "orte/mca/sds/sds.h"
|
||||
#include "orte/mca/sds/bproc/sds_bproc.h"
|
||||
#include "opal/mca/base/mca_base_param.h"
|
||||
|
||||
extern orte_sds_base_module_t orte_sds_bproc_module;
|
||||
|
||||
/*
|
||||
* Instantiate the public struct with all of our public information
|
||||
* and pointers to our public functions in it
|
||||
*/
|
||||
orte_sds_base_component_t mca_sds_bproc_component = {
|
||||
/* First, the mca_component_t struct containing meta information
|
||||
about the component itself */
|
||||
{
|
||||
/* Indicate that we are a sds v1.0.0 component (which also
|
||||
implies a specific MCA version) */
|
||||
ORTE_SDS_BASE_VERSION_1_0_0,
|
||||
|
||||
/* Component name and version */
|
||||
"bproc",
|
||||
ORTE_MAJOR_VERSION,
|
||||
ORTE_MINOR_VERSION,
|
||||
ORTE_RELEASE_VERSION,
|
||||
|
||||
/* Component open and close functions */
|
||||
orte_sds_bproc_component_open,
|
||||
orte_sds_bproc_component_close
|
||||
},
|
||||
|
||||
/* Next the MCA v1.0.0 component meta data */
|
||||
{
|
||||
/* Whether the component is checkpointable or not */
|
||||
true
|
||||
},
|
||||
|
||||
/* Initialization / querying functions */
|
||||
orte_sds_bproc_component_init
|
||||
};
|
||||
|
||||
|
||||
int
|
||||
orte_sds_bproc_component_open(void)
|
||||
{
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
orte_sds_base_module_t *
|
||||
orte_sds_bproc_component_init(int *priority)
|
||||
{
|
||||
int id;
|
||||
char *mode;
|
||||
|
||||
/* okay, not seed/singleton attempt another approach */
|
||||
id = mca_base_param_register_string("ns", "nds", NULL, NULL, NULL);
|
||||
mca_base_param_lookup_string(id, &mode);
|
||||
|
||||
if (NULL == mode || 0 != strcmp("bproc", mode)) { return NULL; }
|
||||
|
||||
*priority = 20;
|
||||
return &orte_sds_bproc_module;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
orte_sds_bproc_component_close(void)
|
||||
{
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
178
orte/mca/ess/bproc/ess_bproc_module.c
Обычный файл
178
orte/mca/ess/bproc/ess_bproc_module.c
Обычный файл
@ -0,0 +1,178 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*
|
||||
*/
|
||||
|
||||
#include "orte_config.h"
|
||||
#include <sys/bproc.h>
|
||||
|
||||
#include "orte/orte_constants.h"
|
||||
#include "orte/util/sys_info.h"
|
||||
#include "opal/util/output.h"
|
||||
#include "opal/mca/base/mca_base_param.h"
|
||||
#include "orte/mca/sds/sds.h"
|
||||
#include "orte/mca/sds/base/base.h"
|
||||
#include "orte/mca/sds/bproc/sds_bproc.h"
|
||||
#include "orte/mca/ns/ns.h"
|
||||
#include "orte/mca/ns/base/base.h"
|
||||
#include "orte/mca/errmgr/base/base.h"
|
||||
|
||||
orte_sds_base_module_t orte_sds_bproc_module = {
|
||||
orte_sds_base_basic_contact_universe,
|
||||
orte_sds_bproc_set_name,
|
||||
orte_sds_bproc_finalize,
|
||||
};
|
||||
|
||||
/**
|
||||
* Sets up the process name from the information put into the environment
|
||||
* by the bproc launcher and orte_ns_nds_bproc_put.
|
||||
* @retval ORTE_SUCCESS
|
||||
* @retval error
|
||||
*/
|
||||
int orte_sds_bproc_set_name(void)
|
||||
{
|
||||
int rc;
|
||||
int id;
|
||||
char* name_string = NULL;
|
||||
|
||||
id = mca_base_param_register_string("ns", "nds", "name", NULL, NULL);
|
||||
mca_base_param_lookup_string(id, &name_string);
|
||||
if(name_string != NULL) {
|
||||
if (ORTE_SUCCESS != (rc = orte_ns.convert_string_to_process_name(
|
||||
&(orte_process_info.my_name),
|
||||
name_string))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
free(name_string);
|
||||
return rc;
|
||||
}
|
||||
free(name_string);
|
||||
|
||||
} else {
|
||||
|
||||
orte_cellid_t cellid;
|
||||
orte_jobid_t jobid;
|
||||
orte_vpid_t vpid;
|
||||
orte_vpid_t vpid_start;
|
||||
char* cellid_string;
|
||||
char* jobid_string;
|
||||
char* vpid_string;
|
||||
int num_procs;
|
||||
char *bproc_rank_string;
|
||||
int bproc_rank;
|
||||
int stride;
|
||||
|
||||
id = mca_base_param_register_string("ns", "nds", "cellid", NULL, NULL);
|
||||
mca_base_param_lookup_string(id, &cellid_string);
|
||||
if (NULL == cellid_string) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
|
||||
return ORTE_ERR_NOT_FOUND;
|
||||
}
|
||||
if (ORTE_SUCCESS != (rc = orte_ns.convert_string_to_cellid(&cellid, cellid_string))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return(rc);
|
||||
}
|
||||
|
||||
id = mca_base_param_register_string("ns", "nds", "jobid", NULL, NULL);
|
||||
mca_base_param_lookup_string(id, &jobid_string);
|
||||
if (NULL == jobid_string) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
|
||||
return ORTE_ERR_NOT_FOUND;
|
||||
}
|
||||
if (ORTE_SUCCESS != (rc = orte_ns.convert_string_to_jobid(&jobid, jobid_string))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return(rc);
|
||||
}
|
||||
|
||||
/* BPROC_RANK is set by bproc when we do a parallel launch */
|
||||
bproc_rank_string = getenv("BPROC_RANK");
|
||||
if (NULL == bproc_rank_string) {
|
||||
opal_output(0, "orte_ns_nds_bproc_get: Error: Environment variable "
|
||||
"BPROC_RANK not found.\n");
|
||||
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
|
||||
return ORTE_ERR_NOT_FOUND;
|
||||
}
|
||||
bproc_rank = (int)strtol(bproc_rank_string, NULL, 10);
|
||||
|
||||
/* to compute our process name, we need to know two other things: the
|
||||
* stride (i.e., the size of the step between vpids in this launch
|
||||
* wave) and the starting vpid of this launch. Get those values here
|
||||
*/
|
||||
id = mca_base_param_register_int("pls", "bproc", "stride", NULL, -1);
|
||||
mca_base_param_lookup_int(id, &stride);
|
||||
if (stride < 0) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
|
||||
return ORTE_ERR_NOT_FOUND;
|
||||
}
|
||||
|
||||
id = mca_base_param_register_string("ns", "nds", "vpid_start", NULL, NULL);
|
||||
mca_base_param_lookup_string(id, &vpid_string);
|
||||
if (NULL == vpid_string) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
|
||||
return ORTE_ERR_NOT_FOUND;
|
||||
}
|
||||
rc = orte_ns.convert_string_to_vpid(&vpid_start, vpid_string);
|
||||
if (ORTE_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return(rc);
|
||||
}
|
||||
|
||||
/* compute our vpid */
|
||||
vpid = vpid_start + (bproc_rank * stride);
|
||||
|
||||
/* create our name */
|
||||
if (ORTE_SUCCESS != (rc = orte_ns.create_process_name(
|
||||
&(orte_process_info.my_name),
|
||||
cellid,
|
||||
jobid,
|
||||
vpid))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
id = mca_base_param_register_int("ns", "nds", "num_procs", NULL, -1);
|
||||
mca_base_param_lookup_int(id, &num_procs);
|
||||
if (num_procs < 0) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
|
||||
return ORTE_ERR_NOT_FOUND;
|
||||
}
|
||||
orte_process_info.num_procs = (size_t)num_procs;
|
||||
|
||||
id = mca_base_param_register_string("ns", "nds", "global_vpid_start", NULL, NULL);
|
||||
mca_base_param_lookup_string(id, &vpid_string);
|
||||
if (NULL == vpid_string) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
|
||||
return ORTE_ERR_NOT_FOUND;
|
||||
}
|
||||
rc = orte_ns.convert_string_to_vpid(&orte_process_info.vpid_start, vpid_string);
|
||||
if (ORTE_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return(rc);
|
||||
}
|
||||
|
||||
if(NULL != orte_system_info.nodename)
|
||||
free(orte_system_info.nodename);
|
||||
asprintf(&orte_system_info.nodename, "%d", bproc_currnode());
|
||||
}
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
orte_sds_bproc_finalize(void)
|
||||
{
|
||||
return ORTE_SUCCESS;
|
||||
}
|
151
orte/mca/ess/bproc/ess_bproc_put.c
Обычный файл
151
orte/mca/ess/bproc/ess_bproc_put.c
Обычный файл
@ -0,0 +1,151 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "orte_config.h"
|
||||
|
||||
#ifdef HAVE_UNISTD_H
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
#include <stdlib.h>
|
||||
#include <errno.h>
|
||||
|
||||
#include "opal/util/opal_environ.h"
|
||||
#include "opal/util/output.h"
|
||||
#include "opal/mca/base/mca_base_param.h"
|
||||
#include "orte/orte_constants.h"
|
||||
#include "orte/mca/sds/base/base.h"
|
||||
#include "orte/mca/ns/base/base.h"
|
||||
#include "orte/mca/ns/ns.h"
|
||||
#include "orte/mca/errmgr/base/base.h"
|
||||
|
||||
|
||||
/**
|
||||
* sets up the environment so that a process launched with the bproc launcher can
|
||||
* figure out its name
|
||||
* @param cell the cell that the process belongs to.
|
||||
* @param job the job the process belongs to
|
||||
* @param vpid_start the starting vpid for the current parallel launch
|
||||
* @param global_vpid_start the starting vpid for the job
|
||||
* @param num_procs the number of user processes in the job
|
||||
* @param env a pointer to the environment to setup
|
||||
* @retval ORTE_SUCCESS
|
||||
* @retval error
|
||||
*/
|
||||
int orte_ns_nds_bproc_put(orte_cellid_t cell, orte_jobid_t job,
|
||||
orte_vpid_t vpid_start, orte_vpid_t global_vpid_start,
|
||||
int num_procs, char ***env) {
|
||||
char* param;
|
||||
char* value;
|
||||
int rc;
|
||||
|
||||
/* set the mode to bproc */
|
||||
if(NULL == (param = mca_base_param_environ_variable("ns","nds",NULL))) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
||||
return ORTE_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
opal_setenv(param, "bproc", true, env);
|
||||
free(param);
|
||||
|
||||
/* not a seed */
|
||||
if(NULL == (param = mca_base_param_environ_variable("seed",NULL,NULL))) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
||||
return ORTE_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
opal_unsetenv(param, env);
|
||||
free(param);
|
||||
|
||||
/* since we want to pass the name as separate components, make sure
|
||||
* that the "name" environmental variable is cleared!
|
||||
*/
|
||||
if(NULL == (param = mca_base_param_environ_variable("ns","nds","name"))) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
||||
return ORTE_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
opal_unsetenv(param, env);
|
||||
free(param);
|
||||
|
||||
/* setup the name */
|
||||
if(ORTE_SUCCESS != (rc = orte_ns.convert_cellid_to_string(&value, cell))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
if(NULL == (param = mca_base_param_environ_variable("ns","nds","cellid"))) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
||||
return ORTE_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
opal_setenv(param, value, true, env);
|
||||
free(param);
|
||||
free(value);
|
||||
|
||||
if(ORTE_SUCCESS != (rc = orte_ns.convert_jobid_to_string(&value, job))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
if(NULL == (param = mca_base_param_environ_variable("ns","nds","jobid"))) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
||||
return ORTE_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
opal_setenv(param, value, true, env);
|
||||
free(param);
|
||||
free(value);
|
||||
|
||||
rc = orte_ns.convert_vpid_to_string(&value, vpid_start);
|
||||
if (ORTE_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return(rc);
|
||||
}
|
||||
if(NULL == (param = mca_base_param_environ_variable("ns","nds","vpid_start"))) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
||||
return ORTE_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
opal_setenv(param, value, true, env);
|
||||
free(param);
|
||||
free(value);
|
||||
|
||||
rc = orte_ns.convert_vpid_to_string(&value, global_vpid_start);
|
||||
if (ORTE_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return(rc);
|
||||
}
|
||||
if(NULL == (param = mca_base_param_environ_variable("ns","nds","global_vpid_start"))) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
||||
return ORTE_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
opal_setenv(param, value, true, env);
|
||||
free(param);
|
||||
free(value);
|
||||
|
||||
asprintf(&value, "%d", num_procs);
|
||||
if(NULL == (param = mca_base_param_environ_variable("ns","nds","num_procs")))
|
||||
{
|
||||
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
||||
return ORTE_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
opal_setenv(param, value, true, env);
|
||||
free(param);
|
||||
free(value);
|
||||
|
||||
/* we have to set this environmental variable so bproc will give us our rank
|
||||
* after the launch */
|
||||
|
||||
putenv("BPROC_RANK=XXXXXXX");
|
||||
opal_setenv("BPROC_RANK", "XXXXXXX", true, env);
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
0
orte/mca/odls/bproc/.ompi_ignore
Обычный файл
0
orte/mca/odls/bproc/.ompi_ignore
Обычный файл
1
orte/mca/odls/bproc/.ompi_unignore
Обычный файл
1
orte/mca/odls/bproc/.ompi_unignore
Обычный файл
@ -0,0 +1 @@
|
||||
rhc
|
51
orte/mca/odls/bproc/Makefile.am
Обычный файл
51
orte/mca/odls/bproc/Makefile.am
Обычный файл
@ -0,0 +1,51 @@
|
||||
#
|
||||
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
# University Research and Technology
|
||||
# Corporation. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
# of Tennessee Research Foundation. All rights
|
||||
# reserved.
|
||||
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
# University of Stuttgart. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
# All rights reserved.
|
||||
# Copyright (c) 2006 Cisco Systems, Inc. All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
AM_CPPFLAGS = -I$(top_ompi_builddir)/src/include $(odls_bproc_CPPFLAGS)
|
||||
|
||||
# Make the output library in this directory, and name it either
|
||||
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
|
||||
# (for static builds).
|
||||
|
||||
if OMPI_BUILD_odls_bproc_DSO
|
||||
component_noinst =
|
||||
component_install = mca_odls_bproc.la
|
||||
else
|
||||
component_noinst = libmca_odls_bproc.la
|
||||
component_install =
|
||||
endif
|
||||
|
||||
sources = \
|
||||
odls_bproc.h \
|
||||
odls_bproc.c \
|
||||
odls_bproc_component.c
|
||||
|
||||
mcacomponentdir = $(libdir)/openmpi
|
||||
mcacomponent_LTLIBRARIES = $(component_install)
|
||||
mca_odls_bproc_la_SOURCES = $(sources)
|
||||
mca_odls_bproc_la_LIBADD = \
|
||||
$(odls_bproc_LIBS) \
|
||||
$(top_ompi_builddir)/orte/libopen-rte.la \
|
||||
$(top_ompi_builddir)/opal/libopen-pal.la
|
||||
mca_odls_bproc_la_LDFLAGS = -module -avoid-version $(odls_bproc_LDFLAGS)
|
||||
|
||||
noinst_LTLIBRARIES = $(component_noinst)
|
||||
libmca_odls_bproc_la_SOURCES = $(sources)
|
||||
libmca_odls_bproc_la_LIBADD = $(odls_bproc_LIBS)
|
||||
libmca_odls_bproc_la_LDFLAGS = -module -avoid-version $(odls_bproc_LDFLAGS)
|
38
orte/mca/odls/bproc/configure.m4
Обычный файл
38
orte/mca/odls/bproc/configure.m4
Обычный файл
@ -0,0 +1,38 @@
|
||||
# -*- shell-script -*-
|
||||
#
|
||||
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
# University Research and Technology
|
||||
# Corporation. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
# of Tennessee Research Foundation. All rights
|
||||
# reserved.
|
||||
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
# University of Stuttgart. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
# All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
# MCA_odls_bproc_CONFIG([action-if-found], [action-if-not-found])
|
||||
# -----------------------------------------------------------
|
||||
AC_DEFUN([MCA_odls_bproc_CONFIG],[
|
||||
OMPI_CHECK_BPROC([odls_bproc], [odls_bproc_good=1],
|
||||
[odls_bproc_good=1], [odls_bproc_good=0])
|
||||
|
||||
# if check worked, set wrapper flags if so.
|
||||
# Evaluate succeed / fail
|
||||
AS_IF([test "$odls_bproc_good" = "1"],
|
||||
[odls_bproc_WRAPPER_EXTRA_LDFLAGS="$odls_bproc_LDFLAGS"
|
||||
odls_bproc_WRAPPER_EXTRA_LIBS="$odls_bproc_LIBS"
|
||||
$1],
|
||||
[$2])
|
||||
|
||||
# set build flags to use in makefile
|
||||
AC_SUBST([odls_bproc_CPPFLAGS])
|
||||
AC_SUBST([odls_bproc_LDFLAGS])
|
||||
AC_SUBST([odls_bproc_LIBS])
|
||||
])dnl
|
24
orte/mca/odls/bproc/configure.params
Обычный файл
24
orte/mca/odls/bproc/configure.params
Обычный файл
@ -0,0 +1,24 @@
|
||||
# -*- shell-script -*-
|
||||
#
|
||||
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
# University Research and Technology
|
||||
# Corporation. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
# of Tennessee Research Foundation. All rights
|
||||
# reserved.
|
||||
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
# University of Stuttgart. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
# All rights reserved.
|
||||
# Copyright (c) 2007 Los Alamos National Security, LLC. All rights
|
||||
# reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
# Specific to this module
|
||||
|
||||
PARAM_CONFIG_FILES="Makefile"
|
697
orte/mca/odls/bproc/odls_bproc.c
Обычный файл
697
orte/mca/odls/bproc/odls_bproc.c
Обычный файл
@ -0,0 +1,697 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
/**
|
||||
* @file:
|
||||
* Part of the bproc launcher.
|
||||
* See odls_bproc.h for an overview of how it works.
|
||||
*/
|
||||
#include "orte_config.h"
|
||||
#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
#include <sys/types.h>
|
||||
#include <fcntl.h>
|
||||
#include <pty.h>
|
||||
#include <dirent.h>
|
||||
|
||||
#include "opal/mca/base/mca_base_param.h"
|
||||
#include "opal/runtime/opal_progress.h"
|
||||
#include "opal/threads/condition.h"
|
||||
#include "opal/util/os_dirpath.h"
|
||||
#include "opal/util/os_path.h"
|
||||
#include "opal/util/output.h"
|
||||
|
||||
#include "orte/dss/dss.h"
|
||||
#include "orte/util/sys_info.h"
|
||||
#include "orte/orte_constants.h"
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
#include "orte/mca/gpr/gpr.h"
|
||||
#include "orte/mca/iof/iof.h"
|
||||
#include "orte/mca/iof/base/iof_base_setup.h"
|
||||
#include "orte/mca/ns/base/base.h"
|
||||
#include "orte/mca/oob/base/base.h"
|
||||
#include "orte/mca/rml/rml.h"
|
||||
#include "orte/util/session_dir.h"
|
||||
#include "orte/util/univ_info.h"
|
||||
|
||||
#include "odls_bproc.h"
|
||||
|
||||
/**
|
||||
* Initialization of the bproc_orted module with all the needed function pointers
|
||||
*/
|
||||
orte_odls_base_module_t orte_odls_bproc_module = {
|
||||
orte_odls_bproc_subscribe_launch_data,
|
||||
orte_odls_bproc_get_add_procs_data,
|
||||
orte_odls_bproc_launch_local_procs,
|
||||
orte_odls_bproc_kill_local_procs,
|
||||
orte_odls_bproc_signal_local_procs
|
||||
};
|
||||
|
||||
static int odls_bproc_make_dir(char *directory);
|
||||
static char * odls_bproc_get_base_dir_name(int proc_rank, orte_jobid_t jobid,
|
||||
orte_std_cntr_t app_context);
|
||||
static void odls_bproc_delete_dir_tree(char * path);
|
||||
static int odls_bproc_remove_dir(void);
|
||||
static void odls_bproc_send_cb(int status, orte_process_name_t * peer,
|
||||
orte_buffer_t* buffer, int tag, void* cbdata);
|
||||
static int odls_bproc_setup_stdio(orte_process_name_t *proc_name,
|
||||
int proc_rank, orte_jobid_t jobid,
|
||||
orte_std_cntr_t app_context, bool connect_stdin);
|
||||
|
||||
|
||||
int orte_odls_bproc_get_add_procs_data(orte_gpr_notify_data_t **data, orte_job_map_t *map)
|
||||
{
|
||||
return ORTE_ERR_NOT_IMPLEMENTED;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Creates the passed directory. If the directory already exists, it and its
|
||||
* contents will be deleted then the directory will be created.
|
||||
* @param directory The directory to be created.
|
||||
* @retval ORTE_SUCCESS
|
||||
* @retval error
|
||||
*/
|
||||
static int
|
||||
odls_bproc_make_dir(char *directory)
|
||||
{
|
||||
struct stat buf;
|
||||
mode_t my_mode = S_IRWXU; /* at the least, I need to be able to do anything */
|
||||
|
||||
if (0 == stat(directory, &buf)) { /* exists - delete it and its contents */
|
||||
odls_bproc_delete_dir_tree(directory);
|
||||
}
|
||||
/* try to create it with proper mode */
|
||||
return(opal_os_dirpath_create(directory, my_mode));
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns a path of the form:
|
||||
* @code
|
||||
* /tmp/openmpi-bproc-<user>/<universe>/<jobid>-<app_context>/<proc_rank>/
|
||||
* @endcode
|
||||
* which is used to put links to the pty/pipes in
|
||||
* @param proc_rank the process's rank on the node
|
||||
* @param jobid the jobid the proc belongs to
|
||||
* @param app_context the application context number within the job
|
||||
* @retval path
|
||||
*/
|
||||
static char *
|
||||
odls_bproc_get_base_dir_name(int proc_rank, orte_jobid_t jobid,
|
||||
orte_std_cntr_t app_context)
|
||||
{
|
||||
char *path = NULL, *user = NULL, *job = NULL;
|
||||
int rc;
|
||||
|
||||
/* ensure that system info is set */
|
||||
orte_sys_info();
|
||||
|
||||
if (NULL == orte_universe_info.name) { /* error condition */
|
||||
ORTE_ERROR_LOG(ORTE_ERROR);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
rc = orte_ns.convert_jobid_to_string(&job, jobid);
|
||||
if(ORTE_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* get the username set by the bproc pls. We need to get it from here
|
||||
* because on many bproc systems the method we use to get the username
|
||||
* from the system on the backend fails and we only get the uid. */
|
||||
rc = mca_base_param_register_string("pls", "bproc", "username", NULL,
|
||||
orte_system_info.user);
|
||||
mca_base_param_lookup_string(rc,&user);
|
||||
|
||||
if (0 > asprintf(&path, OPAL_PATH_SEP"tmp"OPAL_PATH_SEP"openmpi-bproc-%s"OPAL_PATH_SEP"%s"OPAL_PATH_SEP"%s-%d"OPAL_PATH_SEP"%d",
|
||||
user, orte_universe_info.name,
|
||||
job, (int) app_context, proc_rank)) {
|
||||
ORTE_ERROR_LOG(ORTE_ERROR);
|
||||
path = NULL;
|
||||
}
|
||||
if(0 < mca_odls_bproc_component.debug) {
|
||||
opal_output(0, "odls bproc io setup. Path: %s\n", path);
|
||||
}
|
||||
free(user);
|
||||
free(job);
|
||||
return path;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* deletes the passed directory tree recursively
|
||||
* @param path the path to the base directory to delete
|
||||
*/
|
||||
static void
|
||||
odls_bproc_delete_dir_tree(char * path)
|
||||
{
|
||||
DIR *dp;
|
||||
struct dirent *ep;
|
||||
char *filenm;
|
||||
int ret;
|
||||
struct stat buf;
|
||||
dp = opendir(path);
|
||||
if (NULL == dp) {
|
||||
return;
|
||||
}
|
||||
|
||||
while (NULL != (ep = readdir(dp)) ) {
|
||||
/* skip: . and .. */
|
||||
if ((0 != strcmp(ep->d_name, ".")) && (0 != strcmp(ep->d_name, ".."))) {
|
||||
filenm = opal_os_path(false, path, ep->d_name, NULL);
|
||||
ret = stat(filenm, &buf);
|
||||
if (ret < 0 || S_ISDIR(buf.st_mode)) {
|
||||
odls_bproc_delete_dir_tree(filenm);
|
||||
free(filenm);
|
||||
continue;
|
||||
}
|
||||
unlink(filenm);
|
||||
free(filenm);
|
||||
}
|
||||
}
|
||||
closedir(dp);
|
||||
rmdir(path);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Removes the bproc directory
|
||||
* @code /tmp/openmpi-bproc-<user>/ @endcode and all of its contents
|
||||
* @retval ORTE_SUCCESS
|
||||
* @retval error
|
||||
*/
|
||||
static int
|
||||
odls_bproc_remove_dir()
|
||||
{
|
||||
char *frontend = NULL, *user = NULL, *filename = NULL;
|
||||
int id;
|
||||
|
||||
/* get the username set by the bproc pls. We need to get it from here
|
||||
* because on many bproc systems the method we use to get the username
|
||||
* from the system on the backend fails and we only get the uid. */
|
||||
id = mca_base_param_register_string("pls", "bproc", "username", NULL,
|
||||
orte_system_info.user);
|
||||
mca_base_param_lookup_string(id,&user);
|
||||
asprintf(&filename, "openmpi-bproc-%s", user );
|
||||
if( NULL == filename ) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
||||
return ORTE_ERROR;
|
||||
}
|
||||
frontend = opal_os_path(false, "tmp", filename, NULL );
|
||||
free(filename); /* Always free the filename */
|
||||
if (NULL == frontend) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
||||
return ORTE_ERROR;
|
||||
}
|
||||
/* we do our best to clean up the directory tree, but we ignore errors*/
|
||||
odls_bproc_delete_dir_tree(frontend);
|
||||
free(frontend);
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Callback function for when we tell mpirun we are ready
|
||||
* @param status
|
||||
* @param peer
|
||||
* @param buffer
|
||||
* @param tag
|
||||
* @param cbdata
|
||||
*/
|
||||
static void
|
||||
odls_bproc_send_cb(int status, orte_process_name_t * peer,
|
||||
orte_buffer_t* buffer, int tag, void* cbdata)
|
||||
{
|
||||
OBJ_RELEASE(buffer);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Create Standard I/O symlinks in the filesystem for a given proc
|
||||
*
|
||||
* Create Standard I/O symlinks in the filesystem for a given proc.
|
||||
* The symlinks will be placed in:
|
||||
* @code
|
||||
* /tmp/openmpi-bproc-<user>/<universe>/<jobid>-<app_context>/<proc_rank>/
|
||||
* @endcode
|
||||
*
|
||||
* The symlinks will be to FIFOs for stdin and stderr. stdout will either
|
||||
* be to a FIFO or pty, depending on the configuration of Open MPI.
|
||||
*
|
||||
* @param proc_rank the process's rank on the node
|
||||
* @param jobid the jobid the proc belongs to
|
||||
* @param app_context the application context number within the job
|
||||
* @param connect_stdin if true, stdin will be connected, otherwise it will be
|
||||
* set to /dev/null
|
||||
*
|
||||
* @retval ORTE_SUCCESS
|
||||
* @retval error
|
||||
*/
|
||||
static int
|
||||
odls_bproc_setup_stdio(orte_process_name_t *proc_name, int proc_rank,
|
||||
orte_jobid_t jobid,
|
||||
orte_std_cntr_t app_context, bool connect_stdin)
|
||||
{
|
||||
char *path_prefix, *fd_link_path = NULL;
|
||||
int rc = ORTE_SUCCESS, fd;
|
||||
#if defined(HAVE_OPENPTY) && (OMPI_ENABLE_PTY_SUPPORT != 0)
|
||||
int amaster, aslave;
|
||||
char pty_name[256];
|
||||
struct termios term_attrs;
|
||||
#endif
|
||||
|
||||
path_prefix = odls_bproc_get_base_dir_name(proc_rank, jobid, (size_t)app_context);
|
||||
if (NULL == path_prefix) {
|
||||
rc = ORTE_ERROR;
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/* check for existence and access, or create it */
|
||||
if (ORTE_SUCCESS != (rc = odls_bproc_make_dir(path_prefix))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/* setup the stdin FIFO. Always use a fifo for the same reason we
|
||||
always use a pipe in the iof_setup code -- don't want to flush
|
||||
onto the floor during close */
|
||||
fd_link_path = opal_os_path( false, path_prefix, "0", NULL );
|
||||
if (NULL == fd_link_path) {
|
||||
rc = ORTE_ERROR;
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
if (connect_stdin) {
|
||||
if (0 != mkfifo(fd_link_path, S_IRWXU)) {
|
||||
perror("odls_bproc mkfifo failed");
|
||||
rc = ORTE_ERROR;
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
fd = open(fd_link_path, O_RDWR);
|
||||
if (-1 == fd) {
|
||||
perror("odls_bproc open failed");
|
||||
rc = ORTE_ERROR;
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
orte_iof.iof_publish(proc_name, ORTE_IOF_SINK,
|
||||
ORTE_IOF_STDIN, fd);
|
||||
} else {
|
||||
if(0 != symlink("/dev/null", fd_link_path)) {
|
||||
perror("odls_bproc could not create symlink");
|
||||
rc = ORTE_ERROR;
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
}
|
||||
|
||||
free(fd_link_path);
|
||||
fd_link_path = NULL;
|
||||
|
||||
/* setup the stdout PTY / FIFO */
|
||||
fd_link_path = opal_os_path( false, path_prefix, "1", NULL );
|
||||
if (NULL == fd_link_path) {
|
||||
rc = ORTE_ERROR;
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
#if defined(HAVE_OPENPTY) && (OMPI_ENABLE_PTY_SUPPORT != 0)
|
||||
if (0 != openpty(&amaster, &aslave, pty_name, NULL, NULL)) {
|
||||
opal_output(0, "odls_bproc: openpty failed, using pipes instead");
|
||||
goto stdout_fifo_setup;
|
||||
}
|
||||
|
||||
if (0 != symlink(pty_name, fd_link_path)) {
|
||||
rc = ORTE_ERROR;
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
if (tcgetattr(aslave, &term_attrs) < 0) {
|
||||
rc = ORTE_ERROR;
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
term_attrs.c_lflag &= ~ (ECHO | ECHOE | ECHOK |
|
||||
ECHOCTL | ECHOKE | ECHONL);
|
||||
term_attrs.c_iflag &= ~ (ICRNL | INLCR | ISTRIP | INPCK | IXON);
|
||||
term_attrs.c_oflag &= ~ (OCRNL | ONLCR);
|
||||
if (tcsetattr(aslave, TCSANOW, &term_attrs) == -1) {
|
||||
rc = ORTE_ERROR;
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
orte_iof.iof_publish(proc_name, ORTE_IOF_SOURCE,
|
||||
ORTE_IOF_STDOUT, amaster);
|
||||
|
||||
goto stderr_fifo_setup;
|
||||
|
||||
stdout_fifo_setup:
|
||||
#endif
|
||||
|
||||
if (0 != mkfifo(fd_link_path, S_IRWXU)) {
|
||||
perror("odls_bproc mkfifo failed");
|
||||
rc = ORTE_ERROR;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
fd = open(fd_link_path, O_RDWR);
|
||||
if (-1 == fd) {
|
||||
perror("odls_bproc open failed");
|
||||
rc = ORTE_ERROR;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
orte_iof.iof_publish(proc_name, ORTE_IOF_SOURCE,
|
||||
ORTE_IOF_STDOUT, fd);
|
||||
|
||||
#if defined(HAVE_OPENPTY) && (OMPI_ENABLE_PTY_SUPPORT != 0)
|
||||
stderr_fifo_setup:
|
||||
#endif
|
||||
|
||||
free(fd_link_path);
|
||||
fd_link_path = NULL;
|
||||
|
||||
/* setup the stderr FIFO. Always a fifo */
|
||||
fd_link_path = opal_os_path( false, path_prefix, "2", NULL );
|
||||
if (NULL == fd_link_path) {
|
||||
rc = ORTE_ERROR;
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
if (0 != mkfifo(fd_link_path, S_IRWXU)) {
|
||||
perror("odls_bproc mkfifo failed");
|
||||
rc = ORTE_ERROR;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
fd = open(fd_link_path, O_RDWR);
|
||||
if (-1 == fd) {
|
||||
perror("odls_bproc open failed");
|
||||
rc = ORTE_ERROR;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
orte_iof.iof_publish(proc_name, ORTE_IOF_SOURCE,
|
||||
ORTE_IOF_STDERR, fd);
|
||||
|
||||
cleanup:
|
||||
if (NULL != path_prefix) {
|
||||
free(path_prefix);
|
||||
}
|
||||
if (NULL != fd_link_path) {
|
||||
free(fd_link_path);
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
|
||||
|
||||
/* this entire function gets called within a GPR compound command,
|
||||
* so the subscription actually doesn't get done until the orted
|
||||
* executes the compound command
|
||||
*/
|
||||
int orte_odls_bproc_subscribe_launch_data(orte_jobid_t job, orte_gpr_notify_cb_fn_t cbfunc)
|
||||
{
|
||||
char *segment;
|
||||
orte_gpr_value_t *values[1];
|
||||
orte_gpr_subscription_t *subs, sub=ORTE_GPR_SUBSCRIPTION_EMPTY;
|
||||
orte_gpr_trigger_t *trigs, trig=ORTE_GPR_TRIGGER_EMPTY;
|
||||
char* keys[] = {
|
||||
ORTE_PROC_NAME_KEY,
|
||||
ORTE_PROC_APP_CONTEXT_KEY,
|
||||
ORTE_NODE_NAME_KEY,
|
||||
};
|
||||
int num_keys = 3;
|
||||
int i, rc;
|
||||
|
||||
/* get the job segment name */
|
||||
if (ORTE_SUCCESS != (rc = orte_schema.get_job_segment_name(&segment, job))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* attach ourselves to the "standard" orted trigger */
|
||||
if (ORTE_SUCCESS !=
|
||||
(rc = orte_schema.get_std_trigger_name(&(trig.name),
|
||||
ORTED_LAUNCH_STAGE_GATE_TRIGGER, job))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
free(segment);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* ask for return of all data required for launching local processes */
|
||||
subs = ⊂
|
||||
sub.action = ORTE_GPR_NOTIFY_DELETE_AFTER_TRIG;
|
||||
if (ORTE_SUCCESS != (rc = orte_schema.get_std_subscription_name(&(sub.name),
|
||||
ORTED_LAUNCH_STG_SUB,
|
||||
job))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
free(segment);
|
||||
free(trig.name);
|
||||
return rc;
|
||||
}
|
||||
sub.cnt = 1;
|
||||
sub.values = values;
|
||||
|
||||
if (ORTE_SUCCESS != (rc = orte_gpr.create_value(&(values[0]), ORTE_GPR_KEYS_OR | ORTE_GPR_TOKENS_OR,
|
||||
segment, num_keys, 0))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
free(segment);
|
||||
free(sub.name);
|
||||
free(trig.name);
|
||||
return rc;
|
||||
}
|
||||
for (i=0; i < num_keys; i++) {
|
||||
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(values[0]->keyvals[i]),
|
||||
keys[i], ORTE_UNDEF, NULL))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
free(segment);
|
||||
free(sub.name);
|
||||
free(trig.name);
|
||||
OBJ_RELEASE(values[0]);
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
|
||||
sub.cbfunc = cbfunc;
|
||||
|
||||
trigs = &trig;
|
||||
|
||||
/* do the subscription */
|
||||
if (ORTE_SUCCESS != (rc = orte_gpr.subscribe(1, &subs, 1, &trigs))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
}
|
||||
free(segment);
|
||||
free(sub.name);
|
||||
free(trig.name);
|
||||
OBJ_RELEASE(values[0]);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
/**
|
||||
* Setup io for the current node, then tell orterun we are ready for the actual
|
||||
* processes.
|
||||
* @retval ORTE_SUCCESS
|
||||
* @retval error
|
||||
*/
|
||||
int
|
||||
orte_odls_bproc_launch_local_procs(orte_gpr_notify_data_t *data, char **base_environ)
|
||||
{
|
||||
odls_bproc_child_t *child;
|
||||
opal_list_item_t* item;
|
||||
orte_gpr_value_t *value, **values;
|
||||
orte_gpr_keyval_t *kval;
|
||||
char *node_name;
|
||||
int rc;
|
||||
orte_std_cntr_t i, j, kv, kv2, *sptr;
|
||||
int src = 0;
|
||||
orte_buffer_t *ack;
|
||||
bool connect_stdin;
|
||||
orte_jobid_t jobid;
|
||||
int cycle = 0;
|
||||
|
||||
/* first, retrieve the job number we are to launch from the
|
||||
* returned data - we can extract the jobid directly from the
|
||||
* subscription name we created
|
||||
*/
|
||||
if (ORTE_SUCCESS != (rc = orte_schema.extract_jobid_from_std_trigger_name(&jobid, data->target))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/**
|
||||
* hack for bproc4, change process group so that we do not receive signals
|
||||
* from the parent/front-end process, as bproc4 does not currently allow the
|
||||
* process to intercept the signal
|
||||
*/
|
||||
setpgid(0,0);
|
||||
|
||||
/* loop through the returned data to find the global info and
|
||||
* the info for processes going onto this node
|
||||
*/
|
||||
values = (orte_gpr_value_t**)(data->values)->addr;
|
||||
for (j=0, i=0; i < data->cnt && j < (data->values)->size; j++) { /* loop through all returned values */
|
||||
if (NULL != values[j]) {
|
||||
i++;
|
||||
value = values[j];
|
||||
/* this must have come from one of the process containers, so it must
|
||||
* contain data for a proc structure - see if it belongs to this node
|
||||
*/
|
||||
for (kv=0; kv < value->cnt; kv++) {
|
||||
kval = value->keyvals[kv];
|
||||
if (strcmp(kval->key, ORTE_NODE_NAME_KEY) == 0) {
|
||||
/* Most C-compilers will bark if we try to directly compare the string in the
|
||||
* kval data area against a regular string, so we need to "get" the data
|
||||
* so we can access it */
|
||||
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&node_name, kval->value, ORTE_STRING))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
/* if this is our node...must also protect against a zero-length string */
|
||||
if (NULL != node_name && 0 == strcmp(node_name, orte_system_info.nodename)) {
|
||||
/* ...harvest the info into a new child structure */
|
||||
child = OBJ_NEW(odls_bproc_child_t);
|
||||
for (kv2 = 0; kv2 < value->cnt; kv2++) {
|
||||
kval = value->keyvals[kv2];
|
||||
if(strcmp(kval->key, ORTE_PROC_NAME_KEY) == 0) {
|
||||
/* copy the name into the child object */
|
||||
if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&(child->name), kval->value->data, ORTE_NAME))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if(strcmp(kval->key, ORTE_PROC_APP_CONTEXT_KEY) == 0) {
|
||||
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&sptr, kval->value, ORTE_STD_CNTR))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
child->app_idx = *sptr; /* save the index into the app_context objects */
|
||||
continue;
|
||||
}
|
||||
} /* kv2 */
|
||||
/* protect operation on the global list of children */
|
||||
OPAL_THREAD_LOCK(&mca_odls_bproc_component.mutex);
|
||||
opal_list_append(&mca_odls_bproc_component.children, &child->super);
|
||||
opal_condition_signal(&mca_odls_bproc_component.cond);
|
||||
OPAL_THREAD_UNLOCK(&mca_odls_bproc_component.mutex);
|
||||
|
||||
}
|
||||
}
|
||||
} /* for kv */
|
||||
} /* for j */
|
||||
}
|
||||
|
||||
/* set up the io files for our children */
|
||||
for(item = opal_list_get_first(&mca_odls_bproc_component.children);
|
||||
item != opal_list_get_end(&mca_odls_bproc_component.children);
|
||||
item = opal_list_get_next(item)) {
|
||||
child = (odls_bproc_child_t *) item;
|
||||
if(0 < mca_odls_bproc_component.debug) {
|
||||
opal_output(0, "orte_odls_bproc_launch: setting up io for "
|
||||
"[%lu,%lu,%lu] proc rank %lu\n",
|
||||
ORTE_NAME_ARGS((child->name)),
|
||||
child->name->vpid);
|
||||
}
|
||||
/* only setup to forward stdin if it is rank 0, otherwise connect
|
||||
* to /dev/null */
|
||||
if(0 == child->name->vpid) {
|
||||
connect_stdin = true;
|
||||
} else {
|
||||
connect_stdin = false;
|
||||
}
|
||||
|
||||
rc = odls_bproc_setup_stdio(child->name, cycle,
|
||||
jobid, child->app_idx,
|
||||
connect_stdin);
|
||||
if (ORTE_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
cycle++;
|
||||
}
|
||||
|
||||
/* message to indicate that we are ready */
|
||||
ack = OBJ_NEW(orte_buffer_t);
|
||||
rc = orte_dss.pack(ack, &src, 1, ORTE_INT);
|
||||
if(ORTE_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
}
|
||||
rc = mca_oob_send_packed_nb(ORTE_PROC_MY_HNP, ack, ORTE_RML_TAG_BPROC, 0,
|
||||
odls_bproc_send_cb, NULL);
|
||||
if (0 > rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
rc = ORTE_SUCCESS;
|
||||
|
||||
cleanup:
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
/**
|
||||
* Function to terminate a job. Since this component only runs on remote nodes
|
||||
* and doesn't actually launch any processes, this function is not needed
|
||||
* so is a noop.
|
||||
*/
|
||||
int orte_odls_bproc_kill_local_procs(orte_jobid_t job, bool set_state)
|
||||
{
|
||||
orte_iof.iof_flush();
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
/**
|
||||
* Function to signal a process. Since this component only runs on remote nodes
|
||||
* and doesn't actually launch any processes, this function is not needed
|
||||
* so is a noop.
|
||||
* @param proc the process's name
|
||||
* @param signal The signal to send
|
||||
* @retval ORTE_SUCCESS
|
||||
*/
|
||||
int orte_odls_bproc_signal_local_procs(const orte_process_name_t* proc, int32_t signal)
|
||||
{
|
||||
orte_iof.iof_flush();
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Finalizes the bproc module. Cleanup tmp directory/files
|
||||
* used for I/O forwarding.
|
||||
* @retval ORTE_SUCCESS
|
||||
*/
|
||||
int orte_odls_bproc_finalize(void)
|
||||
{
|
||||
orte_iof.iof_flush();
|
||||
odls_bproc_remove_dir();
|
||||
orte_session_dir_finalize(orte_process_info.my_name);
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
112
orte/mca/odls/bproc/odls_bproc.h
Обычный файл
112
orte/mca/odls/bproc/odls_bproc.h
Обычный файл
@ -0,0 +1,112 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2006 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
/**
|
||||
* @file:
|
||||
* Part of the bproc launching system. This launching system is broken into 2
|
||||
* parts: one runs under the PLS on the head node to launch the orteds, and the
|
||||
* other serves as the orted's local launcher.
|
||||
*
|
||||
* The main job of this component is to setup ptys/pipes for IO forwarding.
|
||||
* See pls_bproc.h for an overview of how the entire bproc launching system works.
|
||||
*/
|
||||
#ifndef ORTE_ODLS_BPROC_H_
|
||||
#define ORTE_ODLS_BPROC_H_
|
||||
|
||||
#include "orte_config.h"
|
||||
|
||||
#include <sys/bproc.h>
|
||||
|
||||
#include "opal/mca/mca.h"
|
||||
#include "opal/threads/condition.h"
|
||||
|
||||
#include "orte/mca/gpr/gpr_types.h"
|
||||
#include "orte/mca/rmaps/rmaps_types.h"
|
||||
|
||||
#include "orte/mca/odls/odls.h"
|
||||
|
||||
#if defined(c_plusplus) || defined(__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Module open / close
|
||||
*/
|
||||
int orte_odls_bproc_component_open(void);
|
||||
int orte_odls_bproc_component_close(void);
|
||||
int orte_odls_bproc_finalize(void);
|
||||
orte_odls_base_module_t* orte_odls_bproc_init(int *priority);
|
||||
|
||||
/*
|
||||
* Startup / Shutdown
|
||||
*/
|
||||
int orte_odls_bproc_finalize(void);
|
||||
|
||||
/*
|
||||
* Interface
|
||||
*/
|
||||
int orte_odls_bproc_subscribe_launch_data(orte_jobid_t job, orte_gpr_notify_cb_fn_t cbfunc);
|
||||
int orte_odls_bproc_get_add_procs_data(orte_gpr_notify_data_t **data, orte_job_map_t *map);
|
||||
int orte_odls_bproc_launch_local_procs(orte_gpr_notify_data_t *data, char **base_environ);
|
||||
int orte_odls_bproc_kill_local_procs(orte_jobid_t job, bool set_state);
|
||||
int orte_odls_bproc_signal_local_procs(const orte_process_name_t* proc_name, int32_t signal);
|
||||
|
||||
/**
|
||||
* ODLS bproc_orted component
|
||||
*/
|
||||
struct orte_odls_bproc_component_t {
|
||||
orte_odls_base_component_t super;
|
||||
/**< The base class */
|
||||
int debug;
|
||||
/**< If greater than 0 print debugging information */
|
||||
int priority;
|
||||
/**< The priority of this component. This will be returned if we determine
|
||||
* that bproc is available and running on this node, */
|
||||
opal_mutex_t lock;
|
||||
/**< Lock used to prevent some race conditions */
|
||||
opal_condition_t cond;
|
||||
/**< Condition used to wake up waiting threads */
|
||||
opal_list_t children;
|
||||
/**< list of children on this node */
|
||||
};
|
||||
/**
|
||||
* Convenience typedef
|
||||
*/
|
||||
typedef struct orte_odls_bproc_component_t orte_odls_bproc_component_t;
|
||||
|
||||
/*
|
||||
* List object to locally store the process names and pids of
|
||||
* our children. This can subsequently be used to order termination
|
||||
* or pass signals without looking the info up again.
|
||||
*/
|
||||
typedef struct odls_bproc_child_t {
|
||||
opal_list_item_t super; /* required to place this on a list */
|
||||
orte_process_name_t *name; /* the OpenRTE name of the proc */
|
||||
pid_t pid; /* local pid of the proc */
|
||||
orte_std_cntr_t app_idx; /* index of the app_context for this proc */
|
||||
bool alive; /* is this proc alive? */
|
||||
} odls_bproc_child_t;
|
||||
OBJ_CLASS_DECLARATION(odls_bproc_child_t);
|
||||
|
||||
ORTE_MODULE_DECLSPEC extern orte_odls_bproc_component_t mca_odls_bproc_component;
|
||||
extern orte_odls_base_module_t orte_odls_bproc_module;
|
||||
|
||||
#if defined(c_plusplus) || defined(__cplusplus)
|
||||
}
|
||||
#endif
|
||||
#endif /* ORTE_ODLS_BPROC_H_ */
|
||||
|
134
orte/mca/odls/bproc/odls_bproc_component.c
Обычный файл
134
orte/mca/odls/bproc/odls_bproc_component.c
Обычный файл
@ -0,0 +1,134 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*
|
||||
*/
|
||||
/**
|
||||
* @file:
|
||||
* Takes care of the component stuff for the MCA.
|
||||
*/
|
||||
#include "orte_config.h"
|
||||
#include "orte/orte_constants.h"
|
||||
|
||||
#include "opal/mca/base/mca_base_param.h"
|
||||
|
||||
#include "orte/util/proc_info.h"
|
||||
|
||||
#include "orte/mca/odls/odls.h"
|
||||
#include "odls_bproc.h"
|
||||
|
||||
/* instance the child list object */
|
||||
static void odls_bproc_child_constructor(odls_bproc_child_t *ptr)
|
||||
{
|
||||
ptr->name = NULL;
|
||||
ptr->app_idx = -1;
|
||||
ptr->alive = false;
|
||||
}
|
||||
static void odls_bproc_child_destructor(odls_bproc_child_t *ptr)
|
||||
{
|
||||
if (NULL != ptr->name) free(ptr->name);
|
||||
}
|
||||
OBJ_CLASS_INSTANCE(odls_bproc_child_t,
|
||||
opal_list_item_t,
|
||||
odls_bproc_child_constructor,
|
||||
odls_bproc_child_destructor);
|
||||
|
||||
/**
|
||||
* The bproc component data structure used to store all the relevent data
|
||||
* about this component.
|
||||
*/
|
||||
orte_odls_bproc_component_t mca_odls_bproc_component = {
|
||||
{
|
||||
/* First, the mca_component_t struct containing meta information
|
||||
about the component itself */
|
||||
{
|
||||
/* Indicate that we are a odls v1.3.0 component (which also
|
||||
implies a specific MCA version) */
|
||||
ORTE_ODLS_BASE_VERSION_1_3_0,
|
||||
/* Component name and version */
|
||||
"bproc",
|
||||
ORTE_MAJOR_VERSION,
|
||||
ORTE_MINOR_VERSION,
|
||||
ORTE_RELEASE_VERSION,
|
||||
/* Component open and close functions */
|
||||
orte_odls_bproc_component_open,
|
||||
orte_odls_bproc_component_close
|
||||
},
|
||||
/* Next the MCA v1.0.0 component meta data */
|
||||
{
|
||||
/* Whether the component is checkpointable or not */
|
||||
false
|
||||
},
|
||||
/* Initialization / querying functions */
|
||||
orte_odls_bproc_init,
|
||||
orte_odls_bproc_finalize
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Opens the pls_bproc component, setting all the needed mca parameters and
|
||||
* finishes setting up the component struct.
|
||||
*/
|
||||
int orte_odls_bproc_component_open(void)
|
||||
{
|
||||
/* initialize globals */
|
||||
OBJ_CONSTRUCT(&mca_odls_bproc_component.lock, opal_mutex_t);
|
||||
OBJ_CONSTRUCT(&mca_odls_bproc_component.cond, opal_condition_t);
|
||||
OBJ_CONSTRUCT(&mca_odls_bproc_component.children, opal_list_t);
|
||||
|
||||
/* lookup parameters */
|
||||
mca_base_param_reg_int(&mca_odls_bproc_component.super.version,
|
||||
"priority", NULL, false, false, 100,
|
||||
&mca_odls_bproc_component.priority);
|
||||
mca_base_param_reg_int(&mca_odls_bproc_component.super.version,
|
||||
"debug", "If > 0 prints library debugging information",
|
||||
false, false, 0, &mca_odls_bproc_component.debug);
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
/**
|
||||
* Initializes the module. We do not want to run unless we are not the seed,
|
||||
* bproc is running, and we are not on the master node.
|
||||
*/
|
||||
orte_odls_base_module_t *orte_odls_bproc_init(int *priority)
|
||||
{
|
||||
int ret;
|
||||
struct bproc_version_t version;
|
||||
|
||||
/* the base open/select logic protects us against operation when
|
||||
* we are NOT in a daemon, so we don't have to check that here
|
||||
*/
|
||||
|
||||
/* check to see if BProc is running here */
|
||||
ret = bproc_version(&version);
|
||||
if (ret != 0) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
*priority = mca_odls_bproc_component.priority;
|
||||
return &orte_odls_bproc_module;
|
||||
}
|
||||
|
||||
/**
|
||||
* Component close function.
|
||||
*/
|
||||
int orte_odls_bproc_component_close(void)
|
||||
{
|
||||
OBJ_DESTRUCT(&mca_odls_bproc_component.lock);
|
||||
OBJ_DESTRUCT(&mca_odls_bproc_component.cond);
|
||||
OBJ_DESTRUCT(&mca_odls_bproc_component.children);
|
||||
return ORTE_SUCCESS;
|
||||
}
|
0
orte/mca/plm/bproc/.ompi_ignore
Обычный файл
0
orte/mca/plm/bproc/.ompi_ignore
Обычный файл
1
orte/mca/plm/bproc/.ompi_unignore
Обычный файл
1
orte/mca/plm/bproc/.ompi_unignore
Обычный файл
@ -0,0 +1 @@
|
||||
rhc
|
51
orte/mca/plm/bproc/Makefile.am
Обычный файл
51
orte/mca/plm/bproc/Makefile.am
Обычный файл
@ -0,0 +1,51 @@
|
||||
#
|
||||
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
# University Research and Technology
|
||||
# Corporation. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
# of Tennessee Research Foundation. All rights
|
||||
# reserved.
|
||||
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
# University of Stuttgart. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
# All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
dist_pkgdata_DATA = help-plm-bproc.txt
|
||||
|
||||
# Make the output library in this directory, and name it either
|
||||
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
|
||||
# (for static builds).
|
||||
|
||||
if OMPI_BUILD_plm_bproc_DSO
|
||||
component_noinst =
|
||||
component_install = mca_plm_bproc.la
|
||||
else
|
||||
component_noinst = libmca_plm_bproc.la
|
||||
component_install =
|
||||
endif
|
||||
|
||||
sources = \
|
||||
plm_bproc.h \
|
||||
plm_bproc.c \
|
||||
plm_bproc_state.c \
|
||||
plm_bproc_component.c
|
||||
|
||||
mcacomponentdir = $(libdir)/openmpi
|
||||
mcacomponent_LTLIBRARIES = $(component_install)
|
||||
mca_plm_bproc_la_SOURCES = $(sources)
|
||||
mca_plm_bproc_la_LIBADD = \
|
||||
$(plm_bproc_LIBS) \
|
||||
$(top_ompi_builddir)/orte/libopen-rte.la \
|
||||
$(top_ompi_builddir)/opal/libopen-pal.la
|
||||
mca_plm_bproc_la_LDFLAGS = -module -avoid-version $(plm_bproc_LDFLAGS)
|
||||
|
||||
noinst_LTLIBRARIES = $(component_noinst)
|
||||
libmca_plm_bproc_la_SOURCES = $(sources)
|
||||
libmca_plm_bproc_la_LIBADD = $(plm_bproc_LIBS)
|
||||
libmca_plm_bproc_la_LDFLAGS = -module -avoid-version $(plm_bproc_LDFLAGS)
|
41
orte/mca/plm/bproc/configure.m4
Обычный файл
41
orte/mca/plm/bproc/configure.m4
Обычный файл
@ -0,0 +1,41 @@
|
||||
# -*- shell-script -*-
|
||||
#
|
||||
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
# University Research and Technology
|
||||
# Corporation. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
# of Tennessee Research Foundation. All rights
|
||||
# reserved.
|
||||
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
# University of Stuttgart. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
# All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
# MCA_plm_bproc_CONFIG([action-if-found], [action-if-not-found])
|
||||
# -----------------------------------------------------------
|
||||
AC_DEFUN([MCA_plm_bproc_CONFIG],[
|
||||
OMPI_CHECK_BPROC([plm_bproc], [plm_bproc_good=2],
|
||||
[plm_bproc_good=1], [plm_bproc_good=0])
|
||||
# if check worked, set wrapper flags if so.
|
||||
# Evaluate succeed / fail
|
||||
AS_IF([test "$plm_bproc_good" = "1"],
|
||||
[AC_DEFINE_UNQUOTED([MCA_plm_bproc_scyld], [1],
|
||||
[Defined if we are using Scyld bproc or pre 3.2.0 LANL bproc])])
|
||||
|
||||
AS_IF([test "$plm_bproc_good" != "0"],
|
||||
[plm_bproc_WRAPPER_EXTRA_LDFLAGS="$plm_bproc_LDFLAGS"
|
||||
plm_bproc_WRAPPER_EXTRA_LIBS="$plm_bproc_LIBS"
|
||||
$1],
|
||||
[$2])
|
||||
|
||||
# set build flags to use in makefile
|
||||
AC_SUBST([plm_bproc_CPPFLAGS])
|
||||
AC_SUBST([plm_bproc_LDFLAGS])
|
||||
AC_SUBST([plm_bproc_LIBS])
|
||||
])dnl
|
24
orte/mca/plm/bproc/configure.params
Обычный файл
24
orte/mca/plm/bproc/configure.params
Обычный файл
@ -0,0 +1,24 @@
|
||||
# -*- shell-script -*-
|
||||
#
|
||||
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
# University Research and Technology
|
||||
# Corporation. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
# of Tennessee Research Foundation. All rights
|
||||
# reserved.
|
||||
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
# University of Stuttgart. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
# All rights reserved.
|
||||
# Copyright (c) 2007 Los Alamos National Security, LLC. All rights
|
||||
# reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
# Specific to this module
|
||||
|
||||
PARAM_CONFIG_FILES="Makefile"
|
107
orte/mca/plm/bproc/help-plm-bproc.txt
Обычный файл
107
orte/mca/plm/bproc/help-plm-bproc.txt
Обычный файл
@ -0,0 +1,107 @@
|
||||
# -*- text -*-
|
||||
#
|
||||
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
# University Research and Technology
|
||||
# Corporation. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
# of Tennessee Research Foundation. All rights
|
||||
# reserved.
|
||||
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
# University of Stuttgart. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
# All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
# This is the US/English general help file for Open RTE's orterun.
|
||||
#
|
||||
[bproc-vexecmove-launch]
|
||||
The bproc PLS component was not able to launch %s on node %d and therefore
|
||||
cannot continue. Errno was set to %d.
|
||||
[bproc-vexecmove-fork]
|
||||
The bproc PLS component was not able to fork and therefore cannot continue.
|
||||
Errno was set to %d.
|
||||
[no-orted]
|
||||
The bproc PLS component was not able to find the executable "%s" in the
|
||||
current directory, your PATH, or in the directory where Open MPI was
|
||||
initially installed, and therefore cannot continue.
|
||||
|
||||
For reference, we looked for
|
||||
|
||||
%s
|
||||
|
||||
Your current PATH is:
|
||||
|
||||
%s
|
||||
|
||||
We also looked for orted in the following directory:
|
||||
|
||||
%s
|
||||
|
||||
You may need to set your PATH properly, or set the MCA parameter
|
||||
pls_bproc_orted to be the path to "orted".
|
||||
[daemon-launch-number]
|
||||
The bproc PLS component was not able to launch all the daemons on the remote
|
||||
nodes and therefore cannot continue.
|
||||
|
||||
We attempted to launch %d daemons but only %d were actually launched.
|
||||
|
||||
For reference, we tried to launch %s
|
||||
[daemon-launch-bad-pid]
|
||||
The bproc PLS component was not able to launch all the daemons on the remote
|
||||
nodes and therefore cannot continue.
|
||||
|
||||
On node %d the daemon pid was %d and errno was set to %d.
|
||||
|
||||
For reference, we tried to launch %s
|
||||
[daemon-died-no-signal]
|
||||
A daemon (pid %d) launched by the bproc PLS component on node %d died
|
||||
unexpectedly so we are aborting.
|
||||
|
||||
This may be because the daemon was unable to find all the needed shared
|
||||
libraries on the remote node. You may set your LD_LIBRARY_PATH to have the
|
||||
location of the shared libraries on the remote nodes and this will
|
||||
automatically be forwarded to the remote nodes.
|
||||
[daemon-died-signal]
|
||||
A daemon (pid %d) launched by the bproc PLS component on node %d died
|
||||
unexpectedly on signal %d so we are aborting.
|
||||
|
||||
This may be because the daemon was unable to find all the needed shared
|
||||
libraries on the remote node. You may set your LD_LIBRARY_PATH to have the
|
||||
location of the shared libraries on the remote nodes and this will
|
||||
automatically be forwarded to the remote nodes.
|
||||
[proc-launch-number]
|
||||
The bproc PLS component was not able to launch all the processes on the remote
|
||||
nodes and therefore cannot continue.
|
||||
|
||||
We attempted to launch %d processes but only %d were actually launched.
|
||||
|
||||
For reference, we tried to launch %s
|
||||
[proc-launch-bad-pid]
|
||||
The bproc PLS component was not able to launch all the processes on the remote
|
||||
nodes and therefore cannot continue.
|
||||
|
||||
On node %d the process pid was %d and errno was set to %d.
|
||||
|
||||
For reference, we tried to launch %s
|
||||
|
||||
[mismatched-slots]
|
||||
The current bproc support requires that the number of available
|
||||
slots on each node be the same. Note that this does -not- mean
|
||||
that the number of processes you want to launch must be the same.
|
||||
It only requires that you have access to the same number of process
|
||||
slots on each node.
|
||||
|
||||
This is not something inherent to Open MPI, but rather a reported
|
||||
characteristic of Bproc. We are in the process of confirming that
|
||||
this requirement remains in effect. If we find that it has been
|
||||
removed, then we will revise the system to support varying
|
||||
numbers of slots on the allocated nodes.
|
||||
|
||||
Meantime, please revise your hostfile or other allocation so they
|
||||
report the same number of process slots on each node. If you want
|
||||
to force a particular mapping of numbers of processes to each node,
|
||||
please use any of the other Open MPI mechanisms for doing so.
|
1430
orte/mca/plm/bproc/plm_bproc.c
Обычный файл
1430
orte/mca/plm/bproc/plm_bproc.c
Обычный файл
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
150
orte/mca/plm/bproc/plm_bproc.h
Обычный файл
150
orte/mca/plm/bproc/plm_bproc.h
Обычный файл
@ -0,0 +1,150 @@
|
||||
/* -*- C -*-
|
||||
*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2006 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*
|
||||
*
|
||||
*/
|
||||
/**
|
||||
* @file:
|
||||
* Header file for the bproc launcher. This launcher is actually split into 2
|
||||
* modules: pls_bproc & pls_bproc_orted. The general idea behind this launcher is:
|
||||
* -# pls_bproc is called by orterun. It figures out the process mapping and
|
||||
* launches orted's on the nodes
|
||||
* -# pls_bproc_orted is called by orted. This module intializes either a pty or
|
||||
* pipes, places symlinks to them in well know points of the filesystem, and
|
||||
* sets up the io forwarding. It then sends an ack back to orterun.
|
||||
* -# pls_bproc waits for an ack to come back from the orteds, then does several
|
||||
* parallel launches of the application processes. The number of launches is
|
||||
* equal to the maximum number of processes on a node. For example, if there
|
||||
* were 2 processes assigned to node 1, and 1 process asigned to node 2, we
|
||||
* would do a parallel launch that launches on process on each node, then
|
||||
* another which launches another process on node 1.
|
||||
*/
|
||||
|
||||
#ifndef ORTE_PLS_BPROC_H_
|
||||
#define ORTE_PLS_BPROC_H_
|
||||
|
||||
#include "orte_config.h"
|
||||
#include "orte/orte_constants.h"
|
||||
|
||||
#include <sys/bproc.h>
|
||||
#ifdef HAVE_SYS_TIME_H
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
|
||||
#include "opal/threads/condition.h"
|
||||
|
||||
#include "orte/class/orte_pointer_array.h"
|
||||
#include "orte/util/proc_info.h"
|
||||
|
||||
#include "orte/mca/rml/rml_types.h"
|
||||
|
||||
#include "orte/mca/pls/base/base.h"
|
||||
|
||||
#if defined(c_plusplus) || defined(__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Module open / close
|
||||
*/
|
||||
int orte_pls_bproc_component_open(void);
|
||||
int orte_pls_bproc_component_close(void);
|
||||
|
||||
/*
|
||||
* Startup / Shutdown
|
||||
*/
|
||||
orte_pls_base_module_t* orte_pls_bproc_init(int *priority);
|
||||
int orte_pls_bproc_finalize(void);
|
||||
|
||||
/*
|
||||
* Interface
|
||||
*/
|
||||
int orte_pls_bproc_launch(orte_jobid_t);
|
||||
int orte_pls_bproc_terminate_job(orte_jobid_t, struct timeval *timeout, opal_list_t*);
|
||||
int orte_pls_bproc_terminate_proc(const orte_process_name_t* proc_name);
|
||||
int orte_pls_bproc_terminate_orteds(orte_jobid_t jobid, struct timeval *timeout, opal_list_t*);
|
||||
int orte_pls_bproc_signal_job(orte_jobid_t, int32_t, opal_list_t*);
|
||||
int orte_pls_bproc_signal_proc(const orte_process_name_t* proc_name, int32_t);
|
||||
int orte_pls_bproc_cancel_operation(void);
|
||||
|
||||
/* Utility routine to get/set process pid */
|
||||
ORTE_DECLSPEC int orte_pls_bproc_set_proc_pid(const orte_process_name_t*, pid_t, int);
|
||||
ORTE_DECLSPEC int orte_pls_bproc_get_proc_pid(const orte_process_name_t*, pid_t*);
|
||||
/**
|
||||
* Utility routine to retreive all process pids w/in a specified job.
|
||||
*/
|
||||
ORTE_DECLSPEC int orte_pls_bproc_get_proc_pids(orte_jobid_t jobid, pid_t** pids,
|
||||
orte_std_cntr_t* num_pids,
|
||||
opal_list_t *attrs);
|
||||
|
||||
/**
|
||||
* Utility routine to get/set daemon pid
|
||||
*/
|
||||
ORTE_DECLSPEC int orte_pls_bproc_set_node_pid(orte_cellid_t cellid, char* node_name, orte_jobid_t jobid, pid_t pid);
|
||||
ORTE_DECLSPEC int orte_pls_bproc_get_node_pids(orte_jobid_t jobid, pid_t** pids, orte_std_cntr_t* num_pids);
|
||||
|
||||
/* utility functions for abort communications */
|
||||
int orte_pls_bproc_comm_start(void);
|
||||
int orte_pls_bproc_comm_stop(void);
|
||||
void orte_pls_bproc_recv(int status, orte_process_name_t* sender,
|
||||
orte_buffer_t* buffer, orte_rml_tag_t tag,
|
||||
void* cbdata);
|
||||
|
||||
/**
|
||||
* PLS bproc Component
|
||||
*/
|
||||
struct orte_pls_bproc_component_t {
|
||||
orte_pls_base_component_t super;
|
||||
/**< The base class */
|
||||
char * orted;
|
||||
/**< The orted executable. This can be an absolute path, or if not found
|
||||
* we will look for it in the user's path */
|
||||
int debug;
|
||||
/**< If greater than 0 print debugging information */
|
||||
int priority;
|
||||
/**< The priority of this component. This will be returned if we determine
|
||||
* that bproc is available and running on this node, */
|
||||
int terminate_sig;
|
||||
/**< The signal that gets sent to a process to kill it. */
|
||||
opal_mutex_t lock;
|
||||
/**< Lock used to prevent some race conditions */
|
||||
opal_condition_t condition;
|
||||
/**< Condition that is signaled when all the daemons have died */
|
||||
bool recv_issued;
|
||||
/**< Indicates that the comm recv for reporting abnormal proc termination
|
||||
* has been issued
|
||||
*/
|
||||
bool do_not_launch;
|
||||
/**< for test purposes, do everything but the actual launch */
|
||||
orte_std_cntr_t num_daemons;
|
||||
/**< track the number of daemons being launched so we can tell when
|
||||
* all have reported in */
|
||||
};
|
||||
/**
|
||||
* Convenience typedef
|
||||
*/
|
||||
typedef struct orte_pls_bproc_component_t orte_pls_bproc_component_t;
|
||||
|
||||
ORTE_DECLSPEC orte_pls_bproc_component_t mca_pls_bproc_component;
|
||||
ORTE_DECLSPEC orte_pls_base_module_t orte_pls_bproc_module;
|
||||
|
||||
#if defined(c_plusplus) || defined(__cplusplus)
|
||||
}
|
||||
#endif
|
||||
#endif /* ORTE_PLS_BPROC_H_ */
|
||||
|
123
orte/mca/plm/bproc/plm_bproc_component.c
Обычный файл
123
orte/mca/plm/bproc/plm_bproc_component.c
Обычный файл
@ -0,0 +1,123 @@
|
||||
/* -*- C -*-
|
||||
*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*
|
||||
*/
|
||||
/**
|
||||
* @file:
|
||||
* Takes care of the component stuff for the MCA.
|
||||
*/
|
||||
#include "orte_config.h"
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
#include "opal/mca/mca.h"
|
||||
#include "opal/mca/base/mca_base_param.h"
|
||||
#include "pls_bproc.h"
|
||||
|
||||
/**
|
||||
* The bproc component data structure used to store all the relevent data about
|
||||
* this component.
|
||||
*/
|
||||
orte_pls_bproc_component_t mca_pls_bproc_component = {
|
||||
{
|
||||
{
|
||||
ORTE_PLS_BASE_VERSION_1_3_0,
|
||||
"bproc", /* MCA component name */
|
||||
ORTE_MAJOR_VERSION, /* MCA component major version */
|
||||
ORTE_MINOR_VERSION, /* MCA component minor version */
|
||||
ORTE_RELEASE_VERSION, /* MCA component release version */
|
||||
orte_pls_bproc_component_open, /* component open */
|
||||
orte_pls_bproc_component_close /* component close */
|
||||
},
|
||||
{
|
||||
false /* checkpoint / restart */
|
||||
},
|
||||
orte_pls_bproc_init /* component init */
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Opens the pls_bproc component, setting all the needed mca parameters and
|
||||
* finishes setting up the component struct.
|
||||
*/
|
||||
int orte_pls_bproc_component_open(void) {
|
||||
int rc;
|
||||
|
||||
/* init parameters */
|
||||
mca_base_component_t *c = &mca_pls_bproc_component.super.pls_version;
|
||||
mca_base_param_reg_int(c, "priority", NULL, false, false, 100,
|
||||
&mca_pls_bproc_component.priority);
|
||||
mca_base_param_reg_int(c, "debug",
|
||||
"If > 0 prints library debugging information",
|
||||
false, false, 0, &mca_pls_bproc_component.debug);
|
||||
mca_base_param_reg_int(c, "terminate_sig",
|
||||
"Signal sent to processes to terminate them", false,
|
||||
false, 9, &mca_pls_bproc_component.terminate_sig);
|
||||
mca_base_param_reg_string(c, "orted", "Path to where orted is installed",
|
||||
false, false, "orted", &mca_pls_bproc_component.orted);
|
||||
mca_base_param_reg_int(c, "nolaunch", NULL, false, false, (int)false,
|
||||
&rc);
|
||||
if ((int)false == rc) {
|
||||
mca_pls_bproc_component.do_not_launch = false;
|
||||
} else {
|
||||
mca_pls_bproc_component.do_not_launch = true;
|
||||
}
|
||||
|
||||
mca_pls_bproc_component.recv_issued = false;
|
||||
OBJ_CONSTRUCT(&mca_pls_bproc_component.lock, opal_mutex_t);
|
||||
OBJ_CONSTRUCT(&mca_pls_bproc_component.condition, opal_condition_t);
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
/**
|
||||
* Closes the pls_bproc component
|
||||
*/
|
||||
int orte_pls_bproc_component_close(void) {
|
||||
OBJ_DESTRUCT(&mca_pls_bproc_component.lock);
|
||||
OBJ_DESTRUCT(&mca_pls_bproc_component.condition);
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
/**
|
||||
* Initializes the module. We do not want to run unless we are the seed, bproc
|
||||
* is running, and we are the master node.
|
||||
*/
|
||||
orte_pls_base_module_t* orte_pls_bproc_init(int *priority) {
|
||||
int ret;
|
||||
struct bproc_version_t version;
|
||||
|
||||
/* are we the seed */
|
||||
if(orte_process_info.seed == false)
|
||||
return NULL;
|
||||
|
||||
/* okay, we are in an HNP - now check to see if BProc is running here */
|
||||
if (!mca_pls_bproc_component.do_not_launch) {
|
||||
ret = bproc_version(&version);
|
||||
if (ret != 0) {
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
/* only launch from the master node */
|
||||
if (bproc_currnode() != BPROC_NODE_MASTER) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
*priority = mca_pls_bproc_component.priority;
|
||||
return &orte_pls_bproc_module;
|
||||
}
|
||||
|
402
orte/mca/plm/bproc/plm_bproc_state.c
Обычный файл
402
orte/mca/plm/bproc/plm_bproc_state.c
Обычный файл
@ -0,0 +1,402 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
|
||||
#include "orte_config.h"
|
||||
#include "orte/orte_constants.h"
|
||||
|
||||
#include "opal/util/output.h"
|
||||
#include "opal/mca/mca.h"
|
||||
#include "opal/mca/base/base.h"
|
||||
|
||||
#include "orte/dss/dss.h"
|
||||
#include "orte/mca/ns/ns.h"
|
||||
#include "orte/mca/gpr/gpr.h"
|
||||
#include "orte/mca/rml/rml.h"
|
||||
#include "orte/mca/smr/smr.h"
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
#include "orte/mca/schema/schema.h"
|
||||
|
||||
#include "orte/mca/pls/bproc/pls_bproc.h"
|
||||
|
||||
/**
|
||||
* Set the process pid in the job segment and indicate the state
|
||||
* as being launched.
|
||||
*/
|
||||
|
||||
int orte_pls_bproc_set_proc_pid(const orte_process_name_t *name, pid_t pid, int nodenum)
|
||||
{
|
||||
orte_gpr_value_t *values[1];
|
||||
char *segment;
|
||||
char *nodename;
|
||||
int rc;
|
||||
|
||||
if(ORTE_SUCCESS != (rc = orte_schema.get_job_segment_name(&segment, name->jobid))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
if (ORTE_SUCCESS != (rc = orte_gpr.create_value(&values[0],
|
||||
ORTE_GPR_OVERWRITE,
|
||||
segment,
|
||||
2, 0))) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
||||
free(segment);
|
||||
return ORTE_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
free(segment);
|
||||
|
||||
if(ORTE_SUCCESS != (rc = orte_schema.get_proc_tokens(&(values[0]->tokens), &(values[0]->num_tokens), (orte_process_name_t*)name))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
OBJ_RELEASE(values[0]);
|
||||
return rc;
|
||||
}
|
||||
|
||||
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(values[0]->keyvals[0]), ORTE_PROC_LOCAL_PID_KEY, ORTE_PID, &pid))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
OBJ_RELEASE(values[0]);
|
||||
return rc;
|
||||
}
|
||||
|
||||
asprintf(&nodename, "%ld", (long)nodenum);
|
||||
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(values[0]->keyvals[1]), ORTE_NODE_NAME_KEY, ORTE_STRING, nodename))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
OBJ_RELEASE(values[0]);
|
||||
free(nodename);
|
||||
return rc;
|
||||
}
|
||||
free(nodename);
|
||||
|
||||
rc = orte_gpr.put(1, values);
|
||||
if(ORTE_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
OBJ_RELEASE(values[0]);
|
||||
return rc;
|
||||
}
|
||||
|
||||
OBJ_RELEASE(values[0]);
|
||||
|
||||
/* set the process state to LAUNCHED */
|
||||
if (ORTE_SUCCESS != (rc = orte_smr.set_proc_state((orte_process_name_t*)name, ORTE_PROC_STATE_LAUNCHED, 0))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retreive a specified process pid from the registry.
|
||||
*/
|
||||
int orte_pls_bproc_get_proc_pid(const orte_process_name_t* name, pid_t* pid)
|
||||
{
|
||||
char *segment;
|
||||
char **tokens;
|
||||
orte_std_cntr_t num_tokens;
|
||||
char *keys[2];
|
||||
orte_gpr_value_t** values = NULL;
|
||||
orte_std_cntr_t i, num_values = 0;
|
||||
pid_t *pptr;
|
||||
int rc;
|
||||
|
||||
/* query the job segment on the registry */
|
||||
if(ORTE_SUCCESS != (rc = orte_schema.get_job_segment_name(&segment, name->jobid))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
if(ORTE_SUCCESS != (rc = orte_schema.get_proc_tokens(&tokens, &num_tokens, (orte_process_name_t*)name))) {
|
||||
free(segment);
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
keys[0] = ORTE_PROC_LOCAL_PID_KEY;
|
||||
keys[1] = NULL;
|
||||
|
||||
rc = orte_gpr.get(
|
||||
ORTE_GPR_KEYS_OR|ORTE_GPR_TOKENS_OR,
|
||||
segment,
|
||||
tokens,
|
||||
keys,
|
||||
&num_values,
|
||||
&values
|
||||
);
|
||||
if(rc != ORTE_SUCCESS) {
|
||||
free(segment);
|
||||
return rc;
|
||||
}
|
||||
|
||||
if(0 == num_values) {
|
||||
rc = ORTE_ERR_NOT_FOUND;
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
if(1 != num_values || values[0]->cnt != 1) {
|
||||
rc = ORTE_ERR_NOT_FOUND;
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&pptr, values[0]->keyvals[0]->value, ORTE_PID))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
*pid = *pptr;
|
||||
|
||||
cleanup:
|
||||
if(NULL != values) {
|
||||
for(i=0; i<num_values; i++) {
|
||||
if(NULL != values[i]) {
|
||||
OBJ_RELEASE(values[i]);
|
||||
}
|
||||
}
|
||||
if (NULL != values) free(values);
|
||||
}
|
||||
free(segment);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieve all process pids for the specified job.
|
||||
*/
|
||||
int orte_pls_bproc_get_proc_pids(orte_jobid_t jobid, pid_t **pids, orte_std_cntr_t* num_pids, opal_list_t *attrs)
|
||||
{
|
||||
char *segment;
|
||||
char *keys[2];
|
||||
orte_gpr_value_t** values = NULL;
|
||||
orte_std_cntr_t i, num_values = 0;
|
||||
pid_t *pptr;
|
||||
int rc;
|
||||
|
||||
/* query the job segment on the registry */
|
||||
if(ORTE_SUCCESS != (rc = orte_schema.get_job_segment_name(&segment, jobid))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
keys[0] = ORTE_PROC_PID_KEY;
|
||||
keys[1] = NULL;
|
||||
|
||||
rc = orte_gpr.get(
|
||||
ORTE_GPR_KEYS_OR|ORTE_GPR_TOKENS_OR,
|
||||
segment,
|
||||
NULL,
|
||||
keys,
|
||||
&num_values,
|
||||
&values
|
||||
);
|
||||
if(rc != ORTE_SUCCESS) {
|
||||
free(segment);
|
||||
return rc;
|
||||
}
|
||||
|
||||
if(0 == num_values) {
|
||||
*pids = NULL;
|
||||
} else {
|
||||
*pids = (pid_t*)malloc(sizeof(pid_t)*num_values);
|
||||
for(i=0; i<num_values; i++) {
|
||||
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&pptr, values[i]->keyvals[0]->value, ORTE_PID))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
(*pids)[i] = *pptr;
|
||||
}
|
||||
}
|
||||
*num_pids = num_values;
|
||||
|
||||
cleanup:
|
||||
if(NULL != values) {
|
||||
for(i=0; i<num_values; i++) {
|
||||
if(NULL != values[i]) {
|
||||
OBJ_RELEASE(values[i]);
|
||||
}
|
||||
}
|
||||
if (NULL != values) free(values);
|
||||
}
|
||||
free(segment);
|
||||
return rc;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Add a key-value to the node segment containing the process pid for
|
||||
* the daemons.
|
||||
*/
|
||||
|
||||
int orte_pls_bproc_set_node_pid(orte_cellid_t cellid, char* node_name, orte_jobid_t jobid, pid_t pid)
|
||||
{
|
||||
orte_gpr_value_t *values[1];
|
||||
char *jobid_string, *key;
|
||||
int rc;
|
||||
|
||||
if (ORTE_SUCCESS != (rc = orte_gpr.create_value(&values[0],
|
||||
ORTE_GPR_OVERWRITE,
|
||||
ORTE_NODE_SEGMENT,
|
||||
1, 0))) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
||||
return ORTE_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
if (ORTE_SUCCESS != (rc = orte_schema.get_node_tokens(&(values[0]->tokens), &(values[0]->num_tokens), cellid, node_name))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
OBJ_RELEASE(values[0]);
|
||||
return rc;
|
||||
}
|
||||
|
||||
if (ORTE_SUCCESS != (rc = orte_ns.convert_jobid_to_string(&jobid_string, jobid))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
OBJ_RELEASE(values[0]);
|
||||
return rc;
|
||||
}
|
||||
|
||||
asprintf(&key, "%s-%s", ORTE_PROC_PID_KEY, jobid_string);
|
||||
free(jobid_string);
|
||||
|
||||
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(values[0]->keyvals[0]), key, ORTE_PID, &pid))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
free(key);
|
||||
OBJ_RELEASE(values[0]);
|
||||
return rc;
|
||||
}
|
||||
free(key);
|
||||
|
||||
rc = orte_gpr.put(1, values);
|
||||
if(ORTE_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
}
|
||||
|
||||
OBJ_RELEASE(values[0]);
|
||||
return rc;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Retrieve all daemon pids for the specified job.
|
||||
*/
|
||||
int orte_pls_bproc_get_node_pids(orte_jobid_t jobid, pid_t **pids, orte_std_cntr_t* num_pids)
|
||||
{
|
||||
char *keys[2];
|
||||
orte_gpr_value_t** values = NULL;
|
||||
orte_std_cntr_t i, num_values = 0;
|
||||
int rc;
|
||||
char *jobid_string;
|
||||
pid_t *pptr;
|
||||
|
||||
if(ORTE_SUCCESS != (rc = orte_ns.convert_jobid_to_string(&jobid_string, jobid)))
|
||||
goto cleanup;
|
||||
|
||||
asprintf(&keys[0], "%s-%s", ORTE_PROC_PID_KEY, jobid_string);
|
||||
free(jobid_string);
|
||||
keys[1] = NULL;
|
||||
|
||||
rc = orte_gpr.get(
|
||||
ORTE_GPR_KEYS_OR|ORTE_GPR_TOKENS_OR,
|
||||
ORTE_NODE_SEGMENT,
|
||||
NULL,
|
||||
keys,
|
||||
&num_values,
|
||||
&values
|
||||
);
|
||||
if(rc != ORTE_SUCCESS) {
|
||||
free(keys[0]);
|
||||
return rc;
|
||||
}
|
||||
|
||||
if(0 == num_values) {
|
||||
*pids = NULL;
|
||||
} else {
|
||||
*pids = (pid_t*)malloc(sizeof(pid_t)*num_values);
|
||||
for(i=0; i<num_values; i++) {
|
||||
if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&pptr, values[i]->keyvals[0]->value, ORTE_PID))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
*(pids[i]) = *pptr;
|
||||
}
|
||||
}
|
||||
*num_pids = num_values;
|
||||
|
||||
cleanup:
|
||||
if(NULL != values) {
|
||||
for(i=0; i<num_values; i++)
|
||||
OBJ_RELEASE(values[i]);
|
||||
if (NULL != values) free(values);
|
||||
}
|
||||
free(keys[0]);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/*
|
||||
* FUNCTIONS FOR DEALING WITH ABNORMAL TERMINATION OF BPROC
|
||||
* APPLICATION PROCESSES
|
||||
*/
|
||||
int orte_pls_bproc_comm_start(void)
|
||||
{
|
||||
int rc;
|
||||
|
||||
if (mca_pls_bproc_component.recv_issued) {
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
if (ORTE_SUCCESS != (rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD,
|
||||
ORTE_RML_TAG_BPROC_ABORT,
|
||||
ORTE_RML_PERSISTENT,
|
||||
orte_pls_bproc_recv,
|
||||
NULL))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
}
|
||||
mca_pls_bproc_component.recv_issued = true;
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
|
||||
int orte_pls_bproc_comm_stop(void)
|
||||
{
|
||||
int rc;
|
||||
|
||||
if (!mca_pls_bproc_component.recv_issued) {
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
if (ORTE_SUCCESS != (rc = orte_rml.recv_cancel(ORTE_NAME_WILDCARD, ORTE_RML_TAG_BPROC_ABORT))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
}
|
||||
mca_pls_bproc_component.recv_issued = false;
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* handle message from proxies
|
||||
* NOTE: The incoming buffer "buffer" is OBJ_RELEASED by the calling program.
|
||||
* DO NOT RELEASE THIS BUFFER IN THIS CODE
|
||||
*/
|
||||
|
||||
void orte_pls_bproc_recv(int status, orte_process_name_t* sender,
|
||||
orte_buffer_t* buffer, orte_rml_tag_t tag,
|
||||
void* cbdata)
|
||||
{
|
||||
int rc;
|
||||
|
||||
/* we don't care what was in the buffer - just set the state of the sender to ABORTED */
|
||||
if (ORTE_SUCCESS != (rc = orte_smr.set_proc_state(sender, ORTE_PROC_STATE_ABORTED, 0))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
}
|
||||
}
|
||||
|
460
orte/mca/plm/bproc/smr_bproc.c
Обычный файл
460
orte/mca/plm/bproc/smr_bproc.c
Обычный файл
@ -0,0 +1,460 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "orte_config.h"
|
||||
|
||||
#include <pwd.h>
|
||||
#include <grp.h>
|
||||
|
||||
#ifdef HAVE_UNISTD_H
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
#include "orte/orte_constants.h"
|
||||
#include "orte/orte_types.h"
|
||||
|
||||
#include "opal/util/output.h"
|
||||
#include "opal/class/opal_list.h"
|
||||
|
||||
#include "orte/util/proc_info.h"
|
||||
#include "orte/mca/ns/ns.h"
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
#include "orte/mca/gpr/base/base.h"
|
||||
#include "orte/mca/schema/schema_types.h"
|
||||
|
||||
#include "orte/mca/smr/base/smr_private.h"
|
||||
#include "orte/mca/smr/bproc/smr_bproc.h"
|
||||
|
||||
#define BIT_MASK(bit) (bit_set)(1 << (bit))
|
||||
#define EMPTY_SET (bit_set)0
|
||||
#define BIT_NODE_NAME 0
|
||||
#define BIT_NODE_STATE 1
|
||||
#define BIT_NODE_BPROC_STATUS 2
|
||||
#define BIT_NODE_BPROC_MODE 3
|
||||
#define BIT_NODE_BPROC_USER 4
|
||||
#define BIT_NODE_BPROC_GROUP 5
|
||||
#define BIT_SET_ALL ( BIT_MASK(BIT_NODE_NAME) \
|
||||
| BIT_MASK(BIT_NODE_STATE) \
|
||||
| BIT_MASK(BIT_NODE_BPROC_STATUS) \
|
||||
| BIT_MASK(BIT_NODE_BPROC_MODE) \
|
||||
| BIT_MASK(BIT_NODE_BPROC_USER) \
|
||||
| BIT_MASK(BIT_NODE_BPROC_GROUP))
|
||||
|
||||
/* define some local variables/types */
|
||||
typedef unsigned int bit_set;
|
||||
static opal_list_t active_node_list;
|
||||
static bool initialized=false;
|
||||
|
||||
static inline void set_bit(bit_set *set, int bit)
|
||||
{
|
||||
*set |= BIT_MASK(bit);
|
||||
}
|
||||
|
||||
static inline int is_set(bit_set set, int bit)
|
||||
{
|
||||
return (set & BIT_MASK(bit)) == BIT_MASK(bit);
|
||||
}
|
||||
|
||||
static inline int num_bits(bit_set set)
|
||||
{
|
||||
int cnt = 0;
|
||||
int bit;
|
||||
|
||||
for (bit = sizeof(bit_set) * 8 - 1; bit >= 0; bit--)
|
||||
if (is_set(set, bit))
|
||||
cnt++;
|
||||
|
||||
return cnt;
|
||||
}
|
||||
|
||||
static inline int empty_set(bit_set set)
|
||||
{
|
||||
return set == EMPTY_SET;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Query the bproc node status
|
||||
*/
|
||||
|
||||
static int orte_smr_bproc_node_state(char *status)
|
||||
{
|
||||
if (strcmp(status, "up") == 0)
|
||||
return ORTE_NODE_STATE_UP;
|
||||
if (strcmp(status, "down") == 0)
|
||||
return ORTE_NODE_STATE_DOWN;
|
||||
if (strcmp(status, "boot") == 0)
|
||||
return ORTE_NODE_STATE_REBOOT;
|
||||
return ORTE_NODE_STATE_UNKNOWN;
|
||||
}
|
||||
|
||||
static bit_set find_changes(struct bproc_node_info_t *old, struct bproc_node_info_t *new)
|
||||
{
|
||||
bit_set changes = EMPTY_SET;
|
||||
|
||||
if (orte_smr_bproc_node_state(old->status)
|
||||
!= orte_smr_bproc_node_state(new->status))
|
||||
set_bit(&changes, BIT_NODE_STATE);
|
||||
|
||||
if (strcmp(old->status, new->status) != 0)
|
||||
set_bit(&changes, BIT_NODE_BPROC_STATUS);
|
||||
|
||||
if (old->mode != new->mode)
|
||||
set_bit(&changes, BIT_NODE_BPROC_MODE);
|
||||
|
||||
if (old->group != new->group)
|
||||
set_bit(&changes, BIT_NODE_BPROC_GROUP);
|
||||
|
||||
if (old->user != new->user)
|
||||
set_bit(&changes, BIT_NODE_BPROC_USER);
|
||||
|
||||
if (old->node != new->node)
|
||||
set_bit(&changes, BIT_NODE_NAME);
|
||||
|
||||
return changes;
|
||||
}
|
||||
|
||||
/**
|
||||
* Process a BProc update notice
|
||||
*/
|
||||
|
||||
static void update_registry(bit_set changes, struct bproc_node_info_t *ni)
|
||||
{
|
||||
int idx;
|
||||
int ret;
|
||||
int cnt;
|
||||
orte_node_state_t state;
|
||||
char *node_name;
|
||||
char *user;
|
||||
char *group;
|
||||
struct passwd *pwd;
|
||||
struct group *grp;
|
||||
orte_gpr_value_t *value;
|
||||
int rc;
|
||||
orte_smr_node_state_tracker_t *node;
|
||||
opal_list_item_t *item;
|
||||
|
||||
cnt = num_bits(changes);
|
||||
|
||||
/*
|
||||
* Check if there's anything to do
|
||||
*/
|
||||
if (cnt == 0)
|
||||
return;
|
||||
|
||||
/* check and update the general cluster status segment - this segment has entries
|
||||
* for every node in the cluster, not just the ones we want to monitor
|
||||
*/
|
||||
if (ORTE_SUCCESS != (rc = orte_gpr.create_value(&value, ORTE_GPR_OVERWRITE | ORTE_GPR_TOKENS_AND,
|
||||
ORTE_BPROC_NODE_SEGMENT, cnt, 0))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return;
|
||||
}
|
||||
|
||||
idx = 0;
|
||||
|
||||
if (is_set(changes, BIT_NODE_STATE)) {
|
||||
state = orte_smr_bproc_node_state(ni->status);
|
||||
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[idx]), ORTE_NODE_STATE_KEY, ORTE_NODE_STATE, &state))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
OBJ_RELEASE(value);
|
||||
return;
|
||||
}
|
||||
idx++;
|
||||
}
|
||||
|
||||
if (is_set(changes, BIT_NODE_BPROC_STATUS)) {
|
||||
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[idx]), ORTE_SMR_BPROC_NODE_STATUS, ORTE_STRING, ni->status))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
OBJ_RELEASE(value);
|
||||
return;
|
||||
}
|
||||
idx++;
|
||||
}
|
||||
|
||||
if (is_set(changes, BIT_NODE_BPROC_MODE)) {
|
||||
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[idx]), ORTE_SMR_BPROC_NODE_MODE, ORTE_UINT32, &(ni->mode)))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
OBJ_RELEASE(value);
|
||||
return;
|
||||
}
|
||||
idx++;
|
||||
}
|
||||
|
||||
if (is_set(changes, BIT_NODE_BPROC_USER)) {
|
||||
if ((pwd = getpwuid(ni->user)))
|
||||
user = strdup(pwd->pw_name);
|
||||
else
|
||||
asprintf(&user, "%d\n", ni->user);
|
||||
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[idx]), ORTE_SMR_BPROC_NODE_USER, ORTE_STRING, user))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
OBJ_RELEASE(value);
|
||||
free(user);
|
||||
return;
|
||||
}
|
||||
free(user);
|
||||
idx++;
|
||||
}
|
||||
|
||||
if (is_set(changes, BIT_NODE_BPROC_GROUP)) {
|
||||
if ((grp = getgrgid(ni->group)))
|
||||
group = strdup(grp->gr_name);
|
||||
else
|
||||
asprintf(&group, "%d\n", ni->group);
|
||||
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[idx]), ORTE_SMR_BPROC_NODE_GROUP, ORTE_STRING, group))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
OBJ_RELEASE(value);
|
||||
free(group);
|
||||
return;
|
||||
}
|
||||
free(group);
|
||||
idx++;
|
||||
}
|
||||
|
||||
asprintf(&node_name, "%d", ni->node);
|
||||
|
||||
if (is_set(changes, BIT_NODE_NAME)) {
|
||||
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[idx]), ORTE_NODE_NAME_KEY, ORTE_STRING, node_name))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
OBJ_RELEASE(value);
|
||||
free(node_name);
|
||||
return;
|
||||
}
|
||||
idx++;
|
||||
}
|
||||
|
||||
if (idx != cnt) {
|
||||
opal_output(0, "smr_bproc: internal error %d != %d\n", idx, cnt);
|
||||
free(node_name);
|
||||
OBJ_RELEASE(value);
|
||||
opal_event_del(&mca_smr_bproc_component.notify_event);
|
||||
return;
|
||||
}
|
||||
|
||||
ret = orte_schema.get_node_tokens(&(value->tokens), &(value->num_tokens),
|
||||
ORTE_PROC_MY_NAME->cellid, node_name);
|
||||
|
||||
if (ret != ORTE_SUCCESS) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
OBJ_RELEASE(value);
|
||||
free(node_name);
|
||||
opal_event_del(&mca_smr_bproc_component.notify_event);
|
||||
return;
|
||||
}
|
||||
|
||||
if (mca_smr_bproc_component.debug)
|
||||
opal_output(0, "updating node %d to segment %s\n", ni->node, value->segment);
|
||||
|
||||
if ((ret = orte_gpr.put(1, &value)) != ORTE_SUCCESS) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
opal_event_del(&mca_smr_bproc_component.notify_event);
|
||||
}
|
||||
OBJ_RELEASE(value);
|
||||
|
||||
/* now let's see if this is one of the nodes we are monitoring and
|
||||
* update it IFF it the state changed to specified conditions. This
|
||||
* action will trigger a callback to the right place to decide what
|
||||
* to do about it
|
||||
*/
|
||||
if (mca_smr_bproc_component.monitoring &&
|
||||
is_set(changes, BIT_NODE_STATE)) {
|
||||
/* see if this is a node we are monitoring */
|
||||
for (item = opal_list_get_first(&active_node_list);
|
||||
item != opal_list_get_end(&active_node_list);
|
||||
item = opal_list_get_next(item)) {
|
||||
node = (orte_smr_node_state_tracker_t*)item;
|
||||
if (0 == strcmp(node->nodename, node_name)) {
|
||||
/* This is a node we are monitoring. If this is a state we care about,
|
||||
* and the state has changed (so we only do this once) - trip the alert monitor
|
||||
*/
|
||||
if (state != node->state &&
|
||||
(state == ORTE_NODE_STATE_DOWN || state == ORTE_NODE_STATE_REBOOT)) {
|
||||
if (ORTE_SUCCESS != (rc = orte_gpr.create_value(&value, ORTE_GPR_OVERWRITE | ORTE_GPR_TOKENS_AND,
|
||||
ORTE_BPROC_NODE_SEGMENT, 1, 0))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return;
|
||||
}
|
||||
value->tokens[0] = strdup(ORTE_BPROC_NODE_GLOBALS);
|
||||
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[0]),
|
||||
ORTE_BPROC_NODE_ALERT_CNTR,
|
||||
ORTE_UNDEF, NULL))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
OBJ_RELEASE(value);
|
||||
return;
|
||||
}
|
||||
if ((rc = orte_gpr.increment_value(value)) != ORTE_SUCCESS) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
opal_event_del(&mca_smr_bproc_component.notify_event);
|
||||
}
|
||||
OBJ_RELEASE(value);
|
||||
}
|
||||
/* update our local records */
|
||||
node->state = state;
|
||||
/* cleanup and return - no need to keep searching */
|
||||
free(node_name);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* if this isn't someone we are monitoring, or it doesn't meet specified conditions,
|
||||
* then just cleanup and leave
|
||||
*/
|
||||
free(node_name);
|
||||
}
|
||||
|
||||
|
||||
static int do_update(struct bproc_node_set_t *ns)
|
||||
{
|
||||
int i;
|
||||
int changed = 0;
|
||||
bit_set changes;
|
||||
struct bproc_node_info_t *ni;
|
||||
|
||||
/* we assume the number of nodes does not change */
|
||||
for (i = 0; i < ns->size; i++) {
|
||||
ni = &ns->node[i];
|
||||
|
||||
if (mca_smr_bproc_component.node_set.size > 0
|
||||
&& mca_smr_bproc_component.node_set.size == ns->size)
|
||||
changes = find_changes(&mca_smr_bproc_component.node_set.node[i], ni);
|
||||
else
|
||||
changes = BIT_SET_ALL;
|
||||
|
||||
if (!empty_set(changes)) {
|
||||
update_registry(changes, ni);
|
||||
changed = 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (changed) {
|
||||
if (mca_smr_bproc_component.node_set.size != 0)
|
||||
bproc_nodeset_free(&mca_smr_bproc_component.node_set);
|
||||
mca_smr_bproc_component.node_set = *ns;
|
||||
}
|
||||
|
||||
return changed;
|
||||
}
|
||||
|
||||
static void orte_smr_bproc_notify_handler(int fd, short flags, void *user)
|
||||
{
|
||||
struct bproc_node_set_t ns = BPROC_EMPTY_NODESET;
|
||||
|
||||
if (bproc_nodelist_(&ns, fd) < 0) {
|
||||
/* bproc_nodelist_ error */
|
||||
opal_event_del(&mca_smr_bproc_component.notify_event);
|
||||
return;
|
||||
}
|
||||
|
||||
if (!do_update(&ns))
|
||||
bproc_nodeset_free(&ns);
|
||||
}
|
||||
|
||||
/**
|
||||
* Register a callback to receive BProc update notifications
|
||||
*/
|
||||
static int orte_smr_bproc_module_init(void)
|
||||
{
|
||||
if (mca_smr_bproc_component.debug)
|
||||
opal_output(0, "init smr_bproc_module\n");
|
||||
|
||||
mca_smr_bproc_component.node_set.size = 0;
|
||||
|
||||
/* construct the monitored node list so we can track who is being monitored */
|
||||
OBJ_CONSTRUCT(&active_node_list, opal_list_t);
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* Setup to begin monitoring a job
|
||||
*/
|
||||
int orte_smr_bproc_begin_monitoring(orte_job_map_t *map, orte_gpr_trigger_cb_fn_t cbfunc, void *user_tag)
|
||||
{
|
||||
struct bproc_node_set_t ns = BPROC_EMPTY_NODESET;
|
||||
opal_list_item_t *item;
|
||||
orte_mapped_node_t *node;
|
||||
orte_smr_node_state_tracker_t *newnode;
|
||||
|
||||
/* if our internal structures haven't been initialized, then
|
||||
* set them up
|
||||
*/
|
||||
if (!initialized) {
|
||||
orte_smr_bproc_module_init();
|
||||
initialized = true;
|
||||
}
|
||||
|
||||
/* setup the local monitoring list */
|
||||
for (item = opal_list_get_first(&map->nodes);
|
||||
item != opal_list_get_end(&map->nodes);
|
||||
item = opal_list_get_next(item)) {
|
||||
node = (orte_mapped_node_t*)item;
|
||||
|
||||
newnode = OBJ_NEW(orte_smr_node_state_tracker_t);
|
||||
newnode->cell = node->cell;
|
||||
newnode->nodename = strdup(node->nodename);
|
||||
opal_list_append(&active_node_list, &newnode->super);
|
||||
}
|
||||
|
||||
/* define the alert monitor to call the cbfunc if we trigger the alert */
|
||||
orte_smr.define_alert_monitor(map->job, ORTE_BPROC_NODE_ALERT_TRIG,
|
||||
ORTE_BPROC_NODE_ALERT_CNTR,
|
||||
0, 1, true, cbfunc, user_tag);
|
||||
|
||||
/*
|
||||
* Set initial node status for all nodes in the local cell. We will
|
||||
* receive reports from them all, but we will only provide alerts
|
||||
* on those we are actively monitoring
|
||||
*/
|
||||
|
||||
if (bproc_nodelist(&ns) < 0)
|
||||
return ORTE_ERROR;
|
||||
|
||||
if (!do_update(&ns))
|
||||
bproc_nodeset_free(&ns);
|
||||
|
||||
/*
|
||||
* Now register notify event
|
||||
*/
|
||||
|
||||
mca_smr_bproc_component.notify_fd = bproc_notifier();
|
||||
if (mca_smr_bproc_component.notify_fd < 0)
|
||||
return ORTE_ERROR;
|
||||
|
||||
memset(&mca_smr_bproc_component.notify_event, 0, sizeof(opal_event_t));
|
||||
|
||||
opal_event_set(
|
||||
&mca_smr_bproc_component.notify_event,
|
||||
mca_smr_bproc_component.notify_fd,
|
||||
OPAL_EV_READ|OPAL_EV_PERSIST,
|
||||
orte_smr_bproc_notify_handler,
|
||||
0);
|
||||
|
||||
opal_event_add(&mca_smr_bproc_component.notify_event, 0);
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
/**
|
||||
* Cleanup
|
||||
*/
|
||||
|
||||
int orte_smr_bproc_finalize(void)
|
||||
{
|
||||
opal_event_del(&mca_smr_bproc_component.notify_event);
|
||||
return ORTE_SUCCESS;
|
||||
}
|
134
orte/mca/plm/bproc/smr_bproc_component.c
Обычный файл
134
orte/mca/plm/bproc/smr_bproc_component.c
Обычный файл
@ -0,0 +1,134 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "orte_config.h"
|
||||
#include "orte/orte_constants.h"
|
||||
#include "opal/mca/base/base.h"
|
||||
#include "opal/mca/base/mca_base_param.h"
|
||||
#include "orte/util/proc_info.h"
|
||||
#include "opal/util/output.h"
|
||||
|
||||
#include "orte/mca/smr/base/smr_private.h"
|
||||
#include "orte/mca/smr/bproc/smr_bproc.h"
|
||||
|
||||
/*
|
||||
* Local functions
|
||||
*/
|
||||
|
||||
static int orte_smr_bproc_open(void);
|
||||
static int orte_smr_bproc_close(void);
|
||||
static orte_smr_base_module_t* orte_smr_bproc_init(int *priority);
|
||||
|
||||
orte_smr_bproc_component_t mca_smr_bproc_component = {
|
||||
{
|
||||
/* First, the mca_base_module_t struct containing meta
|
||||
information about the module itself */
|
||||
{
|
||||
/* Indicate that we are a bproc smr v1.3.0 module (which also
|
||||
implies a specific MCA version) */
|
||||
|
||||
ORTE_SMR_BASE_VERSION_1_3_0,
|
||||
|
||||
"bproc", /* MCA module name */
|
||||
ORTE_MAJOR_VERSION, /* MCA module major version */
|
||||
ORTE_MINOR_VERSION, /* MCA module minor version */
|
||||
ORTE_RELEASE_VERSION, /* MCA module release version */
|
||||
orte_smr_bproc_open, /* component open */
|
||||
orte_smr_bproc_close /* component close */
|
||||
},
|
||||
|
||||
/* Next the MCA v1.0.0 module meta data */
|
||||
|
||||
{
|
||||
/* Whether the module is checkpointable or not */
|
||||
|
||||
false
|
||||
},
|
||||
|
||||
orte_smr_bproc_init
|
||||
}
|
||||
};
|
||||
|
||||
orte_smr_base_module_t orte_smr_bproc_module = {
|
||||
orte_smr_base_get_proc_state,
|
||||
orte_smr_base_set_proc_state,
|
||||
orte_smr_base_get_node_state,
|
||||
orte_smr_base_set_node_state,
|
||||
orte_smr_base_get_job_state,
|
||||
orte_smr_base_set_job_state,
|
||||
orte_smr_bproc_begin_monitoring,
|
||||
orte_smr_base_init_job_stage_gates,
|
||||
orte_smr_base_init_orted_stage_gates,
|
||||
orte_smr_base_define_alert_monitor,
|
||||
orte_smr_base_job_stage_gate_subscribe,
|
||||
orte_smr_bproc_finalize
|
||||
};
|
||||
|
||||
/**
|
||||
* Utility function to register parameters
|
||||
*/
|
||||
static int orte_smr_bproc_param_register_int(
|
||||
const char* param_name,
|
||||
int default_value)
|
||||
{
|
||||
int id = mca_base_param_register_int("smr","bproc",param_name,NULL,default_value);
|
||||
int param_value = default_value;
|
||||
mca_base_param_lookup_int(id,¶m_value);
|
||||
return param_value;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
|
||||
static int orte_smr_bproc_open(void)
|
||||
{
|
||||
mca_smr_bproc_component.debug =
|
||||
orte_smr_bproc_param_register_int("debug", 0);
|
||||
mca_smr_bproc_component.priority =
|
||||
orte_smr_bproc_param_register_int("priority", 1);
|
||||
mca_smr_bproc_component.monitoring = false;
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
|
||||
static orte_smr_base_module_t* orte_smr_bproc_init(int *priority)
|
||||
{
|
||||
if (!orte_process_info.seed) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
*priority = mca_smr_bproc_component.priority;
|
||||
return &orte_smr_bproc_module;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
|
||||
static int orte_smr_bproc_close(void)
|
||||
{
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
|
0
orte/mca/ras/bjs/.ompi_ignore
Обычный файл
0
orte/mca/ras/bjs/.ompi_ignore
Обычный файл
1
orte/mca/ras/bjs/.ompi_unignore
Обычный файл
1
orte/mca/ras/bjs/.ompi_unignore
Обычный файл
@ -0,0 +1 @@
|
||||
rhc
|
54
orte/mca/ras/bjs/Makefile.am
Обычный файл
54
orte/mca/ras/bjs/Makefile.am
Обычный файл
@ -0,0 +1,54 @@
|
||||
#
|
||||
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
# University Research and Technology
|
||||
# Corporation. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
# of Tennessee Research Foundation. All rights
|
||||
# reserved.
|
||||
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
# University of Stuttgart. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
# All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
# Use the top-level Makefile.options
|
||||
|
||||
|
||||
|
||||
# Make the output library in this directory, and name it either
|
||||
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
|
||||
# (for static builds).
|
||||
|
||||
if OMPI_BUILD_ras_bjs_DSO
|
||||
component_noinst =
|
||||
component_install = mca_ras_bjs.la
|
||||
else
|
||||
component_noinst = libmca_ras_bjs.la
|
||||
component_install =
|
||||
endif
|
||||
|
||||
AM_CPPFLAGS= $(ras_bjs_CPPFLAGS)
|
||||
|
||||
proxy_SOURCES = \
|
||||
ras_bjs.c \
|
||||
ras_bjs.h \
|
||||
ras_bjs_component.c
|
||||
|
||||
mcacomponentdir = $(libdir)/openmpi
|
||||
mcacomponent_LTLIBRARIES = $(component_install)
|
||||
mca_ras_bjs_la_SOURCES = $(proxy_SOURCES)
|
||||
mca_ras_bjs_la_LIBADD = \
|
||||
$(ras_bjs_LIBS) \
|
||||
$(top_ompi_builddir)/orte/libopen-rte.la \
|
||||
$(top_ompi_builddir)/opal/libopen-pal.la
|
||||
mca_ras_bjs_la_LDFLAGS = -module -avoid-version $(ras_bjs_LDFLAGS)
|
||||
|
||||
noinst_LTLIBRARIES = $(component_noinst)
|
||||
libmca_ras_bjs_la_SOURCES = $(proxy_SOURCES)
|
||||
libmca_ras_bjs_la_LIBADD = $(ras_bjs_LIBS)
|
||||
libmca_ras_bjs_la_LDFLAGS = -module -avoid-version $(ras_bjs_LDFLAGS)
|
38
orte/mca/ras/bjs/configure.m4
Обычный файл
38
orte/mca/ras/bjs/configure.m4
Обычный файл
@ -0,0 +1,38 @@
|
||||
# -*- shell-script -*-
|
||||
#
|
||||
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
# University Research and Technology
|
||||
# Corporation. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
# of Tennessee Research Foundation. All rights
|
||||
# reserved.
|
||||
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
# University of Stuttgart. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
# All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
# MCA_ras_bjs_CONFIG([action-if-found], [action-if-not-found])
|
||||
# -----------------------------------------------------------
|
||||
AC_DEFUN([MCA_ras_bjs_CONFIG],[
|
||||
OMPI_CHECK_BPROC([ras_bjs], [ras_bjs_good=1], [ras_bjs_good=1],
|
||||
[ras_bjs_good=0])
|
||||
|
||||
# if check worked, set wrapper flags if so.
|
||||
# Evaluate succeed / fail
|
||||
AS_IF([test "$ras_bjs_good" = "1"],
|
||||
[ras_bjs_WRAPPER_EXTRA_LDFLAGS="$ras_bjs_LDFLAGS"
|
||||
ras_bjs_WRAPPER_EXTRA_LIBS="$ras_bjs_LIBS"
|
||||
$1],
|
||||
[$2])
|
||||
|
||||
# set build flags to use in makefile
|
||||
AC_SUBST([ras_bjs_CPPFLAGS])
|
||||
AC_SUBST([ras_bjs_LDFLAGS])
|
||||
AC_SUBST([ras_bjs_LIBS])
|
||||
])dnl
|
24
orte/mca/ras/bjs/configure.params
Обычный файл
24
orte/mca/ras/bjs/configure.params
Обычный файл
@ -0,0 +1,24 @@
|
||||
# -*- shell-script -*-
|
||||
#
|
||||
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
# University Research and Technology
|
||||
# Corporation. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
# of Tennessee Research Foundation. All rights
|
||||
# reserved.
|
||||
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
# University of Stuttgart. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
# All rights reserved.
|
||||
# Copyright (c) 2007 Los Alamos National Security, LLC. All rights
|
||||
# reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
# Specific to this module
|
||||
|
||||
PARAM_CONFIG_FILES="Makefile"
|
291
orte/mca/ras/bjs/ras_bjs.c
Обычный файл
291
orte/mca/ras/bjs/ras_bjs.c
Обычный файл
@ -0,0 +1,291 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
#include "orte_config.h"
|
||||
#include <errno.h>
|
||||
#include <unistd.h>
|
||||
#include <string.h>
|
||||
#include <sys/bproc.h>
|
||||
|
||||
#include "orte/orte_constants.h"
|
||||
#include "orte/orte_types.h"
|
||||
|
||||
#include "opal/util/argv.h"
|
||||
#include "opal/util/output.h"
|
||||
|
||||
#include "orte/dss/dss.h"
|
||||
#include "orte/mca/rmgr/rmgr.h"
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
|
||||
#include "orte/mca/ras/base/ras_private.h"
|
||||
#include "ras_bjs.h"
|
||||
|
||||
|
||||
/**
|
||||
* Query the bproc node status
|
||||
*/
|
||||
|
||||
static int orte_ras_bjs_node_state(int node)
|
||||
{
|
||||
#if defined BPROC_API_VERSION && BPROC_API_VERSION >= 4
|
||||
char nodestatus[BPROC_STATE_LEN + 1];
|
||||
|
||||
bproc_nodestatus(node, nodestatus, sizeof(nodestatus));
|
||||
if (strcmp(nodestatus, "up") == 0)
|
||||
return ORTE_NODE_STATE_UP;
|
||||
if (strcmp(nodestatus, "down") == 0)
|
||||
return ORTE_NODE_STATE_DOWN;
|
||||
if (strcmp(nodestatus, "boot") == 0)
|
||||
return ORTE_NODE_STATE_REBOOT;
|
||||
return ORTE_NODE_STATE_UNKNOWN;
|
||||
#else
|
||||
switch(bproc_nodestatus(node)) {
|
||||
case bproc_node_up:
|
||||
return ORTE_NODE_STATE_UP;
|
||||
case bproc_node_down:
|
||||
return ORTE_NODE_STATE_DOWN;
|
||||
case bproc_node_boot:
|
||||
return ORTE_NODE_STATE_REBOOT;
|
||||
default:
|
||||
return ORTE_NODE_STATE_UNKNOWN;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Parse the NODELIST to determine the number of process
|
||||
* slots/processors available on the node.
|
||||
*/
|
||||
|
||||
static size_t orte_ras_bjs_node_slots(char* node_name)
|
||||
{
|
||||
static char** nodelist = NULL;
|
||||
char** ptr;
|
||||
size_t count = 0;
|
||||
if(nodelist == NULL)
|
||||
nodelist = opal_argv_split(getenv("NODELIST"), ',');
|
||||
ptr = nodelist;
|
||||
while(ptr && *ptr) {
|
||||
if(strcmp(*ptr, node_name) == 0)
|
||||
count++;
|
||||
ptr++;
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolve the node name to node number.
|
||||
*/
|
||||
|
||||
static int orte_ras_bjs_node_resolve(char* node_name, int* node_num)
|
||||
{
|
||||
/* for now we expect this to be the node number */
|
||||
if(NULL == node_name || sscanf(node_name, "%d", node_num) != 1)
|
||||
return ORTE_ERROR;
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
/**
|
||||
* Discover the available resources.
|
||||
* - validate any nodes specified via hostfile/commandline
|
||||
* - check for additional nodes that have already been allocated
|
||||
*/
|
||||
|
||||
static int orte_ras_bjs_discover(
|
||||
opal_list_t* nodelist,
|
||||
orte_app_context_t** context,
|
||||
size_t num_context)
|
||||
{
|
||||
char* nodes;
|
||||
char* ptr;
|
||||
opal_list_item_t* item;
|
||||
opal_list_t new_nodes;
|
||||
int rc;
|
||||
|
||||
/* query the nodelist from the registry */
|
||||
if(ORTE_SUCCESS != (rc = orte_ras_base_node_query(nodelist))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* validate that any user supplied nodes actually exist, etc. */
|
||||
item = opal_list_get_first(nodelist);
|
||||
while(item != opal_list_get_end(nodelist)) {
|
||||
opal_list_item_t* next = opal_list_get_next(item);
|
||||
int node_num;
|
||||
|
||||
orte_ras_node_t* node = (orte_ras_node_t*)item;
|
||||
if(ORTE_SUCCESS != orte_ras_bjs_node_resolve(node->node_name, &node_num)) {
|
||||
opal_list_remove_item(nodelist,item);
|
||||
OBJ_DESTRUCT(item);
|
||||
item = next;
|
||||
continue;
|
||||
}
|
||||
|
||||
if(orte_ras_bjs_node_state(node_num) != ORTE_NODE_STATE_UP) {
|
||||
opal_list_remove_item(nodelist,item);
|
||||
OBJ_DESTRUCT(item);
|
||||
item = next;
|
||||
continue;
|
||||
}
|
||||
|
||||
if(bproc_access(node_num, BPROC_X_OK) != 0) {
|
||||
opal_list_remove_item(nodelist,item);
|
||||
OBJ_DESTRUCT(item);
|
||||
item = next;
|
||||
continue;
|
||||
}
|
||||
|
||||
/* try and determine the number of available slots */
|
||||
if(node->node_slots == 0) {
|
||||
node->node_slots = orte_ras_bjs_node_slots(node->node_name);
|
||||
}
|
||||
item = next;
|
||||
}
|
||||
|
||||
/* parse the node list and check node status/access */
|
||||
nodes = getenv("NODES");
|
||||
if (NULL == nodes) {
|
||||
return ORTE_ERR_NOT_AVAILABLE;
|
||||
}
|
||||
|
||||
OBJ_CONSTRUCT(&new_nodes, opal_list_t);
|
||||
while(NULL != (ptr = strsep(&nodes,","))) {
|
||||
orte_ras_node_t *node;
|
||||
orte_node_state_t node_state;
|
||||
int node_num;
|
||||
|
||||
/* is this node already in the list */
|
||||
for(item = opal_list_get_first(nodelist);
|
||||
item != opal_list_get_end(nodelist);
|
||||
item = opal_list_get_next(item)) {
|
||||
node = (orte_ras_node_t*)item;
|
||||
if(strcmp(node->node_name, ptr) == 0)
|
||||
break;
|
||||
}
|
||||
if(item != opal_list_get_end(nodelist))
|
||||
continue;
|
||||
if(sscanf(ptr, "%d", &node_num) != 1) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if(ORTE_NODE_STATE_UP != (node_state = orte_ras_bjs_node_state(node_num))) {
|
||||
opal_output(0, "error: a specified node (%d) is not up.\n", node_num);
|
||||
rc = ORTE_ERROR;
|
||||
goto cleanup;
|
||||
}
|
||||
if(bproc_access(node_num, BPROC_X_OK) != 0) {
|
||||
opal_output(0, "error: a specified node (%d) is not accessible.\n", node_num);
|
||||
rc = ORTE_ERROR;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/* create a new node entry */
|
||||
node = OBJ_NEW(orte_ras_node_t);
|
||||
node->node_name = strdup(ptr);
|
||||
node->node_state = node_state;
|
||||
node->node_slots = orte_ras_bjs_node_slots(node->node_name);
|
||||
opal_list_append(&new_nodes, &node->super);
|
||||
}
|
||||
|
||||
/* add any newly discovered nodes to the registry */
|
||||
if(opal_list_get_size(&new_nodes)) {
|
||||
rc = orte_ras_base_node_insert(&new_nodes);
|
||||
if(ORTE_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
}
|
||||
}
|
||||
|
||||
/* append them to the nodelist */
|
||||
while(NULL != (item = opal_list_remove_first(&new_nodes)))
|
||||
opal_list_append(nodelist, item);
|
||||
|
||||
cleanup:
|
||||
OBJ_DESTRUCT(&new_nodes);
|
||||
return rc;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Discover available (pre-allocated) nodes. Allocate the
|
||||
* requested number of nodes/process slots to the job.
|
||||
*
|
||||
*/
|
||||
|
||||
static int orte_ras_bjs_allocate(orte_jobid_t jobid, opal_list_t *attributes)
|
||||
{
|
||||
opal_list_t nodes;
|
||||
opal_list_item_t* item;
|
||||
int rc;
|
||||
orte_app_context_t **context = NULL;
|
||||
orte_std_cntr_t i, num_context = 0;
|
||||
|
||||
OBJ_CONSTRUCT(&nodes, opal_list_t);
|
||||
|
||||
rc = orte_rmgr.get_app_context(jobid, &context, &num_context);
|
||||
if(ORTE_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
if(ORTE_SUCCESS != (rc = orte_ras_bjs_discover(&nodes, context, num_context))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
rc = orte_ras_base_allocate_nodes(jobid, &nodes);
|
||||
if(ORTE_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
}
|
||||
|
||||
cleanup:
|
||||
while(NULL != (item = opal_list_remove_first(&nodes))) {
|
||||
OBJ_RELEASE(item);
|
||||
}
|
||||
OBJ_DESTRUCT(&nodes);
|
||||
for(i=0; i<num_context; i++) {
|
||||
OBJ_RELEASE(context[i]);
|
||||
}
|
||||
if (NULL != context) {
|
||||
free(context);
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int orte_ras_bjs_deallocate(orte_jobid_t jobid)
|
||||
{
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
static int orte_ras_bjs_finalize(void)
|
||||
{
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
orte_ras_base_module_t orte_ras_bjs_module = {
|
||||
orte_ras_bjs_allocate,
|
||||
orte_ras_base_node_insert,
|
||||
orte_ras_base_node_query,
|
||||
orte_ras_base_node_query_alloc,
|
||||
orte_ras_base_node_lookup,
|
||||
orte_ras_bjs_deallocate,
|
||||
orte_ras_bjs_finalize
|
||||
};
|
||||
|
50
orte/mca/ras/bjs/ras_bjs.h
Обычный файл
50
orte/mca/ras/bjs/ras_bjs.h
Обычный файл
@ -0,0 +1,50 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2006 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
/**
|
||||
* @file
|
||||
*
|
||||
* Resource Allocation (LSF over BPROC)
|
||||
*/
|
||||
#ifndef ORTE_RAS_BJS_H
|
||||
#define ORTE_RAS_BJS_H
|
||||
|
||||
#include "orte/mca/ras/ras.h"
|
||||
#if defined(c_plusplus) || defined(__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
|
||||
/**
|
||||
* RAS Component
|
||||
*/
|
||||
struct orte_ras_bjs_component_t {
|
||||
orte_ras_base_component_t super;
|
||||
int debug;
|
||||
int priority;
|
||||
char *schedule_policy;
|
||||
};
|
||||
typedef struct orte_ras_bjs_component_t orte_ras_bjs_component_t;
|
||||
|
||||
ORTE_DECLSPEC extern orte_ras_bjs_component_t mca_ras_bjs_component;
|
||||
ORTE_DECLSPEC extern orte_ras_base_module_t orte_ras_bjs_module;
|
||||
|
||||
#if defined(c_plusplus) || defined(__cplusplus)
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
131
orte/mca/ras/bjs/ras_bjs_component.c
Обычный файл
131
orte/mca/ras/bjs/ras_bjs_component.c
Обычный файл
@ -0,0 +1,131 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "orte_config.h"
|
||||
#include "orte/orte_constants.h"
|
||||
#include "opal/mca/base/base.h"
|
||||
#include "opal/mca/base/mca_base_param.h"
|
||||
#include "orte/util/proc_info.h"
|
||||
#include "opal/util/output.h"
|
||||
#include "ras_bjs.h"
|
||||
|
||||
/*
|
||||
* Local functions
|
||||
*/
|
||||
|
||||
static int orte_ras_bjs_open(void);
|
||||
static int orte_ras_bjs_close(void);
|
||||
static orte_ras_base_module_t* orte_ras_bjs_init(int* priority);
|
||||
|
||||
|
||||
orte_ras_bjs_component_t mca_ras_bjs_component = {
|
||||
{
|
||||
/* First, the mca_base_component_t struct containing meta
|
||||
information about the component itself */
|
||||
|
||||
{
|
||||
/* Indicate that we are a ras v1.3.0 component (which also
|
||||
implies a specific MCA version) */
|
||||
|
||||
ORTE_RAS_BASE_VERSION_1_3_0,
|
||||
|
||||
"bjs", /* MCA component name */
|
||||
ORTE_MAJOR_VERSION, /* MCA component major version */
|
||||
ORTE_MINOR_VERSION, /* MCA component minor version */
|
||||
ORTE_RELEASE_VERSION, /* MCA component release version */
|
||||
orte_ras_bjs_open, /* component open */
|
||||
orte_ras_bjs_close /* component close */
|
||||
},
|
||||
|
||||
/* Next the MCA v1.0.0 component meta data */
|
||||
{
|
||||
/* Whether the component is checkpointable or not */
|
||||
false
|
||||
},
|
||||
|
||||
orte_ras_bjs_init
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* Convience functions to lookup MCA parameter values.
|
||||
*/
|
||||
|
||||
static int orte_ras_bjs_param_register_int(
|
||||
const char* param_name,
|
||||
int default_value)
|
||||
{
|
||||
int id = mca_base_param_register_int("ras","bjs",param_name,NULL,default_value);
|
||||
int param_value = default_value;
|
||||
mca_base_param_lookup_int(id,¶m_value);
|
||||
return param_value;
|
||||
}
|
||||
|
||||
|
||||
static char* orte_ras_bjs_param_register_string(
|
||||
const char * a, const char *b, const char *c,
|
||||
const char* default_value)
|
||||
{
|
||||
char *param_value;
|
||||
int id = mca_base_param_register_string(a, b, c, NULL, default_value);
|
||||
mca_base_param_lookup_string(id, ¶m_value);
|
||||
return param_value;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* component open/close/init function
|
||||
*/
|
||||
static int orte_ras_bjs_open(void)
|
||||
{
|
||||
mca_ras_bjs_component.debug = orte_ras_bjs_param_register_int("debug",1);
|
||||
mca_ras_bjs_component.priority = orte_ras_bjs_param_register_int("priority",75);
|
||||
/* JMS To be changed post-beta to LAM's C/N command line notation */
|
||||
mca_ras_bjs_component.schedule_policy =
|
||||
orte_ras_bjs_param_register_string("ras", "base", "schedule_policy", "slot");
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
static orte_ras_base_module_t *orte_ras_bjs_init(int* priority)
|
||||
{
|
||||
/* if we are not an HNP, then we must not be selected */
|
||||
if (!orte_process_info.seed) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#if 0
|
||||
if(getenv("NODES") == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
#endif
|
||||
*priority = mca_ras_bjs_component.priority;
|
||||
return &orte_ras_bjs_module;
|
||||
}
|
||||
|
||||
/**
|
||||
* Close all subsystems.
|
||||
*/
|
||||
|
||||
static int orte_ras_bjs_close(void)
|
||||
{
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
|
0
orte/mca/ras/lsf_bproc/.ompi_ignore
Обычный файл
0
orte/mca/ras/lsf_bproc/.ompi_ignore
Обычный файл
1
orte/mca/ras/lsf_bproc/.ompi_unignore
Обычный файл
1
orte/mca/ras/lsf_bproc/.ompi_unignore
Обычный файл
@ -0,0 +1 @@
|
||||
rhc
|
54
orte/mca/ras/lsf_bproc/Makefile.am
Обычный файл
54
orte/mca/ras/lsf_bproc/Makefile.am
Обычный файл
@ -0,0 +1,54 @@
|
||||
#
|
||||
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
# University Research and Technology
|
||||
# Corporation. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
# of Tennessee Research Foundation. All rights
|
||||
# reserved.
|
||||
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
# University of Stuttgart. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
# All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
# Use the top-level Makefile.options
|
||||
|
||||
|
||||
|
||||
# Make the output library in this directory, and name it either
|
||||
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
|
||||
# (for static builds).
|
||||
|
||||
if OMPI_BUILD_ras_lsf_bproc_DSO
|
||||
component_noinst =
|
||||
component_install = mca_ras_lsf_bproc.la
|
||||
else
|
||||
component_noinst = libmca_ras_lsf_bproc.la
|
||||
component_install =
|
||||
endif
|
||||
|
||||
AM_CPPFLAGS= $(ras_lsf_bproc_CPPFLAGS)
|
||||
|
||||
proxy_SOURCES = \
|
||||
ras_lsf_bproc.c \
|
||||
ras_lsf_bproc.h \
|
||||
ras_lsf_bproc_component.c
|
||||
|
||||
mcacomponentdir = $(libdir)/openmpi
|
||||
mcacomponent_LTLIBRARIES = $(component_install)
|
||||
mca_ras_lsf_bproc_la_SOURCES = $(proxy_SOURCES)
|
||||
mca_ras_lsf_bproc_la_LIBADD = \
|
||||
$(ras_lsf_bproc_LIBS) \
|
||||
$(top_ompi_builddir)/orte/libopen-rte.la \
|
||||
$(top_ompi_builddir)/opal/libopen-pal.la
|
||||
mca_ras_lsf_bproc_la_LDFLAGS = -module -avoid-version $(ras_lsf_bproc_LDFLAGS)
|
||||
|
||||
noinst_LTLIBRARIES = $(component_noinst)
|
||||
libmca_ras_lsf_bproc_la_SOURCES = $(proxy_SOURCES)
|
||||
libmca_ras_lsf_bproc_la_LIBADD = $(ras_lsf_bproc_LIBS)
|
||||
libmca_ras_lsf_bproc_la_LDFLAGS = -module -avoid-version $(ras_lsf_bproc_LDFLAGS)
|
38
orte/mca/ras/lsf_bproc/configure.m4
Обычный файл
38
orte/mca/ras/lsf_bproc/configure.m4
Обычный файл
@ -0,0 +1,38 @@
|
||||
# -*- shell-script -*-
|
||||
#
|
||||
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
# University Research and Technology
|
||||
# Corporation. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
# of Tennessee Research Foundation. All rights
|
||||
# reserved.
|
||||
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
# University of Stuttgart. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
# All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
# MCA_ras_lsf_bproc_CONFIG([action-if-found], [action-if-not-found])
|
||||
# -----------------------------------------------------------
|
||||
AC_DEFUN([MCA_ras_lsf_bproc_CONFIG],[
|
||||
OMPI_CHECK_BPROC([ras_lsf_bproc], [ras_lsf_bproc_good=1],
|
||||
[ras_lsf_bproc_good=1], [ras_lsf_bproc_good=0])
|
||||
|
||||
# if check worked, set wrapper flags if so.
|
||||
# Evaluate succeed / fail
|
||||
AS_IF([test "$ras_lsf_bproc_good" = "1"],
|
||||
[ras_lsf_bproc_WRAPPER_EXTRA_LDFLAGS="$ras_lsf_bproc_LDFLAGS"
|
||||
ras_lsf_bproc_WRAPPER_EXTRA_LIBS="$ras_lsf_bproc_LIBS"
|
||||
$1],
|
||||
[$2])
|
||||
|
||||
# set build flags to use in makefile
|
||||
AC_SUBST([ras_lsf_bproc_CPPFLAGS])
|
||||
AC_SUBST([ras_lsf_bproc_LDFLAGS])
|
||||
AC_SUBST([ras_lsf_bproc_LIBS])
|
||||
])dnl
|
24
orte/mca/ras/lsf_bproc/configure.params
Обычный файл
24
orte/mca/ras/lsf_bproc/configure.params
Обычный файл
@ -0,0 +1,24 @@
|
||||
# -*- shell-script -*-
|
||||
#
|
||||
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
# University Research and Technology
|
||||
# Corporation. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
# of Tennessee Research Foundation. All rights
|
||||
# reserved.
|
||||
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
# University of Stuttgart. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
# All rights reserved.
|
||||
# Copyright (c) 2007 Los Alamos National Security, LLC. All rights
|
||||
# reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
# Specific to this module
|
||||
|
||||
PARAM_CONFIG_FILES="Makefile"
|
55
orte/mca/ras/lsf_bproc/ras_lsf_bproc.c
Обычный файл
55
orte/mca/ras/lsf_bproc/ras_lsf_bproc.c
Обычный файл
@ -0,0 +1,55 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
#include "orte_config.h"
|
||||
#include <errno.h>
|
||||
#include <unistd.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "orte/orte_constants.h"
|
||||
|
||||
#include "orte/mca/ras/base/ras_private.h"
|
||||
#include "ras_lsf_bproc.h"
|
||||
|
||||
|
||||
static int orte_ras_lsf_bproc_allocate(orte_jobid_t jobid, opal_list_t *attributes)
|
||||
{
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
static int orte_ras_lsf_bproc_deallocate(orte_jobid_t jobid)
|
||||
{
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
static int orte_ras_lsf_bproc_finalize(void)
|
||||
{
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
orte_ras_base_module_t orte_ras_lsf_bproc_module = {
|
||||
orte_ras_lsf_bproc_allocate,
|
||||
orte_ras_base_node_insert,
|
||||
orte_ras_base_node_query,
|
||||
orte_ras_base_node_query_alloc,
|
||||
orte_ras_base_node_lookup,
|
||||
orte_ras_lsf_bproc_deallocate,
|
||||
orte_ras_lsf_bproc_finalize
|
||||
};
|
||||
|
49
orte/mca/ras/lsf_bproc/ras_lsf_bproc.h
Обычный файл
49
orte/mca/ras/lsf_bproc/ras_lsf_bproc.h
Обычный файл
@ -0,0 +1,49 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2006 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
/**
|
||||
* @file
|
||||
*
|
||||
* Resource Allocation (LSF over BPROC)
|
||||
*/
|
||||
#ifndef ORTE_RAS_LSF_BPROC_H
|
||||
#define ORTE_RAS_LSF_BPROC_H
|
||||
|
||||
#include "orte/mca/ras/ras.h"
|
||||
#if defined(c_plusplus) || defined(__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
|
||||
/**
|
||||
* RAS Component
|
||||
*/
|
||||
struct orte_ras_lsf_bproc_component_t {
|
||||
orte_ras_base_component_t super;
|
||||
int debug;
|
||||
int priority;
|
||||
};
|
||||
typedef struct orte_ras_lsf_bproc_component_t orte_ras_lsf_bproc_component_t;
|
||||
|
||||
ORTE_DECLSPEC extern orte_ras_lsf_bproc_component_t mca_ras_lsf_bproc_component;
|
||||
ORTE_DECLSPEC extern orte_ras_base_module_t orte_ras_lsf_bproc_module;
|
||||
|
||||
#if defined(c_plusplus) || defined(__cplusplus)
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
111
orte/mca/ras/lsf_bproc/ras_lsf_bproc_component.c
Обычный файл
111
orte/mca/ras/lsf_bproc/ras_lsf_bproc_component.c
Обычный файл
@ -0,0 +1,111 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "orte_config.h"
|
||||
#include "orte/orte_constants.h"
|
||||
#include "opal/mca/base/base.h"
|
||||
#include "opal/mca/base/mca_base_param.h"
|
||||
#include "orte/util/proc_info.h"
|
||||
#include "opal/util/output.h"
|
||||
#include "ras_lsf_bproc.h"
|
||||
|
||||
/*
|
||||
* Local functions
|
||||
*/
|
||||
|
||||
static int orte_ras_lsf_bproc_open(void);
|
||||
static int orte_ras_lsf_bproc_close(void);
|
||||
static orte_ras_base_module_t* orte_ras_lsf_bproc_init(int* priority);
|
||||
|
||||
|
||||
orte_ras_lsf_bproc_component_t mca_ras_lsf_bproc_component = {
|
||||
{
|
||||
/* First, the mca_base_component_t struct containing meta
|
||||
information about the component itself */
|
||||
|
||||
{
|
||||
/* Indicate that we are a ras v1.3.0 component (which also
|
||||
implies a specific MCA version) */
|
||||
|
||||
ORTE_RAS_BASE_VERSION_1_3_0,
|
||||
|
||||
"lsf_bproc", /* MCA component name */
|
||||
ORTE_MAJOR_VERSION, /* MCA component major version */
|
||||
ORTE_MINOR_VERSION, /* MCA component minor version */
|
||||
ORTE_RELEASE_VERSION, /* MCA component release version */
|
||||
orte_ras_lsf_bproc_open, /* component open */
|
||||
orte_ras_lsf_bproc_close /* component close */
|
||||
},
|
||||
|
||||
/* Next the MCA v1.0.0 component meta data */
|
||||
{
|
||||
/* Whether the component is checkpointable or not */
|
||||
false
|
||||
},
|
||||
|
||||
orte_ras_lsf_bproc_init
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* Convience functions to lookup MCA parameters
|
||||
*/
|
||||
static int orte_ras_lsf_bproc_param_register_int(
|
||||
const char* param_name,
|
||||
int default_value)
|
||||
{
|
||||
int id = mca_base_param_register_int("ras","lsf_bproc",param_name,NULL,default_value);
|
||||
int param_value = default_value;
|
||||
mca_base_param_lookup_int(id,¶m_value);
|
||||
return param_value;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* component open/close/init function
|
||||
*/
|
||||
static int orte_ras_lsf_bproc_open(void)
|
||||
{
|
||||
mca_ras_lsf_bproc_component.debug = orte_ras_lsf_bproc_param_register_int("debug",1);
|
||||
mca_ras_lsf_bproc_component.priority = orte_ras_lsf_bproc_param_register_int("priority",-1);
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
static orte_ras_base_module_t *orte_ras_lsf_bproc_init(int* priority)
|
||||
{
|
||||
/* if we are not an HNP, then we must not be selected */
|
||||
if (!orte_process_info.seed) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
*priority = mca_ras_lsf_bproc_component.priority;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/**
|
||||
* Close all subsystems.
|
||||
*/
|
||||
|
||||
static int orte_ras_lsf_bproc_close(void)
|
||||
{
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
|
Загрузка…
Ссылка в новой задаче
Block a user