Added xcpu component in pls and soh.
This commit was SVN r9491.
Этот коммит содержится в:
родитель
c2b6e86766
Коммит
46f84b1e8e
63
config/ompi_check_xcpu.m4
Обычный файл
63
config/ompi_check_xcpu.m4
Обычный файл
@ -0,0 +1,63 @@
|
||||
# -*- shell-script -*-
|
||||
#
|
||||
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
# University Research and Technology
|
||||
# Corporation. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
# of Tennessee Research Foundation. All rights
|
||||
# reserved.
|
||||
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
# University of Stuttgart. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
# All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
# OMPI_CHECK_XCPU(prefix, [action-if-found], [action-if-not-found])
|
||||
# --------------------------------------------------------
|
||||
AC_DEFUN([OMPI_CHECK_XCPU],[
|
||||
AC_ARG_WITH([xcpu],
|
||||
[AC_HELP_STRING([--with-xcpu],
|
||||
[Path to xcpu installation])])
|
||||
|
||||
AS_IF([test ! -z "$with_xcpu" -a "$with_xcpu" = "no"],[$4], [
|
||||
ompi_check_xcpu_save_CPPFLAGS="$CPPFLAGS"
|
||||
ompi_check_xcpu_save_LDFLAGS="$LDFLAGS"
|
||||
ompi_check_xcpu_save_LIBS="$LIBS"
|
||||
|
||||
AS_IF([test ! -z "$with_xcpu" -a "$with_xcpu" != "yes"],
|
||||
[CPPFLAGS="$CPPFLAGS -I$with_xcpu/include"
|
||||
LDFLAGS="$LDFLAGS -L$with_xcpu/lib"])
|
||||
AC_CHECK_HEADERS([sys/xcpu.h],
|
||||
[AC_CHECK_LIB([xcpu],
|
||||
[check_for_xcpu],
|
||||
[ompi_check_xcpu_works="yes"],
|
||||
[ompi_check_xcpu_works="no"])],
|
||||
[AC_CHECK_LIB([xcpu],
|
||||
[check_for_xcpu],
|
||||
[ompi_check_xcpu_works="yes"],
|
||||
[ompi_check_xcpu_works="no"])])
|
||||
# check for library irrespective of if xcpu.h is there or not
|
||||
# 'cause I am not sure
|
||||
# if we need to check for xcpu.h
|
||||
|
||||
CPPFLAGS="$ompi_check_xcpu_save_CPPFLAGS"
|
||||
LDFLAGS="$ompi_check_xcpu_save_LDFLAGS"
|
||||
LIBS="$ompi_check_xcpu_save_LIBS"
|
||||
|
||||
AS_IF([test "$ompi_check_xcpu_works" != "no"],
|
||||
[AS_IF([test ! -z "$with_xcpu" -a "$with_xcpu" != "yes"],
|
||||
[$1_CPPFLAGS="$$1_CPPFLAGS -I$with_xcpu/include"
|
||||
$1_LDFLAGS="$$1_LDFLAGS -L$with_xcpu/lib"])
|
||||
$1_LIBS="$$1_LIBS -lxcpu"
|
||||
AS_IF([test "$ompi_check_xcpu_works" = "yes"], [$2], [$3])],
|
||||
[AS_IF([test ! -z "$with_xcpu"],
|
||||
[AC_MSG_ERROR([xcpu support requested but not found. Perhaps
|
||||
you need to specify the location of the xcpu libraries.])])
|
||||
$4])
|
||||
])
|
||||
])
|
52
orte/mca/pls/xcpu/Makefile.am
Обычный файл
52
orte/mca/pls/xcpu/Makefile.am
Обычный файл
@ -0,0 +1,52 @@
|
||||
#
|
||||
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
# University Research and Technology
|
||||
# Corporation. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
# of Tennessee Research Foundation. All rights
|
||||
# reserved.
|
||||
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
# University of Stuttgart. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
# All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
#dist_pkgdata_DATA = help-pls-bproc.txt
|
||||
|
||||
AM_CPPFLAGS = -I$(top_ompi_builddir)/src/include $(pls_xcpu_CPPFLAGS) -DORTE_BINDIR="\"$(bindir)\""
|
||||
|
||||
# Make the output library in this directory, and name it either
|
||||
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
|
||||
# (for static builds).
|
||||
|
||||
if OMPI_BUILD_pls_xcpu_DSO
|
||||
component_noinst =
|
||||
component_install = mca_pls_xcpu.la
|
||||
else
|
||||
component_noinst = libmca_pls_xcpu.la
|
||||
component_install =
|
||||
endif
|
||||
|
||||
sources = \
|
||||
pls_xcpu.h \
|
||||
pls_xcpu.c \
|
||||
pls_xcpu_component.c
|
||||
|
||||
mcacomponentdir = $(libdir)/openmpi
|
||||
mcacomponent_LTLIBRARIES = $(component_install)
|
||||
mca_pls_xcpu_la_SOURCES = $(sources)
|
||||
mca_pls_xcpu_la_LIBADD = \
|
||||
$(pls_xcpu_LIBS) \
|
||||
$(top_ompi_builddir)/orte/liborte.la \
|
||||
$(top_ompi_builddir)/opal/libopal.la
|
||||
mca_pls_xcpu_la_LDFLAGS = -module -avoid-version $(pls_xcpu_LDFLAGS)
|
||||
|
||||
noinst_LTLIBRARIES = $(component_noinst)
|
||||
libmca_pls_xcpu_la_SOURCES = $(sources)
|
||||
libmca_pls_xcpu_la_LIBADD = $(pls_xcpu_LIBS)
|
||||
libmca_pls_xcpu_la_LDFLAGS = -module -avoid-version $(pls_xcpu_LDFLAGS)
|
37
orte/mca/pls/xcpu/configure.m4
Обычный файл
37
orte/mca/pls/xcpu/configure.m4
Обычный файл
@ -0,0 +1,37 @@
|
||||
# -*- shell-script -*-
|
||||
#
|
||||
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
# University Research and Technology
|
||||
# Corporation. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
# of Tennessee Research Foundation. All rights
|
||||
# reserved.
|
||||
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
# University of Stuttgart. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
# All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
# MCA_pls_xcpu_CONFIG([action-if-found], [action-if-not-found])
|
||||
# -----------------------------------------------------------
|
||||
AC_DEFUN([MCA_pls_xcpu_CONFIG],[
|
||||
OMPI_CHECK_XCPU([pls_xcpu], [pls_xcpu_good=1], [pls_xcpu_good=0])
|
||||
# if xcpu is present and working, pls_xcpu_good=1.
|
||||
# Evaluate succeed / fail
|
||||
|
||||
AS_IF([test "$pls_xcpu_good" = "1"],
|
||||
[pls_xcpu_WRAPPER_EXTRA_LDFLAGS="$pls_xcpu_LDFLAGS"
|
||||
pls_xcpu_WRAPPER_EXTRA_LIBS="$pls_xcpu_LIBS"
|
||||
$1],
|
||||
[$2])
|
||||
|
||||
# set build flags to use in makefile
|
||||
AC_SUBST([pls_xcpu_CPPFLAGS])
|
||||
AC_SUBST([pls_xcpu_LDFLAGS])
|
||||
AC_SUBST([pls_xcpu_LIBS])
|
||||
])dnl
|
24
orte/mca/pls/xcpu/configure.params
Обычный файл
24
orte/mca/pls/xcpu/configure.params
Обычный файл
@ -0,0 +1,24 @@
|
||||
# -*- shell-script -*-
|
||||
#
|
||||
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
# University Research and Technology
|
||||
# Corporation. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
# of Tennessee Research Foundation. All rights
|
||||
# reserved.
|
||||
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
# University of Stuttgart. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
# All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
# Specific to this module
|
||||
|
||||
|
||||
PARAM_INIT_FILE=pls_xcpu.c
|
||||
PARAM_CONFIG_FILES="Makefile"
|
182
orte/mca/pls/xcpu/pls_xcpu.c
Обычный файл
182
orte/mca/pls/xcpu/pls_xcpu.c
Обычный файл
@ -0,0 +1,182 @@
|
||||
/* -*- C -*-
|
||||
*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*
|
||||
*/
|
||||
|
||||
/* @file:
|
||||
* xcpu Lancher to launch jobs on compute nodes..
|
||||
*/
|
||||
|
||||
#include "orte_config.h"
|
||||
#if HAVE_SYS_TYPES_H
|
||||
#include <sys/types.h>
|
||||
#endif /* HAVE_SYS_TYPES_H */
|
||||
#ifdef HAVE_SYS_STAT_H
|
||||
#include <sys/stat.h>
|
||||
#endif /* HAVE_SYS_STAT_H */
|
||||
#ifdef HAVE_UNISTD_H
|
||||
#include <unistd.h>
|
||||
#endif /* HAVE_UNISTD_H */
|
||||
#include <errno.h>
|
||||
#include <signal.h>
|
||||
#ifdef HAVE_FCNTL_H
|
||||
#include <fcntl.h>
|
||||
#endif /* HAVE_FCNTL_H */
|
||||
#ifdef HAVE_STRING_H
|
||||
#include <string.h>
|
||||
#endif /* HAVE_STRING_H */
|
||||
|
||||
#include "opal/event/event.h"
|
||||
#include "opal/mca/base/mca_base_param.h"
|
||||
#include "opal/util/argv.h"
|
||||
#include "opal/util/output.h"
|
||||
#include "opal/util/opal_environ.h"
|
||||
#include "opal/util/path.h"
|
||||
#include "opal/util/show_help.h"
|
||||
|
||||
#include "orte/dss/dss.h"
|
||||
#include "orte/util/sys_info.h"
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
#include "orte/mca/gpr/base/base.h"
|
||||
#include "orte/mca/iof/iof.h"
|
||||
#include "orte/mca/ns/base/base.h"
|
||||
#include "orte/mca/sds/base/base.h"
|
||||
#include "orte/mca/oob/base/base.h"
|
||||
#include "orte/mca/ras/base/base.h"
|
||||
#include "orte/mca/rmgr/base/base.h"
|
||||
#include "orte/mca/rmaps/base/base.h"
|
||||
#include "orte/mca/rmaps/base/rmaps_base_map.h"
|
||||
#include "orte/mca/rml/rml.h"
|
||||
#include "orte/mca/soh/base/base.h"
|
||||
#include "orte/runtime/orte_wait.h"
|
||||
#include "orte/runtime/runtime.h"
|
||||
|
||||
#include "pls_xcpu.h"
|
||||
|
||||
/**
|
||||
* Initialization of the xcpu module with all the needed function pointers
|
||||
*/
|
||||
orte_pls_base_module_t orte_pls_xcpu_module = {
|
||||
orte_pls_xcpu_launch,
|
||||
orte_pls_xcpu_terminate_job,
|
||||
orte_pls_xcpu_terminate_proc,
|
||||
orte_pls_xcpu_finalize
|
||||
};
|
||||
int lrx(int argc, char **argv);
|
||||
int get_argc(char **argv){
|
||||
int i=0;
|
||||
while(argv[i]){
|
||||
i++;
|
||||
}
|
||||
return i;
|
||||
}
|
||||
|
||||
void free_stack(tid_stack *s){
|
||||
if(s){
|
||||
free_stack(s->next);
|
||||
free(s);
|
||||
}
|
||||
}
|
||||
|
||||
/* This is the main function that will launch jobs on remote compute modes
|
||||
* @param jobid the jobid of the job to launch
|
||||
* @retval ORTE_SUCCESS or error
|
||||
*/
|
||||
int orte_pls_xcpu_launch(orte_jobid_t jobid){
|
||||
opal_list_t mapping;
|
||||
char **new_argv;
|
||||
int new_argc, nprocs=0;
|
||||
int rc, i=0;
|
||||
tid_stack *t_stack, *temp_stack;
|
||||
opal_list_item_t *item, *temp;
|
||||
orte_rmaps_base_map_t* map;
|
||||
/* first get the list of nodes on which we are going to launch job */
|
||||
/* OBJ_CONSTRUCT construct/initialize objects that are not dynamically allocated.
|
||||
* see file opal/class/opal_object.h for detils
|
||||
*/
|
||||
/*fprintf(stdout, "\nxcpu launch called, job id: %d\n", jobid);*/
|
||||
OBJ_CONSTRUCT(&mapping, opal_list_t);
|
||||
/* 1. get map from registry*/
|
||||
if(ORTE_SUCCESS != (rc = orte_rmaps_base_get_map(jobid, &mapping))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
/* 2. use the map to launch jobs*/
|
||||
map=(orte_rmaps_base_map_t*)opal_list_get_first(&mapping);
|
||||
new_argc=get_argc(map->app->argv)+3;
|
||||
new_argv=(char**)malloc(new_argc*sizeof(char*));
|
||||
new_argv[0]=(char*)malloc(1);/*it could be anything ... doesn't matter*/
|
||||
for(i=2; i<new_argc; i++){
|
||||
new_argv[i]=map->app->argv[i-2];
|
||||
/*fprintf(stdout, "new_argv[%d]:%s\n", i, new_argv[i]);*/
|
||||
}
|
||||
new_argv[i]=NULL;
|
||||
/*printf("new_argv[%d] is nulled\n", i);*/
|
||||
t_stack=NULL;
|
||||
for(item = opal_list_get_first(&mapping);
|
||||
item != opal_list_get_end(&mapping);
|
||||
item = opal_list_get_next(item)) {
|
||||
map = (orte_rmaps_base_map_t*) item;
|
||||
/* now here.. do we want to pass all node-names and binary as
|
||||
* arguments to xcpu_launch or do we want to launch then one
|
||||
* by one, by providing only one node-name and binary at a time?
|
||||
*/
|
||||
for(temp = opal_list_get_first(&map->nodes);
|
||||
temp != opal_list_get_end(&map->nodes);
|
||||
temp = opal_list_get_next(temp)){
|
||||
|
||||
new_argv[1]=((orte_rmaps_base_node_t*)temp)->node->node_name;
|
||||
/*above should contain node name where process is to be launched*/
|
||||
/*fprintf(stdout, "node name: %s\n", new_argv[1]);*/
|
||||
nprocs=((orte_rmaps_base_node_t*)temp)->node_procs.opal_list_length;
|
||||
/*fprintf(stdout, "list length: %d\n", nprocs);*/
|
||||
for (i = 0; i<nprocs; ++i) {
|
||||
temp_stack=(tid_stack*)malloc(sizeof(tid_stack));
|
||||
temp_stack->next=t_stack;
|
||||
t_stack=temp_stack;
|
||||
t_stack->tid=lrx(new_argc, new_argv);
|
||||
}
|
||||
}
|
||||
}
|
||||
/* wait for all thrads that have launched processes on remote nodes
|
||||
* */
|
||||
temp_stack=t_stack;
|
||||
while(t_stack){
|
||||
pthread_join(t_stack->tid, NULL);
|
||||
t_stack=t_stack->next;
|
||||
}
|
||||
orte_soh.begin_monitoring_job(jobid);
|
||||
|
||||
free_stack(temp_stack);
|
||||
free(new_argv[0]);
|
||||
/*free(new_argv[1]);*/
|
||||
free(new_argv);
|
||||
OBJ_DESTRUCT(&mapping);
|
||||
/*fprintf(stdout, "launch finished\n");*/
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
int orte_pls_xcpu_terminate_job(orte_jobid_t jobid){
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
int orte_pls_xcpu_terminate_proc(const orte_process_name_t* proc_name){
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
int orte_pls_xcpu_finalize(void){
|
||||
return ORTE_SUCCESS;
|
||||
}
|
107
orte/mca/pls/xcpu/pls_xcpu.h
Обычный файл
107
orte/mca/pls/xcpu/pls_xcpu.h
Обычный файл
@ -0,0 +1,107 @@
|
||||
/* -*- C -*-
|
||||
*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*
|
||||
*
|
||||
*/
|
||||
/**
|
||||
* @file:
|
||||
* Header file for the xcpu launcher. This will use xcpu to launch jobs on
|
||||
* the list of nodes that it will get from RAS (resource allocation
|
||||
* system (slurm??)
|
||||
* -# pls_xcpu is called by orterun. It reads the ompi registry and launch
|
||||
* the binary on the nodes specified in the registry.
|
||||
*/
|
||||
|
||||
#ifndef ORTE_PLS_XCPU_H_
|
||||
#define ORTE_PLS_XCPUC_H_
|
||||
|
||||
#include "orte_config.h"
|
||||
#include "orte/class/orte_pointer_array.h"
|
||||
#include "orte/orte_constants.h"
|
||||
#include "orte/mca/pls/base/base.h"
|
||||
#include "orte/util/proc_info.h"
|
||||
#include "opal/threads/condition.h"
|
||||
|
||||
#if defined(c_plusplus) || defined(__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Module open / close -- defined in component file
|
||||
*/
|
||||
int orte_pls_xcpu_component_open(void); /*probably do nothing*/
|
||||
int orte_pls_xcpu_component_close(void); /*probably do nothing*/
|
||||
|
||||
/*
|
||||
* Startup / Shutdown
|
||||
*/
|
||||
orte_pls_base_module_t* orte_pls_xcpu_init(int *priority); /* in component file */
|
||||
/* int orte_pls_xcpu_finalize(void); */ /* should be with interface */
|
||||
|
||||
/*
|
||||
* Interface
|
||||
*/
|
||||
int orte_pls_xcpu_launch(orte_jobid_t);
|
||||
int orte_pls_xcpu_terminate_job(orte_jobid_t);
|
||||
int orte_pls_xcpu_terminate_proc(const orte_process_name_t* proc_name);
|
||||
int orte_pls_xcpu_finalize(void);
|
||||
|
||||
|
||||
/**
|
||||
* (P)rocess (L)aunch (S)ubsystem xcpu Component
|
||||
*/
|
||||
struct orte_pls_xcpu_component_t {
|
||||
orte_pls_base_component_t super;/*base_class this is needed others below this are not*/
|
||||
|
||||
/* most of the memebrs below are going to get removed from this structure
|
||||
* and so are their registrations from open() function
|
||||
*/
|
||||
bool done_launching; /* Is true if we are done launching the user's app. */
|
||||
int debug; /* If greater than 0 print debugging information */
|
||||
int num_procs; /* The number of processes that are running */
|
||||
int priority; /* The priority of this component. This will be returned if
|
||||
* we determine that xcpu is available and running on this node,
|
||||
*/
|
||||
int terminate_sig; /* The signal that gets sent to a process to kill it. */
|
||||
size_t num_daemons; /* The number of daemons that are currently running. */
|
||||
opal_mutex_t lock; /* Lock used to prevent some race conditions */
|
||||
opal_condition_t condition; /* Condition that is signaled when all the daemons have died */
|
||||
orte_pointer_array_t * daemon_names;
|
||||
/* Array of the process names of all the daemons. This is used to send
|
||||
* the daemons a termonation signal when all the user processes are done */
|
||||
orte_cellid_t cellid;
|
||||
};
|
||||
/**
|
||||
* Convenience typedef
|
||||
*/
|
||||
typedef struct orte_pls_xcpu_component_t orte_pls_xcpu_component_t;
|
||||
|
||||
struct tid_stack {
|
||||
int tid;
|
||||
struct tid_stack *next;
|
||||
};
|
||||
typedef struct tid_stack tid_stack;
|
||||
|
||||
ORTE_DECLSPEC extern orte_pls_xcpu_component_t mca_pls_xcpu_component;
|
||||
ORTE_DECLSPEC extern orte_pls_base_module_t orte_pls_xcpu_module; /* this is defined in pls_xcpu.c file */
|
||||
|
||||
#if defined(c_plusplus) || defined(__cplusplus)
|
||||
}
|
||||
#endif
|
||||
#endif /* ORTE_PLS_XCPU_H_ */
|
||||
|
113
orte/mca/pls/xcpu/pls_xcpu_component.c
Обычный файл
113
orte/mca/pls/xcpu/pls_xcpu_component.c
Обычный файл
@ -0,0 +1,113 @@
|
||||
/* -*- C -*-
|
||||
*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*
|
||||
*/
|
||||
/**
|
||||
* @file:
|
||||
* Takes care of the component stuff for the MCA.
|
||||
*/
|
||||
#include "orte_config.h"
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
#include "opal/mca/mca.h"
|
||||
#include "opal/mca/base/mca_base_param.h"
|
||||
#include "pls_xcpu.h"
|
||||
|
||||
/**
|
||||
* The xcpu component data structure that stores all the relevent data about
|
||||
* this component.
|
||||
*/
|
||||
orte_pls_xcpu_component_t mca_pls_xcpu_component = {
|
||||
{ /* version, data and init members of only first
|
||||
* structure (called super) being initialized
|
||||
*/
|
||||
{
|
||||
ORTE_PLS_BASE_VERSION_1_0_0,
|
||||
"xcpu", /* MCA component name */
|
||||
ORTE_MAJOR_VERSION, /* MCA component major version */
|
||||
ORTE_MINOR_VERSION, /* MCA component minor version */
|
||||
ORTE_RELEASE_VERSION, /* MCA component release version */
|
||||
orte_pls_xcpu_component_open, /* component open */
|
||||
orte_pls_xcpu_component_close /* component close */
|
||||
},
|
||||
{
|
||||
false /* checkpoint / restart */
|
||||
},
|
||||
orte_pls_xcpu_init /* component init */
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Opens the pls_xcpu component, setting all the needed mca parameters and
|
||||
* finishes setting up the component struct.
|
||||
*/
|
||||
int orte_pls_xcpu_component_open(void) {
|
||||
int rc;
|
||||
/* init parameters */
|
||||
/*read trunk/opal/mca/base/mca_base_param.h for reg_int details*/
|
||||
mca_base_component_t *c = &mca_pls_xcpu_component.super.pls_version;
|
||||
mca_base_param_reg_int(c, "priority", NULL, false, false,0,
|
||||
&mca_pls_xcpu_component.priority);
|
||||
mca_base_param_reg_int(c, "debug",
|
||||
"If > 0 prints library debugging information",
|
||||
false, false, 0, &mca_pls_xcpu_component.debug);
|
||||
mca_base_param_reg_int(c, "terminate_sig",
|
||||
"Signal sent to processes to terminate them", false,
|
||||
false, 9, &mca_pls_xcpu_component.terminate_sig);
|
||||
mca_pls_xcpu_component.num_procs = 0;
|
||||
mca_pls_xcpu_component.num_daemons = 0;
|
||||
mca_pls_xcpu_component.done_launching = false;
|
||||
OBJ_CONSTRUCT(&mca_pls_xcpu_component.lock, opal_mutex_t);
|
||||
OBJ_CONSTRUCT(&mca_pls_xcpu_component.condition, opal_condition_t);
|
||||
/* init the list to hold the daemon names */
|
||||
rc = orte_pointer_array_init(&mca_pls_xcpu_component.daemon_names, 8, 200000, 8);
|
||||
if(ORTE_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
|
||||
/**
|
||||
* Closes the pls_xcpu component
|
||||
*/
|
||||
int orte_pls_xcpu_component_close(void) {
|
||||
OBJ_DESTRUCT(&mca_pls_xcpu_component.lock);
|
||||
OBJ_DESTRUCT(&mca_pls_xcpu_component.condition);
|
||||
OBJ_RELEASE(mca_pls_xcpu_component.daemon_names);
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
/**
|
||||
* Initializes the module. We do not want to run unless, xcpu
|
||||
* is running and we are on the control node.
|
||||
*/
|
||||
/* What I thnk is that this function will be called some where from (R)esource (M)ana(G)e(R)
|
||||
* and then it will return orte_pls_xcpu_module that contains function pointers for launch,
|
||||
* finalize etc. and then resource manager can call these functions
|
||||
*/
|
||||
orte_pls_base_module_t* orte_pls_xcpu_init(int *priority) {
|
||||
/* check if xcpu component should be loaded or not
|
||||
* if not, then return NULL here
|
||||
*/
|
||||
/*return NULL; *//*for time being*/
|
||||
*priority = mca_pls_xcpu_component.priority;
|
||||
return &orte_pls_xcpu_module; /* this is defined in pls_xcpu.c and will contains
|
||||
* function pointers for launch, terminate_job
|
||||
* terminate_proc and finalize
|
||||
*/
|
||||
}
|
||||
|
57
orte/mca/soh/xcpu/Makefile.am
Обычный файл
57
orte/mca/soh/xcpu/Makefile.am
Обычный файл
@ -0,0 +1,57 @@
|
||||
#
|
||||
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
# University Research and Technology
|
||||
# Corporation. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
# of Tennessee Research Foundation. All rights
|
||||
# reserved.
|
||||
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
# University of Stuttgart. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
# All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
|
||||
|
||||
headers = \
|
||||
soh_xcpu.h
|
||||
|
||||
if OMPI_BUILD_soh_xcpu_DSO
|
||||
component_noinst =
|
||||
component_install = mca_soh_xcpu.la
|
||||
else
|
||||
component_noinst = libmca_soh_xcpu.la
|
||||
component_install =
|
||||
endif
|
||||
|
||||
# Conditionally install the header files
|
||||
|
||||
if WANT_INSTALL_HEADERS
|
||||
ortedir = $(includedir)/openmpi/orte/mca/soh/xcpu
|
||||
orte_HEADERS = $(headers)
|
||||
else
|
||||
ortedir = $(includedir)
|
||||
endif
|
||||
|
||||
host_SOURCES = \
|
||||
soh_xcpu.c \
|
||||
soh_xcpu.h \
|
||||
soh_xcpu_component.c
|
||||
|
||||
mcacomponentdir = $(libdir)/openmpi
|
||||
mcacomponent_LTLIBRARIES = $(component_install)
|
||||
mca_soh_xcpu_la_SOURCES = $(host_SOURCES)
|
||||
mca_soh_xcpu_la_LIBADD = \
|
||||
$(top_ompi_builddir)/orte/liborte.la \
|
||||
$(top_ompi_builddir)/opal/libopal.la
|
||||
mca_soh_xcpu_la_LDFLAGS = -module -avoid-version
|
||||
|
||||
noinst_LTLIBRARIES = $(component_noinst)
|
||||
libmca_soh_xcpu_la_SOURCES = $(host_SOURCES)
|
||||
libmca_soh_xcpu_la_LIBADD =
|
||||
libmca_soh_xcpu_la_LDFLAGS = -module -avoid-version
|
37
orte/mca/soh/xcpu/configure.m4
Обычный файл
37
orte/mca/soh/xcpu/configure.m4
Обычный файл
@ -0,0 +1,37 @@
|
||||
# -*- shell-script -*-
|
||||
#
|
||||
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
# University Research and Technology
|
||||
# Corporation. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
# of Tennessee Research Foundation. All rights
|
||||
# reserved.
|
||||
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
# University of Stuttgart. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
# All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
# MCA_soh_xcpu_CONFIG([action-if-found], [action-if-not-found])
|
||||
# -----------------------------------------------------------
|
||||
AC_DEFUN([MCA_soh_xcpu_CONFIG],[
|
||||
OMPI_CHECK_XCPU([soh_xcpu], [soh_xcpu_good=1], [soh_xcpu_good=0])
|
||||
# if xcpu is present and working, soh_xcpu_good=1.
|
||||
# Evaluate succeed / fail
|
||||
|
||||
AS_IF([test "$soh_xcpu_good" = "1"],
|
||||
[soh_xcpu_WRAPPER_EXTRA_LDFLAGS="$soh_xcpu_LDFLAGS"
|
||||
soh_xcpu_WRAPPER_EXTRA_LIBS="$soh_xcpu_LIBS"
|
||||
$1],
|
||||
[$2])
|
||||
|
||||
# set build flags to use in makefile
|
||||
AC_SUBST([soh_xcpu_CPPFLAGS])
|
||||
AC_SUBST([soh_xcpu_LDFLAGS])
|
||||
AC_SUBST([soh_xcpu_LIBS])
|
||||
])dnl
|
23
orte/mca/soh/xcpu/configure.params
Обычный файл
23
orte/mca/soh/xcpu/configure.params
Обычный файл
@ -0,0 +1,23 @@
|
||||
# -*- shell-script -*-
|
||||
#
|
||||
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
# University Research and Technology
|
||||
# Corporation. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
# of Tennessee Research Foundation. All rights
|
||||
# reserved.
|
||||
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
# University of Stuttgart. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
# All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
# Specific to this module
|
||||
|
||||
PARAM_INIT_FILE=soh_xcpu.c
|
||||
PARAM_CONFIG_FILES="Makefile"
|
238
orte/mca/soh/xcpu/soh_xcpu.c
Обычный файл
238
orte/mca/soh/xcpu/soh_xcpu.c
Обычный файл
@ -0,0 +1,238 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "orte_config.h"
|
||||
|
||||
#include <pwd.h>
|
||||
#include <grp.h>
|
||||
|
||||
#ifdef HAVE_UNISTD_H
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
#include "orte/orte_constants.h"
|
||||
#include "orte/orte_types.h"
|
||||
|
||||
#include "orte/util/proc_info.h"
|
||||
#include "orte/mca/ns/ns.h"
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
#include "orte/mca/gpr/base/base.h"
|
||||
#include "orte/mca/soh/base/base.h"
|
||||
#include "orte/mca/soh/xcpu/soh_xcpu.h"
|
||||
#include "orte/mca/rmaps/base/base.h"
|
||||
#include "orte/mca/rmaps/base/rmaps_base_map.h"
|
||||
#include "opal/util/output.h"
|
||||
|
||||
static int orte_soh_xcpu_get_proc_soh(orte_proc_state_t *, int *, orte_process_name_t *);
|
||||
static int orte_soh_xcpu_set_proc_soh(orte_process_name_t *, orte_proc_state_t, int);
|
||||
static int orte_soh_xcpu_begin_monitoring_job(orte_jobid_t);
|
||||
static int orte_soh_xcpu_finalize(void);
|
||||
|
||||
static int update_registry(orte_jobid_t jobid, char *proc_name){
|
||||
orte_gpr_value_t *value;
|
||||
int rc;
|
||||
char *segment;
|
||||
orte_proc_state_t state;
|
||||
orte_job_state_t jstate;
|
||||
orte_schema.get_job_segment_name(&segment, jobid);
|
||||
/*fprintf(stdout, "soh_xcpu: segment: %s\n", segment);*/
|
||||
if (ORTE_SUCCESS != (rc = orte_gpr.create_value(&value, ORTE_GPR_OVERWRITE | ORTE_GPR_TOKENS_AND,
|
||||
segment, 3, 0))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
/*fprintf(stdout, "debug 1\n");*/
|
||||
|
||||
|
||||
if(ORTE_SUCCESS != (rc = orte_schema.get_proc_tokens(&(value->tokens), &(value->num_tokens),
|
||||
orte_process_info.my_name) ) ){
|
||||
ORTE_ERROR_LOG(rc);
|
||||
}
|
||||
/*
|
||||
if(ORTE_SUCCESS != (rc = orte_schema.get_node_tokens(&(value->tokens), &(value->num_tokens), mca_soh_xcpu_component.cellid,
|
||||
proc_name))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}*/
|
||||
/*fprintf(stdout, "debug 1.1\n");*/
|
||||
state=ORTE_PROC_STATE_TERMINATED;
|
||||
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[0]), ORTE_PROC_STATE_KEY,
|
||||
ORTE_PROC_STATE, &state))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
OBJ_RELEASE(value);
|
||||
return rc;
|
||||
}
|
||||
/*fprintf(stdout, "debug 2\n");*/
|
||||
|
||||
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[1]), ORTE_PROC_EXIT_CODE_KEY,
|
||||
ORTE_INT, 0))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
OBJ_RELEASE(value);
|
||||
return 0;
|
||||
}
|
||||
/*fprintf(stdout, "debug 3\n");*/
|
||||
jstate=ORTE_JOB_STATE_TERMINATED;
|
||||
if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[2]), ORTE_JOB_STATE_KEY,
|
||||
ORTE_JOB_STATE, &jstate))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
OBJ_RELEASE(value);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*fprintf(stdout, "debug 4\n");*/
|
||||
|
||||
if ((rc = orte_gpr.put(1, &value)) != ORTE_SUCCESS) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
}
|
||||
/*fprintf(stdout, "debug 4\n");*/
|
||||
OBJ_RELEASE(value);
|
||||
/*fprintf(stdout, "soh_xcpu: registry updated\n");*/
|
||||
return rc;
|
||||
}
|
||||
|
||||
/*
|
||||
static int do_update(){
|
||||
return 1;
|
||||
|
||||
}
|
||||
|
||||
static void orte_soh_xcpu_notify_handler(int fd, short flags, void *user)
|
||||
{
|
||||
|
||||
}
|
||||
*/
|
||||
|
||||
/**
|
||||
* Register a callback to receive xcpu update notifications
|
||||
*/
|
||||
int orte_soh_xcpu_module_init(void)
|
||||
{
|
||||
int rc;
|
||||
|
||||
if (ORTE_SUCCESS != (rc = orte_ns.get_cellid(&mca_soh_xcpu_component.cellid, orte_process_info.my_name))) {
|
||||
fprintf(stderr, "orte_soh_xcpu_module_init error\n");
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
} /*
|
||||
* Set initial node status
|
||||
*/
|
||||
/* if(!do_update()){
|
||||
fprintf(stderr, "do_update error\n");
|
||||
}
|
||||
*/
|
||||
/*
|
||||
* Now regiser notify event
|
||||
*/
|
||||
|
||||
/*` mca_soh_xcpu_component.notify_fd = 0;*/ /*bproc_notifier();*/
|
||||
/*
|
||||
memset(&mca_soh_xcpu_component.notify_event, 0, sizeof(opal_event_t));
|
||||
|
||||
opal_event_set(
|
||||
&mca_soh_xcpu_component.notify_event,
|
||||
mca_soh_xcpu_component.notify_fd,
|
||||
OPAL_EV_READ|OPAL_EV_PERSIST,
|
||||
orte_soh_xcpu_notify_handler,
|
||||
0);
|
||||
|
||||
opal_event_add(&mca_soh_xcpu_component.notify_event, 0);
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
*/
|
||||
orte_soh_base_module_t orte_soh_xcpu_module = {
|
||||
orte_soh_xcpu_get_proc_soh,
|
||||
orte_soh_xcpu_set_proc_soh,
|
||||
orte_soh_base_get_node_soh_not_available,
|
||||
orte_soh_base_set_node_soh_not_available,
|
||||
orte_soh_base_get_job_soh,
|
||||
orte_soh_base_set_job_soh,
|
||||
orte_soh_xcpu_begin_monitoring_job,
|
||||
orte_soh_xcpu_finalize
|
||||
};
|
||||
|
||||
static int orte_soh_xcpu_get_proc_soh(orte_proc_state_t *state, int *status, orte_process_name_t *proc)
|
||||
{
|
||||
fprintf(stdout, "soh_xcpu: get_proc_soh\n");
|
||||
return ORTE_SUCCESS;
|
||||
return orte_soh_base_get_proc_soh(state, status, proc);
|
||||
}
|
||||
|
||||
static int orte_soh_xcpu_set_proc_soh(orte_process_name_t *proc, orte_proc_state_t state, int status)
|
||||
{
|
||||
fprintf(stdout, "soh_xcpu: set_proc_soh\n");
|
||||
return ORTE_SUCCESS;
|
||||
return orte_soh_base_set_proc_soh(proc, state, status);
|
||||
}
|
||||
|
||||
/* begin monitoring right now only trying to update registry so
|
||||
* that mpirun can exit normally
|
||||
* pls_xcpu is waiting for all threads to finish before calling this function
|
||||
*/
|
||||
static int orte_soh_xcpu_begin_monitoring_job(orte_jobid_t jobid){
|
||||
int rc, nprocs, i;
|
||||
opal_list_item_t *item, *temp;
|
||||
orte_rmaps_base_map_t* map;
|
||||
opal_list_t mapping;
|
||||
|
||||
OBJ_CONSTRUCT(&mapping, opal_list_t);
|
||||
/* 1. get map from registry*/
|
||||
if(ORTE_SUCCESS != (rc = orte_rmaps_base_get_map(jobid, &mapping))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
fprintf(stdout, "soh_xcpu: begin monitoring\n");
|
||||
if (ORTE_SUCCESS != (rc = orte_ns.get_cellid(&mca_soh_xcpu_component.cellid, orte_process_info.my_name))) {
|
||||
fprintf(stderr, "soh_xcpu: get_cell_id error\n");
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}else
|
||||
for(item = opal_list_get_first(&mapping);
|
||||
item != opal_list_get_end(&mapping);
|
||||
item = opal_list_get_next(item)) {
|
||||
map = (orte_rmaps_base_map_t*) item;
|
||||
|
||||
for(temp = opal_list_get_first(&map->nodes);
|
||||
temp != opal_list_get_end(&map->nodes);
|
||||
temp = opal_list_get_next(temp)){
|
||||
|
||||
nprocs=((orte_rmaps_base_node_t*)temp)->node_procs.opal_list_length;
|
||||
|
||||
for (i = 0; i<nprocs; ++i) {
|
||||
/*fprintf(stdout, "%s\n", ((orte_rmaps_base_node_t*)temp)->node->node_name);*/
|
||||
update_registry(jobid, ((orte_rmaps_base_node_t*)temp)->node->node_name);
|
||||
}
|
||||
}
|
||||
}
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
/**
|
||||
* Cleanup
|
||||
*/
|
||||
|
||||
static int orte_soh_xcpu_finalize(void)
|
||||
{
|
||||
fprintf(stdout, "soh_xcpu: finalize\n");
|
||||
/* opal_event_del(&mca_soh_xcpu_component.notify_event);*/
|
||||
return ORTE_SUCCESS;
|
||||
}
|
66
orte/mca/soh/xcpu/soh_xcpu.h
Обычный файл
66
orte/mca/soh/xcpu/soh_xcpu.h
Обычный файл
@ -0,0 +1,66 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
/**
|
||||
* @file
|
||||
*/
|
||||
#ifndef ORTE_SOH_XCPU_H
|
||||
#define ORTE_SOH_XCPU_H
|
||||
|
||||
#include "orte/mca/soh/soh.h"
|
||||
#include "opal/event/event.h"
|
||||
|
||||
#if defined(c_plusplus) || defined(__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Bproc node registry keys
|
||||
*/
|
||||
#define ORTE_SOH_XCPU_NODE_STATUS "orte-node-xcpu-status"
|
||||
#define ORTE_SOH_XCPU_NODE_MODE "orte-node-xcpu-mode"
|
||||
#define ORTE_SOH_XCPU_NODE_USER "orte-node-xcpu-user"
|
||||
#define ORTE_SOH_XCPU_NODE_GROUP "orte-node-xcpu-group"
|
||||
|
||||
|
||||
/**
|
||||
* Module init/fini
|
||||
*/
|
||||
int orte_soh_xcpu_module_init(void);
|
||||
int orte_soh_xcpu_module_finalize(void);
|
||||
|
||||
struct orte_soh_xcpu_component_t {
|
||||
orte_soh_base_component_t super;
|
||||
/* not sure which of the following variabels are
|
||||
* needed
|
||||
* */
|
||||
int debug;
|
||||
int priority;
|
||||
opal_event_t notify_event;
|
||||
int notify_fd;
|
||||
orte_cellid_t cellid;
|
||||
/*struct xcpu_node_set_t node_set;*/
|
||||
};
|
||||
typedef struct orte_soh_xcpu_component_t orte_soh_xcpu_component_t;
|
||||
|
||||
OMPI_COMP_EXPORT extern orte_soh_base_module_t orte_soh_xcpu_module;
|
||||
OMPI_COMP_EXPORT extern orte_soh_xcpu_component_t mca_soh_xcpu_component;
|
||||
|
||||
#if defined(c_plusplus) || defined(__cplusplus)
|
||||
}
|
||||
#endif
|
||||
#endif
|
104
orte/mca/soh/xcpu/soh_xcpu_component.c
Обычный файл
104
orte/mca/soh/xcpu/soh_xcpu_component.c
Обычный файл
@ -0,0 +1,104 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "orte_config.h"
|
||||
#include "orte/orte_constants.h"
|
||||
#include "opal/mca/base/base.h"
|
||||
#include "opal/mca/base/mca_base_param.h"
|
||||
#include "orte/util/proc_info.h"
|
||||
#include "opal/util/output.h"
|
||||
#include "orte/mca/soh/xcpu/soh_xcpu.h"
|
||||
|
||||
/*
|
||||
* Local functions
|
||||
*/
|
||||
|
||||
static int orte_soh_xcpu_open(void);
|
||||
static int orte_soh_xcpu_close(void);
|
||||
static orte_soh_base_module_t* orte_soh_xcpu_init(int*);
|
||||
|
||||
orte_soh_xcpu_component_t mca_soh_xcpu_component = {
|
||||
{
|
||||
/* First, the mca_base_module_t struct containing meta
|
||||
information about the module itself */
|
||||
{
|
||||
/* Indicate that we are a xcpu soh v1.0.0 module (which also
|
||||
implies a specific MCA version) */
|
||||
|
||||
ORTE_SOH_BASE_VERSION_1_0_0,
|
||||
|
||||
"xcpu", /* MCA module name */
|
||||
ORTE_MAJOR_VERSION, /* MCA module major version */
|
||||
ORTE_MINOR_VERSION, /* MCA module minor version */
|
||||
ORTE_RELEASE_VERSION, /* MCA module release version */
|
||||
orte_soh_xcpu_open, /* component open */
|
||||
orte_soh_xcpu_close /* component close */
|
||||
},
|
||||
|
||||
/* Next the MCA v1.0.0 module meta data */
|
||||
|
||||
{
|
||||
/* Whether the module is checkpointable or not */
|
||||
|
||||
false
|
||||
},
|
||||
|
||||
orte_soh_xcpu_init
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Utility function to register parameters
|
||||
*/
|
||||
static int orte_soh_xcpu_param_register_int(
|
||||
const char* param_name,
|
||||
int default_value)
|
||||
{
|
||||
int id = mca_base_param_register_int("soh","xcpu",param_name,NULL,default_value);
|
||||
int param_value = default_value;
|
||||
mca_base_param_lookup_int(id,¶m_value);
|
||||
return param_value;
|
||||
}
|
||||
|
||||
static int orte_soh_xcpu_open(void)
|
||||
{
|
||||
mca_soh_xcpu_component.debug =
|
||||
orte_soh_xcpu_param_register_int("debug", 0);
|
||||
mca_soh_xcpu_component.priority =
|
||||
orte_soh_xcpu_param_register_int("priority", 1);
|
||||
/*fprintf(stdout, "soh_xcpu: open\n");*/
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
static orte_soh_base_module_t* orte_soh_xcpu_init(int *priority)
|
||||
{
|
||||
if (!orte_process_info.seed){
|
||||
fprintf(stderr, "soh_xcpu: no seed found\n");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
*priority = mca_soh_xcpu_component.priority;
|
||||
orte_soh_xcpu_module_init();/*do we need this???*/
|
||||
return &orte_soh_xcpu_module;
|
||||
}
|
||||
|
||||
static int orte_soh_xcpu_close(void)
|
||||
{
|
||||
fprintf(stdout, "soh_xcpu: close\n");
|
||||
return ORTE_SUCCESS;
|
||||
}
|
Загрузка…
x
Ссылка в новой задаче
Block a user