Create a new state-of-health monitoring framework and move the bproc monitoring code to it. Put in an .ompi-ignore to prevent it from being compiled for now. Lots more definition needs to be done here - design document to follow when I can get to it.
This commit was SVN r3821.
Этот коммит содержится в:
родитель
43b565f110
Коммит
8d94778f1e
31
src/mca/soh/Makefile.am
Обычный файл
31
src/mca/soh/Makefile.am
Обычный файл
@ -0,0 +1,31 @@
|
||||
#
|
||||
# Copyright (c) 2004-2005 The Trustees of Indiana University.
|
||||
# All rights reserved.
|
||||
# Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
|
||||
# All rights reserved.
|
||||
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
# University of Stuttgart. All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
include $(top_srcdir)/config/Makefile.options
|
||||
|
||||
SUBDIRS = base $(MCA_soh_STATIC_SUBDIRS)
|
||||
DIST_SUBDIRS = base $(MCA_soh_ALL_SUBDIRS)
|
||||
|
||||
# Source code files
|
||||
|
||||
headers = soh.h
|
||||
|
||||
# Conditionally install the header files
|
||||
|
||||
if WANT_INSTALL_HEADERS
|
||||
ompidir = $(includedir)/openmpi/mca/soh
|
||||
ompi_HEADERS = $(headers)
|
||||
else
|
||||
ompidir = $(includedir)
|
||||
endif
|
45
src/mca/soh/base/Makefile.am
Обычный файл
45
src/mca/soh/base/Makefile.am
Обычный файл
@ -0,0 +1,45 @@
|
||||
#
|
||||
# Copyright (c) 2004-2005 The Trustees of Indiana University.
|
||||
# All rights reserved.
|
||||
# Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
|
||||
# All rights reserved.
|
||||
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
# University of Stuttgart. All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
include $(top_srcdir)/config/Makefile.options
|
||||
|
||||
noinst_LTLIBRARIES = libmca_ns_base.la
|
||||
|
||||
# For VPATH builds, have to specify where static-modules.h will be found
|
||||
|
||||
AM_CPPFLAGS = -I$(top_builddir)/src
|
||||
|
||||
# Source code files
|
||||
|
||||
headers = \
|
||||
base.h
|
||||
|
||||
# Library
|
||||
|
||||
libmca_ns_base_la_SOURCES = \
|
||||
$(headers) \
|
||||
ns_base_close.c \
|
||||
ns_base_select.c \
|
||||
ns_base_open.c \
|
||||
ns_base_local_fns.c
|
||||
|
||||
|
||||
# Conditionally install the header files
|
||||
|
||||
if WANT_INSTALL_HEADERS
|
||||
ompidir = $(includedir)/openmpi/mca/ns/base
|
||||
ompi_HEADERS = $(headers)
|
||||
else
|
||||
ompidir = $(includedir)
|
||||
endif
|
61
src/mca/soh/base/base.h
Обычный файл
61
src/mca/soh/base/base.h
Обычный файл
@ -0,0 +1,61 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
/** @file:
|
||||
*/
|
||||
|
||||
#ifndef MCA_SOH_BASE_H
|
||||
#define MCA_SOH_BASE_H
|
||||
|
||||
/*
|
||||
* includes
|
||||
*/
|
||||
#include "ompi_config.h"
|
||||
|
||||
#include "class/ompi_list.h"
|
||||
#include "mca/mca.h"
|
||||
|
||||
#include "mca/soh/soh.h"
|
||||
|
||||
|
||||
/*
|
||||
* Global functions for MCA overall collective open and close
|
||||
*/
|
||||
#if defined(c_plusplus) || defined(__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
OMPI_DECLSPEC int mca_soh_base_open(void);
|
||||
OMPI_DECLSPEC int mca_soh_base_select(bool *allow_multi_user_threads,
|
||||
bool *have_hidden_threads);
|
||||
OMPI_DECLSPEC int mca_soh_base_close(void);
|
||||
|
||||
|
||||
/*
|
||||
* globals that might be needed
|
||||
*/
|
||||
|
||||
OMPI_DECLSPEC extern int mca_soh_base_output;
|
||||
OMPI_DECLSPEC extern mca_soh_base_module_t ompi_soh_monitor; /* holds selected module's function pointers */
|
||||
OMPI_DECLSPEC extern bool mca_soh_base_selected;
|
||||
OMPI_DECLSPEC extern ompi_list_t mca_soh_base_components_available;
|
||||
OMPI_DECLSPEC extern mca_soh_base_component_t mca_soh_base_selected_component;
|
||||
|
||||
/*
|
||||
* external API functions will be documented in the mca/soh/soh.h file
|
||||
*/
|
||||
|
||||
#if defined(c_plusplus) || defined(__cplusplus)
|
||||
}
|
||||
#endif
|
||||
#endif
|
40
src/mca/soh/base/soh_base_close.c
Обычный файл
40
src/mca/soh/base/soh_base_close.c
Обычный файл
@ -0,0 +1,40 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "ompi_config.h"
|
||||
|
||||
#include "include/constants.h"
|
||||
#include "mca/mca.h"
|
||||
#include "mca/base/base.h"
|
||||
#include "mca/soh/base/base.h"
|
||||
|
||||
|
||||
int mca_soh_base_close(void)
|
||||
{
|
||||
/* If we have a selected component and module, then finalize it */
|
||||
|
||||
if (mca_soh_base_selected) {
|
||||
mca_soh_base_selected_component.soh_finalize();
|
||||
}
|
||||
|
||||
/* Close all remaining available components (may be one if this is a
|
||||
OMPI RTE program, or [possibly] multiple if this is ompi_info) */
|
||||
|
||||
mca_base_components_close(mca_soh_base_output,
|
||||
&mca_soh_base_components_available, NULL);
|
||||
|
||||
/* All done */
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
78
src/mca/soh/base/soh_base_open.c
Обычный файл
78
src/mca/soh/base/soh_base_open.c
Обычный файл
@ -0,0 +1,78 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
|
||||
#include "ompi_config.h"
|
||||
|
||||
#include "include/constants.h"
|
||||
#include "mca/mca.h"
|
||||
#include "mca/base/base.h"
|
||||
#include "mca/base/mca_base_param.h"
|
||||
#include "util/output.h"
|
||||
#include "util/proc_info.h"
|
||||
#include "mca/oob/base/base.h"
|
||||
|
||||
#include "mca/soh/base/base.h"
|
||||
|
||||
|
||||
/*
|
||||
* The following file was created by configure. It contains extern
|
||||
* statements and the definition of an array of pointers to each
|
||||
* component's public mca_base_component_t struct.
|
||||
*/
|
||||
|
||||
#include "mca/soh/base/static-components.h"
|
||||
|
||||
/*
|
||||
* globals
|
||||
*/
|
||||
|
||||
/*
|
||||
* Global variables
|
||||
*/
|
||||
int mca_soh_base_output = -1;
|
||||
mca_soh_base_module_t ompi_soh_monitor = {
|
||||
mca_soh_base_update_cell_soh_not_available;
|
||||
};
|
||||
bool mca_soh_base_selected = false;
|
||||
ompi_list_t mca_soh_base_components_available;
|
||||
mca_soh_base_component_t mca_soh_base_selected_component;
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Function for finding and opening either all MCA components, or the one
|
||||
* that was specifically requested via a MCA parameter.
|
||||
*/
|
||||
int mca_soh_base_open(void)
|
||||
{
|
||||
/* Open up all available components */
|
||||
|
||||
if (OMPI_SUCCESS !=
|
||||
mca_base_components_open("soh", 0, mca_soh_base_static_components,
|
||||
&mca_soh_base_components_available)) {
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
/* setup output for debug messages */
|
||||
if (!ompi_output_init) { /* can't open output */
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
mca_soh_base_output = ompi_output_open(NULL);
|
||||
|
||||
/* All done */
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
101
src/mca/soh/base/soh_base_select.c
Обычный файл
101
src/mca/soh/base/soh_base_select.c
Обычный файл
@ -0,0 +1,101 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
|
||||
#include "ompi_config.h"
|
||||
|
||||
#include "mca/mca.h"
|
||||
#include "mca/base/base.h"
|
||||
|
||||
#include "mca/soh/base/base.h"
|
||||
|
||||
|
||||
/**
|
||||
* Function for selecting one component from all those that are
|
||||
* available.
|
||||
*/
|
||||
int mca_soh_base_select(bool *allow_multi_user_threads,
|
||||
bool *have_hidden_threads)
|
||||
{
|
||||
ompi_list_item_t *item;
|
||||
mca_base_component_list_item_t *cli;
|
||||
mca_soh_base_component_t *component, *best_component = NULL;
|
||||
mca_soh_base_module_t *module, *best_module = NULL;
|
||||
bool multi, hidden;
|
||||
int priority, best_priority = -1;
|
||||
|
||||
/* Iterate through all the available components */
|
||||
|
||||
for (item = ompi_list_get_first(&mca_soh_base_components_available);
|
||||
item != ompi_list_get_end(&mca_soh_base_components_available);
|
||||
item = ompi_list_get_next(item)) {
|
||||
cli = (mca_base_component_list_item_t *) item;
|
||||
component = (mca_soh_base_component_t *) cli->cli_component;
|
||||
|
||||
/* Call the component's init function and see if it wants to be
|
||||
selected */
|
||||
|
||||
module = component->soh_init(&multi, &hidden, &priority);
|
||||
|
||||
/* If we got a non-NULL module back, then the component wants to
|
||||
be selected. So save its multi/hidden values and save the
|
||||
module with the highest priority */
|
||||
|
||||
if (NULL != module) {
|
||||
/* If this is the best one, save it */
|
||||
|
||||
if (priority > best_priority) {
|
||||
|
||||
/* If there was a previous best one, finalize */
|
||||
|
||||
if (NULL != best_component) {
|
||||
best_component->soh_finalize();
|
||||
}
|
||||
|
||||
/* Save the new best one */
|
||||
|
||||
best_module = module;
|
||||
best_component = component;
|
||||
*allow_multi_user_threads = multi;
|
||||
*have_hidden_threads = hidden;
|
||||
|
||||
/* update the best priority */
|
||||
best_priority = priority;
|
||||
}
|
||||
|
||||
/* If it's not the best one, finalize it */
|
||||
|
||||
else {
|
||||
component->soh_finalize();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* If we didn't find one to select, barf */
|
||||
|
||||
if (NULL == best_component) {
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
/* We have happiness -- save the component and module for later
|
||||
usage */
|
||||
|
||||
ompi_soh_monitor = *best_module;
|
||||
mca_soh_base_selected_component = *best_component;
|
||||
mca_soh_base_selected = true;
|
||||
|
||||
/* all done */
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
0
src/mca/soh/bproc/.ompi_ignore
Обычный файл
0
src/mca/soh/bproc/.ompi_ignore
Обычный файл
230
src/mca/soh/bproc/svc_bproc_soh.c
Обычный файл
230
src/mca/soh/bproc/svc_bproc_soh.c
Обычный файл
@ -0,0 +1,230 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "ompi_config.h"
|
||||
|
||||
#include <sys/poll.h>
|
||||
#include <sys/bproc.h>
|
||||
|
||||
#ifdef HAVE_UNISTD_H
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
#include "include/constants.h"
|
||||
#include "mca/oob/oob.h"
|
||||
#include "mca/oob/base/base.h"
|
||||
#include "mca/ns/base/base.h"
|
||||
#include "runtime/runtime.h"
|
||||
|
||||
#include "svc_bproc_soh.h"
|
||||
|
||||
|
||||
mca_svc_base_module_t mca_svc_bproc_soh_module = {
|
||||
mca_svc_bproc_soh_module_init,
|
||||
mca_svc_bproc_soh_module_fini
|
||||
};
|
||||
|
||||
|
||||
/*
|
||||
* Add a BProc node to the virtual machine SOH segment
|
||||
*/
|
||||
int
|
||||
mca_svc_bproc_soh_add_node(mca_ns_base_cellid_t cellid, int node)
|
||||
{
|
||||
ompi_rte_vm_status_t *vmdata;
|
||||
int err;
|
||||
|
||||
vmdata = (ompi_rte_vm_status_t*)malloc(sizeof(ompi_rte_vm_status_t));
|
||||
vmdata->cell = cellid;
|
||||
asprintf(&(vmdata->nodename), "%d", node);
|
||||
err = bproc_getnodeattr(ni->node, "cpus", &cpus, sizeof(cpus));
|
||||
if (err != 0)
|
||||
cpus = 1;
|
||||
vmdata->cpus = (uint16_t)cpus;
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Process a BProc update notice
|
||||
*/
|
||||
|
||||
int
|
||||
mca_svc_bproc_soh_status_changed(struct bproc_node_info_t *old, struct bproc_node_info_t *new)
|
||||
{
|
||||
if (old->node != new->node)
|
||||
return 0;
|
||||
if (strcmp(old->status, new->status))
|
||||
return 1;
|
||||
if (old->user != new->user)
|
||||
return 1;
|
||||
if (old->group != new->group)
|
||||
return 1;
|
||||
if (old->mode != new->mode)
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void
|
||||
mca_svc_bproc_soh_update_node_info(mca_ns_base_cellid_t cellid, struct bproc_node_info_t *ni)
|
||||
{
|
||||
int err;
|
||||
int cpus;
|
||||
char *node;
|
||||
ompi_rte_vm_status__t *vmdata;
|
||||
|
||||
asprintf(&node, "%d", ni->node);
|
||||
vmdata = ompi_rte_get_vm_status(cellid, node);
|
||||
if (vmdata == NULL) { /* this node isn't present yet - add it */
|
||||
mca_svc_bproc_soh_add_node(cellid, ni->node);
|
||||
|
||||
return;
|
||||
|
||||
/* in long-term, we will store the soh data in key-value pairs. for now,
|
||||
* we store it simply as values so we can get it working - I will update
|
||||
* this later to the final form.
|
||||
*/
|
||||
vmdata->user = ni->user;
|
||||
vmdata->group = ni->group;
|
||||
vmdata->mode = ni->mode;
|
||||
if (NULL != vmdata->status) {
|
||||
free(vmdata->status);
|
||||
}
|
||||
vmdata->status = strdup(ni->status);
|
||||
/*
|
||||
ompi_vm_status_data_add_int(vmdata, "user", ni->user);
|
||||
ompi_vm_status_data_add_int(vmdata, "group", ni->group);
|
||||
ompi_vm_status_data_add_int(vmdata, "mode", ni->mode);
|
||||
ompi_vm_status_data_add_string(vmdata, "status", ni->status);
|
||||
*/
|
||||
|
||||
/* probably should optimize this so it only happens once */
|
||||
/* ompi_vm_status_data_add_int(vmdata, "#cpus", cpus); */
|
||||
|
||||
/* registry_put(segment, cell, node, vmdata); */
|
||||
|
||||
free(node);
|
||||
ompit_vm_status_data_finish(vmdata);
|
||||
}
|
||||
|
||||
void
|
||||
mca_svc_bproc_soh_check_node_info(char *segment, char *cell,
|
||||
struct bproc_node_set_t **old,
|
||||
struct bproc_node_set_t *new)
|
||||
{
|
||||
/* we assume the number of nodes does not change */
|
||||
for (i = 0; i < new->size; i++) {
|
||||
ni = &new->node[i];
|
||||
if (!old->size || status_changed((*old)->node[i], ni))
|
||||
update_node_info(segment, cell, ni);
|
||||
}
|
||||
|
||||
if ((*old)->size)
|
||||
bproc_nodeset_free(*old);
|
||||
bproc_nodeset_init(*old, new->size);
|
||||
memcpy((*old)->node, new->node, sizeof(*new->node) * new->size);
|
||||
}
|
||||
|
||||
#if OMPI_HAVE_POSIX_THREADS
|
||||
static void *
|
||||
mca_svc_bproc_soh_status_thread(ompi_thread_t *thread)
|
||||
{
|
||||
struct pollfd pfd;
|
||||
struct bproc_node_set_t ns = BPROC_EMPTY_NODESET;
|
||||
mca_svc_bproc_soh_module_t *module = (mca_svc_bproc_soh_module_t *)thread->t_arg;
|
||||
|
||||
/* This thread enter in a cancel enabled state */
|
||||
pthread_setcancelstate( PTHREAD_CANCEL_ENABLE, NULL );
|
||||
pthread_setcanceltype( PTHREAD_CANCEL_ASYNCHRONOUS, NULL );
|
||||
|
||||
for (;;) {
|
||||
pfd.fd = module->notify_fd;
|
||||
pfd.events = POLLIN;
|
||||
res = poll(&pfd, 1, -1);
|
||||
if (res < 0) {
|
||||
/* poll error */
|
||||
break;
|
||||
}
|
||||
if (bproc_nodelist_(&ns, module->notify_fd) < 0) {
|
||||
/* bproc_nodelist_ error */
|
||||
break;
|
||||
}
|
||||
|
||||
mca_svc_bproc_soh_check_node_info(module->segment, module->cell, &module->node_info, ns);
|
||||
|
||||
bproc_nodeset_free(&ns);
|
||||
}
|
||||
|
||||
return PTHREAD_CANCELED;
|
||||
}
|
||||
#endif /* OMPI_HAVE_POSIX_THREADS */
|
||||
|
||||
|
||||
/**
|
||||
* Register a callback to receive BProc update notifications
|
||||
*/
|
||||
|
||||
int mca_svc_bproc_soh_module_init(mca_svc_base_module_t* base)
|
||||
{
|
||||
int i;
|
||||
int num_nodes;
|
||||
bproc_node_set_t node_list;
|
||||
int node_num;
|
||||
char *segment, *jobid_string;
|
||||
mca_svc_bproc_soh_module_t *module /* = somthing */;
|
||||
|
||||
jobid_string = ompi_name_server.get_jobid_string(ompi_rte_get_self());
|
||||
asprintf(&module->segment, "%s-bproc", OMPI_RTE_VM_STATUS_SEGMENT);
|
||||
module->cell = /* get cell somehow */;
|
||||
|
||||
num_nodes = bproc_nodelist(&module->node_info);
|
||||
if (num_nodes < 0)
|
||||
return OMPI_ERROR;
|
||||
|
||||
for (i = 0; i < module->node_info->size; i++) {
|
||||
update_node_info(&module->node_info[i]);
|
||||
}
|
||||
|
||||
module->notify_fd = bproc_notifier();
|
||||
if (module->notify_fd < 0)
|
||||
return OMPI_ERROR;
|
||||
|
||||
if (ompi_using_thread()) {
|
||||
#if OMPI_HAVE_POSIX_THREADS
|
||||
module->thread.t_handle = 0;
|
||||
module->thread.t_run = (ompi_thread_fn_t)mca_bproc_status_thread;
|
||||
module->thread.t_arg = (void *)module;
|
||||
#endif /* OMPI_HAVE_POSIX_THREADS */
|
||||
}
|
||||
|
||||
return ompi_thread_start(&module->thread);
|
||||
}
|
||||
|
||||
/**
|
||||
* Cleanup
|
||||
*/
|
||||
|
||||
int mca_svc_bproc_soh_module_fini(mca_svc_base_module_t* base)
|
||||
{
|
||||
mca_svc_bproc_soh_module_t *module /* = somthing */;
|
||||
|
||||
#if OMPI_HAVE_POSIX_THREADS
|
||||
if (module->thread.t_handle != 0) {
|
||||
void *thread_return;
|
||||
pthread_cancel(ptl->thread.t_handle);
|
||||
ompi_thread_join(&(module->thread), &thread_return);
|
||||
}
|
||||
#endif /* OMPI_HAVE_POSIX_THREADS */
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
87
src/mca/soh/soh.h
Обычный файл
87
src/mca/soh/soh.h
Обычный файл
@ -0,0 +1,87 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
/** @file:
|
||||
*
|
||||
* The Open MPI State-of-Health Monitoring Subsystem
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef MCA_SOH_H
|
||||
#define MCA_SOH_H
|
||||
|
||||
/*
|
||||
* includes
|
||||
*/
|
||||
|
||||
#include "ompi_config.h"
|
||||
|
||||
#include "mca/mca.h"
|
||||
#include "mca/ns/base/base.h"
|
||||
|
||||
/*
|
||||
* Component functions - all MUST be provided!
|
||||
*/
|
||||
|
||||
/* Update the state-of-health of a cell
|
||||
*/
|
||||
typedef int (*mca_soh_base_module_update_cell_soh_fn_t)(mca_ns_base_cellid_t cellid);
|
||||
|
||||
|
||||
/*
|
||||
* Ver 1.0.0
|
||||
*/
|
||||
struct mca_soh_base_module_1_0_0_t {
|
||||
mca_soh_base_module_update_cell_soh_fn_t update_cell_soh;
|
||||
};
|
||||
|
||||
typedef struct mca_soh_base_module_1_0_0_t mca_soh_base_module_1_0_0_t;
|
||||
typedef mca_soh_base_module_1_0_0_t mca_soh_base_module_t;
|
||||
|
||||
/*
|
||||
* SOH Component
|
||||
*/
|
||||
|
||||
typedef mca_soh_base_module_t* (*mca_soh_base_component_init_fn_t)(
|
||||
bool *allow_multi_user_threads,
|
||||
bool *have_hidden_threads,
|
||||
int *priority);
|
||||
|
||||
typedef int (*mca_soh_base_component_finalize_fn_t)(void);
|
||||
|
||||
/*
|
||||
* the standard component data structure
|
||||
*/
|
||||
|
||||
struct mca_soh_base_component_1_0_0_t {
|
||||
mca_base_component_t soh_version;
|
||||
mca_base_component_data_1_0_0_t soh_data;
|
||||
|
||||
mca_soh_base_component_init_fn_t soh_init;
|
||||
mca_soh_base_component_finalize_fn_t soh_finalize;
|
||||
};
|
||||
typedef struct mca_soh_base_component_1_0_0_t mca_soh_base_component_1_0_0_t;
|
||||
typedef mca_soh_base_component_1_0_0_t mca_soh_base_component_t;
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* Macro for use in components that are of type ns v1.0.0
|
||||
*/
|
||||
#define MCA_SOH_BASE_VERSION_1_0_0 \
|
||||
/* soh v1.0 is chained to MCA v1.0 */ \
|
||||
MCA_BASE_VERSION_1_0_0, \
|
||||
/* soh v1.0 */ \
|
||||
"soh", 1, 0, 0
|
||||
|
||||
#endif
|
@ -1,94 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "ompi_config.h"
|
||||
|
||||
#include <sys/bproc.h>
|
||||
|
||||
#ifdef HAVE_UNISTD_H
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
#include "include/constants.h"
|
||||
#include "mca/oob/oob.h"
|
||||
#include "mca/oob/base/base.h"
|
||||
#include "svc_bproc_soh.h"
|
||||
|
||||
|
||||
mca_svc_base_module_t mca_svc_bproc_soh_module = {
|
||||
mca_svc_bproc_soh_module_init,
|
||||
mca_svc_bproc_soh_module_fini
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* Process a BProc update notice
|
||||
*/
|
||||
|
||||
static void mca_svc_bproc_soh_cbfunc()
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Register a callback to receive BProc update notifications
|
||||
*/
|
||||
|
||||
int mca_svc_bproc_soh_module_init(mca_svc_base_module_t* module)
|
||||
{
|
||||
bool registration_successful=true; /* added strictly to allow compilation
|
||||
- should be removed by Greg/Nathan */
|
||||
|
||||
bproc_node_info_t node_info;
|
||||
int node_num;
|
||||
char *segment, *jobid_string;
|
||||
|
||||
jobid_string = ompi_name_server.get_jobid_string(ompi_rte_get_self());
|
||||
asprintf(&segment, "%s-bproc", OMPI_RTE_VM_STATUS_SEGMENT);
|
||||
|
||||
/* Greg/Nathan - we need to initialize a registry segment that
|
||||
* has info from each node on the BProc cluster. From what I read
|
||||
* in the BProc documentation, we want each process to call this
|
||||
* function and add that info to our segment. Please feel free
|
||||
* to correct this info if incorrect...
|
||||
*/
|
||||
node_num = bproc_currnode();
|
||||
|
||||
|
||||
/* Greg/Nathan - this is where you need to add code so that
|
||||
* BProc will call you back whenever there is a change
|
||||
* or info that you want to get. I have named the callback
|
||||
* function "mca_svc_bproc_soh_cbfunc".
|
||||
*/
|
||||
if (registration_successful) {
|
||||
return OMPI_SUCCESS;
|
||||
} else {
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Cleanup
|
||||
*/
|
||||
|
||||
int mca_svc_bproc_soh_module_fini(mca_svc_base_module_t* module)
|
||||
{
|
||||
/* Greg/Nathan - all you need to do here is de-register the
|
||||
* callback from BProc.
|
||||
*/
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user