1
1

Add two new frameworks for sensing and predicting faults. This is just the bare-bones plumbing for now - will instantiate soon.

No ess modules reference these frameworks yet, so they are completely inactive.

This commit was SVN r21847.
Этот коммит содержится в:
Ralph Castain 2009-08-20 04:27:16 +00:00
родитель 4d8afc8fb0
Коммит c3c642aa0d
29 изменённых файлов: 1575 добавлений и 0 удалений

34
orte/mca/fddp/Makefile.am Обычный файл
Просмотреть файл

@ -0,0 +1,34 @@
#
# Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
#
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# main library setup
noinst_LTLIBRARIES = libmca_fddp.la
libmca_fddp_la_SOURCES =
# header setup
nobase_orte_HEADERS =
# local files
headers = fddp.h
libmca_fddp_la_SOURCES += $(headers)
# Conditionally install the header files
if WANT_INSTALL_HEADERS
nobase_orte_HEADERS += $(headers)
ortedir = $(includedir)/openmpi/orte/mca/fddp
else
ortedir = $(includedir)
endif
include base/Makefile.am
distclean-local:
rm -f base/static-components.h

17
orte/mca/fddp/base/Makefile.am Обычный файл
Просмотреть файл

@ -0,0 +1,17 @@
#
# Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
#
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
headers += \
base/base.h
libmca_fddp_la_SOURCES += \
base/fddp_base_close.c \
base/fddp_base_select.c \
base/fddp_base_open.c

55
orte/mca/fddp/base/base.h Обычный файл
Просмотреть файл

@ -0,0 +1,55 @@
/*
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
*
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/** @file:
*/
#ifndef MCA_FDDP_BASE_H
#define MCA_FDDP_BASE_H
/*
* includes
*/
#include "orte_config.h"
#include "opal/class/opal_list.h"
#include "opal/mca/mca.h"
#include "orte/mca/fddp/fddp.h"
/*
* Global functions for MCA overall collective open and close
*/
BEGIN_C_DECLS
/*
* function definitions
*/
ORTE_DECLSPEC int orte_fddp_base_open(void);
ORTE_DECLSPEC int orte_fddp_base_select(void);
ORTE_DECLSPEC int orte_fddp_base_close(void);
/*
* globals that might be needed
*/
ORTE_DECLSPEC extern int orte_fddp_base_output;
ORTE_DECLSPEC extern bool mca_fddp_base_selected;
ORTE_DECLSPEC extern opal_list_t mca_fddp_base_components_available;
ORTE_DECLSPEC extern orte_fddp_base_component_t mca_fddp_base_selected_component;
#if !ORTE_DISABLE_FULL_SUPPORT
/* no base functions to protect at this time */
#endif /* ORTE_DISABLE_FULL_SUPPORT */
END_C_DECLS
#endif

38
orte/mca/fddp/base/fddp_base_close.c Обычный файл
Просмотреть файл

@ -0,0 +1,38 @@
/*
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
*
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include "orte/constants.h"
#include <stdio.h>
#include "opal/mca/mca.h"
#include "opal/mca/base/base.h"
#include "orte/mca/fddp/base/base.h"
int orte_fddp_base_close(void)
{
/* If we have a selected component and module, then finalize it */
if (NULL != orte_fddp.finalize) {
orte_fddp.finalize();
}
/* Close all remaining available components (may be one if this is a
OpenRTE program, or [possibly] multiple if this is ompi_info) */
mca_base_components_close(orte_fddp_base_output,
&mca_fddp_base_components_available, NULL);
/* All done */
return ORTE_SUCCESS;
}

66
orte/mca/fddp/base/fddp_base_open.c Обычный файл
Просмотреть файл

@ -0,0 +1,66 @@
/*
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
*
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include "orte/constants.h"
#include "opal/mca/mca.h"
#include "opal/util/output.h"
#include "opal/mca/base/base.h"
#include "opal/mca/base/mca_base_param.h"
#ifdef HAVE_STRING_H
#include <string.h>
#endif
#include "orte/mca/fddp/base/base.h"
/*
* The following file was created by configure. It contains extern
* statements and the definition of an array of pointers to each
* component's public mca_base_component_t struct.
*/
#include "orte/mca/fddp/base/static-components.h"
/*
* Global variables
*/
int orte_fddp_base_output = -1;
orte_fddp_base_module_t orte_fddp;
opal_list_t mca_fddp_base_components_available;
orte_fddp_base_component_t mca_fddp_base_selected_component;
/**
* Function for finding and opening either all MCA components, or the one
* that was specifically requested via a MCA parameter.
*/
int orte_fddp_base_open(void)
{
/* Debugging / verbose output. Always have stream open, with
verbose set by the mca open system... */
orte_fddp_base_output = opal_output_open(NULL);
/* Open up all available components */
if (ORTE_SUCCESS !=
mca_base_components_open("fddp", orte_fddp_base_output,
mca_fddp_base_static_components,
&mca_fddp_base_components_available, true)) {
return ORTE_ERROR;
}
/* All done */
return ORTE_SUCCESS;
}

78
orte/mca/fddp/base/fddp_base_select.c Обычный файл
Просмотреть файл

@ -0,0 +1,78 @@
/*
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
*
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include "orte/constants.h"
#include "opal/mca/mca.h"
#include "opal/mca/base/base.h"
#include "orte/mca/fddp/base/base.h"
/**
* Function for selecting one component from all those that are
* available.
*/
int orte_fddp_base_select(void)
{
int ret, exit_status = ORTE_SUCCESS;
orte_fddp_base_component_t *best_component = NULL;
orte_fddp_base_module_t *best_module = NULL;
char *include_list = NULL;
/*
* Register the framework MCA param and look up include list
*/
mca_base_param_reg_string_name("fddp", NULL,
"Which fddp component to use (empty = none)",
false, false,
NULL, &include_list);
/* If we do not have any components to select this is ok. Just use the default
* "no-op" component and move on.
*/
if( 0 >= opal_list_get_size(&mca_fddp_base_components_available) || NULL == include_list) {
/* Close all components since none will be used */
mca_base_components_close(0, /* Pass 0 to keep this from closing the output handle */
&mca_fddp_base_components_available,
NULL);
goto cleanup;
}
/*
* Select the best component
*/
if( ORTE_SUCCESS != mca_base_select("fddp", orte_fddp_base_output,
&mca_fddp_base_components_available,
(mca_base_module_t **) &best_module,
(mca_base_component_t **) &best_component) ) {
/* It is okay if no component was selected - we just leave
* the orte_fddp module as the default
*/
exit_status = ORTE_SUCCESS;
goto cleanup;
}
if (NULL != orte_fddp.init) {
/* if an init function is provided, use it */
if (ORTE_SUCCESS != (ret = orte_fddp.init()) ) {
exit_status = ret;
goto cleanup;
}
}
/* Save the winner */
orte_fddp = *best_module;
cleanup:
return exit_status;
}

81
orte/mca/fddp/fddp.h Обычный файл
Просмотреть файл

@ -0,0 +1,81 @@
/*
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
*
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*
* @file:
*
*/
#ifndef MCA_FDDP_H
#define MCA_FDDP_H
/*
* includes
*/
#include "orte_config.h"
#include "opal/mca/mca.h"
#include "orte/mca/sensor/sensor_types.h"
BEGIN_C_DECLS
/*
* Component functions - all MUST be provided!
*/
/* initialize the selected module */
typedef int (*orte_fddp_base_module_init_fn_t)(void);
/* finalize the selected module */
typedef int (*orte_fddp_base_module_finalize_fn_t)(void);
typedef int (*orte_fddp_base_module_process_fn_t)(orte_sensor_data_t *data,
int num_bins, uint8_t *failure_likelihood);
/*
* Ver 1.0
*/
struct orte_fddp_base_module_1_0_0_t {
orte_fddp_base_module_init_fn_t init;
orte_fddp_base_module_finalize_fn_t finalize;
orte_fddp_base_module_process_fn_t process;
};
typedef struct orte_fddp_base_module_1_0_0_t orte_fddp_base_module_1_0_0_t;
typedef orte_fddp_base_module_1_0_0_t orte_fddp_base_module_t;
/*
* the standard component data structure
*/
struct orte_fddp_base_component_1_0_0_t {
mca_base_component_t base_version;
mca_base_component_data_t base_data;
};
typedef struct orte_fddp_base_component_1_0_0_t orte_fddp_base_component_1_0_0_t;
typedef orte_fddp_base_component_1_0_0_t orte_fddp_base_component_t;
/*
* Macro for use in components that are of type fddp v1.0.0
*/
#define ORTE_FDDP_BASE_VERSION_1_0_0 \
/* fddp v1.0 is chained to MCA v2.0 */ \
MCA_BASE_VERSION_2_0_0, \
/* fddp v1.0 */ \
"fddp", 1, 0, 0
/* Global structure for accessing fddp functions
*/
ORTE_DECLSPEC extern orte_fddp_base_module_t orte_fddp; /* holds selected module's function pointers */
END_C_DECLS
#endif /* MCA_FDDP_H */

35
orte/mca/fddp/trend/Makefile.am Обычный файл
Просмотреть файл

@ -0,0 +1,35 @@
#
# Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
#
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
sources = \
fddp_trend.c \
fddp_trend.h \
fddp_trend_component.c
# Make the output library in this directory, and name it either
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
# (for static builds).
if OMPI_BUILD_fddp_trend_DSO
component_noinst =
component_install = mca_fddp_trend.la
else
component_noinst = libmca_fddp_trend.la
component_install =
endif
mcacomponentdir = $(pkglibdir)
mcacomponent_LTLIBRARIES = $(component_install)
mca_fddp_trend_la_SOURCES = $(sources)
mca_fddp_trend_la_LDFLAGS = -module -avoid-version
noinst_LTLIBRARIES = $(component_noinst)
libmca_fddp_trend_la_SOURCES =$(sources)
libmca_fddp_trend_la_LDFLAGS = -module -avoid-version

14
orte/mca/fddp/trend/configure.params Обычный файл
Просмотреть файл

@ -0,0 +1,14 @@
# -*- shell-script -*-
#
# Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
#
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# Specific to this module
PARAM_CONFIG_FILES="Makefile"

69
orte/mca/fddp/trend/fddp_trend.c Обычный файл
Просмотреть файл

@ -0,0 +1,69 @@
/*
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
*
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include "orte/constants.h"
#include "orte/types.h"
#include <errno.h>
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif /* HAVE_UNISTD_H */
#ifdef HAVE_STRING_H
#include <string.h>
#endif /* HAVE_STRING_H */
#include <stdio.h>
#include "opal/mca/base/mca_base_param.h"
#include "opal/util/argv.h"
#include "opal/class/opal_pointer_array.h"
#include "orte/util/show_help.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/sensor/sensor_types.h"
#include "fddp_trend.h"
static int init(void);
static int finalize(void);
static int process(orte_sensor_data_t *data, int num_bins, uint8_t *failure_likelihood);
orte_fddp_base_module_t orte_fddp_trend_module = {
init,
finalize,
process
};
static int init(void)
{
return ORTE_SUCCESS;
}
static int finalize(void)
{
return ORTE_SUCCESS;
}
static int process(orte_sensor_data_t *data, int num_bins, uint8_t *failure_likelihood)
{
/* the failure likelihood in this model is just the trended value of the
* data itself, scaled appropriately
*/
/* using the sliding window, compute the trend of the data */
/* for each point in future time, compute the predicted value of
* the sensor reading
*/
/* scale it by the provided scaling factors */
return ORTE_SUCCESS;
}

35
orte/mca/fddp/trend/fddp_trend.h Обычный файл
Просмотреть файл

@ -0,0 +1,35 @@
/*
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
*
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/**
* @file
*
*/
#ifndef ORTE_FDDP_TREND_H
#define ORTE_FDDP_TREND_H
#include "orte_config.h"
#include "orte/mca/fddp/fddp.h"
BEGIN_C_DECLS
struct orte_fddp_trend_component_t {
orte_fddp_base_component_t super;
int window_size;
};
typedef struct orte_fddp_trend_component_t orte_fddp_trend_component_t;
ORTE_MODULE_DECLSPEC extern orte_fddp_trend_component_t mca_fddp_trend_component;
extern orte_fddp_base_module_t orte_fddp_trend_module;
END_C_DECLS
#endif

92
orte/mca/fddp/trend/fddp_trend_component.c Обычный файл
Просмотреть файл

@ -0,0 +1,92 @@
/*
* Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include "orte/constants.h"
#include "opal/mca/base/base.h"
#include "opal/mca/base/mca_base_param.h"
#include "opal/class/opal_pointer_array.h"
#include "orte/util/proc_info.h"
#include "orte/util/show_help.h"
#include "fddp_trend.h"
/*
* Local functions
*/
static int orte_fddp_trend_open(void);
static int orte_fddp_trend_close(void);
static int orte_fddp_trend_query(mca_base_module_t **module, int *priority);
orte_fddp_trend_component_t mca_fddp_trend_component = {
{
{
ORTE_FDDP_BASE_VERSION_1_0_0,
"trend", /* MCA component name */
ORTE_MAJOR_VERSION, /* MCA component major version */
ORTE_MINOR_VERSION, /* MCA component minor version */
ORTE_RELEASE_VERSION, /* MCA component release version */
orte_fddp_trend_open, /* component open */
orte_fddp_trend_close, /* component close */
orte_fddp_trend_query /* component query */
},
{
/* The component is checkpoint ready */
MCA_BASE_METADATA_PARAM_CHECKPOINT
}
}
};
/**
* component open/close/init function
*/
static int orte_fddp_trend_open(void)
{
mca_base_component_t *c = &mca_fddp_trend_component.super.base_version;
/* lookup parameters */
mca_base_param_reg_int(c, "window_size",
"Size of sliding window to smooth data for trend [default: 1]",
false, false, 80, &mca_fddp_trend_component.window_size);
return ORTE_SUCCESS;
}
static int orte_fddp_trend_query(mca_base_module_t **module, int *priority)
{
*priority = 0; /* select only if specified */
*module = (mca_base_module_t *)&orte_fddp_trend_module;
return ORTE_SUCCESS;
}
/**
* Close all subsystems.
*/
static int orte_fddp_trend_close(void)
{
return ORTE_SUCCESS;
}

35
orte/mca/sensor/Makefile.am Обычный файл
Просмотреть файл

@ -0,0 +1,35 @@
#
# Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
#
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# main library setup
noinst_LTLIBRARIES = libmca_sensor.la
libmca_sensor_la_SOURCES =
# header setup
nobase_orte_HEADERS =
# local files
headers = sensor.h \
sensor_types.h
libmca_sensor_la_SOURCES += $(headers)
# Conditionally install the header files
if WANT_INSTALL_HEADERS
nobase_orte_HEADERS += $(headers)
ortedir = $(includedir)/openmpi/orte/mca/sensor
else
ortedir = $(includedir)
endif
include base/Makefile.am
distclean-local:
rm -f base/static-components.h

27
orte/mca/sensor/base/Makefile.am Обычный файл
Просмотреть файл

@ -0,0 +1,27 @@
#
# Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
#
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
headers += \
base/base.h
libmca_sensor_la_SOURCES += \
base/sensor_base_open.c
if !ORTE_DISABLE_FULL_SUPPORT
headers += \
base/sensor_private.h
libmca_sensor_la_SOURCES += \
base/sensor_base_close.c \
base/sensor_base_select.c \
base/sensor_base_scale.c
endif

62
orte/mca/sensor/base/base.h Обычный файл
Просмотреть файл

@ -0,0 +1,62 @@
/*
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
*
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/** @file:
*/
#ifndef MCA_SENSOR_BASE_H
#define MCA_SENSOR_BASE_H
/*
* includes
*/
#include "orte_config.h"
#include "opal/class/opal_list.h"
#include "opal/mca/mca.h"
#include "orte/mca/sensor/sensor.h"
/*
* Global functions for MCA overall collective open and close
*/
BEGIN_C_DECLS
/*
* function definitions
*/
ORTE_DECLSPEC int orte_sensor_base_open(void);
ORTE_DECLSPEC int orte_sensor_base_select(void);
ORTE_DECLSPEC int orte_sensor_base_close(void);
/*
* globals that might be needed
*/
ORTE_DECLSPEC extern int orte_sensor_base_output;
ORTE_DECLSPEC extern opal_list_t mca_sensor_base_components_available;
ORTE_DECLSPEC extern opal_list_t orte_sensor_base_selected_modules;
/* object definition */
typedef struct {
opal_list_item_t super;
orte_sensor_base_component_t *component;
orte_sensor_base_module_t *module;
} orte_sensor_base_selected_pair_t;
OBJ_CLASS_DECLARATION(orte_sensor_base_selected_pair_t);
#if !ORTE_DISABLE_FULL_SUPPORT
/* no base functions to protect at this time */
#endif /* ORTE_DISABLE_FULL_SUPPORT */
END_C_DECLS
#endif

42
orte/mca/sensor/base/sensor_base_close.c Обычный файл
Просмотреть файл

@ -0,0 +1,42 @@
/*
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
*
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include "orte/constants.h"
#include <stdio.h>
#include "opal/mca/mca.h"
#include "opal/mca/base/base.h"
#include "orte/mca/sensor/base/base.h"
int orte_sensor_base_close(void)
{
orte_sensor_base_selected_pair_t *pair;
opal_list_item_t *item;
/* destruct the list of modules so they each can finalize */
for (item = opal_list_get_first(&orte_sensor_base_selected_modules);
opal_list_get_end(&orte_sensor_base_selected_modules) != item;
item = opal_list_get_next(item)) {
pair = (orte_sensor_base_selected_pair_t*)item;
OBJ_DESTRUCT(pair);
}
/* Close all remaining available components */
mca_base_components_close(orte_sensor_base_output,
&mca_sensor_base_components_available, NULL);
/* All done */
return ORTE_SUCCESS;
}

125
orte/mca/sensor/base/sensor_base_open.c Обычный файл
Просмотреть файл

@ -0,0 +1,125 @@
/*
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
*
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include "orte/constants.h"
#include "opal/mca/mca.h"
#include "opal/util/output.h"
#include "opal/mca/base/base.h"
#include "opal/mca/base/mca_base_param.h"
#ifdef HAVE_STRING_H
#include <string.h>
#endif
#include "orte/mca/sensor/base/base.h"
/*
* The following file was created by configure. It contains extern
* statements and the definition of an array of pointers to each
* component's public mca_base_component_t struct.
*/
#include "orte/mca/sensor/base/static-components.h"
/* object definition */
static void construct(orte_sensor_base_selected_pair_t *obj)
{
obj->component = NULL;
obj->module = NULL;
}
static void destruct(orte_sensor_base_selected_pair_t *obj)
{
if (NULL != obj->module->finalize) {
obj->module->finalize();
}
}
OBJ_CLASS_INSTANCE(orte_sensor_base_selected_pair_t,
opal_list_item_t,
construct, destruct);
/* base functions */
static void start(void);
static void stop(void);
/*
* Global variables
*/
int orte_sensor_base_output = -1;
orte_sensor_base_API_module_t orte_sensor = {
start,
stop
};
opal_list_t mca_sensor_base_components_available;
opal_list_t orte_sensor_base_selected_modules;
/**
* Function for finding and opening either all MCA components, or the one
* that was specifically requested via a MCA parameter.
*/
int orte_sensor_base_open(void)
{
/* Debugging / verbose output. Always have stream open, with
verbose set by the mca open system... */
orte_sensor_base_output = opal_output_open(NULL);
/* construct the list of modules */
OBJ_CONSTRUCT(&orte_sensor_base_selected_modules, opal_list_t);
/* Open up all available components */
if (ORTE_SUCCESS !=
mca_base_components_open("sensor", orte_sensor_base_output,
mca_sensor_base_static_components,
&mca_sensor_base_components_available, true)) {
return ORTE_ERROR;
}
/* All done */
return ORTE_SUCCESS;
}
static void start(void)
{
orte_sensor_base_selected_pair_t *pair;
opal_list_item_t *item;
for (item = opal_list_get_first(&orte_sensor_base_selected_modules);
opal_list_get_end(&orte_sensor_base_selected_modules) != item;
item = opal_list_get_next(item)) {
pair = (orte_sensor_base_selected_pair_t*)item;
if (NULL != pair->module->start) {
pair->module->start();
}
}
return;
}
static void stop(void)
{
orte_sensor_base_selected_pair_t *pair;
opal_list_item_t *item;
for (item = opal_list_get_first(&orte_sensor_base_selected_modules);
opal_list_get_end(&orte_sensor_base_selected_modules) != item;
item = opal_list_get_next(item)) {
pair = (orte_sensor_base_selected_pair_t*)item;
if (NULL != pair->module->stop) {
pair->module->stop();
}
}
return;
}

47
orte/mca/sensor/base/sensor_base_scale.c Обычный файл
Просмотреть файл

@ -0,0 +1,47 @@
/*
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
*
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include "orte/constants.h"
#include "opal/mca/mca.h"
#include "opal/util/output.h"
#include "opal/mca/base/base.h"
#include "opal/mca/base/mca_base_param.h"
#ifdef HAVE_STRING_H
#include <string.h>
#endif
#include "orte/mca/sensor/base/sensor_private.h"
int orte_sensor_scale_data(orte_sensor_data_t *target, int num_values, float *data)
{
int i;
/* ensure we have enough data storage in the sensor data object */
if (NULL != target->data.bytes) {
/* clear out pre-existing data */
free(target->data.bytes);
}
/* allocate what we need */
target->data.bytes = (uint8_t*)malloc(num_values * sizeof(uint8_t));
memset(target->data.bytes, 0, num_values);
target->data.size = num_values;
/* convert the data */
for (i=0; i < num_values; i++) {
target->data.bytes[i] = UINT8_MAX * (data[i] - target->min) / (target->max - target->min);
}
return ORTE_SUCCESS;
}

123
orte/mca/sensor/base/sensor_base_select.c Обычный файл
Просмотреть файл

@ -0,0 +1,123 @@
/*
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
*
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include "orte/constants.h"
#include "opal/mca/mca.h"
#include "opal/mca/base/base.h"
#include "opal/util/argv.h"
#include "orte/mca/sensor/base/base.h"
/**
* Function for weeding out sensor components that don't want to run.
*
* Call the init function on all available components to find out if
* they want to run. Select all components that don't fail. Failing
* components will be closed and unloaded. The selected modules will
* be returned to the caller in a opal_list_t.
*/
int orte_sensor_base_select(void)
{
mca_base_component_list_item_t *cli = NULL;
mca_base_component_t *component = NULL;
mca_base_module_t *module = NULL;
orte_sensor_base_module_t *nmodule;
opal_list_item_t *item;
int i, priority, ret;
char *include_list = NULL;
char **imodules = NULL;
orte_sensor_base_selected_pair_t *pair;
/*
* Register the framework MCA param and look up include list
*/
mca_base_param_reg_string_name("sensor", NULL,
"Comma-delimisted list of sensor component to use (empty = all avail)",
false, false,
NULL, &include_list);
/* if the list is empty, then we have nothing to do */
if (NULL == include_list) {
return ORTE_SUCCESS;
}
/* separate the names of the sensors to be used */
imodules = opal_argv_split(include_list, ',');
/* Query all available components and ask if they have a module */
for (item = opal_list_get_first(&mca_sensor_base_components_available);
opal_list_get_end(&mca_sensor_base_components_available) != item;
item = opal_list_get_next(item)) {
cli = (mca_base_component_list_item_t *) item;
component = (mca_base_component_t *) cli->cli_component;
/* If this component was not specified, skip it */
for (i = 0; NULL != imodules[i]; ++i) {
if (0 == strcmp(imodules[i], component->mca_component_name)) {
break;
}
}
if (NULL == imodules[i]) {
continue;
}
/* If there's no query function, skip it */
if (NULL == component->mca_query_component) {
opal_output_verbose(5, orte_sensor_base_output,
"mca:sensor:select: Skipping component [%s]. It does not implement a query function",
component->mca_component_name );
continue;
}
/* Query the component */
opal_output_verbose(5, orte_sensor_base_output,
"mca:sensor:select: Querying component [%s]",
component->mca_component_name);
ret = component->mca_query_component(&module, &priority);
/* If no module was returned, then skip component */
if (ORTE_SUCCESS != ret || NULL == module) {
opal_output_verbose(5, orte_sensor_base_output,
"mca:sensor:select: Skipping component [%s]. Query failed to return a module",
component->mca_component_name );
continue;
}
/* If we got a module, initialize it */
nmodule = (orte_sensor_base_module_t*) module;
if (NULL != nmodule->init) {
/* If the module doesn't want to be used, skip it */
if (ORTE_SUCCESS != (ret = nmodule->init()) ) {
if (NULL != nmodule->finalize) {
nmodule->finalize();
}
continue;
}
}
opal_output_verbose(5, orte_sensor_base_output,
"mca:sensor:select: Adding component [%s] to active list",
component->mca_component_name );
/* Make an item for the list */
pair = OBJ_NEW(orte_sensor_base_selected_pair_t);
pair->component = (orte_sensor_base_component_t*) component;
pair->module = nmodule;
/* Add it to the list of operational sensors */
opal_list_append(&orte_sensor_base_selected_modules, &(pair->super));
}
return ORTE_SUCCESS;
}

40
orte/mca/sensor/base/sensor_private.h Обычный файл
Просмотреть файл

@ -0,0 +1,40 @@
/*
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
*
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/** @file:
*/
#ifndef MCA_SENSOR_PRIVATE_H
#define MCA_SENSOR_PRIVATE_H
/*
* includes
*/
#include "orte_config.h"
#include "opal/dss/dss_types.h"
#include "orte/mca/sensor/sensor_types.h"
/*
* Global functions for MCA overall collective open and close
*/
BEGIN_C_DECLS
#if !ORTE_DISABLE_FULL_SUPPORT
/*
* function definitions
*/
ORTE_DECLSPEC int orte_sensor_scale_data(orte_sensor_data_t *target, int num_values, float *data);
#endif /* ORTE_DISABLE_FULL_SUPPORT */
END_C_DECLS
#endif

37
orte/mca/sensor/pru/Makefile.am Обычный файл
Просмотреть файл

@ -0,0 +1,37 @@
#
# Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
#
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
dist_pkgdata_DATA = help-orte-sensor-pru.txt
sources = \
sensor_pru.c \
sensor_pru.h \
sensor_pru_component.c
# Make the output library in this directory, and name it either
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
# (for static builds).
if OMPI_BUILD_sensor_pru_DSO
component_noinst =
component_install = mca_sensor_pru.la
else
component_noinst = libmca_sensor_pru.la
component_install =
endif
mcacomponentdir = $(pkglibdir)
mcacomponent_LTLIBRARIES = $(component_install)
mca_sensor_pru_la_SOURCES = $(sources)
mca_sensor_pru_la_LDFLAGS = -module -avoid-version
noinst_LTLIBRARIES = $(component_noinst)
libmca_sensor_pru_la_SOURCES =$(sources)
libmca_sensor_pru_la_LDFLAGS = -module -avoid-version

14
orte/mca/sensor/pru/configure.params Обычный файл
Просмотреть файл

@ -0,0 +1,14 @@
# -*- shell-script -*-
#
# Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
#
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# Specific to this module
PARAM_CONFIG_FILES="Makefile"

Просмотреть файл

@ -0,0 +1,52 @@
# -*- text -*-
#
# Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
#
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# This is the US/English general help file for the resilient mapper.
#
[orte-rmaps-resilient:alloc-error]
There are not enough slots available in the system to satisfy the %d slots
that were requested by the application:
%s
Either request fewer slots for your application, or make more slots available
for use.
[orte-rmaps-resilient:multi-apps-and-zero-np]
RMAPS found multiple applications to be launched, with
at least one that failed to specify the number of processes to execute.
When specifying multiple applications, you must specify how many processes
of each to launch via the -np argument.
[orte-rmaps-resilient:per-node-and-too-many-procs]
There are not enough nodes in your allocation to satisfy your request to launch
%d processes on a per-node basis - only %d nodes were available.
Either request fewer processes, or obtain a larger allocation.
[orte-rmaps-resilient:n-per-node-and-too-many-procs]
There are not enough nodes in your allocation to satisfy your request to launch
%d processes on a %d per-node basis - only %d nodes with a total of %d slots were available.
Either request fewer processes, or obtain a larger allocation.
[orte-rmaps-resilient:n-per-node-and-not-enough-slots]
There are not enough slots on the nodes in your allocation to satisfy your request to launch on a %d process-per-node basis - only %d slots/node were available.
Either request fewer processes/node, or obtain a larger allocation.
[orte-rmaps-resilient:no-np-and-user-map]
You have specified a rank-to-node/slot mapping, but failed to provide
the number of processes to be executed. For some reason, this information
could not be obtained from the mapping you provided, so we cannot continue
with executing the specified application.
#
[orte-rmaps-resilient:file-not-found]
The specified file that describes the fault groups for this system:
FILE: %s
was not found. Please verify the file name and location.

56
orte/mca/sensor/pru/sensor_pru.c Обычный файл
Просмотреть файл

@ -0,0 +1,56 @@
/*
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
*
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include "orte/constants.h"
#include "orte/types.h"
#include <errno.h>
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif /* HAVE_UNISTD_H */
#ifdef HAVE_STRING_H
#include <string.h>
#endif /* HAVE_STRING_H */
#include <stdio.h>
#include "opal/mca/base/mca_base_param.h"
#include "opal/util/argv.h"
#include "orte/util/show_help.h"
#include "orte/mca/errmgr/errmgr.h"
#include "sensor_pru.h"
/* declare the functions */
static int start(void);
static int stop(void);
/* instantiate the module */
orte_sensor_base_module_t orte_sensor_pru_module = {
start,
stop
};
/*
* Start monitoring of local processes
*/
static int start(void)
{
return ORTE_SUCCESS;
}
static int stop(void)
{
return ORTE_SUCCESS;
}

36
orte/mca/sensor/pru/sensor_pru.h Обычный файл
Просмотреть файл

@ -0,0 +1,36 @@
/*
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
*
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/**
* @file
*
* Process Resource Utilization sensor
*/
#ifndef ORTE_SENSOR_PRU_H
#define ORTE_SENSOR_PRU_H
#include "orte_config.h"
#include "orte/mca/sensor/sensor.h"
BEGIN_C_DECLS
struct orte_sensor_pru_component_t {
orte_sensor_base_component_t super;
int sample_rate;
};
typedef struct orte_sensor_pru_component_t orte_sensor_pru_component_t;
ORTE_MODULE_DECLSPEC extern orte_sensor_pru_component_t mca_sensor_pru_component;
extern orte_sensor_base_module_t orte_sensor_pru_module;
END_C_DECLS
#endif

91
orte/mca/sensor/pru/sensor_pru_component.c Обычный файл
Просмотреть файл

@ -0,0 +1,91 @@
/*
* Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include "orte/constants.h"
#include "opal/mca/base/base.h"
#include "opal/mca/base/mca_base_param.h"
#include "opal/class/opal_pointer_array.h"
#include "orte/util/proc_info.h"
#include "orte/util/show_help.h"
#include "sensor_pru.h"
/*
* Local functions
*/
static int orte_sensor_pru_open(void);
static int orte_sensor_pru_close(void);
static int orte_sensor_pru_query(mca_base_module_t **module, int *priority);
orte_sensor_pru_component_t mca_sensor_pru_component = {
{
{
ORTE_SENSOR_BASE_VERSION_1_0_0,
"pru", /* MCA component name */
ORTE_MAJOR_VERSION, /* MCA component major version */
ORTE_MINOR_VERSION, /* MCA component minor version */
ORTE_RELEASE_VERSION, /* MCA component release version */
orte_sensor_pru_open, /* component open */
orte_sensor_pru_close, /* component close */
orte_sensor_pru_query /* component query */
},
{
/* The component is checkpoint ready */
MCA_BASE_METADATA_PARAM_CHECKPOINT
}
}
};
/**
* component open/close/init function
*/
static int orte_sensor_pru_open(void)
{
mca_base_component_t *c = &mca_sensor_pru_component.super.base_version;
/* lookup parameters */
mca_base_param_reg_int(c, "sample_rate",
"Sample rate in seconds (default=10)",
false, false, 10, &mca_sensor_pru_component.sample_rate);
return ORTE_SUCCESS;
}
static int orte_sensor_pru_query(mca_base_module_t **module, int *priority)
{
*priority = 0; /* select only if specified */
*module = (mca_base_module_t *)&orte_sensor_pru_module;
return ORTE_SUCCESS;
}
/**
* Close all subsystems.
*/
static int orte_sensor_pru_close(void)
{
return ORTE_SUCCESS;
}

96
orte/mca/sensor/sensor.h Обычный файл
Просмотреть файл

@ -0,0 +1,96 @@
/*
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
*
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*
* @file:
*
*/
#ifndef MCA_SENSOR_H
#define MCA_SENSOR_H
/*
* includes
*/
#include "orte_config.h"
#include "opal/mca/mca.h"
BEGIN_C_DECLS
/*
* Component functions - all MUST be provided!
*/
/* initialize the selected module */
typedef int (*orte_sensor_base_module_init_fn_t)(void);
/* finalize the selected module */
typedef void (*orte_sensor_base_module_finalize_fn_t)(void);
/* start collecting data */
typedef void (*orte_sensor_base_module_start_fn_t)(void);
/* stop collecting data */
typedef void (*orte_sensor_base_module_stop_fn_t)(void);
/* API module */
/*
* Ver 1.0
*/
struct orte_sensor_base_API_module_1_0_0_t {
orte_sensor_base_module_start_fn_t start;
orte_sensor_base_module_stop_fn_t stop;
};
typedef struct orte_sensor_base_API_module_1_0_0_t orte_sensor_base_API_module_1_0_0_t;
typedef orte_sensor_base_API_module_1_0_0_t orte_sensor_base_API_module_t;
/*
* Component modules Ver 1.0
*/
struct orte_sensor_base_module_1_0_0_t {
orte_sensor_base_module_init_fn_t init;
orte_sensor_base_module_finalize_fn_t finalize;
orte_sensor_base_module_start_fn_t start;
orte_sensor_base_module_stop_fn_t stop;
};
typedef struct orte_sensor_base_module_1_0_0_t orte_sensor_base_module_1_0_0_t;
typedef orte_sensor_base_module_1_0_0_t orte_sensor_base_module_t;
/*
* the standard component data structure
*/
struct orte_sensor_base_component_1_0_0_t {
mca_base_component_t base_version;
mca_base_component_data_t base_data;
};
typedef struct orte_sensor_base_component_1_0_0_t orte_sensor_base_component_1_0_0_t;
typedef orte_sensor_base_component_1_0_0_t orte_sensor_base_component_t;
/*
* Macro for use in components that are of type sensor v1.0.0
*/
#define ORTE_SENSOR_BASE_VERSION_1_0_0 \
/* sensor v1.0 is chained to MCA v2.0 */ \
MCA_BASE_VERSION_2_0_0, \
/* sensor v1.0 */ \
"sensor", 1, 0, 0
/* Global structure for accessing sensor functions
*/
ORTE_DECLSPEC extern orte_sensor_base_API_module_t orte_sensor; /* holds API function pointers */
END_C_DECLS
#endif /* MCA_SENSOR_H */

54
orte/mca/sensor/sensor_types.h Обычный файл
Просмотреть файл

@ -0,0 +1,54 @@
/*
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
*
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/** @file:
*/
#ifndef ORTE_MCA_SENSOR_TYPES_H
#define ORTE_MCA_SENSOR_TYPES_H
#include "orte_config.h"
#include "orte/constants.h"
#ifdef HAVE_SYS_TIME_H
#include <sys/time.h>
#endif /* HAVE_SYS_TIME_H */
#include "opal/dss/dss_types.h"
/*
* General SENSOR types - instanced in runtime/orte_globals.c
*/
BEGIN_C_DECLS
enum {
ORTE_SENSOR_SCALE_LINEAR,
ORTE_SENSOR_SCALE_LOG,
ORTE_SENSOR_SCALE_SIGMOID
};
/*
* Structure for passing data from sensors
*/
typedef struct {
opal_object_t super;
char *sensor;
struct timeval timestamp;
int scaling_law;
float min;
float max;
float gain;
opal_byte_object_t data;
} orte_sensor_data_t;
ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_sensor_data_t);
END_C_DECLS
#endif

Просмотреть файл

@ -35,6 +35,7 @@
#include "opal/dss/dss.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/sensor/sensor_types.h"
#include "orte/util/proc_info.h"
#include "orte/util/name_fns.h"
@ -969,4 +970,27 @@ OBJ_CLASS_INSTANCE(orte_regex_node_t,
orte_regex_node_construct,
orte_regex_node_destruct);
static void orte_sensor_data_construct(orte_sensor_data_t *ptr)
{
ptr->sensor = NULL;
ptr->scaling_law = ORTE_SENSOR_SCALE_LINEAR;
ptr->min = 0.0;
ptr->max = 100.0;
ptr->gain = 1.0;
ptr->data.size = 0;
ptr->data.bytes = NULL;
}
static void orte_sensor_data_destruct(orte_sensor_data_t *ptr)
{
if (NULL != ptr->sensor) {
free(ptr->sensor);
}
if (NULL != ptr->data.bytes) {
free(ptr->data.bytes);
}
}
OBJ_CLASS_INSTANCE(orte_sensor_data_t,
opal_object_t,
orte_sensor_data_construct,
orte_sensor_data_destruct);
#endif