b9893aacc5
1. file activity - can monitor file size, access and modification times. If these fail to change over a specified number of sampling iterations (rate is an mca param), then the errmgr is notified. 2. memory usage - checks amount of memory used by a process. Limit and sampling rate can be set. This support must be enabled by configuring --enable-sensors. ompi_info and orte-info have been updated to include the new framework. Also includes some initial steps toward restoring the recovery capability. Most notably, the ODLS API has been extended to include a "restart_proc" entry for restarting a local process, and organizes the various ERRMGR framework globals into a single struct as we do in the other ORTE frameworks. Fix an oversight in the ERRMGR framework where a pointer array was constructed, but not initialized. Implementation continues. This commit was SVN r23043.
98 строки
2.3 KiB
C
98 строки
2.3 KiB
C
/*
|
|
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
|
|
*
|
|
* $COPYRIGHT$
|
|
*
|
|
* Additional copyrights may follow
|
|
*
|
|
* $HEADER$
|
|
*
|
|
* @file:
|
|
*
|
|
*/
|
|
|
|
#ifndef MCA_SENSOR_H
|
|
#define MCA_SENSOR_H
|
|
|
|
/*
|
|
* includes
|
|
*/
|
|
|
|
#include "orte_config.h"
|
|
#include "orte/types.h"
|
|
|
|
#include "opal/mca/mca.h"
|
|
|
|
BEGIN_C_DECLS
|
|
|
|
/*
|
|
* Component functions - all MUST be provided!
|
|
*/
|
|
|
|
/* initialize the selected module */
|
|
typedef int (*orte_sensor_base_module_init_fn_t)(void);
|
|
|
|
/* finalize the selected module */
|
|
typedef void (*orte_sensor_base_module_finalize_fn_t)(void);
|
|
|
|
/* start collecting data */
|
|
typedef void (*orte_sensor_base_module_start_fn_t)(orte_jobid_t jobid);
|
|
|
|
/* stop collecting data */
|
|
typedef void (*orte_sensor_base_module_stop_fn_t)(orte_jobid_t jobid);
|
|
|
|
/* API module */
|
|
/*
|
|
* Ver 1.0
|
|
*/
|
|
struct orte_sensor_base_API_module_1_0_0_t {
|
|
orte_sensor_base_module_start_fn_t start;
|
|
orte_sensor_base_module_stop_fn_t stop;
|
|
};
|
|
|
|
typedef struct orte_sensor_base_API_module_1_0_0_t orte_sensor_base_API_module_1_0_0_t;
|
|
typedef orte_sensor_base_API_module_1_0_0_t orte_sensor_base_API_module_t;
|
|
|
|
|
|
/*
|
|
* Component modules Ver 1.0
|
|
*/
|
|
struct orte_sensor_base_module_1_0_0_t {
|
|
orte_sensor_base_module_init_fn_t init;
|
|
orte_sensor_base_module_finalize_fn_t finalize;
|
|
orte_sensor_base_module_start_fn_t start;
|
|
orte_sensor_base_module_stop_fn_t stop;
|
|
};
|
|
|
|
typedef struct orte_sensor_base_module_1_0_0_t orte_sensor_base_module_1_0_0_t;
|
|
typedef orte_sensor_base_module_1_0_0_t orte_sensor_base_module_t;
|
|
|
|
/*
|
|
* the standard component data structure
|
|
*/
|
|
struct orte_sensor_base_component_1_0_0_t {
|
|
mca_base_component_t base_version;
|
|
mca_base_component_data_t base_data;
|
|
};
|
|
typedef struct orte_sensor_base_component_1_0_0_t orte_sensor_base_component_1_0_0_t;
|
|
typedef orte_sensor_base_component_1_0_0_t orte_sensor_base_component_t;
|
|
|
|
|
|
|
|
/*
|
|
* Macro for use in components that are of type sensor v1.0.0
|
|
*/
|
|
#define ORTE_SENSOR_BASE_VERSION_1_0_0 \
|
|
/* sensor v1.0 is chained to MCA v2.0 */ \
|
|
MCA_BASE_VERSION_2_0_0, \
|
|
/* sensor v1.0 */ \
|
|
"sensor", 1, 0, 0
|
|
|
|
/* Global structure for accessing sensor functions
|
|
*/
|
|
ORTE_DECLSPEC extern orte_sensor_base_API_module_t orte_sensor; /* holds API function pointers */
|
|
|
|
END_C_DECLS
|
|
|
|
#endif /* MCA_SENSOR_H */
|