1
1

239 строки
8.5 KiB
C

/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/** @file:
*
* The Open MPI State-of-Health Monitoring Subsystem
*
*/
#ifndef ORTE_SMR_H
#define ORTE_SMR_H
/*
* includes
*/
#include "orte_config.h"
#include "orte/orte_constants.h"
#include "orte/orte_types.h"
#include "opal/mca/mca.h"
#include "orte/mca/gpr/gpr_types.h"
#include "orte/mca/ns/ns_types.h"
#include "orte/mca/smr/smr_types.h"
#include "orte/mca/rmaps/rmaps_types.h"
#if defined(c_plusplus) || defined(__cplusplus)
extern "C" {
#endif
/*
* Component functions - all MUST be provided!
*/
/*
* Query a process state
*/
typedef int (*orte_smr_base_module_get_proc_state_fn_t)(orte_proc_state_t *state,
int *status,
orte_process_name_t *proc);
/*
* Set a process state
*/
typedef int (*orte_smr_base_module_set_proc_state_fn_t)(orte_process_name_t *proc,
orte_proc_state_t state, int status);
/*
* Query a node state
*/
typedef int (*orte_smr_base_module_get_node_state_fn_t)(orte_node_state_t *state,
orte_cellid_t cell,
char *nodename);
/*
* Set a node state
*/
typedef int (*orte_smr_base_module_set_node_state_fn_t)(orte_cellid_t cell,
char *nodename,
orte_node_state_t state);
/*
* Query a job state
*/
typedef int (*orte_smr_base_module_get_job_state_fn_t)(orte_job_state_t *state,
orte_jobid_t jobid);
/*
* Set a job state
*/
typedef int (*orte_smr_base_module_set_job_state_fn_t)(orte_jobid_t jobid,
orte_job_state_t state);
/*
* Define the job-specific standard stage gates
* This function creates all of the ORTE-standard stage gates.
*/
typedef int (*orte_smr_base_module_job_stage_gate_init_fn_t)(orte_jobid_t job,
orte_gpr_trigger_cb_fn_t cbfunc,
void *user_tag);
/*
* Define the orted standard stage gates
* This function creates all of the orted-standard stage gates.
*/
typedef int (*orte_smr_base_module_orted_stage_gate_init_fn_t)(orte_jobid_t job,
orte_std_cntr_t num_orteds,
orte_gpr_trigger_cb_fn_t cbfunc,
void *user_tag);
/*
* Define an "alert" monitor
* This function will establish an appropriate trigger to notify the specified
* callback function when an event takes place. In this case, event is defined
* by the specified memory location achieving the specified value - e.g., a
* location could be monitored for a value being set to 1, indicating that a
* process has aborted.
*
* @param job The job that is to be monitored.
*
* @param *trigger_name The name of the trigger to be defined.
*
* @param *counter_key A string defining the key name of the counter on the registry.
*
* @param *counter A pointer to a data_value object that contains the initial
* value to which the counter should be set.
*
* @param *alert_value A pointer to a data_value object that contains the value of
* the counter that should cause the alert to be sent.
*
* @param one_shot Whether or not the trigger should be a one-shot
*
* @param cbfunc A registry callback function to be called when the alert fires.
*
* @param *user_tag Whatever data the user would like to have passed back to them
* when the alert is received
*
* NOTE: alerts are intended solely for purposes of alerting the caller when
* an event happens. Thus, they do not convey any information beyond the fact that
* they fired.
*/
typedef int (*orte_smr_base_module_define_alert_monitor_fn_t)(orte_jobid_t job,
char *trigger_name,
char *counter_key,
orte_std_cntr_t counter,
orte_std_cntr_t alert_value,
bool one_shot,
orte_gpr_trigger_cb_fn_t cbfunc,
void *user_tag);
/*
* Initiate monitoring of a job
* This function notifies the smr that it should initiate monitoring of the specified
* jobid. It is called by a PLS component at an appropriate point in the launch procedure. Calling
* the function allows smr components (e.g., the BProc component that monitors daemons
* via the BProc-provided centralized alerting system) to make the necessary connections
* for monitoring the job.
*/
typedef int (*orte_smr_base_module_begin_monitoring_fn_t)(orte_job_map_t *map,
orte_gpr_trigger_cb_fn_t cbfunc,
void *user_tag);
/*
* Subscribe to a job stage gate
*/
typedef int (*orte_smr_base_module_job_stage_gate_subscribe_fn_t)(orte_jobid_t job,
orte_gpr_notify_cb_fn_t cbfunc, void* cbdata,
orte_proc_state_t cb_conditions);
/* Shutdown the module nicely
*/
typedef int (*orte_smr_base_module_finalize_fn_t)(void);
/* below are the prototypes needed by the MCA */
/*
* Ver 1.3.0
*/
struct orte_smr_base_module_1_3_0_t {
orte_smr_base_module_get_proc_state_fn_t get_proc_state;
orte_smr_base_module_set_proc_state_fn_t set_proc_state;
orte_smr_base_module_get_node_state_fn_t get_node_state;
orte_smr_base_module_set_node_state_fn_t set_node_state;
orte_smr_base_module_get_job_state_fn_t get_job_state;
orte_smr_base_module_set_job_state_fn_t set_job_state;
orte_smr_base_module_begin_monitoring_fn_t begin_monitoring;
/* TRIGGER INIT FUNCTIONS */
orte_smr_base_module_job_stage_gate_init_fn_t init_job_stage_gates;
orte_smr_base_module_orted_stage_gate_init_fn_t init_orted_stage_gates;
orte_smr_base_module_define_alert_monitor_fn_t define_alert_monitor;
orte_smr_base_module_job_stage_gate_subscribe_fn_t job_stage_gate_subscribe;
orte_smr_base_module_finalize_fn_t finalize;
};
typedef struct orte_smr_base_module_1_3_0_t orte_smr_base_module_1_3_0_t;
typedef orte_smr_base_module_1_3_0_t orte_smr_base_module_t;
/*
* SOH Component
*/
typedef orte_smr_base_module_t* (*orte_smr_base_component_init_fn_t)(
int *priority);
typedef int (*orte_smr_base_component_finalize_fn_t)(void);
/*
* the standard component data structure
*/
struct orte_smr_base_component_1_3_0_t {
mca_base_component_t smr_version;
mca_base_component_data_1_0_0_t smr_data;
orte_smr_base_component_init_fn_t smr_init;
orte_smr_base_component_finalize_fn_t smr_finalize;
};
typedef struct orte_smr_base_component_1_3_0_t orte_smr_base_component_1_3_0_t;
typedef orte_smr_base_component_1_3_0_t orte_smr_base_component_t;
/*
* Macro for use in components that are of type ns v1.0.0
*/
#define ORTE_SMR_BASE_VERSION_1_3_0 \
/* smr v1.3 is chained to MCA v1.0 */ \
MCA_BASE_VERSION_1_0_0, \
/* smr v1.3 */ \
"smr", 1, 3, 0
ORTE_DECLSPEC extern orte_smr_base_module_t orte_smr; /* holds selected module's function pointers */
#if defined(c_plusplus) || defined(__cplusplus)
}
#endif
#endif /* ORTE_SMR_H */