1
1
openmpi/orte/mca/errmgr/hnp/errmgr_hnp.h
Josh Hursey fabd5cc153 Simplification of the ErrMgr framework by removing the 'stack'/composite functionality.
The composite functionality was becoming difficult to maintain, so we removed it for now which simplifies the framework design considerably.

Since the 'crmig' and 'autor' components were -very- similar to the 'hnp' component, this commit also merges them together. By moving the 'crmig' and 'autor' to a separate file under the 'hnp' component we are able to isolate the C/R logic to a large extent, thus being only minimally hooked into the previous 'hnp' component.

So other than some name changes, the functionality is all still in place. I will update the C/R documentation later this morning.

This commit was SVN r23628.
2010-08-19 13:09:20 +00:00

134 строки
5.1 KiB
C

/*
* Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
*
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/**
* @file
*
*/
#ifndef MCA_ERRMGR_HNP_EXPORT_H
#define MCA_ERRMGR_HNP_EXPORT_H
#include "orte_config.h"
#include "orte/mca/errmgr/errmgr.h"
BEGIN_C_DECLS
/*
* Local Component structures
*/
struct orte_errmgr_hnp_component_t {
orte_errmgr_base_component_t super; /** Base Errmgr component */
bool ignore_current_update;
bool term_in_progress;
#if OPAL_ENABLE_FT_CR
/* State of the Recovery */
bool crmig_in_progress;
bool autor_in_progress;
/* CRMig Options */
bool crmig_enabled;
bool crmig_timing_enabled;
/* AutoR Options */
bool autor_enabled;
bool autor_timing_enabled;
int autor_recovery_delay;
bool autor_skip_oldnode;
#endif
};
typedef struct orte_errmgr_hnp_component_t orte_errmgr_hnp_component_t;
OPAL_MODULE_DECLSPEC extern orte_errmgr_hnp_component_t mca_errmgr_hnp_component;
int orte_errmgr_hnp_component_query(mca_base_module_t **module, int *priority);
void orte_errmgr_hnp_update_proc(orte_job_t *jdata,
orte_process_name_t *proc,
orte_proc_state_t state,
pid_t pid,
orte_exit_code_t exit_code);
void orte_errmgr_hnp_record_dead_daemon(orte_job_t *jdat,
orte_vpid_t vpid,
orte_proc_state_t state,
orte_exit_code_t exit_code);
/***************************
* Module functions: Global
***************************/
int orte_errmgr_hnp_global_module_init(void);
int orte_errmgr_hnp_global_module_finalize(void);
int orte_errmgr_hnp_global_update_state(orte_jobid_t job,
orte_job_state_t jobstate,
orte_process_name_t *proc_name,
orte_proc_state_t state,
pid_t pid,
orte_exit_code_t exit_code);
int orte_errmgr_hnp_global_predicted_fault(opal_list_t *proc_list,
opal_list_t *node_list,
opal_list_t *suggested_map);
int orte_errmgr_hnp_global_suggest_map_targets(orte_proc_t *proc,
orte_node_t *oldnode,
opal_list_t *node_list);
int orte_errmgr_hnp_global_ft_event(int state);
/* HNP Versions */
int orte_errmgr_hnp_base_global_init(void);
int orte_errmgr_hnp_base_global_finalize(void);
int orte_errmgr_hnp_base_global_update_state(orte_jobid_t job,
orte_job_state_t jobstate,
orte_process_name_t *proc,
orte_proc_state_t state,
pid_t pid,
orte_exit_code_t exit_code);
int orte_errmgr_hnp_base_global_ft_event(int state);
#if OPAL_ENABLE_FT_CR
/* CRMig Versions */
int orte_errmgr_hnp_crmig_global_module_init(void);
int orte_errmgr_hnp_crmig_global_module_finalize(void);
int orte_errmgr_hnp_crmig_global_update_state(orte_jobid_t job,
orte_job_state_t jobstate,
orte_process_name_t *proc_name,
orte_proc_state_t state,
pid_t pid,
orte_exit_code_t exit_code);
int orte_errmgr_hnp_crmig_global_predicted_fault(opal_list_t *proc_list,
opal_list_t *node_list,
opal_list_t *suggested_map);
int orte_errmgr_hnp_crmig_global_suggest_map_targets(orte_proc_t *proc,
orte_node_t *oldnode,
opal_list_t *node_list);
int orte_errmgr_hnp_crmig_global_ft_event(int state);
/* AutoR Versions */
int orte_errmgr_hnp_autor_global_module_init(void);
int orte_errmgr_hnp_autor_global_module_finalize(void);
int orte_errmgr_hnp_autor_global_update_state(orte_jobid_t job,
orte_job_state_t jobstate,
orte_process_name_t *proc_name,
orte_proc_state_t state,
pid_t pid,
orte_exit_code_t exit_code);
int orte_errmgr_hnp_autor_global_suggest_map_targets(orte_proc_t *proc,
orte_node_t *oldnode,
opal_list_t *node_list);
int orte_errmgr_hnp_autor_global_ft_event(int state);
#endif
END_C_DECLS
#endif /* MCA_ERRMGR_HNP_EXPORT_H */