/* * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ /** * @file * * Snapshot Coordination (SNAPC) Interface * * Terminology: * ------------ * Global Snapshot Coordinator: * - HNP(s) coordination function. * Local Snapshot Coordinator * - VHNP(s) [e.g., orted] coordination function * Application Snapshot Coordinator * - Application level coordinaton function * Local Snapshot * - Snapshot generated by a single process in the parallel job * Local Snapshot Reference * - A generic reference to the physical Local Snapshot * Global Snapshot * - Snapshot generated for the entire parallel job * Global Snapshot Reference * - A generic reference to the physical Global Snapshot * * General Description: * --------------------- * This framework is tasked with: * - Initiating the checkpoint in the system * - Physically moving the local snapshot files to a location * Initially this location, is the node on which the Head Node Process (HNP) * is running, but later this will be a replicated checkpoint server or * the like. * - Generating a 'global snapshot handle' that the user can use to restart * the parallel job. * * Each component will have 3 teirs of behavior that must behave in concert: * - Global Snapshot Coordinator * This is the HNPs tasks. Mostly distributing the notification of the * checkpoint, and then compiling the physical and virtual nature of the * global snapshot handle. * - Local Snapshot Coordinator * This is the VHNPs (or orted, if available) tasks. This will involve * working with the Global Snapshot Coordinator to route the physical * and virtual 'local snapshot's from the application to the desired * location. This process must also notify the Global Snapshot Coordinator * when it's set of processes have completed the checkpoint. * - Application Snapshot Coordinator * This is the application level coordinator. This is very light, just * a subscription to be triggered when it needs to checkpoint, and then, * once finished with the checkpoint, notify the Local Snapshot Coordinator * that it is complete. * If there is no orted (so no bootproxy), then the application assumes the * responsibility of the Local Snapshot Coordinator as well. * */ #ifndef MCA_SNAPC_H #define MCA_SNAPC_H #include "orte_config.h" #include "orte/constants.h" #include "orte/types.h" #include "opal/mca/mca.h" #include "opal/mca/base/base.h" #include "opal/mca/crs/crs.h" #include "opal/mca/crs/base/base.h" #include "opal/class/opal_object.h" #include "opal/class/opal_pointer_array.h" #include "opal/util/output.h" #include "orte/mca/sstore/sstore.h" BEGIN_C_DECLS /** * States that a process can be in while checkpointing */ /* Reached an error */ #define ORTE_SNAPC_CKPT_STATE_ERROR 0 /* Doing no checkpoint -- Quiet state */ #define ORTE_SNAPC_CKPT_STATE_NONE 1 /* There has been a request for a checkpoint from one of the applications */ #define ORTE_SNAPC_CKPT_STATE_REQUEST 2 /* There is a Pending checkpoint for this process */ #define ORTE_SNAPC_CKPT_STATE_PENDING 3 /* Running the checkpoint */ #define ORTE_SNAPC_CKPT_STATE_RUNNING 4 /* INC Prep Finished */ #define ORTE_SNAPC_CKPT_STATE_INC_PREPED 5 /* All Processes have been stopped */ #define ORTE_SNAPC_CKPT_STATE_STOPPED 6 /* Finished the checkpoint locally */ #define ORTE_SNAPC_CKPT_STATE_FINISHED_LOCAL 7 /* Migrating */ #define ORTE_SNAPC_CKPT_STATE_MIGRATING 8 /* Finished establishing the checkpoint */ #define ORTE_SNAPC_CKPT_STATE_ESTABLISHED 9 /* Processes continuing or have been recovered (finished post-INC) */ #define ORTE_SNAPC_CKPT_STATE_RECOVERED 10 /* Unable to checkpoint this job */ #define ORTE_SNAPC_CKPT_STATE_NO_CKPT 11 /* Unable to restart this job */ #define ORTE_SNAPC_CKPT_STATE_NO_RESTART 12 #define ORTE_SNAPC_CKPT_MAX 13 /** * Sufficiently high shift value to avoid colliding the process * checkpointing states above with the ORTE process states */ #define ORTE_SNAPC_CKPT_SHIFT 131072 /* Uniquely encode the SNAPC state */ #define ORTE_SNAPC_CKPT_NOTIFY(state) (ORTE_SNAPC_CKPT_SHIFT + state) /* Decode the SNAPC state */ #define ORTE_SNAPC_CKPT_STATE(state) (state - ORTE_SNAPC_CKPT_SHIFT) /* Check whether a state is a SNAPC state or not. */ #define CHECK_ORTE_SNAPC_CKPT_STATE(state) (state >= ORTE_SNAPC_CKPT_SHIFT) /** * Definition of a orte local snapshot. * Similar to the opal_crs_base_snapshot_t except that it * contains process contact information. */ struct orte_snapc_base_local_snapshot_1_0_0_t { /** List super object */ opal_list_item_t super; /** ORTE Process name */ orte_process_name_t process_name; /** State of the checkpoint */ int state; /** Stable Storage Handle (must equal the global version) */ orte_sstore_base_handle_t ss_handle; }; typedef struct orte_snapc_base_local_snapshot_1_0_0_t orte_snapc_base_local_snapshot_1_0_0_t; typedef struct orte_snapc_base_local_snapshot_1_0_0_t orte_snapc_base_local_snapshot_t; ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_snapc_base_local_snapshot_t); /** * Definition of the global snapshot. * Each component is assumed to have extened this definition * in the same way they extern the orte_snapc_base_compoinent_t below. */ struct orte_snapc_base_global_snapshot_1_0_0_t { /** This is an object, so must have super */ opal_list_item_t super; /** A list of orte_snapc_base_snapshot_t's */ opal_list_t local_snapshots; /** Checkpoint Options */ opal_crs_base_ckpt_options_t *options; /** Stable Storage Handle */ orte_sstore_base_handle_t ss_handle; }; typedef struct orte_snapc_base_global_snapshot_1_0_0_t orte_snapc_base_global_snapshot_1_0_0_t; typedef struct orte_snapc_base_global_snapshot_1_0_0_t orte_snapc_base_global_snapshot_t; ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_snapc_base_global_snapshot_t); struct orte_snapc_base_quiesce_1_0_0_t { /** Parent is an object type */ opal_object_t super; /** Current epoch */ int epoch; /** Requested CRS */ char * crs_name; /** Handle for reference */ char * handle; /** snapshot list */ orte_snapc_base_global_snapshot_t *snapshot; /** Stable Storage Handle */ orte_sstore_base_handle_t ss_handle; /** Stable Storage Snapshot list */ orte_sstore_base_global_snapshot_info_t *ss_snapshot; /** Target Directory */ char * target_dir; /** Command Line */ char * cmdline; /** State of operation if checkpointing */ opal_crs_state_type_t cr_state; /** Checkpointing? */ bool checkpointing; /** Restarting? */ bool restarting; /** Migrating? */ bool migrating; /** List of migrating processes */ int num_migrating; opal_pointer_array_t migrating_procs; }; typedef struct orte_snapc_base_quiesce_1_0_0_t orte_snapc_base_quiesce_1_0_0_t; typedef struct orte_snapc_base_quiesce_1_0_0_t orte_snapc_base_quiesce_t; ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_snapc_base_quiesce_t); /** * Application request for a global checkpoint related operation */ typedef enum { ORTE_SNAPC_OP_NONE = 0, ORTE_SNAPC_OP_INIT, ORTE_SNAPC_OP_FIN, ORTE_SNAPC_OP_FIN_ACK, ORTE_SNAPC_OP_CHECKPOINT, ORTE_SNAPC_OP_RESTART, ORTE_SNAPC_OP_MIGRATE, ORTE_SNAPC_OP_QUIESCE_START, ORTE_SNAPC_OP_QUIESCE_CHECKPOINT, ORTE_SNAPC_OP_QUIESCE_END } orte_snapc_base_request_op_event_t; struct orte_snapc_base_request_op_1_0_0_t { /** Parent is an object type */ opal_object_t super; /** Event to request */ orte_snapc_base_request_op_event_t event; /** Is this request still active */ bool is_active; /** Leader of the operation */ int leader; /** Sequence Number */ int seq_num; /** Global Handle */ char * global_handle; /** Stable Storage Handle */ orte_sstore_base_handle_t ss_handle; /** Migrating vpid list of participants */ int mig_num; int *mig_vpids; /** Migrating hostname preference list */ char (*mig_host_pref)[OPAL_MAX_PROCESSOR_NAME]; /** Migrating vpid preference list */ int *mig_vpid_pref; /** Info key */ int *mig_off_node; }; typedef struct orte_snapc_base_request_op_1_0_0_t orte_snapc_base_request_op_1_0_0_t; typedef struct orte_snapc_base_request_op_1_0_0_t orte_snapc_base_request_op_t; ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_snapc_base_request_op_t); /** * Module initialization function. * Returns ORTE_SUCCESS */ typedef int (*orte_snapc_base_module_init_fn_t) (bool seed, bool app); /** * Module finalization function. * Returns ORTE_SUCCESS */ typedef int (*orte_snapc_base_module_finalize_fn_t) (void); /** * Setup the necessary structures for this job * Returns ORTE_SUCCESS */ typedef int (*orte_snapc_base_setup_job_fn_t) (orte_jobid_t jobid); /** * Setup the necessary structures for this job * Returns ORTE_SUCCESS */ typedef int (*orte_snapc_base_release_job_fn_t) (orte_jobid_t jobid); /** * Handle fault tolerance updates * * @param[in] state Fault tolerance state update * * @retval ORTE_SUCCESS The operation completed successfully * @retval ORTE_ERROR An unspecifed error occurred */ typedef int (*orte_snapc_base_ft_event_fn_t)(int state); /** * Start a checkpoint originating from an internal source. * * This really only makes sense to call from an application, but in the future * we may allow the checkpoint operation to use this function from the local * coordinator. * * @param[out] epoch Epoch number to associate with this checkpoint operation * Returns ORTE_SUCCESS */ typedef int (*orte_snapc_base_start_checkpoint_fn_t) (orte_snapc_base_quiesce_t *datum); /** * Signal end of checkpoint epoch originating from an internal source. * * @param[in] epoch Epoch number to associate with this checkpoint operation * Returns ORTE_SUCCESS */ typedef int (*orte_snapc_base_end_checkpoint_fn_t) (orte_snapc_base_quiesce_t *datum); /** * Request a checkpoint related operation to take place */ typedef int (*orte_snapc_base_request_op_fn_t) (orte_snapc_base_request_op_t *datum); /** * Structure for SNAPC components. */ struct orte_snapc_base_component_2_0_0_t { /** MCA base component */ mca_base_component_t base_version; /** MCA base data */ mca_base_component_data_t base_data; /** Verbosity Level */ int verbose; /** Output Handle for opal_output */ int output_handle; /** Default Priority */ int priority; }; typedef struct orte_snapc_base_component_2_0_0_t orte_snapc_base_component_2_0_0_t; typedef struct orte_snapc_base_component_2_0_0_t orte_snapc_base_component_t; /** * Structure for SNAPC modules */ struct orte_snapc_base_module_1_0_0_t { /** Initialization Function */ orte_snapc_base_module_init_fn_t snapc_init; /** Finalization Function */ orte_snapc_base_module_finalize_fn_t snapc_finalize; /** Setup structures for a job */ orte_snapc_base_setup_job_fn_t setup_job; /** Release job */ orte_snapc_base_release_job_fn_t release_job; /** Handle any FT Notifications */ orte_snapc_base_ft_event_fn_t ft_event; /** Handle internal request for checkpoint */ orte_snapc_base_start_checkpoint_fn_t start_ckpt; orte_snapc_base_end_checkpoint_fn_t end_ckpt; /** Handle a checkpoint related request */ orte_snapc_base_request_op_fn_t request_op; }; typedef struct orte_snapc_base_module_1_0_0_t orte_snapc_base_module_1_0_0_t; typedef struct orte_snapc_base_module_1_0_0_t orte_snapc_base_module_t; ORTE_DECLSPEC extern orte_snapc_base_module_t orte_snapc; ORTE_DECLSPEC extern orte_snapc_base_component_t orte_snapc_base_selected_component; /** * Macro for use in components that are of type SNAPC */ #define ORTE_SNAPC_BASE_VERSION_2_0_0 \ MCA_BASE_VERSION_2_0_0, \ "snapc", 2, 0, 0 END_C_DECLS #endif /* ORTE_SNAPC_H */