2007-03-16 23:11:45 +00:00
|
|
|
/*
|
2010-01-25 20:28:38 +00:00
|
|
|
* Copyright (c) 2004-2010 The Trustees of Indiana University.
|
2007-03-16 23:11:45 +00:00
|
|
|
* All rights reserved.
|
|
|
|
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
|
|
|
|
* All rights reserved.
|
2015-06-23 20:59:57 -07:00
|
|
|
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
2007-03-16 23:11:45 +00:00
|
|
|
* University of Stuttgart. All rights reserved.
|
|
|
|
* Copyright (c) 2004-2005 The Regents of the University of California.
|
|
|
|
* All rights reserved.
|
2007-06-05 03:03:59 +00:00
|
|
|
* Copyright (c) 2007 Los Alamos National Security, LLC. All rights
|
2007-10-17 13:47:36 +00:00
|
|
|
* reserved.
|
|
|
|
* Copyright (c) 2007 Evergrid, Inc. All rights reserved.
|
|
|
|
*
|
2007-03-16 23:11:45 +00:00
|
|
|
* $COPYRIGHT$
|
2015-06-23 20:59:57 -07:00
|
|
|
*
|
2007-03-16 23:11:45 +00:00
|
|
|
* Additional copyrights may follow
|
2015-06-23 20:59:57 -07:00
|
|
|
*
|
2007-03-16 23:11:45 +00:00
|
|
|
* $HEADER$
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "opal_config.h"
|
|
|
|
|
|
|
|
#include <sys/types.h>
|
2009-07-16 18:27:33 +00:00
|
|
|
#ifdef HAVE_UNISTD_H
|
2007-03-16 23:11:45 +00:00
|
|
|
#include <unistd.h>
|
2007-04-01 16:08:27 +00:00
|
|
|
#endif /* HAVE_UNISTD_H */
|
2009-05-07 16:14:59 +00:00
|
|
|
#include <string.h>
|
2009-08-05 16:21:51 +00:00
|
|
|
#ifdef HAVE_DLFCN_H
|
|
|
|
#include <dlfcn.h>
|
|
|
|
#endif
|
2007-03-16 23:11:45 +00:00
|
|
|
|
2007-06-05 03:03:59 +00:00
|
|
|
#include "opal/util/opal_environ.h"
|
2007-03-16 23:11:45 +00:00
|
|
|
#include "opal/util/output.h"
|
|
|
|
#include "opal/util/show_help.h"
|
|
|
|
#include "opal/util/argv.h"
|
|
|
|
#include "opal/util/opal_environ.h"
|
|
|
|
|
|
|
|
#include "opal/constants.h"
|
2013-03-27 21:09:41 +00:00
|
|
|
#include "opal/mca/base/mca_base_var.h"
|
2007-03-16 23:11:45 +00:00
|
|
|
|
|
|
|
#include "opal/mca/crs/crs.h"
|
|
|
|
#include "opal/mca/crs/base/base.h"
|
|
|
|
#include "opal/runtime/opal_cr.h"
|
|
|
|
|
|
|
|
#include "crs_self.h"
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Self module
|
|
|
|
*/
|
|
|
|
static opal_crs_base_module_t loc_module = {
|
|
|
|
/** Initialization Function */
|
|
|
|
opal_crs_self_module_init,
|
|
|
|
/** Finalization Function */
|
|
|
|
opal_crs_self_module_finalize,
|
|
|
|
|
|
|
|
/** Checkpoint interface */
|
|
|
|
opal_crs_self_checkpoint,
|
|
|
|
|
|
|
|
/** Restart Command Access */
|
|
|
|
opal_crs_self_restart,
|
|
|
|
|
|
|
|
/** Disable checkpoints */
|
|
|
|
opal_crs_self_disable_checkpoint,
|
|
|
|
/** Enable checkpoints */
|
2007-10-17 13:47:36 +00:00
|
|
|
opal_crs_self_enable_checkpoint,
|
|
|
|
|
|
|
|
/** Prelaunch */
|
2008-07-08 20:04:39 +00:00
|
|
|
opal_crs_self_prelaunch,
|
2008-02-19 22:15:52 +00:00
|
|
|
|
|
|
|
/** Register Thread */
|
2008-07-08 20:04:39 +00:00
|
|
|
opal_crs_self_reg_thread
|
2007-03-16 23:11:45 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Snapshot structure
|
|
|
|
*/
|
|
|
|
OBJ_CLASS_DECLARATION(opal_crs_self_snapshot_t);
|
|
|
|
|
|
|
|
struct opal_crs_self_snapshot_t {
|
|
|
|
/** Base CRS snapshot type */
|
|
|
|
opal_crs_base_snapshot_t super;
|
|
|
|
/** Command Line used to restart the app */
|
|
|
|
char * cmd_line;
|
|
|
|
};
|
|
|
|
typedef struct opal_crs_self_snapshot_t opal_crs_self_snapshot_t;
|
|
|
|
|
2007-04-01 16:08:27 +00:00
|
|
|
static void opal_crs_self_construct(opal_crs_self_snapshot_t *obj);
|
|
|
|
static void opal_crs_self_destruct( opal_crs_self_snapshot_t *obj);
|
2007-03-16 23:11:45 +00:00
|
|
|
|
|
|
|
OBJ_CLASS_INSTANCE(opal_crs_self_snapshot_t,
|
|
|
|
opal_crs_base_snapshot_t,
|
|
|
|
opal_crs_self_construct,
|
|
|
|
opal_crs_self_destruct);
|
|
|
|
|
|
|
|
|
2009-08-05 22:07:37 +00:00
|
|
|
typedef void (*opal_crs_self_dlsym_dummy_fn_t)(void);
|
|
|
|
|
2007-03-16 23:11:45 +00:00
|
|
|
/************************************
|
|
|
|
* Locally Global vars & functions :)
|
|
|
|
************************************/
|
2009-08-05 22:07:37 +00:00
|
|
|
static int crs_self_find_function(char *prefix, char *suffix,
|
|
|
|
opal_crs_self_dlsym_dummy_fn_t *fn_ptr);
|
2007-03-16 23:11:45 +00:00
|
|
|
|
|
|
|
static int self_update_snapshot_metadata(opal_crs_self_snapshot_t *snapshot);
|
|
|
|
|
|
|
|
static int opal_crs_self_restart_cmd(opal_crs_self_snapshot_t *snapshot, char **cmd);
|
|
|
|
static int self_cold_start(opal_crs_self_snapshot_t *snapshot);
|
|
|
|
|
2007-04-01 16:08:27 +00:00
|
|
|
void opal_crs_self_construct(opal_crs_self_snapshot_t *snapshot)
|
|
|
|
{
|
2007-03-16 23:11:45 +00:00
|
|
|
snapshot->cmd_line = NULL;
|
|
|
|
}
|
|
|
|
|
2007-04-01 16:08:27 +00:00
|
|
|
void opal_crs_self_destruct( opal_crs_self_snapshot_t *snapshot)
|
|
|
|
{
|
2007-03-16 23:11:45 +00:00
|
|
|
if(NULL != snapshot->cmd_line)
|
|
|
|
free(snapshot->cmd_line);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int opal_crs_self_extract_callbacks(void);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* MCA Functions
|
|
|
|
*/
|
2008-05-06 18:08:45 +00:00
|
|
|
int opal_crs_self_component_query(mca_base_module_t **module, int *priority)
|
2007-03-16 23:11:45 +00:00
|
|
|
{
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
opal_output_verbose(10, mca_crs_self_component.super.output_handle,
|
|
|
|
"crs:self: component_query()");
|
|
|
|
|
2009-08-05 16:21:51 +00:00
|
|
|
/*
|
|
|
|
* If this is a tool, then return a module with the lowest priority.
|
|
|
|
* This allows 'mpirun' to select the 'none' component since it has
|
|
|
|
* a priority higher than 0.
|
|
|
|
* But also allows 'opal-restart' to select this component if needed
|
|
|
|
* since it only ever requests that a specific component be opened
|
|
|
|
* that is defined in the snapshot metadata file.
|
|
|
|
*/
|
|
|
|
if( opal_cr_is_tool ) {
|
|
|
|
*priority = 0;
|
|
|
|
*module = (mca_base_module_t *)&loc_module;
|
|
|
|
return OPAL_SUCCESS;
|
|
|
|
}
|
|
|
|
|
2007-03-16 23:11:45 +00:00
|
|
|
/*
|
|
|
|
* Extract the user level callbacks if they exist
|
|
|
|
*/
|
|
|
|
ret = opal_crs_self_extract_callbacks();
|
|
|
|
|
2015-06-23 20:59:57 -07:00
|
|
|
if( OPAL_SUCCESS != ret ||
|
2007-03-16 23:11:45 +00:00
|
|
|
!mca_crs_self_component.can_checkpoint ) {
|
|
|
|
*priority = -1;
|
2008-05-06 18:08:45 +00:00
|
|
|
*module = NULL;
|
|
|
|
return OPAL_ERROR;
|
2007-03-16 23:11:45 +00:00
|
|
|
}
|
|
|
|
else {
|
|
|
|
*priority = mca_crs_self_component.super.priority;
|
2008-05-06 18:08:45 +00:00
|
|
|
*module = (mca_base_module_t *)&loc_module;
|
|
|
|
return OPAL_SUCCESS;
|
2007-03-16 23:11:45 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2007-04-01 16:08:27 +00:00
|
|
|
static int opal_crs_self_extract_callbacks(void)
|
|
|
|
{
|
2009-08-05 22:07:37 +00:00
|
|
|
opal_crs_self_dlsym_dummy_fn_t loc_fn;
|
2007-03-16 23:11:45 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Find the function names
|
|
|
|
*/
|
2009-08-05 22:07:37 +00:00
|
|
|
crs_self_find_function(mca_crs_self_component.prefix,
|
|
|
|
SUFFIX_CHECKPOINT,
|
|
|
|
&loc_fn);
|
|
|
|
mca_crs_self_component.ucb_checkpoint_fn = (opal_crs_self_checkpoint_callback_fn_t)loc_fn;
|
2007-03-16 23:11:45 +00:00
|
|
|
|
2009-08-05 22:07:37 +00:00
|
|
|
crs_self_find_function(mca_crs_self_component.prefix,
|
|
|
|
SUFFIX_CONTINUE,
|
|
|
|
&loc_fn);
|
|
|
|
mca_crs_self_component.ucb_continue_fn = (opal_crs_self_continue_callback_fn_t)loc_fn;
|
2007-03-16 23:11:45 +00:00
|
|
|
|
2009-08-05 22:07:37 +00:00
|
|
|
crs_self_find_function(mca_crs_self_component.prefix,
|
|
|
|
SUFFIX_RESTART,
|
|
|
|
&loc_fn);
|
|
|
|
mca_crs_self_component.ucb_restart_fn = (opal_crs_self_restart_callback_fn_t)loc_fn;
|
2007-03-16 23:11:45 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Sanity check
|
|
|
|
*/
|
|
|
|
mca_crs_self_component.can_checkpoint = true;
|
|
|
|
|
|
|
|
if(NULL == mca_crs_self_component.ucb_checkpoint_fn) {
|
|
|
|
mca_crs_self_component.can_checkpoint = false;
|
|
|
|
}
|
|
|
|
if(NULL == mca_crs_self_component.ucb_continue_fn) {
|
|
|
|
}
|
|
|
|
if(NULL == mca_crs_self_component.ucb_restart_fn) {
|
|
|
|
}
|
|
|
|
|
|
|
|
return OPAL_SUCCESS;
|
|
|
|
}
|
|
|
|
|
|
|
|
int opal_crs_self_module_init(void)
|
|
|
|
{
|
|
|
|
bool callback_matched = true;
|
|
|
|
|
|
|
|
opal_output_verbose(10, mca_crs_self_component.super.output_handle,
|
|
|
|
"crs:self: module_init()");
|
|
|
|
|
|
|
|
if( opal_cr_is_tool ) {
|
|
|
|
return OPAL_SUCCESS;
|
|
|
|
}
|
2015-06-23 20:59:57 -07:00
|
|
|
|
2007-03-16 23:11:45 +00:00
|
|
|
/*
|
|
|
|
* Sanity check
|
|
|
|
*/
|
|
|
|
if(NULL == mca_crs_self_component.ucb_checkpoint_fn) {
|
|
|
|
callback_matched = false;
|
|
|
|
mca_crs_self_component.can_checkpoint = false;
|
|
|
|
}
|
|
|
|
if(NULL == mca_crs_self_component.ucb_continue_fn) {
|
|
|
|
callback_matched = false;
|
|
|
|
}
|
|
|
|
if(NULL == mca_crs_self_component.ucb_restart_fn) {
|
|
|
|
callback_matched = false;
|
|
|
|
}
|
|
|
|
if( !callback_matched ) {
|
|
|
|
if( 1 <= mca_crs_self_component.super.verbose ) {
|
|
|
|
opal_show_help("help-opal-crs-self.txt", "self:no_callback", false,
|
|
|
|
"checkpoint", mca_crs_self_component.prefix, SUFFIX_CHECKPOINT,
|
|
|
|
"continue ", mca_crs_self_component.prefix, SUFFIX_CONTINUE,
|
|
|
|
"restart ", mca_crs_self_component.prefix, SUFFIX_RESTART,
|
|
|
|
PREFIX_DEFAULT);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If the user requested that we do_restart, then call their callback
|
|
|
|
*/
|
|
|
|
if(mca_crs_self_component.do_restart) {
|
|
|
|
opal_output_verbose(10, mca_crs_self_component.super.output_handle,
|
|
|
|
"crs:self: module_init: Call their restart function");
|
2015-06-23 20:59:57 -07:00
|
|
|
if( NULL != mca_crs_self_component.ucb_restart_fn)
|
2007-03-16 23:11:45 +00:00
|
|
|
mca_crs_self_component.ucb_restart_fn();
|
|
|
|
}
|
|
|
|
|
|
|
|
return OPAL_SUCCESS;
|
|
|
|
}
|
|
|
|
|
|
|
|
int opal_crs_self_module_finalize(void)
|
|
|
|
{
|
|
|
|
opal_output_verbose(10, mca_crs_self_component.super.output_handle,
|
|
|
|
"crs:self: module_finalize()");
|
|
|
|
|
|
|
|
return OPAL_SUCCESS;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2009-09-22 18:26:12 +00:00
|
|
|
int opal_crs_self_checkpoint(pid_t pid,
|
|
|
|
opal_crs_base_snapshot_t *base_snapshot,
|
|
|
|
opal_crs_base_ckpt_options_t *options,
|
|
|
|
opal_crs_state_type_t *state)
|
2007-03-16 23:11:45 +00:00
|
|
|
{
|
|
|
|
opal_crs_self_snapshot_t *snapshot = OBJ_NEW(opal_crs_self_snapshot_t);
|
|
|
|
int ret, exit_status = OPAL_SUCCESS;
|
|
|
|
char * restart_cmd = NULL;
|
|
|
|
|
2009-08-05 16:21:51 +00:00
|
|
|
/*
|
|
|
|
* This function should never be called by a tool
|
|
|
|
*/
|
|
|
|
if( opal_cr_is_tool ) {
|
|
|
|
return OPAL_ERR_NOT_SUPPORTED;
|
|
|
|
}
|
|
|
|
|
2009-09-22 18:26:12 +00:00
|
|
|
if( options->stop ) {
|
|
|
|
opal_output(0,
|
|
|
|
"crs:self: checkpoint(): Error: SIGSTOP Not currently supported!");
|
|
|
|
}
|
|
|
|
|
2007-03-16 23:11:45 +00:00
|
|
|
/*
|
|
|
|
* Setup for snapshot directory creation
|
|
|
|
*/
|
A number of C/R enhancements per RFC below:
http://www.open-mpi.org/community/lists/devel/2010/07/8240.php
Documentation:
http://osl.iu.edu/research/ft/
Major Changes:
--------------
* Added C/R-enabled Debugging support.
Enabled with the --enable-crdebug flag. See the following website for more information:
http://osl.iu.edu/research/ft/crdebug/
* Added Stable Storage (SStore) framework for checkpoint storage
* 'central' component does a direct to central storage save
* 'stage' component stages checkpoints to central storage while the application continues execution.
* 'stage' supports offline compression of checkpoints before moving (sstore_stage_compress)
* 'stage' supports local caching of checkpoints to improve automatic recovery (sstore_stage_caching)
* Added Compression (compress) framework to support
* Add two new ErrMgr recovery policies
* {{{crmig}}} C/R Process Migration
* {{{autor}}} C/R Automatic Recovery
* Added the {{{ompi-migrate}}} command line tool to support the {{{crmig}}} ErrMgr component
* Added CR MPI Ext functions (enable them with {{{--enable-mpi-ext=cr}}} configure option)
* {{{OMPI_CR_Checkpoint}}} (Fixes trac:2342)
* {{{OMPI_CR_Restart}}}
* {{{OMPI_CR_Migrate}}} (may need some more work for mapping rules)
* {{{OMPI_CR_INC_register_callback}}} (Fixes trac:2192)
* {{{OMPI_CR_Quiesce_start}}}
* {{{OMPI_CR_Quiesce_checkpoint}}}
* {{{OMPI_CR_Quiesce_end}}}
* {{{OMPI_CR_self_register_checkpoint_callback}}}
* {{{OMPI_CR_self_register_restart_callback}}}
* {{{OMPI_CR_self_register_continue_callback}}}
* The ErrMgr predicted_fault() interface has been changed to take an opal_list_t of ErrMgr defined types. This will allow us to better support a wider range of fault prediction services in the future.
* Add a progress meter to:
* FileM rsh (filem_rsh_process_meter)
* SnapC full (snapc_full_progress_meter)
* SStore stage (sstore_stage_progress_meter)
* Added 2 new command line options to ompi-restart
* --showme : Display the full command line that would have been exec'ed.
* --mpirun_opts : Command line options to pass directly to mpirun. (Fixes trac:2413)
* Deprecated some MCA params:
* crs_base_snapshot_dir deprecated, use sstore_stage_local_snapshot_dir
* snapc_base_global_snapshot_dir deprecated, use sstore_base_global_snapshot_dir
* snapc_base_global_shared deprecated, use sstore_stage_global_is_shared
* snapc_base_store_in_place deprecated, replaced with different components of SStore
* snapc_base_global_snapshot_ref deprecated, use sstore_base_global_snapshot_ref
* snapc_base_establish_global_snapshot_dir deprecated, never well supported
* snapc_full_skip_filem deprecated, use sstore_stage_skip_filem
Minor Changes:
--------------
* Fixes trac:1924 : {{{ompi-restart}}} now recognizes path prefixed checkpoint handles and does the right thing.
* Fixes trac:2097 : {{{ompi-info}}} should now report all available CRS components
* Fixes trac:2161 : Manual checkpoint movement. A user can 'mv' a checkpoint directory from the original location to another and still restart from it.
* Fixes trac:2208 : Honor various TMPDIR varaibles instead of forcing {{{/tmp}}}
* Move {{{ompi_cr_continue_like_restart}}} to {{{orte_cr_continue_like_restart}}} to be more flexible in where this should be set.
* opal_crs_base_metadata_write* functions have been moved to SStore to support a wider range of metadata handling functionality.
* Cleanup the CRS framework and components to work with the SStore framework.
* Cleanup the SnapC framework and components to work with the SStore framework (cleans up these code paths considerably).
* Add 'quiesce' hook to CRCP for a future enhancement.
* We now require a BLCR version that supports {{{cr_request_file()}}} or {{{cr_request_checkpoint()}}} in order to make the code more maintainable. Note that {{{cr_request_file}}} has been deprecated since 0.7.0, so we prefer to use {{{cr_request_checkpoint()}}}.
* Add optional application level INC callbacks (registered through the CR MPI Ext interface).
* Increase the {{{opal_cr_thread_sleep_wait}}} parameter to 1000 microseconds to make the C/R thread less aggressive.
* {{{opal-restart}}} now looks for cache directories before falling back on stable storage when asked.
* {{{opal-restart}}} also support local decompression before restarting
* {{{orte-checkpoint}}} now uses the SStore framework to work with the metadata
* {{{orte-restart}}} now uses the SStore framework to work with the metadata
* Remove the {{{orte-restart}}} preload option. This was removed since the user only needs to select the 'stage' component in order to support this functionality.
* Since the '-am' parameter is saved in the metadata, {{{ompi-restart}}} no longer hard codes {{{-am ft-enable-cr}}}.
* Fix {{{hnp}}} ErrMgr so that if a previous component in the stack has 'fixed' the problem, then it should be skipped.
* Make sure to decrement the number of 'num_local_procs' in the orted when one goes away.
* odls now checks the SStore framework to see if it needs to load any checkpoint files before launching (to support 'stage'). This separates the SStore logic from the --preload-[binary|files] options.
* Add unique IDs to the named pipes established between the orted and the app in SnapC. This is to better support migration and automatic recovery activities.
* Improve the checks for 'already checkpointing' error path.
* A a recovery output timer, to show how long it takes to restart a job
* Do a better job of cleaning up the old session directory on restart.
* Add a local module to the autor and crmig ErrMgr components. These small modules prevent the 'orted' component from attempting a local recovery (Which does not work for MPI apps at the moment)
* Add a fix for bounding the checkpointable region between MPI_Init and MPI_Finalize.
This commit was SVN r23587.
The following Trac tickets were found above:
Ticket 1924 --> https://svn.open-mpi.org/trac/ompi/ticket/1924
Ticket 2097 --> https://svn.open-mpi.org/trac/ompi/ticket/2097
Ticket 2161 --> https://svn.open-mpi.org/trac/ompi/ticket/2161
Ticket 2192 --> https://svn.open-mpi.org/trac/ompi/ticket/2192
Ticket 2208 --> https://svn.open-mpi.org/trac/ompi/ticket/2208
Ticket 2342 --> https://svn.open-mpi.org/trac/ompi/ticket/2342
Ticket 2413 --> https://svn.open-mpi.org/trac/ompi/ticket/2413
2010-08-10 20:51:11 +00:00
|
|
|
snapshot->super = *base_snapshot;
|
|
|
|
#if 0
|
|
|
|
snapshot->super.snapshot_directory = strdup(base_snapshot->snapshot_directory);
|
|
|
|
snapshot->super.metadata_filename = strdup(base_snapshot->metadata_filename);
|
|
|
|
#endif
|
2007-03-16 23:11:45 +00:00
|
|
|
|
|
|
|
opal_output_verbose(10, mca_crs_self_component.super.output_handle,
|
|
|
|
"crs:self: checkpoint(%d, ---)", pid);
|
|
|
|
|
|
|
|
if(!mca_crs_self_component.can_checkpoint) {
|
|
|
|
opal_show_help("help-opal-crs-self.txt", "self:ckpt_disabled", false);
|
|
|
|
exit_status = OPAL_ERROR;
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2008-05-08 18:47:47 +00:00
|
|
|
* Update the snapshot metadata
|
2007-03-16 23:11:45 +00:00
|
|
|
*/
|
2008-05-06 18:08:45 +00:00
|
|
|
snapshot->super.component_name = strdup(mca_crs_self_component.super.base_version.mca_component_name);
|
A number of C/R enhancements per RFC below:
http://www.open-mpi.org/community/lists/devel/2010/07/8240.php
Documentation:
http://osl.iu.edu/research/ft/
Major Changes:
--------------
* Added C/R-enabled Debugging support.
Enabled with the --enable-crdebug flag. See the following website for more information:
http://osl.iu.edu/research/ft/crdebug/
* Added Stable Storage (SStore) framework for checkpoint storage
* 'central' component does a direct to central storage save
* 'stage' component stages checkpoints to central storage while the application continues execution.
* 'stage' supports offline compression of checkpoints before moving (sstore_stage_compress)
* 'stage' supports local caching of checkpoints to improve automatic recovery (sstore_stage_caching)
* Added Compression (compress) framework to support
* Add two new ErrMgr recovery policies
* {{{crmig}}} C/R Process Migration
* {{{autor}}} C/R Automatic Recovery
* Added the {{{ompi-migrate}}} command line tool to support the {{{crmig}}} ErrMgr component
* Added CR MPI Ext functions (enable them with {{{--enable-mpi-ext=cr}}} configure option)
* {{{OMPI_CR_Checkpoint}}} (Fixes trac:2342)
* {{{OMPI_CR_Restart}}}
* {{{OMPI_CR_Migrate}}} (may need some more work for mapping rules)
* {{{OMPI_CR_INC_register_callback}}} (Fixes trac:2192)
* {{{OMPI_CR_Quiesce_start}}}
* {{{OMPI_CR_Quiesce_checkpoint}}}
* {{{OMPI_CR_Quiesce_end}}}
* {{{OMPI_CR_self_register_checkpoint_callback}}}
* {{{OMPI_CR_self_register_restart_callback}}}
* {{{OMPI_CR_self_register_continue_callback}}}
* The ErrMgr predicted_fault() interface has been changed to take an opal_list_t of ErrMgr defined types. This will allow us to better support a wider range of fault prediction services in the future.
* Add a progress meter to:
* FileM rsh (filem_rsh_process_meter)
* SnapC full (snapc_full_progress_meter)
* SStore stage (sstore_stage_progress_meter)
* Added 2 new command line options to ompi-restart
* --showme : Display the full command line that would have been exec'ed.
* --mpirun_opts : Command line options to pass directly to mpirun. (Fixes trac:2413)
* Deprecated some MCA params:
* crs_base_snapshot_dir deprecated, use sstore_stage_local_snapshot_dir
* snapc_base_global_snapshot_dir deprecated, use sstore_base_global_snapshot_dir
* snapc_base_global_shared deprecated, use sstore_stage_global_is_shared
* snapc_base_store_in_place deprecated, replaced with different components of SStore
* snapc_base_global_snapshot_ref deprecated, use sstore_base_global_snapshot_ref
* snapc_base_establish_global_snapshot_dir deprecated, never well supported
* snapc_full_skip_filem deprecated, use sstore_stage_skip_filem
Minor Changes:
--------------
* Fixes trac:1924 : {{{ompi-restart}}} now recognizes path prefixed checkpoint handles and does the right thing.
* Fixes trac:2097 : {{{ompi-info}}} should now report all available CRS components
* Fixes trac:2161 : Manual checkpoint movement. A user can 'mv' a checkpoint directory from the original location to another and still restart from it.
* Fixes trac:2208 : Honor various TMPDIR varaibles instead of forcing {{{/tmp}}}
* Move {{{ompi_cr_continue_like_restart}}} to {{{orte_cr_continue_like_restart}}} to be more flexible in where this should be set.
* opal_crs_base_metadata_write* functions have been moved to SStore to support a wider range of metadata handling functionality.
* Cleanup the CRS framework and components to work with the SStore framework.
* Cleanup the SnapC framework and components to work with the SStore framework (cleans up these code paths considerably).
* Add 'quiesce' hook to CRCP for a future enhancement.
* We now require a BLCR version that supports {{{cr_request_file()}}} or {{{cr_request_checkpoint()}}} in order to make the code more maintainable. Note that {{{cr_request_file}}} has been deprecated since 0.7.0, so we prefer to use {{{cr_request_checkpoint()}}}.
* Add optional application level INC callbacks (registered through the CR MPI Ext interface).
* Increase the {{{opal_cr_thread_sleep_wait}}} parameter to 1000 microseconds to make the C/R thread less aggressive.
* {{{opal-restart}}} now looks for cache directories before falling back on stable storage when asked.
* {{{opal-restart}}} also support local decompression before restarting
* {{{orte-checkpoint}}} now uses the SStore framework to work with the metadata
* {{{orte-restart}}} now uses the SStore framework to work with the metadata
* Remove the {{{orte-restart}}} preload option. This was removed since the user only needs to select the 'stage' component in order to support this functionality.
* Since the '-am' parameter is saved in the metadata, {{{ompi-restart}}} no longer hard codes {{{-am ft-enable-cr}}}.
* Fix {{{hnp}}} ErrMgr so that if a previous component in the stack has 'fixed' the problem, then it should be skipped.
* Make sure to decrement the number of 'num_local_procs' in the orted when one goes away.
* odls now checks the SStore framework to see if it needs to load any checkpoint files before launching (to support 'stage'). This separates the SStore logic from the --preload-[binary|files] options.
* Add unique IDs to the named pipes established between the orted and the app in SnapC. This is to better support migration and automatic recovery activities.
* Improve the checks for 'already checkpointing' error path.
* A a recovery output timer, to show how long it takes to restart a job
* Do a better job of cleaning up the old session directory on restart.
* Add a local module to the autor and crmig ErrMgr components. These small modules prevent the 'orted' component from attempting a local recovery (Which does not work for MPI apps at the moment)
* Add a fix for bounding the checkpointable region between MPI_Init and MPI_Finalize.
This commit was SVN r23587.
The following Trac tickets were found above:
Ticket 1924 --> https://svn.open-mpi.org/trac/ompi/ticket/1924
Ticket 2097 --> https://svn.open-mpi.org/trac/ompi/ticket/2097
Ticket 2161 --> https://svn.open-mpi.org/trac/ompi/ticket/2161
Ticket 2192 --> https://svn.open-mpi.org/trac/ompi/ticket/2192
Ticket 2208 --> https://svn.open-mpi.org/trac/ompi/ticket/2208
Ticket 2342 --> https://svn.open-mpi.org/trac/ompi/ticket/2342
Ticket 2413 --> https://svn.open-mpi.org/trac/ompi/ticket/2413
2010-08-10 20:51:11 +00:00
|
|
|
if( NULL == snapshot->super.metadata ) {
|
|
|
|
if (NULL == (snapshot->super.metadata = fopen(snapshot->super.metadata_filename, "a")) ) {
|
|
|
|
opal_output(mca_crs_self_component.super.output_handle,
|
|
|
|
"crs:self: checkpoint(): Error: Unable to open the file (%s)",
|
|
|
|
snapshot->super.metadata_filename);
|
|
|
|
exit_status = OPAL_ERROR;
|
|
|
|
goto cleanup;
|
|
|
|
}
|
2007-03-16 23:11:45 +00:00
|
|
|
}
|
A number of C/R enhancements per RFC below:
http://www.open-mpi.org/community/lists/devel/2010/07/8240.php
Documentation:
http://osl.iu.edu/research/ft/
Major Changes:
--------------
* Added C/R-enabled Debugging support.
Enabled with the --enable-crdebug flag. See the following website for more information:
http://osl.iu.edu/research/ft/crdebug/
* Added Stable Storage (SStore) framework for checkpoint storage
* 'central' component does a direct to central storage save
* 'stage' component stages checkpoints to central storage while the application continues execution.
* 'stage' supports offline compression of checkpoints before moving (sstore_stage_compress)
* 'stage' supports local caching of checkpoints to improve automatic recovery (sstore_stage_caching)
* Added Compression (compress) framework to support
* Add two new ErrMgr recovery policies
* {{{crmig}}} C/R Process Migration
* {{{autor}}} C/R Automatic Recovery
* Added the {{{ompi-migrate}}} command line tool to support the {{{crmig}}} ErrMgr component
* Added CR MPI Ext functions (enable them with {{{--enable-mpi-ext=cr}}} configure option)
* {{{OMPI_CR_Checkpoint}}} (Fixes trac:2342)
* {{{OMPI_CR_Restart}}}
* {{{OMPI_CR_Migrate}}} (may need some more work for mapping rules)
* {{{OMPI_CR_INC_register_callback}}} (Fixes trac:2192)
* {{{OMPI_CR_Quiesce_start}}}
* {{{OMPI_CR_Quiesce_checkpoint}}}
* {{{OMPI_CR_Quiesce_end}}}
* {{{OMPI_CR_self_register_checkpoint_callback}}}
* {{{OMPI_CR_self_register_restart_callback}}}
* {{{OMPI_CR_self_register_continue_callback}}}
* The ErrMgr predicted_fault() interface has been changed to take an opal_list_t of ErrMgr defined types. This will allow us to better support a wider range of fault prediction services in the future.
* Add a progress meter to:
* FileM rsh (filem_rsh_process_meter)
* SnapC full (snapc_full_progress_meter)
* SStore stage (sstore_stage_progress_meter)
* Added 2 new command line options to ompi-restart
* --showme : Display the full command line that would have been exec'ed.
* --mpirun_opts : Command line options to pass directly to mpirun. (Fixes trac:2413)
* Deprecated some MCA params:
* crs_base_snapshot_dir deprecated, use sstore_stage_local_snapshot_dir
* snapc_base_global_snapshot_dir deprecated, use sstore_base_global_snapshot_dir
* snapc_base_global_shared deprecated, use sstore_stage_global_is_shared
* snapc_base_store_in_place deprecated, replaced with different components of SStore
* snapc_base_global_snapshot_ref deprecated, use sstore_base_global_snapshot_ref
* snapc_base_establish_global_snapshot_dir deprecated, never well supported
* snapc_full_skip_filem deprecated, use sstore_stage_skip_filem
Minor Changes:
--------------
* Fixes trac:1924 : {{{ompi-restart}}} now recognizes path prefixed checkpoint handles and does the right thing.
* Fixes trac:2097 : {{{ompi-info}}} should now report all available CRS components
* Fixes trac:2161 : Manual checkpoint movement. A user can 'mv' a checkpoint directory from the original location to another and still restart from it.
* Fixes trac:2208 : Honor various TMPDIR varaibles instead of forcing {{{/tmp}}}
* Move {{{ompi_cr_continue_like_restart}}} to {{{orte_cr_continue_like_restart}}} to be more flexible in where this should be set.
* opal_crs_base_metadata_write* functions have been moved to SStore to support a wider range of metadata handling functionality.
* Cleanup the CRS framework and components to work with the SStore framework.
* Cleanup the SnapC framework and components to work with the SStore framework (cleans up these code paths considerably).
* Add 'quiesce' hook to CRCP for a future enhancement.
* We now require a BLCR version that supports {{{cr_request_file()}}} or {{{cr_request_checkpoint()}}} in order to make the code more maintainable. Note that {{{cr_request_file}}} has been deprecated since 0.7.0, so we prefer to use {{{cr_request_checkpoint()}}}.
* Add optional application level INC callbacks (registered through the CR MPI Ext interface).
* Increase the {{{opal_cr_thread_sleep_wait}}} parameter to 1000 microseconds to make the C/R thread less aggressive.
* {{{opal-restart}}} now looks for cache directories before falling back on stable storage when asked.
* {{{opal-restart}}} also support local decompression before restarting
* {{{orte-checkpoint}}} now uses the SStore framework to work with the metadata
* {{{orte-restart}}} now uses the SStore framework to work with the metadata
* Remove the {{{orte-restart}}} preload option. This was removed since the user only needs to select the 'stage' component in order to support this functionality.
* Since the '-am' parameter is saved in the metadata, {{{ompi-restart}}} no longer hard codes {{{-am ft-enable-cr}}}.
* Fix {{{hnp}}} ErrMgr so that if a previous component in the stack has 'fixed' the problem, then it should be skipped.
* Make sure to decrement the number of 'num_local_procs' in the orted when one goes away.
* odls now checks the SStore framework to see if it needs to load any checkpoint files before launching (to support 'stage'). This separates the SStore logic from the --preload-[binary|files] options.
* Add unique IDs to the named pipes established between the orted and the app in SnapC. This is to better support migration and automatic recovery activities.
* Improve the checks for 'already checkpointing' error path.
* A a recovery output timer, to show how long it takes to restart a job
* Do a better job of cleaning up the old session directory on restart.
* Add a local module to the autor and crmig ErrMgr components. These small modules prevent the 'orted' component from attempting a local recovery (Which does not work for MPI apps at the moment)
* Add a fix for bounding the checkpointable region between MPI_Init and MPI_Finalize.
This commit was SVN r23587.
The following Trac tickets were found above:
Ticket 1924 --> https://svn.open-mpi.org/trac/ompi/ticket/1924
Ticket 2097 --> https://svn.open-mpi.org/trac/ompi/ticket/2097
Ticket 2161 --> https://svn.open-mpi.org/trac/ompi/ticket/2161
Ticket 2192 --> https://svn.open-mpi.org/trac/ompi/ticket/2192
Ticket 2208 --> https://svn.open-mpi.org/trac/ompi/ticket/2208
Ticket 2342 --> https://svn.open-mpi.org/trac/ompi/ticket/2342
Ticket 2413 --> https://svn.open-mpi.org/trac/ompi/ticket/2413
2010-08-10 20:51:11 +00:00
|
|
|
fprintf(snapshot->super.metadata, "%s%s\n", CRS_METADATA_COMP, snapshot->super.component_name);
|
2007-03-16 23:11:45 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Call the user callback function
|
|
|
|
*/
|
|
|
|
if(NULL != mca_crs_self_component.ucb_checkpoint_fn) {
|
|
|
|
mca_crs_self_component.ucb_checkpoint_fn(&restart_cmd);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Save the restart command
|
|
|
|
*/
|
|
|
|
if( NULL == restart_cmd) {
|
|
|
|
*state = OPAL_CRS_ERROR;
|
|
|
|
opal_show_help("help-opal-crs-self.txt", "self:no-restart-cmd",
|
|
|
|
true);
|
|
|
|
exit_status = OPAL_ERROR;
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
snapshot->cmd_line = strdup(restart_cmd);
|
|
|
|
|
|
|
|
opal_output_verbose(10, mca_crs_self_component.super.output_handle,
|
|
|
|
"crs:self: checkpoint: Restart Command (%s)", snapshot->cmd_line);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The best we can do is update the metadata file with the
|
|
|
|
* application argv and argc we started with.
|
|
|
|
*/
|
|
|
|
if( OPAL_SUCCESS != (ret = self_update_snapshot_metadata(snapshot)) ) {
|
|
|
|
*state = OPAL_CRS_ERROR;
|
|
|
|
opal_output(mca_crs_self_component.super.output_handle,
|
|
|
|
"crs:self: checkpoint(): Error: Unable to update metadata for snapshot (%s).",
|
A number of C/R enhancements per RFC below:
http://www.open-mpi.org/community/lists/devel/2010/07/8240.php
Documentation:
http://osl.iu.edu/research/ft/
Major Changes:
--------------
* Added C/R-enabled Debugging support.
Enabled with the --enable-crdebug flag. See the following website for more information:
http://osl.iu.edu/research/ft/crdebug/
* Added Stable Storage (SStore) framework for checkpoint storage
* 'central' component does a direct to central storage save
* 'stage' component stages checkpoints to central storage while the application continues execution.
* 'stage' supports offline compression of checkpoints before moving (sstore_stage_compress)
* 'stage' supports local caching of checkpoints to improve automatic recovery (sstore_stage_caching)
* Added Compression (compress) framework to support
* Add two new ErrMgr recovery policies
* {{{crmig}}} C/R Process Migration
* {{{autor}}} C/R Automatic Recovery
* Added the {{{ompi-migrate}}} command line tool to support the {{{crmig}}} ErrMgr component
* Added CR MPI Ext functions (enable them with {{{--enable-mpi-ext=cr}}} configure option)
* {{{OMPI_CR_Checkpoint}}} (Fixes trac:2342)
* {{{OMPI_CR_Restart}}}
* {{{OMPI_CR_Migrate}}} (may need some more work for mapping rules)
* {{{OMPI_CR_INC_register_callback}}} (Fixes trac:2192)
* {{{OMPI_CR_Quiesce_start}}}
* {{{OMPI_CR_Quiesce_checkpoint}}}
* {{{OMPI_CR_Quiesce_end}}}
* {{{OMPI_CR_self_register_checkpoint_callback}}}
* {{{OMPI_CR_self_register_restart_callback}}}
* {{{OMPI_CR_self_register_continue_callback}}}
* The ErrMgr predicted_fault() interface has been changed to take an opal_list_t of ErrMgr defined types. This will allow us to better support a wider range of fault prediction services in the future.
* Add a progress meter to:
* FileM rsh (filem_rsh_process_meter)
* SnapC full (snapc_full_progress_meter)
* SStore stage (sstore_stage_progress_meter)
* Added 2 new command line options to ompi-restart
* --showme : Display the full command line that would have been exec'ed.
* --mpirun_opts : Command line options to pass directly to mpirun. (Fixes trac:2413)
* Deprecated some MCA params:
* crs_base_snapshot_dir deprecated, use sstore_stage_local_snapshot_dir
* snapc_base_global_snapshot_dir deprecated, use sstore_base_global_snapshot_dir
* snapc_base_global_shared deprecated, use sstore_stage_global_is_shared
* snapc_base_store_in_place deprecated, replaced with different components of SStore
* snapc_base_global_snapshot_ref deprecated, use sstore_base_global_snapshot_ref
* snapc_base_establish_global_snapshot_dir deprecated, never well supported
* snapc_full_skip_filem deprecated, use sstore_stage_skip_filem
Minor Changes:
--------------
* Fixes trac:1924 : {{{ompi-restart}}} now recognizes path prefixed checkpoint handles and does the right thing.
* Fixes trac:2097 : {{{ompi-info}}} should now report all available CRS components
* Fixes trac:2161 : Manual checkpoint movement. A user can 'mv' a checkpoint directory from the original location to another and still restart from it.
* Fixes trac:2208 : Honor various TMPDIR varaibles instead of forcing {{{/tmp}}}
* Move {{{ompi_cr_continue_like_restart}}} to {{{orte_cr_continue_like_restart}}} to be more flexible in where this should be set.
* opal_crs_base_metadata_write* functions have been moved to SStore to support a wider range of metadata handling functionality.
* Cleanup the CRS framework and components to work with the SStore framework.
* Cleanup the SnapC framework and components to work with the SStore framework (cleans up these code paths considerably).
* Add 'quiesce' hook to CRCP for a future enhancement.
* We now require a BLCR version that supports {{{cr_request_file()}}} or {{{cr_request_checkpoint()}}} in order to make the code more maintainable. Note that {{{cr_request_file}}} has been deprecated since 0.7.0, so we prefer to use {{{cr_request_checkpoint()}}}.
* Add optional application level INC callbacks (registered through the CR MPI Ext interface).
* Increase the {{{opal_cr_thread_sleep_wait}}} parameter to 1000 microseconds to make the C/R thread less aggressive.
* {{{opal-restart}}} now looks for cache directories before falling back on stable storage when asked.
* {{{opal-restart}}} also support local decompression before restarting
* {{{orte-checkpoint}}} now uses the SStore framework to work with the metadata
* {{{orte-restart}}} now uses the SStore framework to work with the metadata
* Remove the {{{orte-restart}}} preload option. This was removed since the user only needs to select the 'stage' component in order to support this functionality.
* Since the '-am' parameter is saved in the metadata, {{{ompi-restart}}} no longer hard codes {{{-am ft-enable-cr}}}.
* Fix {{{hnp}}} ErrMgr so that if a previous component in the stack has 'fixed' the problem, then it should be skipped.
* Make sure to decrement the number of 'num_local_procs' in the orted when one goes away.
* odls now checks the SStore framework to see if it needs to load any checkpoint files before launching (to support 'stage'). This separates the SStore logic from the --preload-[binary|files] options.
* Add unique IDs to the named pipes established between the orted and the app in SnapC. This is to better support migration and automatic recovery activities.
* Improve the checks for 'already checkpointing' error path.
* A a recovery output timer, to show how long it takes to restart a job
* Do a better job of cleaning up the old session directory on restart.
* Add a local module to the autor and crmig ErrMgr components. These small modules prevent the 'orted' component from attempting a local recovery (Which does not work for MPI apps at the moment)
* Add a fix for bounding the checkpointable region between MPI_Init and MPI_Finalize.
This commit was SVN r23587.
The following Trac tickets were found above:
Ticket 1924 --> https://svn.open-mpi.org/trac/ompi/ticket/1924
Ticket 2097 --> https://svn.open-mpi.org/trac/ompi/ticket/2097
Ticket 2161 --> https://svn.open-mpi.org/trac/ompi/ticket/2161
Ticket 2192 --> https://svn.open-mpi.org/trac/ompi/ticket/2192
Ticket 2208 --> https://svn.open-mpi.org/trac/ompi/ticket/2208
Ticket 2342 --> https://svn.open-mpi.org/trac/ompi/ticket/2342
Ticket 2413 --> https://svn.open-mpi.org/trac/ompi/ticket/2413
2010-08-10 20:51:11 +00:00
|
|
|
snapshot->super.metadata_filename);
|
2007-03-16 23:11:45 +00:00
|
|
|
exit_status = ret;
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
*state = OPAL_CRS_CONTINUE;
|
2015-06-23 20:59:57 -07:00
|
|
|
|
2007-03-16 23:11:45 +00:00
|
|
|
/*
|
|
|
|
* Call their continue routine for completeness
|
|
|
|
*/
|
|
|
|
if(NULL != mca_crs_self_component.ucb_continue_fn) {
|
|
|
|
mca_crs_self_component.ucb_continue_fn();
|
|
|
|
}
|
|
|
|
|
|
|
|
base_snapshot = &(snapshot->super);
|
|
|
|
|
|
|
|
cleanup:
|
|
|
|
if( NULL != restart_cmd) {
|
|
|
|
free(restart_cmd);
|
|
|
|
restart_cmd = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
return exit_status;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2015-06-23 20:59:57 -07:00
|
|
|
* Notice that the user restart callback is not called here, but always from
|
2007-03-16 23:11:45 +00:00
|
|
|
* opal_init for the self module.
|
|
|
|
*/
|
|
|
|
int opal_crs_self_restart(opal_crs_base_snapshot_t *base_snapshot, bool spawn_child, pid_t *child_pid)
|
|
|
|
{
|
|
|
|
opal_crs_self_snapshot_t *snapshot = OBJ_NEW(opal_crs_self_snapshot_t);
|
|
|
|
char **cr_argv = NULL;
|
|
|
|
char * cr_cmd = NULL;
|
|
|
|
int ret;
|
|
|
|
int exit_status = OPAL_SUCCESS;
|
|
|
|
int status;
|
|
|
|
|
|
|
|
snapshot->super = *base_snapshot;
|
|
|
|
|
|
|
|
opal_output_verbose(10, mca_crs_self_component.super.output_handle,
|
A number of C/R enhancements per RFC below:
http://www.open-mpi.org/community/lists/devel/2010/07/8240.php
Documentation:
http://osl.iu.edu/research/ft/
Major Changes:
--------------
* Added C/R-enabled Debugging support.
Enabled with the --enable-crdebug flag. See the following website for more information:
http://osl.iu.edu/research/ft/crdebug/
* Added Stable Storage (SStore) framework for checkpoint storage
* 'central' component does a direct to central storage save
* 'stage' component stages checkpoints to central storage while the application continues execution.
* 'stage' supports offline compression of checkpoints before moving (sstore_stage_compress)
* 'stage' supports local caching of checkpoints to improve automatic recovery (sstore_stage_caching)
* Added Compression (compress) framework to support
* Add two new ErrMgr recovery policies
* {{{crmig}}} C/R Process Migration
* {{{autor}}} C/R Automatic Recovery
* Added the {{{ompi-migrate}}} command line tool to support the {{{crmig}}} ErrMgr component
* Added CR MPI Ext functions (enable them with {{{--enable-mpi-ext=cr}}} configure option)
* {{{OMPI_CR_Checkpoint}}} (Fixes trac:2342)
* {{{OMPI_CR_Restart}}}
* {{{OMPI_CR_Migrate}}} (may need some more work for mapping rules)
* {{{OMPI_CR_INC_register_callback}}} (Fixes trac:2192)
* {{{OMPI_CR_Quiesce_start}}}
* {{{OMPI_CR_Quiesce_checkpoint}}}
* {{{OMPI_CR_Quiesce_end}}}
* {{{OMPI_CR_self_register_checkpoint_callback}}}
* {{{OMPI_CR_self_register_restart_callback}}}
* {{{OMPI_CR_self_register_continue_callback}}}
* The ErrMgr predicted_fault() interface has been changed to take an opal_list_t of ErrMgr defined types. This will allow us to better support a wider range of fault prediction services in the future.
* Add a progress meter to:
* FileM rsh (filem_rsh_process_meter)
* SnapC full (snapc_full_progress_meter)
* SStore stage (sstore_stage_progress_meter)
* Added 2 new command line options to ompi-restart
* --showme : Display the full command line that would have been exec'ed.
* --mpirun_opts : Command line options to pass directly to mpirun. (Fixes trac:2413)
* Deprecated some MCA params:
* crs_base_snapshot_dir deprecated, use sstore_stage_local_snapshot_dir
* snapc_base_global_snapshot_dir deprecated, use sstore_base_global_snapshot_dir
* snapc_base_global_shared deprecated, use sstore_stage_global_is_shared
* snapc_base_store_in_place deprecated, replaced with different components of SStore
* snapc_base_global_snapshot_ref deprecated, use sstore_base_global_snapshot_ref
* snapc_base_establish_global_snapshot_dir deprecated, never well supported
* snapc_full_skip_filem deprecated, use sstore_stage_skip_filem
Minor Changes:
--------------
* Fixes trac:1924 : {{{ompi-restart}}} now recognizes path prefixed checkpoint handles and does the right thing.
* Fixes trac:2097 : {{{ompi-info}}} should now report all available CRS components
* Fixes trac:2161 : Manual checkpoint movement. A user can 'mv' a checkpoint directory from the original location to another and still restart from it.
* Fixes trac:2208 : Honor various TMPDIR varaibles instead of forcing {{{/tmp}}}
* Move {{{ompi_cr_continue_like_restart}}} to {{{orte_cr_continue_like_restart}}} to be more flexible in where this should be set.
* opal_crs_base_metadata_write* functions have been moved to SStore to support a wider range of metadata handling functionality.
* Cleanup the CRS framework and components to work with the SStore framework.
* Cleanup the SnapC framework and components to work with the SStore framework (cleans up these code paths considerably).
* Add 'quiesce' hook to CRCP for a future enhancement.
* We now require a BLCR version that supports {{{cr_request_file()}}} or {{{cr_request_checkpoint()}}} in order to make the code more maintainable. Note that {{{cr_request_file}}} has been deprecated since 0.7.0, so we prefer to use {{{cr_request_checkpoint()}}}.
* Add optional application level INC callbacks (registered through the CR MPI Ext interface).
* Increase the {{{opal_cr_thread_sleep_wait}}} parameter to 1000 microseconds to make the C/R thread less aggressive.
* {{{opal-restart}}} now looks for cache directories before falling back on stable storage when asked.
* {{{opal-restart}}} also support local decompression before restarting
* {{{orte-checkpoint}}} now uses the SStore framework to work with the metadata
* {{{orte-restart}}} now uses the SStore framework to work with the metadata
* Remove the {{{orte-restart}}} preload option. This was removed since the user only needs to select the 'stage' component in order to support this functionality.
* Since the '-am' parameter is saved in the metadata, {{{ompi-restart}}} no longer hard codes {{{-am ft-enable-cr}}}.
* Fix {{{hnp}}} ErrMgr so that if a previous component in the stack has 'fixed' the problem, then it should be skipped.
* Make sure to decrement the number of 'num_local_procs' in the orted when one goes away.
* odls now checks the SStore framework to see if it needs to load any checkpoint files before launching (to support 'stage'). This separates the SStore logic from the --preload-[binary|files] options.
* Add unique IDs to the named pipes established between the orted and the app in SnapC. This is to better support migration and automatic recovery activities.
* Improve the checks for 'already checkpointing' error path.
* A a recovery output timer, to show how long it takes to restart a job
* Do a better job of cleaning up the old session directory on restart.
* Add a local module to the autor and crmig ErrMgr components. These small modules prevent the 'orted' component from attempting a local recovery (Which does not work for MPI apps at the moment)
* Add a fix for bounding the checkpointable region between MPI_Init and MPI_Finalize.
This commit was SVN r23587.
The following Trac tickets were found above:
Ticket 1924 --> https://svn.open-mpi.org/trac/ompi/ticket/1924
Ticket 2097 --> https://svn.open-mpi.org/trac/ompi/ticket/2097
Ticket 2161 --> https://svn.open-mpi.org/trac/ompi/ticket/2161
Ticket 2192 --> https://svn.open-mpi.org/trac/ompi/ticket/2192
Ticket 2208 --> https://svn.open-mpi.org/trac/ompi/ticket/2208
Ticket 2342 --> https://svn.open-mpi.org/trac/ompi/ticket/2342
Ticket 2413 --> https://svn.open-mpi.org/trac/ompi/ticket/2413
2010-08-10 20:51:11 +00:00
|
|
|
"crs:self: restart(%d)", spawn_child);
|
2007-03-16 23:11:45 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* If we need to reconstruct the snapshot
|
|
|
|
*/
|
|
|
|
if(snapshot->super.cold_start) {
|
|
|
|
if( OPAL_SUCCESS != (ret = self_cold_start(snapshot)) ){
|
|
|
|
exit_status = ret;
|
|
|
|
opal_output(mca_crs_self_component.super.output_handle,
|
|
|
|
"crs:blcr: blcr_restart: Unable to reconstruct the snapshot.");
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2009-08-05 16:21:51 +00:00
|
|
|
/*
|
|
|
|
* JJH: Check to make sure the application exists?
|
|
|
|
*/
|
|
|
|
|
2007-03-16 23:11:45 +00:00
|
|
|
/*
|
|
|
|
* Get the restart command
|
|
|
|
*/
|
|
|
|
if ( OPAL_SUCCESS != (ret = opal_crs_self_restart_cmd(snapshot, &cr_cmd)) ) {
|
|
|
|
exit_status = ret;
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
if ( NULL == (cr_argv = opal_argv_split(cr_cmd, ' ')) ) {
|
|
|
|
exit_status = OPAL_ERROR;
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if (!spawn_child) {
|
|
|
|
opal_output_verbose(10, mca_crs_self_component.super.output_handle,
|
|
|
|
"crs:self: self_restart: SELF: exec :(%s, %s):",
|
|
|
|
strdup(cr_argv[0]),
|
|
|
|
opal_argv_join(cr_argv, ' '));
|
|
|
|
|
|
|
|
status = execvp(strdup(cr_argv[0]), cr_argv);
|
|
|
|
|
|
|
|
if(status < 0) {
|
|
|
|
opal_output(mca_crs_self_component.super.output_handle,
|
|
|
|
"crs:self: self_restart: SELF: Child failed to execute :(%d):", status);
|
|
|
|
}
|
|
|
|
opal_output(mca_crs_self_component.super.output_handle,
|
|
|
|
"crs:self: self_restart: SELF: execvp returned %d", status);
|
|
|
|
exit_status = status;
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
*child_pid = fork();
|
|
|
|
if( *child_pid == 0) {
|
|
|
|
/* Child Process */
|
|
|
|
opal_output_verbose(10, mca_crs_self_component.super.output_handle,
|
|
|
|
"crs:self: self_restart: CHILD: exec :(%s, %s):",
|
|
|
|
strdup(cr_argv[0]),
|
|
|
|
opal_argv_join(cr_argv, ' '));
|
|
|
|
|
|
|
|
status = execvp(strdup(cr_argv[0]), cr_argv);
|
|
|
|
|
|
|
|
if(status < 0) {
|
|
|
|
opal_output(mca_crs_self_component.super.output_handle,
|
|
|
|
"crs:self: self_restart: CHILD: Child failed to execute :(%d):", status);
|
|
|
|
}
|
|
|
|
opal_output(mca_crs_self_component.super.output_handle,
|
|
|
|
"crs:self: self_restart: CHILD: execvp returned %d", status);
|
|
|
|
exit_status = status;
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
else if(*child_pid > 0) {
|
|
|
|
/* Parent is done once it is started. */
|
|
|
|
;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
opal_output(mca_crs_self_component.super.output_handle,
|
|
|
|
"crs:self: self_restart: CHILD: fork failed :(%d):", *child_pid);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
cleanup:
|
|
|
|
if( NULL != cr_cmd)
|
|
|
|
free(cr_cmd);
|
2015-06-23 20:59:57 -07:00
|
|
|
if( NULL != cr_argv)
|
2007-03-16 23:11:45 +00:00
|
|
|
opal_argv_free(cr_argv);
|
|
|
|
|
|
|
|
return exit_status;
|
|
|
|
}
|
|
|
|
|
|
|
|
int opal_crs_self_disable_checkpoint(void)
|
|
|
|
{
|
2009-08-05 16:21:51 +00:00
|
|
|
/*
|
|
|
|
* This function should never be called by a tool
|
|
|
|
*/
|
|
|
|
if( opal_cr_is_tool ) {
|
|
|
|
return OPAL_ERR_NOT_SUPPORTED;
|
|
|
|
}
|
|
|
|
|
2007-03-16 23:11:45 +00:00
|
|
|
opal_output_verbose(10, mca_crs_self_component.super.output_handle,
|
|
|
|
"crs:self: disable_checkpoint()");
|
|
|
|
|
|
|
|
mca_crs_self_component.can_checkpoint = false;
|
|
|
|
|
|
|
|
return OPAL_SUCCESS;
|
|
|
|
}
|
|
|
|
|
|
|
|
int opal_crs_self_enable_checkpoint(void)
|
|
|
|
{
|
2009-08-05 16:21:51 +00:00
|
|
|
/*
|
|
|
|
* This function should never be called by a tool
|
|
|
|
*/
|
|
|
|
if( opal_cr_is_tool ) {
|
|
|
|
return OPAL_ERR_NOT_SUPPORTED;
|
|
|
|
}
|
|
|
|
|
2007-03-16 23:11:45 +00:00
|
|
|
opal_output_verbose(10, mca_crs_self_component.super.output_handle,
|
|
|
|
"crs:self: enable_checkpoint()");
|
|
|
|
|
|
|
|
mca_crs_self_component.can_checkpoint = true;
|
|
|
|
|
|
|
|
return OPAL_SUCCESS;
|
|
|
|
}
|
|
|
|
|
2008-07-08 20:04:39 +00:00
|
|
|
int opal_crs_self_prelaunch(int32_t rank,
|
|
|
|
char *base_snapshot_dir,
|
|
|
|
char **app,
|
|
|
|
char **cwd,
|
|
|
|
char ***argv,
|
|
|
|
char ***env)
|
|
|
|
{
|
|
|
|
char * tmp_env_var = NULL;
|
|
|
|
|
2009-08-05 16:21:51 +00:00
|
|
|
/*
|
|
|
|
* This function should never be called by a tool
|
|
|
|
*/
|
|
|
|
if( opal_cr_is_tool ) {
|
|
|
|
return OPAL_ERR_NOT_SUPPORTED;
|
|
|
|
}
|
|
|
|
|
2013-03-27 21:09:41 +00:00
|
|
|
(void) mca_base_var_env_name("opal_cr_is_tool", &tmp_env_var);
|
2008-07-08 20:04:39 +00:00
|
|
|
opal_setenv(tmp_env_var,
|
|
|
|
"0", true, env);
|
|
|
|
free(tmp_env_var);
|
|
|
|
tmp_env_var = NULL;
|
|
|
|
|
|
|
|
return OPAL_SUCCESS;
|
|
|
|
}
|
|
|
|
|
|
|
|
int opal_crs_self_reg_thread(void)
|
|
|
|
{
|
2009-08-05 16:21:51 +00:00
|
|
|
/*
|
|
|
|
* This function should never be called by a tool
|
|
|
|
*/
|
|
|
|
if( opal_cr_is_tool ) {
|
|
|
|
return OPAL_ERR_NOT_SUPPORTED;
|
|
|
|
}
|
|
|
|
|
2008-07-08 20:04:39 +00:00
|
|
|
return OPAL_SUCCESS;
|
|
|
|
}
|
|
|
|
|
2007-03-16 23:11:45 +00:00
|
|
|
/******************
|
|
|
|
* Local functions
|
|
|
|
******************/
|
2009-08-05 22:07:37 +00:00
|
|
|
static int crs_self_find_function(char *prefix, char *suffix,
|
|
|
|
opal_crs_self_dlsym_dummy_fn_t *fn_ptr) {
|
2007-03-16 23:11:45 +00:00
|
|
|
char *func_to_find = NULL;
|
|
|
|
|
|
|
|
if( NULL == prefix || 0 >= strlen(prefix) ) {
|
|
|
|
opal_output(mca_crs_self_component.super.output_handle,
|
|
|
|
"crs:self: crs_self_find_function: Error: prefix is NULL or empty string!");
|
2009-08-05 22:07:37 +00:00
|
|
|
*fn_ptr = NULL;
|
|
|
|
return OPAL_ERROR;
|
2007-03-16 23:11:45 +00:00
|
|
|
}
|
|
|
|
if( NULL == suffix || 0 >= strlen(suffix) ) {
|
|
|
|
opal_output(mca_crs_self_component.super.output_handle,
|
|
|
|
"crs:self: crs_self_find_function: Error: suffix is NULL or empty string!");
|
2009-08-05 22:07:37 +00:00
|
|
|
*fn_ptr = NULL;
|
|
|
|
return OPAL_ERROR;
|
2007-03-16 23:11:45 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
opal_output_verbose(10, mca_crs_self_component.super.output_handle,
|
2015-06-23 20:59:57 -07:00
|
|
|
"crs:self: crs_self_find_function(--, %s, %s)",
|
2007-03-16 23:11:45 +00:00
|
|
|
prefix, suffix);
|
|
|
|
|
|
|
|
asprintf(&func_to_find, "%s_%s", prefix, suffix);
|
|
|
|
|
2009-08-05 22:07:37 +00:00
|
|
|
/* The RTLD_DEFAULT is a special handle that searches the default libraries
|
|
|
|
* including the current application for the indicated symbol. This allows
|
|
|
|
* us to not have to dlopen/dlclose the executable. A bit of short hand
|
|
|
|
* really.
|
|
|
|
*/
|
|
|
|
*((void**) fn_ptr) = dlsym(RTLD_DEFAULT, func_to_find);
|
|
|
|
if( NULL == fn_ptr) {
|
2007-03-16 23:11:45 +00:00
|
|
|
opal_output_verbose(12, mca_crs_self_component.super.output_handle,
|
|
|
|
"crs:self: crs_self_find_function: WARNING: Function \"%s\" not found",
|
|
|
|
func_to_find);
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
opal_output_verbose(10, mca_crs_self_component.super.output_handle,
|
|
|
|
"crs:self: crs_self_find_function: Found function \"%s\"",
|
|
|
|
func_to_find);
|
|
|
|
}
|
|
|
|
|
2009-08-05 22:07:37 +00:00
|
|
|
if( NULL == func_to_find) {
|
2007-03-16 23:11:45 +00:00
|
|
|
free(func_to_find);
|
2009-08-05 22:07:37 +00:00
|
|
|
}
|
2007-03-16 23:11:45 +00:00
|
|
|
|
2009-08-05 22:07:37 +00:00
|
|
|
return OPAL_SUCCESS;
|
2007-03-16 23:11:45 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Self is a special case. The 'fname' here is the command line that the user
|
|
|
|
* wishes to execute. This function takes this command line and adds
|
|
|
|
* -mca crs_self_do_restart 1
|
|
|
|
* Which will trigger the restart callback once the program has been run.
|
|
|
|
*
|
|
|
|
* For example, The user starts their program with:
|
|
|
|
* $ my_prog arg1 arg2
|
|
|
|
*
|
|
|
|
* They checkpoint it:
|
|
|
|
* $ opal_checkpoint -mca crs self 1234
|
|
|
|
*
|
|
|
|
* They restart it:
|
|
|
|
* $ opal_restart -mca crs self my_prog arg1 arg2
|
|
|
|
*
|
|
|
|
* fname is then:
|
|
|
|
* fname = "my_prog arg1 arg2"
|
|
|
|
*
|
|
|
|
* This funciton translates that to the command:
|
|
|
|
* cmd = "my_prog arg1 arg2 -mca crs self -mca crs_self_do_restart 1"
|
|
|
|
*
|
2015-06-23 20:59:57 -07:00
|
|
|
* Which will cause the program "my_prog" to call their restart function
|
2007-03-16 23:11:45 +00:00
|
|
|
* upon opal_init time.
|
|
|
|
*
|
|
|
|
* Note: The user could bypass the opal_restart routine safely by simply calling
|
|
|
|
* $ my_prog arg1 arg2 -mca crs self -mca crs_self_do_restart 1
|
2015-06-23 20:59:57 -07:00
|
|
|
* However, for consistency sake, we should not encourage this as it won't work for
|
2007-03-16 23:11:45 +00:00
|
|
|
* all of the other checkpointers.
|
|
|
|
*/
|
|
|
|
static int opal_crs_self_restart_cmd(opal_crs_self_snapshot_t *snapshot, char **cmd)
|
|
|
|
{
|
2008-04-23 00:17:12 +00:00
|
|
|
char * tmp_env_var = NULL;
|
|
|
|
|
2007-03-16 23:11:45 +00:00
|
|
|
opal_output_verbose(10, mca_crs_self_component.super.output_handle,
|
|
|
|
"crs:self: restart_cmd(%s, ---)", snapshot->cmd_line);
|
|
|
|
|
2013-03-27 21:09:41 +00:00
|
|
|
(void) mca_base_var_env_name("crs", &tmp_env_var);
|
2008-04-23 00:17:12 +00:00
|
|
|
opal_setenv(tmp_env_var,
|
2015-06-23 20:59:57 -07:00
|
|
|
"self",
|
2007-03-16 23:11:45 +00:00
|
|
|
true, &environ);
|
2008-04-23 00:17:12 +00:00
|
|
|
free(tmp_env_var);
|
|
|
|
tmp_env_var = NULL;
|
|
|
|
|
2013-03-27 21:09:41 +00:00
|
|
|
(void) mca_base_var_env_name("crs_self_do_restart", &tmp_env_var);
|
2008-04-23 00:17:12 +00:00
|
|
|
opal_setenv(tmp_env_var,
|
2015-06-23 20:59:57 -07:00
|
|
|
"1",
|
2007-03-16 23:11:45 +00:00
|
|
|
true, &environ);
|
2008-04-23 00:17:12 +00:00
|
|
|
free(tmp_env_var);
|
|
|
|
tmp_env_var = NULL;
|
|
|
|
|
2013-03-27 21:09:41 +00:00
|
|
|
(void) mca_base_var_env_name("crs_self_prefix", &tmp_env_var);
|
2008-04-23 00:17:12 +00:00
|
|
|
opal_setenv(tmp_env_var,
|
2015-06-23 20:59:57 -07:00
|
|
|
mca_crs_self_component.prefix,
|
2007-03-16 23:11:45 +00:00
|
|
|
true, &environ);
|
2008-04-23 00:17:12 +00:00
|
|
|
free(tmp_env_var);
|
|
|
|
tmp_env_var = NULL;
|
2007-03-16 23:11:45 +00:00
|
|
|
|
|
|
|
/* Instead of adding it to the command line, we should use the environment
|
2015-06-23 20:59:57 -07:00
|
|
|
* to pass the values. This allow sthe OPAL application to be braindead
|
2007-03-16 23:11:45 +00:00
|
|
|
* WRT MCA parameters
|
|
|
|
* add_args = strdup("-mca crs self -mca crs_self_do_restart 1");
|
|
|
|
*/
|
|
|
|
|
|
|
|
asprintf(cmd, "%s", snapshot->cmd_line);
|
|
|
|
|
|
|
|
return OPAL_SUCCESS;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int self_cold_start(opal_crs_self_snapshot_t *snapshot) {
|
2008-05-08 18:47:47 +00:00
|
|
|
int ret, exit_status = OPAL_SUCCESS;
|
|
|
|
char **tmp_argv = NULL;
|
2007-03-16 23:11:45 +00:00
|
|
|
char * component_name = NULL;
|
|
|
|
int prev_pid;
|
|
|
|
|
|
|
|
opal_output_verbose(10, mca_crs_self_component.super.output_handle,
|
A number of C/R enhancements per RFC below:
http://www.open-mpi.org/community/lists/devel/2010/07/8240.php
Documentation:
http://osl.iu.edu/research/ft/
Major Changes:
--------------
* Added C/R-enabled Debugging support.
Enabled with the --enable-crdebug flag. See the following website for more information:
http://osl.iu.edu/research/ft/crdebug/
* Added Stable Storage (SStore) framework for checkpoint storage
* 'central' component does a direct to central storage save
* 'stage' component stages checkpoints to central storage while the application continues execution.
* 'stage' supports offline compression of checkpoints before moving (sstore_stage_compress)
* 'stage' supports local caching of checkpoints to improve automatic recovery (sstore_stage_caching)
* Added Compression (compress) framework to support
* Add two new ErrMgr recovery policies
* {{{crmig}}} C/R Process Migration
* {{{autor}}} C/R Automatic Recovery
* Added the {{{ompi-migrate}}} command line tool to support the {{{crmig}}} ErrMgr component
* Added CR MPI Ext functions (enable them with {{{--enable-mpi-ext=cr}}} configure option)
* {{{OMPI_CR_Checkpoint}}} (Fixes trac:2342)
* {{{OMPI_CR_Restart}}}
* {{{OMPI_CR_Migrate}}} (may need some more work for mapping rules)
* {{{OMPI_CR_INC_register_callback}}} (Fixes trac:2192)
* {{{OMPI_CR_Quiesce_start}}}
* {{{OMPI_CR_Quiesce_checkpoint}}}
* {{{OMPI_CR_Quiesce_end}}}
* {{{OMPI_CR_self_register_checkpoint_callback}}}
* {{{OMPI_CR_self_register_restart_callback}}}
* {{{OMPI_CR_self_register_continue_callback}}}
* The ErrMgr predicted_fault() interface has been changed to take an opal_list_t of ErrMgr defined types. This will allow us to better support a wider range of fault prediction services in the future.
* Add a progress meter to:
* FileM rsh (filem_rsh_process_meter)
* SnapC full (snapc_full_progress_meter)
* SStore stage (sstore_stage_progress_meter)
* Added 2 new command line options to ompi-restart
* --showme : Display the full command line that would have been exec'ed.
* --mpirun_opts : Command line options to pass directly to mpirun. (Fixes trac:2413)
* Deprecated some MCA params:
* crs_base_snapshot_dir deprecated, use sstore_stage_local_snapshot_dir
* snapc_base_global_snapshot_dir deprecated, use sstore_base_global_snapshot_dir
* snapc_base_global_shared deprecated, use sstore_stage_global_is_shared
* snapc_base_store_in_place deprecated, replaced with different components of SStore
* snapc_base_global_snapshot_ref deprecated, use sstore_base_global_snapshot_ref
* snapc_base_establish_global_snapshot_dir deprecated, never well supported
* snapc_full_skip_filem deprecated, use sstore_stage_skip_filem
Minor Changes:
--------------
* Fixes trac:1924 : {{{ompi-restart}}} now recognizes path prefixed checkpoint handles and does the right thing.
* Fixes trac:2097 : {{{ompi-info}}} should now report all available CRS components
* Fixes trac:2161 : Manual checkpoint movement. A user can 'mv' a checkpoint directory from the original location to another and still restart from it.
* Fixes trac:2208 : Honor various TMPDIR varaibles instead of forcing {{{/tmp}}}
* Move {{{ompi_cr_continue_like_restart}}} to {{{orte_cr_continue_like_restart}}} to be more flexible in where this should be set.
* opal_crs_base_metadata_write* functions have been moved to SStore to support a wider range of metadata handling functionality.
* Cleanup the CRS framework and components to work with the SStore framework.
* Cleanup the SnapC framework and components to work with the SStore framework (cleans up these code paths considerably).
* Add 'quiesce' hook to CRCP for a future enhancement.
* We now require a BLCR version that supports {{{cr_request_file()}}} or {{{cr_request_checkpoint()}}} in order to make the code more maintainable. Note that {{{cr_request_file}}} has been deprecated since 0.7.0, so we prefer to use {{{cr_request_checkpoint()}}}.
* Add optional application level INC callbacks (registered through the CR MPI Ext interface).
* Increase the {{{opal_cr_thread_sleep_wait}}} parameter to 1000 microseconds to make the C/R thread less aggressive.
* {{{opal-restart}}} now looks for cache directories before falling back on stable storage when asked.
* {{{opal-restart}}} also support local decompression before restarting
* {{{orte-checkpoint}}} now uses the SStore framework to work with the metadata
* {{{orte-restart}}} now uses the SStore framework to work with the metadata
* Remove the {{{orte-restart}}} preload option. This was removed since the user only needs to select the 'stage' component in order to support this functionality.
* Since the '-am' parameter is saved in the metadata, {{{ompi-restart}}} no longer hard codes {{{-am ft-enable-cr}}}.
* Fix {{{hnp}}} ErrMgr so that if a previous component in the stack has 'fixed' the problem, then it should be skipped.
* Make sure to decrement the number of 'num_local_procs' in the orted when one goes away.
* odls now checks the SStore framework to see if it needs to load any checkpoint files before launching (to support 'stage'). This separates the SStore logic from the --preload-[binary|files] options.
* Add unique IDs to the named pipes established between the orted and the app in SnapC. This is to better support migration and automatic recovery activities.
* Improve the checks for 'already checkpointing' error path.
* A a recovery output timer, to show how long it takes to restart a job
* Do a better job of cleaning up the old session directory on restart.
* Add a local module to the autor and crmig ErrMgr components. These small modules prevent the 'orted' component from attempting a local recovery (Which does not work for MPI apps at the moment)
* Add a fix for bounding the checkpointable region between MPI_Init and MPI_Finalize.
This commit was SVN r23587.
The following Trac tickets were found above:
Ticket 1924 --> https://svn.open-mpi.org/trac/ompi/ticket/1924
Ticket 2097 --> https://svn.open-mpi.org/trac/ompi/ticket/2097
Ticket 2161 --> https://svn.open-mpi.org/trac/ompi/ticket/2161
Ticket 2192 --> https://svn.open-mpi.org/trac/ompi/ticket/2192
Ticket 2208 --> https://svn.open-mpi.org/trac/ompi/ticket/2208
Ticket 2342 --> https://svn.open-mpi.org/trac/ompi/ticket/2342
Ticket 2413 --> https://svn.open-mpi.org/trac/ompi/ticket/2413
2010-08-10 20:51:11 +00:00
|
|
|
"crs:self: cold_start()");
|
2007-03-16 23:11:45 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Find the snapshot directory, read the metadata file
|
|
|
|
*/
|
A number of C/R enhancements per RFC below:
http://www.open-mpi.org/community/lists/devel/2010/07/8240.php
Documentation:
http://osl.iu.edu/research/ft/
Major Changes:
--------------
* Added C/R-enabled Debugging support.
Enabled with the --enable-crdebug flag. See the following website for more information:
http://osl.iu.edu/research/ft/crdebug/
* Added Stable Storage (SStore) framework for checkpoint storage
* 'central' component does a direct to central storage save
* 'stage' component stages checkpoints to central storage while the application continues execution.
* 'stage' supports offline compression of checkpoints before moving (sstore_stage_compress)
* 'stage' supports local caching of checkpoints to improve automatic recovery (sstore_stage_caching)
* Added Compression (compress) framework to support
* Add two new ErrMgr recovery policies
* {{{crmig}}} C/R Process Migration
* {{{autor}}} C/R Automatic Recovery
* Added the {{{ompi-migrate}}} command line tool to support the {{{crmig}}} ErrMgr component
* Added CR MPI Ext functions (enable them with {{{--enable-mpi-ext=cr}}} configure option)
* {{{OMPI_CR_Checkpoint}}} (Fixes trac:2342)
* {{{OMPI_CR_Restart}}}
* {{{OMPI_CR_Migrate}}} (may need some more work for mapping rules)
* {{{OMPI_CR_INC_register_callback}}} (Fixes trac:2192)
* {{{OMPI_CR_Quiesce_start}}}
* {{{OMPI_CR_Quiesce_checkpoint}}}
* {{{OMPI_CR_Quiesce_end}}}
* {{{OMPI_CR_self_register_checkpoint_callback}}}
* {{{OMPI_CR_self_register_restart_callback}}}
* {{{OMPI_CR_self_register_continue_callback}}}
* The ErrMgr predicted_fault() interface has been changed to take an opal_list_t of ErrMgr defined types. This will allow us to better support a wider range of fault prediction services in the future.
* Add a progress meter to:
* FileM rsh (filem_rsh_process_meter)
* SnapC full (snapc_full_progress_meter)
* SStore stage (sstore_stage_progress_meter)
* Added 2 new command line options to ompi-restart
* --showme : Display the full command line that would have been exec'ed.
* --mpirun_opts : Command line options to pass directly to mpirun. (Fixes trac:2413)
* Deprecated some MCA params:
* crs_base_snapshot_dir deprecated, use sstore_stage_local_snapshot_dir
* snapc_base_global_snapshot_dir deprecated, use sstore_base_global_snapshot_dir
* snapc_base_global_shared deprecated, use sstore_stage_global_is_shared
* snapc_base_store_in_place deprecated, replaced with different components of SStore
* snapc_base_global_snapshot_ref deprecated, use sstore_base_global_snapshot_ref
* snapc_base_establish_global_snapshot_dir deprecated, never well supported
* snapc_full_skip_filem deprecated, use sstore_stage_skip_filem
Minor Changes:
--------------
* Fixes trac:1924 : {{{ompi-restart}}} now recognizes path prefixed checkpoint handles and does the right thing.
* Fixes trac:2097 : {{{ompi-info}}} should now report all available CRS components
* Fixes trac:2161 : Manual checkpoint movement. A user can 'mv' a checkpoint directory from the original location to another and still restart from it.
* Fixes trac:2208 : Honor various TMPDIR varaibles instead of forcing {{{/tmp}}}
* Move {{{ompi_cr_continue_like_restart}}} to {{{orte_cr_continue_like_restart}}} to be more flexible in where this should be set.
* opal_crs_base_metadata_write* functions have been moved to SStore to support a wider range of metadata handling functionality.
* Cleanup the CRS framework and components to work with the SStore framework.
* Cleanup the SnapC framework and components to work with the SStore framework (cleans up these code paths considerably).
* Add 'quiesce' hook to CRCP for a future enhancement.
* We now require a BLCR version that supports {{{cr_request_file()}}} or {{{cr_request_checkpoint()}}} in order to make the code more maintainable. Note that {{{cr_request_file}}} has been deprecated since 0.7.0, so we prefer to use {{{cr_request_checkpoint()}}}.
* Add optional application level INC callbacks (registered through the CR MPI Ext interface).
* Increase the {{{opal_cr_thread_sleep_wait}}} parameter to 1000 microseconds to make the C/R thread less aggressive.
* {{{opal-restart}}} now looks for cache directories before falling back on stable storage when asked.
* {{{opal-restart}}} also support local decompression before restarting
* {{{orte-checkpoint}}} now uses the SStore framework to work with the metadata
* {{{orte-restart}}} now uses the SStore framework to work with the metadata
* Remove the {{{orte-restart}}} preload option. This was removed since the user only needs to select the 'stage' component in order to support this functionality.
* Since the '-am' parameter is saved in the metadata, {{{ompi-restart}}} no longer hard codes {{{-am ft-enable-cr}}}.
* Fix {{{hnp}}} ErrMgr so that if a previous component in the stack has 'fixed' the problem, then it should be skipped.
* Make sure to decrement the number of 'num_local_procs' in the orted when one goes away.
* odls now checks the SStore framework to see if it needs to load any checkpoint files before launching (to support 'stage'). This separates the SStore logic from the --preload-[binary|files] options.
* Add unique IDs to the named pipes established between the orted and the app in SnapC. This is to better support migration and automatic recovery activities.
* Improve the checks for 'already checkpointing' error path.
* A a recovery output timer, to show how long it takes to restart a job
* Do a better job of cleaning up the old session directory on restart.
* Add a local module to the autor and crmig ErrMgr components. These small modules prevent the 'orted' component from attempting a local recovery (Which does not work for MPI apps at the moment)
* Add a fix for bounding the checkpointable region between MPI_Init and MPI_Finalize.
This commit was SVN r23587.
The following Trac tickets were found above:
Ticket 1924 --> https://svn.open-mpi.org/trac/ompi/ticket/1924
Ticket 2097 --> https://svn.open-mpi.org/trac/ompi/ticket/2097
Ticket 2161 --> https://svn.open-mpi.org/trac/ompi/ticket/2161
Ticket 2192 --> https://svn.open-mpi.org/trac/ompi/ticket/2192
Ticket 2208 --> https://svn.open-mpi.org/trac/ompi/ticket/2208
Ticket 2342 --> https://svn.open-mpi.org/trac/ompi/ticket/2342
Ticket 2413 --> https://svn.open-mpi.org/trac/ompi/ticket/2413
2010-08-10 20:51:11 +00:00
|
|
|
if( NULL == snapshot->super.metadata ) {
|
|
|
|
if (NULL == (snapshot->super.metadata = fopen(snapshot->super.metadata_filename, "a")) ) {
|
|
|
|
opal_output(mca_crs_self_component.super.output_handle,
|
|
|
|
"crs:self: checkpoint(): Error: Unable to open the file (%s)",
|
|
|
|
snapshot->super.metadata_filename);
|
|
|
|
exit_status = OPAL_ERROR;
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if( OPAL_SUCCESS != (ret = opal_crs_base_extract_expected_component(snapshot->super.metadata,
|
2008-05-08 18:47:47 +00:00
|
|
|
&component_name, &prev_pid) ) ) {
|
2010-01-25 20:28:38 +00:00
|
|
|
opal_output(mca_crs_self_component.super.output_handle,
|
|
|
|
"crs:self: self_cold_start: Error: Failed to extract the metadata from the local snapshot (%s). Returned %d.",
|
A number of C/R enhancements per RFC below:
http://www.open-mpi.org/community/lists/devel/2010/07/8240.php
Documentation:
http://osl.iu.edu/research/ft/
Major Changes:
--------------
* Added C/R-enabled Debugging support.
Enabled with the --enable-crdebug flag. See the following website for more information:
http://osl.iu.edu/research/ft/crdebug/
* Added Stable Storage (SStore) framework for checkpoint storage
* 'central' component does a direct to central storage save
* 'stage' component stages checkpoints to central storage while the application continues execution.
* 'stage' supports offline compression of checkpoints before moving (sstore_stage_compress)
* 'stage' supports local caching of checkpoints to improve automatic recovery (sstore_stage_caching)
* Added Compression (compress) framework to support
* Add two new ErrMgr recovery policies
* {{{crmig}}} C/R Process Migration
* {{{autor}}} C/R Automatic Recovery
* Added the {{{ompi-migrate}}} command line tool to support the {{{crmig}}} ErrMgr component
* Added CR MPI Ext functions (enable them with {{{--enable-mpi-ext=cr}}} configure option)
* {{{OMPI_CR_Checkpoint}}} (Fixes trac:2342)
* {{{OMPI_CR_Restart}}}
* {{{OMPI_CR_Migrate}}} (may need some more work for mapping rules)
* {{{OMPI_CR_INC_register_callback}}} (Fixes trac:2192)
* {{{OMPI_CR_Quiesce_start}}}
* {{{OMPI_CR_Quiesce_checkpoint}}}
* {{{OMPI_CR_Quiesce_end}}}
* {{{OMPI_CR_self_register_checkpoint_callback}}}
* {{{OMPI_CR_self_register_restart_callback}}}
* {{{OMPI_CR_self_register_continue_callback}}}
* The ErrMgr predicted_fault() interface has been changed to take an opal_list_t of ErrMgr defined types. This will allow us to better support a wider range of fault prediction services in the future.
* Add a progress meter to:
* FileM rsh (filem_rsh_process_meter)
* SnapC full (snapc_full_progress_meter)
* SStore stage (sstore_stage_progress_meter)
* Added 2 new command line options to ompi-restart
* --showme : Display the full command line that would have been exec'ed.
* --mpirun_opts : Command line options to pass directly to mpirun. (Fixes trac:2413)
* Deprecated some MCA params:
* crs_base_snapshot_dir deprecated, use sstore_stage_local_snapshot_dir
* snapc_base_global_snapshot_dir deprecated, use sstore_base_global_snapshot_dir
* snapc_base_global_shared deprecated, use sstore_stage_global_is_shared
* snapc_base_store_in_place deprecated, replaced with different components of SStore
* snapc_base_global_snapshot_ref deprecated, use sstore_base_global_snapshot_ref
* snapc_base_establish_global_snapshot_dir deprecated, never well supported
* snapc_full_skip_filem deprecated, use sstore_stage_skip_filem
Minor Changes:
--------------
* Fixes trac:1924 : {{{ompi-restart}}} now recognizes path prefixed checkpoint handles and does the right thing.
* Fixes trac:2097 : {{{ompi-info}}} should now report all available CRS components
* Fixes trac:2161 : Manual checkpoint movement. A user can 'mv' a checkpoint directory from the original location to another and still restart from it.
* Fixes trac:2208 : Honor various TMPDIR varaibles instead of forcing {{{/tmp}}}
* Move {{{ompi_cr_continue_like_restart}}} to {{{orte_cr_continue_like_restart}}} to be more flexible in where this should be set.
* opal_crs_base_metadata_write* functions have been moved to SStore to support a wider range of metadata handling functionality.
* Cleanup the CRS framework and components to work with the SStore framework.
* Cleanup the SnapC framework and components to work with the SStore framework (cleans up these code paths considerably).
* Add 'quiesce' hook to CRCP for a future enhancement.
* We now require a BLCR version that supports {{{cr_request_file()}}} or {{{cr_request_checkpoint()}}} in order to make the code more maintainable. Note that {{{cr_request_file}}} has been deprecated since 0.7.0, so we prefer to use {{{cr_request_checkpoint()}}}.
* Add optional application level INC callbacks (registered through the CR MPI Ext interface).
* Increase the {{{opal_cr_thread_sleep_wait}}} parameter to 1000 microseconds to make the C/R thread less aggressive.
* {{{opal-restart}}} now looks for cache directories before falling back on stable storage when asked.
* {{{opal-restart}}} also support local decompression before restarting
* {{{orte-checkpoint}}} now uses the SStore framework to work with the metadata
* {{{orte-restart}}} now uses the SStore framework to work with the metadata
* Remove the {{{orte-restart}}} preload option. This was removed since the user only needs to select the 'stage' component in order to support this functionality.
* Since the '-am' parameter is saved in the metadata, {{{ompi-restart}}} no longer hard codes {{{-am ft-enable-cr}}}.
* Fix {{{hnp}}} ErrMgr so that if a previous component in the stack has 'fixed' the problem, then it should be skipped.
* Make sure to decrement the number of 'num_local_procs' in the orted when one goes away.
* odls now checks the SStore framework to see if it needs to load any checkpoint files before launching (to support 'stage'). This separates the SStore logic from the --preload-[binary|files] options.
* Add unique IDs to the named pipes established between the orted and the app in SnapC. This is to better support migration and automatic recovery activities.
* Improve the checks for 'already checkpointing' error path.
* A a recovery output timer, to show how long it takes to restart a job
* Do a better job of cleaning up the old session directory on restart.
* Add a local module to the autor and crmig ErrMgr components. These small modules prevent the 'orted' component from attempting a local recovery (Which does not work for MPI apps at the moment)
* Add a fix for bounding the checkpointable region between MPI_Init and MPI_Finalize.
This commit was SVN r23587.
The following Trac tickets were found above:
Ticket 1924 --> https://svn.open-mpi.org/trac/ompi/ticket/1924
Ticket 2097 --> https://svn.open-mpi.org/trac/ompi/ticket/2097
Ticket 2161 --> https://svn.open-mpi.org/trac/ompi/ticket/2161
Ticket 2192 --> https://svn.open-mpi.org/trac/ompi/ticket/2192
Ticket 2208 --> https://svn.open-mpi.org/trac/ompi/ticket/2208
Ticket 2342 --> https://svn.open-mpi.org/trac/ompi/ticket/2342
Ticket 2413 --> https://svn.open-mpi.org/trac/ompi/ticket/2413
2010-08-10 20:51:11 +00:00
|
|
|
snapshot->super.metadata_filename, ret);
|
2008-05-08 18:47:47 +00:00
|
|
|
exit_status = ret;
|
2007-03-16 23:11:45 +00:00
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
|
|
|
snapshot->super.component_name = strdup(component_name);
|
|
|
|
|
|
|
|
/* Compare the strings to make sure this is our snapshot before going further */
|
2015-06-23 20:59:57 -07:00
|
|
|
if ( 0 != strncmp(mca_crs_self_component.super.base_version.mca_component_name,
|
2007-03-16 23:11:45 +00:00
|
|
|
component_name, strlen(component_name)) ) {
|
|
|
|
exit_status = OPAL_ERROR;
|
|
|
|
opal_output(mca_crs_self_component.super.output_handle,
|
2015-06-23 20:59:57 -07:00
|
|
|
"crs:self: self_cold_start: Error: This snapshot (%s) is not intended for us (%s)\n",
|
2008-05-06 18:08:45 +00:00
|
|
|
component_name, mca_crs_self_component.super.base_version.mca_component_name);
|
2007-03-16 23:11:45 +00:00
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Restart command
|
|
|
|
* JJH: Command lines limited to 256 chars.
|
|
|
|
*/
|
A number of C/R enhancements per RFC below:
http://www.open-mpi.org/community/lists/devel/2010/07/8240.php
Documentation:
http://osl.iu.edu/research/ft/
Major Changes:
--------------
* Added C/R-enabled Debugging support.
Enabled with the --enable-crdebug flag. See the following website for more information:
http://osl.iu.edu/research/ft/crdebug/
* Added Stable Storage (SStore) framework for checkpoint storage
* 'central' component does a direct to central storage save
* 'stage' component stages checkpoints to central storage while the application continues execution.
* 'stage' supports offline compression of checkpoints before moving (sstore_stage_compress)
* 'stage' supports local caching of checkpoints to improve automatic recovery (sstore_stage_caching)
* Added Compression (compress) framework to support
* Add two new ErrMgr recovery policies
* {{{crmig}}} C/R Process Migration
* {{{autor}}} C/R Automatic Recovery
* Added the {{{ompi-migrate}}} command line tool to support the {{{crmig}}} ErrMgr component
* Added CR MPI Ext functions (enable them with {{{--enable-mpi-ext=cr}}} configure option)
* {{{OMPI_CR_Checkpoint}}} (Fixes trac:2342)
* {{{OMPI_CR_Restart}}}
* {{{OMPI_CR_Migrate}}} (may need some more work for mapping rules)
* {{{OMPI_CR_INC_register_callback}}} (Fixes trac:2192)
* {{{OMPI_CR_Quiesce_start}}}
* {{{OMPI_CR_Quiesce_checkpoint}}}
* {{{OMPI_CR_Quiesce_end}}}
* {{{OMPI_CR_self_register_checkpoint_callback}}}
* {{{OMPI_CR_self_register_restart_callback}}}
* {{{OMPI_CR_self_register_continue_callback}}}
* The ErrMgr predicted_fault() interface has been changed to take an opal_list_t of ErrMgr defined types. This will allow us to better support a wider range of fault prediction services in the future.
* Add a progress meter to:
* FileM rsh (filem_rsh_process_meter)
* SnapC full (snapc_full_progress_meter)
* SStore stage (sstore_stage_progress_meter)
* Added 2 new command line options to ompi-restart
* --showme : Display the full command line that would have been exec'ed.
* --mpirun_opts : Command line options to pass directly to mpirun. (Fixes trac:2413)
* Deprecated some MCA params:
* crs_base_snapshot_dir deprecated, use sstore_stage_local_snapshot_dir
* snapc_base_global_snapshot_dir deprecated, use sstore_base_global_snapshot_dir
* snapc_base_global_shared deprecated, use sstore_stage_global_is_shared
* snapc_base_store_in_place deprecated, replaced with different components of SStore
* snapc_base_global_snapshot_ref deprecated, use sstore_base_global_snapshot_ref
* snapc_base_establish_global_snapshot_dir deprecated, never well supported
* snapc_full_skip_filem deprecated, use sstore_stage_skip_filem
Minor Changes:
--------------
* Fixes trac:1924 : {{{ompi-restart}}} now recognizes path prefixed checkpoint handles and does the right thing.
* Fixes trac:2097 : {{{ompi-info}}} should now report all available CRS components
* Fixes trac:2161 : Manual checkpoint movement. A user can 'mv' a checkpoint directory from the original location to another and still restart from it.
* Fixes trac:2208 : Honor various TMPDIR varaibles instead of forcing {{{/tmp}}}
* Move {{{ompi_cr_continue_like_restart}}} to {{{orte_cr_continue_like_restart}}} to be more flexible in where this should be set.
* opal_crs_base_metadata_write* functions have been moved to SStore to support a wider range of metadata handling functionality.
* Cleanup the CRS framework and components to work with the SStore framework.
* Cleanup the SnapC framework and components to work with the SStore framework (cleans up these code paths considerably).
* Add 'quiesce' hook to CRCP for a future enhancement.
* We now require a BLCR version that supports {{{cr_request_file()}}} or {{{cr_request_checkpoint()}}} in order to make the code more maintainable. Note that {{{cr_request_file}}} has been deprecated since 0.7.0, so we prefer to use {{{cr_request_checkpoint()}}}.
* Add optional application level INC callbacks (registered through the CR MPI Ext interface).
* Increase the {{{opal_cr_thread_sleep_wait}}} parameter to 1000 microseconds to make the C/R thread less aggressive.
* {{{opal-restart}}} now looks for cache directories before falling back on stable storage when asked.
* {{{opal-restart}}} also support local decompression before restarting
* {{{orte-checkpoint}}} now uses the SStore framework to work with the metadata
* {{{orte-restart}}} now uses the SStore framework to work with the metadata
* Remove the {{{orte-restart}}} preload option. This was removed since the user only needs to select the 'stage' component in order to support this functionality.
* Since the '-am' parameter is saved in the metadata, {{{ompi-restart}}} no longer hard codes {{{-am ft-enable-cr}}}.
* Fix {{{hnp}}} ErrMgr so that if a previous component in the stack has 'fixed' the problem, then it should be skipped.
* Make sure to decrement the number of 'num_local_procs' in the orted when one goes away.
* odls now checks the SStore framework to see if it needs to load any checkpoint files before launching (to support 'stage'). This separates the SStore logic from the --preload-[binary|files] options.
* Add unique IDs to the named pipes established between the orted and the app in SnapC. This is to better support migration and automatic recovery activities.
* Improve the checks for 'already checkpointing' error path.
* A a recovery output timer, to show how long it takes to restart a job
* Do a better job of cleaning up the old session directory on restart.
* Add a local module to the autor and crmig ErrMgr components. These small modules prevent the 'orted' component from attempting a local recovery (Which does not work for MPI apps at the moment)
* Add a fix for bounding the checkpointable region between MPI_Init and MPI_Finalize.
This commit was SVN r23587.
The following Trac tickets were found above:
Ticket 1924 --> https://svn.open-mpi.org/trac/ompi/ticket/1924
Ticket 2097 --> https://svn.open-mpi.org/trac/ompi/ticket/2097
Ticket 2161 --> https://svn.open-mpi.org/trac/ompi/ticket/2161
Ticket 2192 --> https://svn.open-mpi.org/trac/ompi/ticket/2192
Ticket 2208 --> https://svn.open-mpi.org/trac/ompi/ticket/2208
Ticket 2342 --> https://svn.open-mpi.org/trac/ompi/ticket/2342
Ticket 2413 --> https://svn.open-mpi.org/trac/ompi/ticket/2413
2010-08-10 20:51:11 +00:00
|
|
|
opal_crs_base_metadata_read_token(snapshot->super.metadata, CRS_METADATA_CONTEXT, &tmp_argv);
|
2009-12-09 18:41:56 +00:00
|
|
|
if( NULL == tmp_argv ) {
|
|
|
|
opal_output(mca_crs_self_component.super.output_handle,
|
|
|
|
"crs:self: self_cold_start: Error: Failed to read the %s token from the local checkpoint in %s",
|
A number of C/R enhancements per RFC below:
http://www.open-mpi.org/community/lists/devel/2010/07/8240.php
Documentation:
http://osl.iu.edu/research/ft/
Major Changes:
--------------
* Added C/R-enabled Debugging support.
Enabled with the --enable-crdebug flag. See the following website for more information:
http://osl.iu.edu/research/ft/crdebug/
* Added Stable Storage (SStore) framework for checkpoint storage
* 'central' component does a direct to central storage save
* 'stage' component stages checkpoints to central storage while the application continues execution.
* 'stage' supports offline compression of checkpoints before moving (sstore_stage_compress)
* 'stage' supports local caching of checkpoints to improve automatic recovery (sstore_stage_caching)
* Added Compression (compress) framework to support
* Add two new ErrMgr recovery policies
* {{{crmig}}} C/R Process Migration
* {{{autor}}} C/R Automatic Recovery
* Added the {{{ompi-migrate}}} command line tool to support the {{{crmig}}} ErrMgr component
* Added CR MPI Ext functions (enable them with {{{--enable-mpi-ext=cr}}} configure option)
* {{{OMPI_CR_Checkpoint}}} (Fixes trac:2342)
* {{{OMPI_CR_Restart}}}
* {{{OMPI_CR_Migrate}}} (may need some more work for mapping rules)
* {{{OMPI_CR_INC_register_callback}}} (Fixes trac:2192)
* {{{OMPI_CR_Quiesce_start}}}
* {{{OMPI_CR_Quiesce_checkpoint}}}
* {{{OMPI_CR_Quiesce_end}}}
* {{{OMPI_CR_self_register_checkpoint_callback}}}
* {{{OMPI_CR_self_register_restart_callback}}}
* {{{OMPI_CR_self_register_continue_callback}}}
* The ErrMgr predicted_fault() interface has been changed to take an opal_list_t of ErrMgr defined types. This will allow us to better support a wider range of fault prediction services in the future.
* Add a progress meter to:
* FileM rsh (filem_rsh_process_meter)
* SnapC full (snapc_full_progress_meter)
* SStore stage (sstore_stage_progress_meter)
* Added 2 new command line options to ompi-restart
* --showme : Display the full command line that would have been exec'ed.
* --mpirun_opts : Command line options to pass directly to mpirun. (Fixes trac:2413)
* Deprecated some MCA params:
* crs_base_snapshot_dir deprecated, use sstore_stage_local_snapshot_dir
* snapc_base_global_snapshot_dir deprecated, use sstore_base_global_snapshot_dir
* snapc_base_global_shared deprecated, use sstore_stage_global_is_shared
* snapc_base_store_in_place deprecated, replaced with different components of SStore
* snapc_base_global_snapshot_ref deprecated, use sstore_base_global_snapshot_ref
* snapc_base_establish_global_snapshot_dir deprecated, never well supported
* snapc_full_skip_filem deprecated, use sstore_stage_skip_filem
Minor Changes:
--------------
* Fixes trac:1924 : {{{ompi-restart}}} now recognizes path prefixed checkpoint handles and does the right thing.
* Fixes trac:2097 : {{{ompi-info}}} should now report all available CRS components
* Fixes trac:2161 : Manual checkpoint movement. A user can 'mv' a checkpoint directory from the original location to another and still restart from it.
* Fixes trac:2208 : Honor various TMPDIR varaibles instead of forcing {{{/tmp}}}
* Move {{{ompi_cr_continue_like_restart}}} to {{{orte_cr_continue_like_restart}}} to be more flexible in where this should be set.
* opal_crs_base_metadata_write* functions have been moved to SStore to support a wider range of metadata handling functionality.
* Cleanup the CRS framework and components to work with the SStore framework.
* Cleanup the SnapC framework and components to work with the SStore framework (cleans up these code paths considerably).
* Add 'quiesce' hook to CRCP for a future enhancement.
* We now require a BLCR version that supports {{{cr_request_file()}}} or {{{cr_request_checkpoint()}}} in order to make the code more maintainable. Note that {{{cr_request_file}}} has been deprecated since 0.7.0, so we prefer to use {{{cr_request_checkpoint()}}}.
* Add optional application level INC callbacks (registered through the CR MPI Ext interface).
* Increase the {{{opal_cr_thread_sleep_wait}}} parameter to 1000 microseconds to make the C/R thread less aggressive.
* {{{opal-restart}}} now looks for cache directories before falling back on stable storage when asked.
* {{{opal-restart}}} also support local decompression before restarting
* {{{orte-checkpoint}}} now uses the SStore framework to work with the metadata
* {{{orte-restart}}} now uses the SStore framework to work with the metadata
* Remove the {{{orte-restart}}} preload option. This was removed since the user only needs to select the 'stage' component in order to support this functionality.
* Since the '-am' parameter is saved in the metadata, {{{ompi-restart}}} no longer hard codes {{{-am ft-enable-cr}}}.
* Fix {{{hnp}}} ErrMgr so that if a previous component in the stack has 'fixed' the problem, then it should be skipped.
* Make sure to decrement the number of 'num_local_procs' in the orted when one goes away.
* odls now checks the SStore framework to see if it needs to load any checkpoint files before launching (to support 'stage'). This separates the SStore logic from the --preload-[binary|files] options.
* Add unique IDs to the named pipes established between the orted and the app in SnapC. This is to better support migration and automatic recovery activities.
* Improve the checks for 'already checkpointing' error path.
* A a recovery output timer, to show how long it takes to restart a job
* Do a better job of cleaning up the old session directory on restart.
* Add a local module to the autor and crmig ErrMgr components. These small modules prevent the 'orted' component from attempting a local recovery (Which does not work for MPI apps at the moment)
* Add a fix for bounding the checkpointable region between MPI_Init and MPI_Finalize.
This commit was SVN r23587.
The following Trac tickets were found above:
Ticket 1924 --> https://svn.open-mpi.org/trac/ompi/ticket/1924
Ticket 2097 --> https://svn.open-mpi.org/trac/ompi/ticket/2097
Ticket 2161 --> https://svn.open-mpi.org/trac/ompi/ticket/2161
Ticket 2192 --> https://svn.open-mpi.org/trac/ompi/ticket/2192
Ticket 2208 --> https://svn.open-mpi.org/trac/ompi/ticket/2208
Ticket 2342 --> https://svn.open-mpi.org/trac/ompi/ticket/2342
Ticket 2413 --> https://svn.open-mpi.org/trac/ompi/ticket/2413
2010-08-10 20:51:11 +00:00
|
|
|
CRS_METADATA_CONTEXT, snapshot->super.snapshot_directory);
|
2009-12-09 18:41:56 +00:00
|
|
|
exit_status = OPAL_ERROR;
|
|
|
|
goto cleanup;
|
|
|
|
}
|
2008-05-08 18:47:47 +00:00
|
|
|
asprintf(&snapshot->cmd_line, "%s", tmp_argv[0]);
|
2007-03-16 23:11:45 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Reset the cold_start flag
|
|
|
|
*/
|
|
|
|
snapshot->super.cold_start = false;
|
|
|
|
|
|
|
|
cleanup:
|
2008-05-08 18:47:47 +00:00
|
|
|
if(NULL != tmp_argv) {
|
|
|
|
opal_argv_free(tmp_argv);
|
|
|
|
tmp_argv = NULL;
|
|
|
|
}
|
2007-03-16 23:11:45 +00:00
|
|
|
|
|
|
|
return exit_status;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
static int self_update_snapshot_metadata(opal_crs_self_snapshot_t *snapshot) {
|
|
|
|
int exit_status = OPAL_SUCCESS;
|
2015-06-23 20:59:57 -07:00
|
|
|
|
2008-05-08 18:47:47 +00:00
|
|
|
if(NULL == snapshot->cmd_line) {
|
|
|
|
opal_show_help("help-opal-crs-self.txt", "self:no-restart-cmd",
|
|
|
|
true);
|
|
|
|
exit_status = OPAL_ERROR;
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
2007-03-16 23:11:45 +00:00
|
|
|
opal_output_verbose(10, mca_crs_self_component.super.output_handle,
|
|
|
|
"crs:self: update_snapshot_metadata(%s)",
|
A number of C/R enhancements per RFC below:
http://www.open-mpi.org/community/lists/devel/2010/07/8240.php
Documentation:
http://osl.iu.edu/research/ft/
Major Changes:
--------------
* Added C/R-enabled Debugging support.
Enabled with the --enable-crdebug flag. See the following website for more information:
http://osl.iu.edu/research/ft/crdebug/
* Added Stable Storage (SStore) framework for checkpoint storage
* 'central' component does a direct to central storage save
* 'stage' component stages checkpoints to central storage while the application continues execution.
* 'stage' supports offline compression of checkpoints before moving (sstore_stage_compress)
* 'stage' supports local caching of checkpoints to improve automatic recovery (sstore_stage_caching)
* Added Compression (compress) framework to support
* Add two new ErrMgr recovery policies
* {{{crmig}}} C/R Process Migration
* {{{autor}}} C/R Automatic Recovery
* Added the {{{ompi-migrate}}} command line tool to support the {{{crmig}}} ErrMgr component
* Added CR MPI Ext functions (enable them with {{{--enable-mpi-ext=cr}}} configure option)
* {{{OMPI_CR_Checkpoint}}} (Fixes trac:2342)
* {{{OMPI_CR_Restart}}}
* {{{OMPI_CR_Migrate}}} (may need some more work for mapping rules)
* {{{OMPI_CR_INC_register_callback}}} (Fixes trac:2192)
* {{{OMPI_CR_Quiesce_start}}}
* {{{OMPI_CR_Quiesce_checkpoint}}}
* {{{OMPI_CR_Quiesce_end}}}
* {{{OMPI_CR_self_register_checkpoint_callback}}}
* {{{OMPI_CR_self_register_restart_callback}}}
* {{{OMPI_CR_self_register_continue_callback}}}
* The ErrMgr predicted_fault() interface has been changed to take an opal_list_t of ErrMgr defined types. This will allow us to better support a wider range of fault prediction services in the future.
* Add a progress meter to:
* FileM rsh (filem_rsh_process_meter)
* SnapC full (snapc_full_progress_meter)
* SStore stage (sstore_stage_progress_meter)
* Added 2 new command line options to ompi-restart
* --showme : Display the full command line that would have been exec'ed.
* --mpirun_opts : Command line options to pass directly to mpirun. (Fixes trac:2413)
* Deprecated some MCA params:
* crs_base_snapshot_dir deprecated, use sstore_stage_local_snapshot_dir
* snapc_base_global_snapshot_dir deprecated, use sstore_base_global_snapshot_dir
* snapc_base_global_shared deprecated, use sstore_stage_global_is_shared
* snapc_base_store_in_place deprecated, replaced with different components of SStore
* snapc_base_global_snapshot_ref deprecated, use sstore_base_global_snapshot_ref
* snapc_base_establish_global_snapshot_dir deprecated, never well supported
* snapc_full_skip_filem deprecated, use sstore_stage_skip_filem
Minor Changes:
--------------
* Fixes trac:1924 : {{{ompi-restart}}} now recognizes path prefixed checkpoint handles and does the right thing.
* Fixes trac:2097 : {{{ompi-info}}} should now report all available CRS components
* Fixes trac:2161 : Manual checkpoint movement. A user can 'mv' a checkpoint directory from the original location to another and still restart from it.
* Fixes trac:2208 : Honor various TMPDIR varaibles instead of forcing {{{/tmp}}}
* Move {{{ompi_cr_continue_like_restart}}} to {{{orte_cr_continue_like_restart}}} to be more flexible in where this should be set.
* opal_crs_base_metadata_write* functions have been moved to SStore to support a wider range of metadata handling functionality.
* Cleanup the CRS framework and components to work with the SStore framework.
* Cleanup the SnapC framework and components to work with the SStore framework (cleans up these code paths considerably).
* Add 'quiesce' hook to CRCP for a future enhancement.
* We now require a BLCR version that supports {{{cr_request_file()}}} or {{{cr_request_checkpoint()}}} in order to make the code more maintainable. Note that {{{cr_request_file}}} has been deprecated since 0.7.0, so we prefer to use {{{cr_request_checkpoint()}}}.
* Add optional application level INC callbacks (registered through the CR MPI Ext interface).
* Increase the {{{opal_cr_thread_sleep_wait}}} parameter to 1000 microseconds to make the C/R thread less aggressive.
* {{{opal-restart}}} now looks for cache directories before falling back on stable storage when asked.
* {{{opal-restart}}} also support local decompression before restarting
* {{{orte-checkpoint}}} now uses the SStore framework to work with the metadata
* {{{orte-restart}}} now uses the SStore framework to work with the metadata
* Remove the {{{orte-restart}}} preload option. This was removed since the user only needs to select the 'stage' component in order to support this functionality.
* Since the '-am' parameter is saved in the metadata, {{{ompi-restart}}} no longer hard codes {{{-am ft-enable-cr}}}.
* Fix {{{hnp}}} ErrMgr so that if a previous component in the stack has 'fixed' the problem, then it should be skipped.
* Make sure to decrement the number of 'num_local_procs' in the orted when one goes away.
* odls now checks the SStore framework to see if it needs to load any checkpoint files before launching (to support 'stage'). This separates the SStore logic from the --preload-[binary|files] options.
* Add unique IDs to the named pipes established between the orted and the app in SnapC. This is to better support migration and automatic recovery activities.
* Improve the checks for 'already checkpointing' error path.
* A a recovery output timer, to show how long it takes to restart a job
* Do a better job of cleaning up the old session directory on restart.
* Add a local module to the autor and crmig ErrMgr components. These small modules prevent the 'orted' component from attempting a local recovery (Which does not work for MPI apps at the moment)
* Add a fix for bounding the checkpointable region between MPI_Init and MPI_Finalize.
This commit was SVN r23587.
The following Trac tickets were found above:
Ticket 1924 --> https://svn.open-mpi.org/trac/ompi/ticket/1924
Ticket 2097 --> https://svn.open-mpi.org/trac/ompi/ticket/2097
Ticket 2161 --> https://svn.open-mpi.org/trac/ompi/ticket/2161
Ticket 2192 --> https://svn.open-mpi.org/trac/ompi/ticket/2192
Ticket 2208 --> https://svn.open-mpi.org/trac/ompi/ticket/2208
Ticket 2342 --> https://svn.open-mpi.org/trac/ompi/ticket/2342
Ticket 2413 --> https://svn.open-mpi.org/trac/ompi/ticket/2413
2010-08-10 20:51:11 +00:00
|
|
|
snapshot->super.metadata_filename);
|
2015-06-23 20:59:57 -07:00
|
|
|
|
2007-03-16 23:11:45 +00:00
|
|
|
/*
|
2008-05-08 18:47:47 +00:00
|
|
|
* Append to the metadata file the command line to restart with
|
|
|
|
* - How user wants us to restart
|
2007-03-16 23:11:45 +00:00
|
|
|
*/
|
A number of C/R enhancements per RFC below:
http://www.open-mpi.org/community/lists/devel/2010/07/8240.php
Documentation:
http://osl.iu.edu/research/ft/
Major Changes:
--------------
* Added C/R-enabled Debugging support.
Enabled with the --enable-crdebug flag. See the following website for more information:
http://osl.iu.edu/research/ft/crdebug/
* Added Stable Storage (SStore) framework for checkpoint storage
* 'central' component does a direct to central storage save
* 'stage' component stages checkpoints to central storage while the application continues execution.
* 'stage' supports offline compression of checkpoints before moving (sstore_stage_compress)
* 'stage' supports local caching of checkpoints to improve automatic recovery (sstore_stage_caching)
* Added Compression (compress) framework to support
* Add two new ErrMgr recovery policies
* {{{crmig}}} C/R Process Migration
* {{{autor}}} C/R Automatic Recovery
* Added the {{{ompi-migrate}}} command line tool to support the {{{crmig}}} ErrMgr component
* Added CR MPI Ext functions (enable them with {{{--enable-mpi-ext=cr}}} configure option)
* {{{OMPI_CR_Checkpoint}}} (Fixes trac:2342)
* {{{OMPI_CR_Restart}}}
* {{{OMPI_CR_Migrate}}} (may need some more work for mapping rules)
* {{{OMPI_CR_INC_register_callback}}} (Fixes trac:2192)
* {{{OMPI_CR_Quiesce_start}}}
* {{{OMPI_CR_Quiesce_checkpoint}}}
* {{{OMPI_CR_Quiesce_end}}}
* {{{OMPI_CR_self_register_checkpoint_callback}}}
* {{{OMPI_CR_self_register_restart_callback}}}
* {{{OMPI_CR_self_register_continue_callback}}}
* The ErrMgr predicted_fault() interface has been changed to take an opal_list_t of ErrMgr defined types. This will allow us to better support a wider range of fault prediction services in the future.
* Add a progress meter to:
* FileM rsh (filem_rsh_process_meter)
* SnapC full (snapc_full_progress_meter)
* SStore stage (sstore_stage_progress_meter)
* Added 2 new command line options to ompi-restart
* --showme : Display the full command line that would have been exec'ed.
* --mpirun_opts : Command line options to pass directly to mpirun. (Fixes trac:2413)
* Deprecated some MCA params:
* crs_base_snapshot_dir deprecated, use sstore_stage_local_snapshot_dir
* snapc_base_global_snapshot_dir deprecated, use sstore_base_global_snapshot_dir
* snapc_base_global_shared deprecated, use sstore_stage_global_is_shared
* snapc_base_store_in_place deprecated, replaced with different components of SStore
* snapc_base_global_snapshot_ref deprecated, use sstore_base_global_snapshot_ref
* snapc_base_establish_global_snapshot_dir deprecated, never well supported
* snapc_full_skip_filem deprecated, use sstore_stage_skip_filem
Minor Changes:
--------------
* Fixes trac:1924 : {{{ompi-restart}}} now recognizes path prefixed checkpoint handles and does the right thing.
* Fixes trac:2097 : {{{ompi-info}}} should now report all available CRS components
* Fixes trac:2161 : Manual checkpoint movement. A user can 'mv' a checkpoint directory from the original location to another and still restart from it.
* Fixes trac:2208 : Honor various TMPDIR varaibles instead of forcing {{{/tmp}}}
* Move {{{ompi_cr_continue_like_restart}}} to {{{orte_cr_continue_like_restart}}} to be more flexible in where this should be set.
* opal_crs_base_metadata_write* functions have been moved to SStore to support a wider range of metadata handling functionality.
* Cleanup the CRS framework and components to work with the SStore framework.
* Cleanup the SnapC framework and components to work with the SStore framework (cleans up these code paths considerably).
* Add 'quiesce' hook to CRCP for a future enhancement.
* We now require a BLCR version that supports {{{cr_request_file()}}} or {{{cr_request_checkpoint()}}} in order to make the code more maintainable. Note that {{{cr_request_file}}} has been deprecated since 0.7.0, so we prefer to use {{{cr_request_checkpoint()}}}.
* Add optional application level INC callbacks (registered through the CR MPI Ext interface).
* Increase the {{{opal_cr_thread_sleep_wait}}} parameter to 1000 microseconds to make the C/R thread less aggressive.
* {{{opal-restart}}} now looks for cache directories before falling back on stable storage when asked.
* {{{opal-restart}}} also support local decompression before restarting
* {{{orte-checkpoint}}} now uses the SStore framework to work with the metadata
* {{{orte-restart}}} now uses the SStore framework to work with the metadata
* Remove the {{{orte-restart}}} preload option. This was removed since the user only needs to select the 'stage' component in order to support this functionality.
* Since the '-am' parameter is saved in the metadata, {{{ompi-restart}}} no longer hard codes {{{-am ft-enable-cr}}}.
* Fix {{{hnp}}} ErrMgr so that if a previous component in the stack has 'fixed' the problem, then it should be skipped.
* Make sure to decrement the number of 'num_local_procs' in the orted when one goes away.
* odls now checks the SStore framework to see if it needs to load any checkpoint files before launching (to support 'stage'). This separates the SStore logic from the --preload-[binary|files] options.
* Add unique IDs to the named pipes established between the orted and the app in SnapC. This is to better support migration and automatic recovery activities.
* Improve the checks for 'already checkpointing' error path.
* A a recovery output timer, to show how long it takes to restart a job
* Do a better job of cleaning up the old session directory on restart.
* Add a local module to the autor and crmig ErrMgr components. These small modules prevent the 'orted' component from attempting a local recovery (Which does not work for MPI apps at the moment)
* Add a fix for bounding the checkpointable region between MPI_Init and MPI_Finalize.
This commit was SVN r23587.
The following Trac tickets were found above:
Ticket 1924 --> https://svn.open-mpi.org/trac/ompi/ticket/1924
Ticket 2097 --> https://svn.open-mpi.org/trac/ompi/ticket/2097
Ticket 2161 --> https://svn.open-mpi.org/trac/ompi/ticket/2161
Ticket 2192 --> https://svn.open-mpi.org/trac/ompi/ticket/2192
Ticket 2208 --> https://svn.open-mpi.org/trac/ompi/ticket/2208
Ticket 2342 --> https://svn.open-mpi.org/trac/ompi/ticket/2342
Ticket 2413 --> https://svn.open-mpi.org/trac/ompi/ticket/2413
2010-08-10 20:51:11 +00:00
|
|
|
fprintf(snapshot->super.metadata, "%s%s\n", CRS_METADATA_CONTEXT, snapshot->cmd_line);
|
2007-03-16 23:11:45 +00:00
|
|
|
|
|
|
|
cleanup:
|
|
|
|
return exit_status;
|
|
|
|
}
|