2007-03-17 02:11:45 +03:00
|
|
|
/*
|
2010-03-13 02:57:50 +03:00
|
|
|
* Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana
|
2007-03-17 02:11:45 +03:00
|
|
|
* University Research and Technology
|
|
|
|
* Corporation. All rights reserved.
|
|
|
|
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
|
|
|
* of Tennessee Research Foundation. All rights
|
|
|
|
* reserved.
|
|
|
|
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
|
|
|
* University of Stuttgart. All rights reserved.
|
|
|
|
* Copyright (c) 2004-2005 The Regents of the University of California.
|
|
|
|
* All rights reserved.
|
2007-06-05 07:03:59 +04:00
|
|
|
* Copyright (c) 2007 Los Alamos National Security, LLC. All rights
|
|
|
|
* reserved.
|
2011-12-07 18:58:08 +04:00
|
|
|
* Copyright (c) 2011 Oak Ridge National Labs. All rights reserved.
|
Per RFC, bring in the following changes:
* Remove paffinity, maffinity, and carto frameworks -- they've been
wholly replaced by hwloc.
* Move ompi_mpi_init() affinity-setting/checking code down to ORTE.
* Update sm, smcuda, wv, and openib components to no longer use carto.
Instead, use hwloc data. There are still optimizations possible in
the sm/smcuda BTLs (i.e., making multiple mpools). Also, the old
carto-based code found out how many NUMA nodes were ''available''
-- not how many were used ''in this job''. The new hwloc-using
code computes the same value -- it was not updated to calculate how
many NUMA nodes are used ''by this job.''
* Note that I cannot compile the smcuda and wv BTLs -- I ''think''
they're right, but they need to be verified by their owners.
* The openib component now does a bunch of stuff to figure out where
"near" OpenFabrics devices are. '''THIS IS A CHANGE IN DEFAULT
BEHAVIOR!!''' and still needs to be verified by OpenFabrics vendors
(I do not have a NUMA machine with an OpenFabrics device that is a
non-uniform distance from multiple different NUMA nodes).
* Completely rewrite the OMPI_Affinity_str() routine from the
"affinity" mpiext extension. This extension now understands
hyperthreads; the output format of it has changed a bit to reflect
this new information.
* Bunches of minor changes around the code base to update names/types
from maffinity/paffinity-based names to hwloc-based names.
* Add some helper functions into the hwloc base, mainly having to do
with the fact that we have the hwloc data reporting ''all''
topology information, but sometimes you really only want the
(online | available) data.
This commit was SVN r26391.
2012-05-07 18:52:54 +04:00
|
|
|
* Copyright (c) 2012 Cisco Systems, Inc. All rights reserved.
|
2007-03-17 02:11:45 +03:00
|
|
|
* $COPYRIGHT$
|
|
|
|
*
|
|
|
|
* Additional copyrights may follow
|
|
|
|
*
|
|
|
|
* $HEADER$
|
|
|
|
*/
|
|
|
|
|
|
|
|
/** @file
|
|
|
|
*
|
|
|
|
* OPAL Layer Checkpoint/Restart Runtime functions
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "opal_config.h"
|
|
|
|
|
2009-03-13 05:10:32 +03:00
|
|
|
#ifdef HAVE_STRING_H
|
|
|
|
#include <string.h>
|
|
|
|
#endif
|
2007-03-17 02:11:45 +03:00
|
|
|
#include <errno.h>
|
|
|
|
#ifdef HAVE_UNISTD_H
|
|
|
|
#include <unistd.h>
|
|
|
|
#endif /* HAVE_UNISTD_H */
|
|
|
|
#ifdef HAVE_FCNTL_H
|
|
|
|
#include <fcntl.h>
|
|
|
|
#endif /* HAVE_FCNTL_H */
|
|
|
|
#ifdef HAVE_SYS_TYPES_H
|
|
|
|
#include <sys/types.h>
|
|
|
|
#endif /* HAVE_SYS_TYPES_H */
|
|
|
|
#ifdef HAVE_SYS_STAT_H
|
|
|
|
#include <sys/stat.h> /* for mkfifo */
|
|
|
|
#endif /* HAVE_SYS_STAT_H */
|
|
|
|
#ifdef HAVE_SIGNAL_H
|
|
|
|
#include <signal.h>
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#include "opal/class/opal_object.h"
|
2007-06-05 07:03:59 +04:00
|
|
|
#include "opal/util/opal_environ.h"
|
2011-12-07 18:58:08 +04:00
|
|
|
#include "opal/util/show_help.h"
|
2007-03-17 02:11:45 +03:00
|
|
|
#include "opal/util/output.h"
|
|
|
|
#include "opal/util/malloc.h"
|
|
|
|
#include "opal/util/keyval_parse.h"
|
|
|
|
#include "opal/util/opal_environ.h"
|
|
|
|
#include "opal/util/argv.h"
|
|
|
|
#include "opal/memoryhooks/memory.h"
|
|
|
|
|
|
|
|
#include "opal/mca/base/base.h"
|
|
|
|
#include "opal/runtime/opal_cr.h"
|
|
|
|
#include "opal/runtime/opal.h"
|
|
|
|
#include "opal/constants.h"
|
|
|
|
|
2010-09-22 05:11:40 +04:00
|
|
|
#include "opal/mca/if/base/base.h"
|
2007-03-17 02:11:45 +03:00
|
|
|
#include "opal/mca/memcpy/base/base.h"
|
|
|
|
#include "opal/mca/memory/base/base.h"
|
|
|
|
#include "opal/mca/timer/base/base.h"
|
|
|
|
|
|
|
|
#include "opal/threads/mutex.h"
|
|
|
|
#include "opal/threads/threads.h"
|
|
|
|
#include "opal/mca/crs/base/base.h"
|
|
|
|
|
|
|
|
/******************
|
|
|
|
* Global Var Decls
|
|
|
|
******************/
|
A number of C/R enhancements per RFC below:
http://www.open-mpi.org/community/lists/devel/2010/07/8240.php
Documentation:
http://osl.iu.edu/research/ft/
Major Changes:
--------------
* Added C/R-enabled Debugging support.
Enabled with the --enable-crdebug flag. See the following website for more information:
http://osl.iu.edu/research/ft/crdebug/
* Added Stable Storage (SStore) framework for checkpoint storage
* 'central' component does a direct to central storage save
* 'stage' component stages checkpoints to central storage while the application continues execution.
* 'stage' supports offline compression of checkpoints before moving (sstore_stage_compress)
* 'stage' supports local caching of checkpoints to improve automatic recovery (sstore_stage_caching)
* Added Compression (compress) framework to support
* Add two new ErrMgr recovery policies
* {{{crmig}}} C/R Process Migration
* {{{autor}}} C/R Automatic Recovery
* Added the {{{ompi-migrate}}} command line tool to support the {{{crmig}}} ErrMgr component
* Added CR MPI Ext functions (enable them with {{{--enable-mpi-ext=cr}}} configure option)
* {{{OMPI_CR_Checkpoint}}} (Fixes trac:2342)
* {{{OMPI_CR_Restart}}}
* {{{OMPI_CR_Migrate}}} (may need some more work for mapping rules)
* {{{OMPI_CR_INC_register_callback}}} (Fixes trac:2192)
* {{{OMPI_CR_Quiesce_start}}}
* {{{OMPI_CR_Quiesce_checkpoint}}}
* {{{OMPI_CR_Quiesce_end}}}
* {{{OMPI_CR_self_register_checkpoint_callback}}}
* {{{OMPI_CR_self_register_restart_callback}}}
* {{{OMPI_CR_self_register_continue_callback}}}
* The ErrMgr predicted_fault() interface has been changed to take an opal_list_t of ErrMgr defined types. This will allow us to better support a wider range of fault prediction services in the future.
* Add a progress meter to:
* FileM rsh (filem_rsh_process_meter)
* SnapC full (snapc_full_progress_meter)
* SStore stage (sstore_stage_progress_meter)
* Added 2 new command line options to ompi-restart
* --showme : Display the full command line that would have been exec'ed.
* --mpirun_opts : Command line options to pass directly to mpirun. (Fixes trac:2413)
* Deprecated some MCA params:
* crs_base_snapshot_dir deprecated, use sstore_stage_local_snapshot_dir
* snapc_base_global_snapshot_dir deprecated, use sstore_base_global_snapshot_dir
* snapc_base_global_shared deprecated, use sstore_stage_global_is_shared
* snapc_base_store_in_place deprecated, replaced with different components of SStore
* snapc_base_global_snapshot_ref deprecated, use sstore_base_global_snapshot_ref
* snapc_base_establish_global_snapshot_dir deprecated, never well supported
* snapc_full_skip_filem deprecated, use sstore_stage_skip_filem
Minor Changes:
--------------
* Fixes trac:1924 : {{{ompi-restart}}} now recognizes path prefixed checkpoint handles and does the right thing.
* Fixes trac:2097 : {{{ompi-info}}} should now report all available CRS components
* Fixes trac:2161 : Manual checkpoint movement. A user can 'mv' a checkpoint directory from the original location to another and still restart from it.
* Fixes trac:2208 : Honor various TMPDIR varaibles instead of forcing {{{/tmp}}}
* Move {{{ompi_cr_continue_like_restart}}} to {{{orte_cr_continue_like_restart}}} to be more flexible in where this should be set.
* opal_crs_base_metadata_write* functions have been moved to SStore to support a wider range of metadata handling functionality.
* Cleanup the CRS framework and components to work with the SStore framework.
* Cleanup the SnapC framework and components to work with the SStore framework (cleans up these code paths considerably).
* Add 'quiesce' hook to CRCP for a future enhancement.
* We now require a BLCR version that supports {{{cr_request_file()}}} or {{{cr_request_checkpoint()}}} in order to make the code more maintainable. Note that {{{cr_request_file}}} has been deprecated since 0.7.0, so we prefer to use {{{cr_request_checkpoint()}}}.
* Add optional application level INC callbacks (registered through the CR MPI Ext interface).
* Increase the {{{opal_cr_thread_sleep_wait}}} parameter to 1000 microseconds to make the C/R thread less aggressive.
* {{{opal-restart}}} now looks for cache directories before falling back on stable storage when asked.
* {{{opal-restart}}} also support local decompression before restarting
* {{{orte-checkpoint}}} now uses the SStore framework to work with the metadata
* {{{orte-restart}}} now uses the SStore framework to work with the metadata
* Remove the {{{orte-restart}}} preload option. This was removed since the user only needs to select the 'stage' component in order to support this functionality.
* Since the '-am' parameter is saved in the metadata, {{{ompi-restart}}} no longer hard codes {{{-am ft-enable-cr}}}.
* Fix {{{hnp}}} ErrMgr so that if a previous component in the stack has 'fixed' the problem, then it should be skipped.
* Make sure to decrement the number of 'num_local_procs' in the orted when one goes away.
* odls now checks the SStore framework to see if it needs to load any checkpoint files before launching (to support 'stage'). This separates the SStore logic from the --preload-[binary|files] options.
* Add unique IDs to the named pipes established between the orted and the app in SnapC. This is to better support migration and automatic recovery activities.
* Improve the checks for 'already checkpointing' error path.
* A a recovery output timer, to show how long it takes to restart a job
* Do a better job of cleaning up the old session directory on restart.
* Add a local module to the autor and crmig ErrMgr components. These small modules prevent the 'orted' component from attempting a local recovery (Which does not work for MPI apps at the moment)
* Add a fix for bounding the checkpointable region between MPI_Init and MPI_Finalize.
This commit was SVN r23587.
The following Trac tickets were found above:
Ticket 1924 --> https://svn.open-mpi.org/trac/ompi/ticket/1924
Ticket 2097 --> https://svn.open-mpi.org/trac/ompi/ticket/2097
Ticket 2161 --> https://svn.open-mpi.org/trac/ompi/ticket/2161
Ticket 2192 --> https://svn.open-mpi.org/trac/ompi/ticket/2192
Ticket 2208 --> https://svn.open-mpi.org/trac/ompi/ticket/2208
Ticket 2342 --> https://svn.open-mpi.org/trac/ompi/ticket/2342
Ticket 2413 --> https://svn.open-mpi.org/trac/ompi/ticket/2413
2010-08-11 00:51:11 +04:00
|
|
|
#if OPAL_ENABLE_CRDEBUG == 1
|
|
|
|
static opal_thread_t **opal_cr_debug_free_threads = NULL;
|
|
|
|
static int opal_cr_debug_num_free_threads = 0;
|
|
|
|
static int opal_cr_debug_threads_already_waiting = false;
|
|
|
|
|
|
|
|
int MPIR_debug_with_checkpoint = 0;
|
|
|
|
static volatile int MPIR_checkpoint_debug_gate = 0;
|
|
|
|
|
|
|
|
int opal_cr_debug_signal = 0;
|
|
|
|
#endif
|
|
|
|
|
2007-10-09 00:53:02 +04:00
|
|
|
bool opal_cr_stall_check = false;
|
|
|
|
bool opal_cr_currently_stalled = false;
|
2007-03-17 02:11:45 +03:00
|
|
|
int opal_cr_output;
|
A number of C/R enhancements per RFC below:
http://www.open-mpi.org/community/lists/devel/2010/07/8240.php
Documentation:
http://osl.iu.edu/research/ft/
Major Changes:
--------------
* Added C/R-enabled Debugging support.
Enabled with the --enable-crdebug flag. See the following website for more information:
http://osl.iu.edu/research/ft/crdebug/
* Added Stable Storage (SStore) framework for checkpoint storage
* 'central' component does a direct to central storage save
* 'stage' component stages checkpoints to central storage while the application continues execution.
* 'stage' supports offline compression of checkpoints before moving (sstore_stage_compress)
* 'stage' supports local caching of checkpoints to improve automatic recovery (sstore_stage_caching)
* Added Compression (compress) framework to support
* Add two new ErrMgr recovery policies
* {{{crmig}}} C/R Process Migration
* {{{autor}}} C/R Automatic Recovery
* Added the {{{ompi-migrate}}} command line tool to support the {{{crmig}}} ErrMgr component
* Added CR MPI Ext functions (enable them with {{{--enable-mpi-ext=cr}}} configure option)
* {{{OMPI_CR_Checkpoint}}} (Fixes trac:2342)
* {{{OMPI_CR_Restart}}}
* {{{OMPI_CR_Migrate}}} (may need some more work for mapping rules)
* {{{OMPI_CR_INC_register_callback}}} (Fixes trac:2192)
* {{{OMPI_CR_Quiesce_start}}}
* {{{OMPI_CR_Quiesce_checkpoint}}}
* {{{OMPI_CR_Quiesce_end}}}
* {{{OMPI_CR_self_register_checkpoint_callback}}}
* {{{OMPI_CR_self_register_restart_callback}}}
* {{{OMPI_CR_self_register_continue_callback}}}
* The ErrMgr predicted_fault() interface has been changed to take an opal_list_t of ErrMgr defined types. This will allow us to better support a wider range of fault prediction services in the future.
* Add a progress meter to:
* FileM rsh (filem_rsh_process_meter)
* SnapC full (snapc_full_progress_meter)
* SStore stage (sstore_stage_progress_meter)
* Added 2 new command line options to ompi-restart
* --showme : Display the full command line that would have been exec'ed.
* --mpirun_opts : Command line options to pass directly to mpirun. (Fixes trac:2413)
* Deprecated some MCA params:
* crs_base_snapshot_dir deprecated, use sstore_stage_local_snapshot_dir
* snapc_base_global_snapshot_dir deprecated, use sstore_base_global_snapshot_dir
* snapc_base_global_shared deprecated, use sstore_stage_global_is_shared
* snapc_base_store_in_place deprecated, replaced with different components of SStore
* snapc_base_global_snapshot_ref deprecated, use sstore_base_global_snapshot_ref
* snapc_base_establish_global_snapshot_dir deprecated, never well supported
* snapc_full_skip_filem deprecated, use sstore_stage_skip_filem
Minor Changes:
--------------
* Fixes trac:1924 : {{{ompi-restart}}} now recognizes path prefixed checkpoint handles and does the right thing.
* Fixes trac:2097 : {{{ompi-info}}} should now report all available CRS components
* Fixes trac:2161 : Manual checkpoint movement. A user can 'mv' a checkpoint directory from the original location to another and still restart from it.
* Fixes trac:2208 : Honor various TMPDIR varaibles instead of forcing {{{/tmp}}}
* Move {{{ompi_cr_continue_like_restart}}} to {{{orte_cr_continue_like_restart}}} to be more flexible in where this should be set.
* opal_crs_base_metadata_write* functions have been moved to SStore to support a wider range of metadata handling functionality.
* Cleanup the CRS framework and components to work with the SStore framework.
* Cleanup the SnapC framework and components to work with the SStore framework (cleans up these code paths considerably).
* Add 'quiesce' hook to CRCP for a future enhancement.
* We now require a BLCR version that supports {{{cr_request_file()}}} or {{{cr_request_checkpoint()}}} in order to make the code more maintainable. Note that {{{cr_request_file}}} has been deprecated since 0.7.0, so we prefer to use {{{cr_request_checkpoint()}}}.
* Add optional application level INC callbacks (registered through the CR MPI Ext interface).
* Increase the {{{opal_cr_thread_sleep_wait}}} parameter to 1000 microseconds to make the C/R thread less aggressive.
* {{{opal-restart}}} now looks for cache directories before falling back on stable storage when asked.
* {{{opal-restart}}} also support local decompression before restarting
* {{{orte-checkpoint}}} now uses the SStore framework to work with the metadata
* {{{orte-restart}}} now uses the SStore framework to work with the metadata
* Remove the {{{orte-restart}}} preload option. This was removed since the user only needs to select the 'stage' component in order to support this functionality.
* Since the '-am' parameter is saved in the metadata, {{{ompi-restart}}} no longer hard codes {{{-am ft-enable-cr}}}.
* Fix {{{hnp}}} ErrMgr so that if a previous component in the stack has 'fixed' the problem, then it should be skipped.
* Make sure to decrement the number of 'num_local_procs' in the orted when one goes away.
* odls now checks the SStore framework to see if it needs to load any checkpoint files before launching (to support 'stage'). This separates the SStore logic from the --preload-[binary|files] options.
* Add unique IDs to the named pipes established between the orted and the app in SnapC. This is to better support migration and automatic recovery activities.
* Improve the checks for 'already checkpointing' error path.
* A a recovery output timer, to show how long it takes to restart a job
* Do a better job of cleaning up the old session directory on restart.
* Add a local module to the autor and crmig ErrMgr components. These small modules prevent the 'orted' component from attempting a local recovery (Which does not work for MPI apps at the moment)
* Add a fix for bounding the checkpointable region between MPI_Init and MPI_Finalize.
This commit was SVN r23587.
The following Trac tickets were found above:
Ticket 1924 --> https://svn.open-mpi.org/trac/ompi/ticket/1924
Ticket 2097 --> https://svn.open-mpi.org/trac/ompi/ticket/2097
Ticket 2161 --> https://svn.open-mpi.org/trac/ompi/ticket/2161
Ticket 2192 --> https://svn.open-mpi.org/trac/ompi/ticket/2192
Ticket 2208 --> https://svn.open-mpi.org/trac/ompi/ticket/2208
Ticket 2342 --> https://svn.open-mpi.org/trac/ompi/ticket/2342
Ticket 2413 --> https://svn.open-mpi.org/trac/ompi/ticket/2413
2010-08-11 00:51:11 +04:00
|
|
|
int opal_cr_initalized = 0;
|
2007-03-17 02:11:45 +03:00
|
|
|
|
2008-10-16 19:09:00 +04:00
|
|
|
static double opal_cr_get_time(void);
|
|
|
|
static void display_indv_timer_core(double diff, char *str);
|
|
|
|
static double timer_start[OPAL_CR_TIMER_MAX];
|
|
|
|
bool opal_cr_timing_barrier_enabled = false;
|
|
|
|
bool opal_cr_timing_enabled = false;
|
|
|
|
int opal_cr_timing_my_rank = 0;
|
|
|
|
int opal_cr_timing_target_rank = 0;
|
|
|
|
|
2007-03-17 02:11:45 +03:00
|
|
|
/******************
|
|
|
|
* Local Functions & Var Decls
|
|
|
|
******************/
|
A number of C/R enhancements per RFC below:
http://www.open-mpi.org/community/lists/devel/2010/07/8240.php
Documentation:
http://osl.iu.edu/research/ft/
Major Changes:
--------------
* Added C/R-enabled Debugging support.
Enabled with the --enable-crdebug flag. See the following website for more information:
http://osl.iu.edu/research/ft/crdebug/
* Added Stable Storage (SStore) framework for checkpoint storage
* 'central' component does a direct to central storage save
* 'stage' component stages checkpoints to central storage while the application continues execution.
* 'stage' supports offline compression of checkpoints before moving (sstore_stage_compress)
* 'stage' supports local caching of checkpoints to improve automatic recovery (sstore_stage_caching)
* Added Compression (compress) framework to support
* Add two new ErrMgr recovery policies
* {{{crmig}}} C/R Process Migration
* {{{autor}}} C/R Automatic Recovery
* Added the {{{ompi-migrate}}} command line tool to support the {{{crmig}}} ErrMgr component
* Added CR MPI Ext functions (enable them with {{{--enable-mpi-ext=cr}}} configure option)
* {{{OMPI_CR_Checkpoint}}} (Fixes trac:2342)
* {{{OMPI_CR_Restart}}}
* {{{OMPI_CR_Migrate}}} (may need some more work for mapping rules)
* {{{OMPI_CR_INC_register_callback}}} (Fixes trac:2192)
* {{{OMPI_CR_Quiesce_start}}}
* {{{OMPI_CR_Quiesce_checkpoint}}}
* {{{OMPI_CR_Quiesce_end}}}
* {{{OMPI_CR_self_register_checkpoint_callback}}}
* {{{OMPI_CR_self_register_restart_callback}}}
* {{{OMPI_CR_self_register_continue_callback}}}
* The ErrMgr predicted_fault() interface has been changed to take an opal_list_t of ErrMgr defined types. This will allow us to better support a wider range of fault prediction services in the future.
* Add a progress meter to:
* FileM rsh (filem_rsh_process_meter)
* SnapC full (snapc_full_progress_meter)
* SStore stage (sstore_stage_progress_meter)
* Added 2 new command line options to ompi-restart
* --showme : Display the full command line that would have been exec'ed.
* --mpirun_opts : Command line options to pass directly to mpirun. (Fixes trac:2413)
* Deprecated some MCA params:
* crs_base_snapshot_dir deprecated, use sstore_stage_local_snapshot_dir
* snapc_base_global_snapshot_dir deprecated, use sstore_base_global_snapshot_dir
* snapc_base_global_shared deprecated, use sstore_stage_global_is_shared
* snapc_base_store_in_place deprecated, replaced with different components of SStore
* snapc_base_global_snapshot_ref deprecated, use sstore_base_global_snapshot_ref
* snapc_base_establish_global_snapshot_dir deprecated, never well supported
* snapc_full_skip_filem deprecated, use sstore_stage_skip_filem
Minor Changes:
--------------
* Fixes trac:1924 : {{{ompi-restart}}} now recognizes path prefixed checkpoint handles and does the right thing.
* Fixes trac:2097 : {{{ompi-info}}} should now report all available CRS components
* Fixes trac:2161 : Manual checkpoint movement. A user can 'mv' a checkpoint directory from the original location to another and still restart from it.
* Fixes trac:2208 : Honor various TMPDIR varaibles instead of forcing {{{/tmp}}}
* Move {{{ompi_cr_continue_like_restart}}} to {{{orte_cr_continue_like_restart}}} to be more flexible in where this should be set.
* opal_crs_base_metadata_write* functions have been moved to SStore to support a wider range of metadata handling functionality.
* Cleanup the CRS framework and components to work with the SStore framework.
* Cleanup the SnapC framework and components to work with the SStore framework (cleans up these code paths considerably).
* Add 'quiesce' hook to CRCP for a future enhancement.
* We now require a BLCR version that supports {{{cr_request_file()}}} or {{{cr_request_checkpoint()}}} in order to make the code more maintainable. Note that {{{cr_request_file}}} has been deprecated since 0.7.0, so we prefer to use {{{cr_request_checkpoint()}}}.
* Add optional application level INC callbacks (registered through the CR MPI Ext interface).
* Increase the {{{opal_cr_thread_sleep_wait}}} parameter to 1000 microseconds to make the C/R thread less aggressive.
* {{{opal-restart}}} now looks for cache directories before falling back on stable storage when asked.
* {{{opal-restart}}} also support local decompression before restarting
* {{{orte-checkpoint}}} now uses the SStore framework to work with the metadata
* {{{orte-restart}}} now uses the SStore framework to work with the metadata
* Remove the {{{orte-restart}}} preload option. This was removed since the user only needs to select the 'stage' component in order to support this functionality.
* Since the '-am' parameter is saved in the metadata, {{{ompi-restart}}} no longer hard codes {{{-am ft-enable-cr}}}.
* Fix {{{hnp}}} ErrMgr so that if a previous component in the stack has 'fixed' the problem, then it should be skipped.
* Make sure to decrement the number of 'num_local_procs' in the orted when one goes away.
* odls now checks the SStore framework to see if it needs to load any checkpoint files before launching (to support 'stage'). This separates the SStore logic from the --preload-[binary|files] options.
* Add unique IDs to the named pipes established between the orted and the app in SnapC. This is to better support migration and automatic recovery activities.
* Improve the checks for 'already checkpointing' error path.
* A a recovery output timer, to show how long it takes to restart a job
* Do a better job of cleaning up the old session directory on restart.
* Add a local module to the autor and crmig ErrMgr components. These small modules prevent the 'orted' component from attempting a local recovery (Which does not work for MPI apps at the moment)
* Add a fix for bounding the checkpointable region between MPI_Init and MPI_Finalize.
This commit was SVN r23587.
The following Trac tickets were found above:
Ticket 1924 --> https://svn.open-mpi.org/trac/ompi/ticket/1924
Ticket 2097 --> https://svn.open-mpi.org/trac/ompi/ticket/2097
Ticket 2161 --> https://svn.open-mpi.org/trac/ompi/ticket/2161
Ticket 2192 --> https://svn.open-mpi.org/trac/ompi/ticket/2192
Ticket 2208 --> https://svn.open-mpi.org/trac/ompi/ticket/2208
Ticket 2342 --> https://svn.open-mpi.org/trac/ompi/ticket/2342
Ticket 2413 --> https://svn.open-mpi.org/trac/ompi/ticket/2413
2010-08-11 00:51:11 +04:00
|
|
|
static int extract_env_vars(int prev_pid, char * file_name);
|
2008-05-08 22:47:47 +04:00
|
|
|
|
2008-04-23 04:17:12 +04:00
|
|
|
static void opal_cr_sigpipe_debug_signal_handler (int signo);
|
2007-03-17 02:11:45 +03:00
|
|
|
|
A number of C/R enhancements per RFC below:
http://www.open-mpi.org/community/lists/devel/2010/07/8240.php
Documentation:
http://osl.iu.edu/research/ft/
Major Changes:
--------------
* Added C/R-enabled Debugging support.
Enabled with the --enable-crdebug flag. See the following website for more information:
http://osl.iu.edu/research/ft/crdebug/
* Added Stable Storage (SStore) framework for checkpoint storage
* 'central' component does a direct to central storage save
* 'stage' component stages checkpoints to central storage while the application continues execution.
* 'stage' supports offline compression of checkpoints before moving (sstore_stage_compress)
* 'stage' supports local caching of checkpoints to improve automatic recovery (sstore_stage_caching)
* Added Compression (compress) framework to support
* Add two new ErrMgr recovery policies
* {{{crmig}}} C/R Process Migration
* {{{autor}}} C/R Automatic Recovery
* Added the {{{ompi-migrate}}} command line tool to support the {{{crmig}}} ErrMgr component
* Added CR MPI Ext functions (enable them with {{{--enable-mpi-ext=cr}}} configure option)
* {{{OMPI_CR_Checkpoint}}} (Fixes trac:2342)
* {{{OMPI_CR_Restart}}}
* {{{OMPI_CR_Migrate}}} (may need some more work for mapping rules)
* {{{OMPI_CR_INC_register_callback}}} (Fixes trac:2192)
* {{{OMPI_CR_Quiesce_start}}}
* {{{OMPI_CR_Quiesce_checkpoint}}}
* {{{OMPI_CR_Quiesce_end}}}
* {{{OMPI_CR_self_register_checkpoint_callback}}}
* {{{OMPI_CR_self_register_restart_callback}}}
* {{{OMPI_CR_self_register_continue_callback}}}
* The ErrMgr predicted_fault() interface has been changed to take an opal_list_t of ErrMgr defined types. This will allow us to better support a wider range of fault prediction services in the future.
* Add a progress meter to:
* FileM rsh (filem_rsh_process_meter)
* SnapC full (snapc_full_progress_meter)
* SStore stage (sstore_stage_progress_meter)
* Added 2 new command line options to ompi-restart
* --showme : Display the full command line that would have been exec'ed.
* --mpirun_opts : Command line options to pass directly to mpirun. (Fixes trac:2413)
* Deprecated some MCA params:
* crs_base_snapshot_dir deprecated, use sstore_stage_local_snapshot_dir
* snapc_base_global_snapshot_dir deprecated, use sstore_base_global_snapshot_dir
* snapc_base_global_shared deprecated, use sstore_stage_global_is_shared
* snapc_base_store_in_place deprecated, replaced with different components of SStore
* snapc_base_global_snapshot_ref deprecated, use sstore_base_global_snapshot_ref
* snapc_base_establish_global_snapshot_dir deprecated, never well supported
* snapc_full_skip_filem deprecated, use sstore_stage_skip_filem
Minor Changes:
--------------
* Fixes trac:1924 : {{{ompi-restart}}} now recognizes path prefixed checkpoint handles and does the right thing.
* Fixes trac:2097 : {{{ompi-info}}} should now report all available CRS components
* Fixes trac:2161 : Manual checkpoint movement. A user can 'mv' a checkpoint directory from the original location to another and still restart from it.
* Fixes trac:2208 : Honor various TMPDIR varaibles instead of forcing {{{/tmp}}}
* Move {{{ompi_cr_continue_like_restart}}} to {{{orte_cr_continue_like_restart}}} to be more flexible in where this should be set.
* opal_crs_base_metadata_write* functions have been moved to SStore to support a wider range of metadata handling functionality.
* Cleanup the CRS framework and components to work with the SStore framework.
* Cleanup the SnapC framework and components to work with the SStore framework (cleans up these code paths considerably).
* Add 'quiesce' hook to CRCP for a future enhancement.
* We now require a BLCR version that supports {{{cr_request_file()}}} or {{{cr_request_checkpoint()}}} in order to make the code more maintainable. Note that {{{cr_request_file}}} has been deprecated since 0.7.0, so we prefer to use {{{cr_request_checkpoint()}}}.
* Add optional application level INC callbacks (registered through the CR MPI Ext interface).
* Increase the {{{opal_cr_thread_sleep_wait}}} parameter to 1000 microseconds to make the C/R thread less aggressive.
* {{{opal-restart}}} now looks for cache directories before falling back on stable storage when asked.
* {{{opal-restart}}} also support local decompression before restarting
* {{{orte-checkpoint}}} now uses the SStore framework to work with the metadata
* {{{orte-restart}}} now uses the SStore framework to work with the metadata
* Remove the {{{orte-restart}}} preload option. This was removed since the user only needs to select the 'stage' component in order to support this functionality.
* Since the '-am' parameter is saved in the metadata, {{{ompi-restart}}} no longer hard codes {{{-am ft-enable-cr}}}.
* Fix {{{hnp}}} ErrMgr so that if a previous component in the stack has 'fixed' the problem, then it should be skipped.
* Make sure to decrement the number of 'num_local_procs' in the orted when one goes away.
* odls now checks the SStore framework to see if it needs to load any checkpoint files before launching (to support 'stage'). This separates the SStore logic from the --preload-[binary|files] options.
* Add unique IDs to the named pipes established between the orted and the app in SnapC. This is to better support migration and automatic recovery activities.
* Improve the checks for 'already checkpointing' error path.
* A a recovery output timer, to show how long it takes to restart a job
* Do a better job of cleaning up the old session directory on restart.
* Add a local module to the autor and crmig ErrMgr components. These small modules prevent the 'orted' component from attempting a local recovery (Which does not work for MPI apps at the moment)
* Add a fix for bounding the checkpointable region between MPI_Init and MPI_Finalize.
This commit was SVN r23587.
The following Trac tickets were found above:
Ticket 1924 --> https://svn.open-mpi.org/trac/ompi/ticket/1924
Ticket 2097 --> https://svn.open-mpi.org/trac/ompi/ticket/2097
Ticket 2161 --> https://svn.open-mpi.org/trac/ompi/ticket/2161
Ticket 2192 --> https://svn.open-mpi.org/trac/ompi/ticket/2192
Ticket 2208 --> https://svn.open-mpi.org/trac/ompi/ticket/2208
Ticket 2342 --> https://svn.open-mpi.org/trac/ompi/ticket/2342
Ticket 2413 --> https://svn.open-mpi.org/trac/ompi/ticket/2413
2010-08-11 00:51:11 +04:00
|
|
|
static opal_cr_user_inc_callback_fn_t cur_user_coord_callback[OMPI_CR_INC_MAX] = {NULL};
|
2007-03-17 02:11:45 +03:00
|
|
|
static opal_cr_coord_callback_fn_t cur_coord_callback = NULL;
|
2007-10-09 00:53:02 +04:00
|
|
|
static opal_cr_notify_callback_fn_t cur_notify_callback = NULL;
|
2007-03-17 02:11:45 +03:00
|
|
|
|
2009-05-09 00:51:13 +04:00
|
|
|
static int core_prev_pid = 0;
|
|
|
|
|
2007-03-17 02:11:45 +03:00
|
|
|
/******************
|
|
|
|
* Interface Functions & Vars
|
|
|
|
******************/
|
|
|
|
char * opal_cr_pipe_dir = NULL;
|
2007-10-09 00:53:02 +04:00
|
|
|
int opal_cr_entry_point_signal = 0;
|
2007-03-17 02:11:45 +03:00
|
|
|
bool opal_cr_is_enabled = true;
|
|
|
|
bool opal_cr_is_tool = false;
|
2008-05-08 22:47:47 +04:00
|
|
|
|
2007-10-09 00:53:02 +04:00
|
|
|
/* Current checkpoint state */
|
2008-05-08 22:47:47 +04:00
|
|
|
int opal_cr_checkpointing_state = OPAL_CR_STATUS_NONE;
|
|
|
|
|
2007-10-09 00:53:02 +04:00
|
|
|
/* Current checkpoint request channel state */
|
2008-05-08 22:47:47 +04:00
|
|
|
int opal_cr_checkpoint_request = OPAL_CR_STATUS_NONE;
|
2007-10-09 00:53:02 +04:00
|
|
|
|
2008-04-23 04:17:12 +04:00
|
|
|
static bool opal_cr_debug_sigpipe = false;
|
|
|
|
|
2008-02-20 01:15:52 +03:00
|
|
|
#if OPAL_ENABLE_FT_THREAD == 1
|
|
|
|
/*****************
|
|
|
|
* Threading Functions and Variables
|
|
|
|
*****************/
|
|
|
|
static void* opal_cr_thread_fn(opal_object_t *obj);
|
2008-02-21 01:33:42 +03:00
|
|
|
bool opal_cr_thread_is_done = false;
|
2008-02-20 01:15:52 +03:00
|
|
|
bool opal_cr_thread_is_active = false;
|
|
|
|
bool opal_cr_thread_in_library = false;
|
|
|
|
bool opal_cr_thread_use_if_avail = true;
|
|
|
|
int32_t opal_cr_thread_num_in_library = 0;
|
|
|
|
int opal_cr_thread_sleep_check = 0;
|
|
|
|
int opal_cr_thread_sleep_wait = 0;
|
|
|
|
opal_thread_t opal_cr_thread;
|
|
|
|
opal_mutex_t opal_cr_thread_lock;
|
|
|
|
#if 0
|
|
|
|
#define OPAL_CR_LOCK() opal_cr_thread_in_library = true; opal_mutex_lock(&opal_cr_thread_lock);
|
|
|
|
#define OPAL_CR_UNLOCK() opal_cr_thread_in_library = false; opal_mutex_unlock(&opal_cr_thread_lock);
|
|
|
|
#define OPAL_CR_THREAD_LOCK() opal_mutex_lock(&opal_cr_thread_lock);
|
|
|
|
#define OPAL_CR_THREAD_UNLOCK() opal_mutex_unlock(&opal_cr_thread_lock);
|
|
|
|
#else
|
|
|
|
/* This technique will potentially starve the thread, but that is OK since
|
|
|
|
* it is only there as support for when the process is not in the MPI library
|
|
|
|
*/
|
|
|
|
static const uint32_t ThreadFlag = 0x1;
|
|
|
|
static const uint32_t ProcInc = 0x2;
|
|
|
|
|
|
|
|
#define OPAL_CR_LOCK() \
|
|
|
|
{ \
|
|
|
|
opal_cr_thread_in_library = true; \
|
|
|
|
OPAL_THREAD_ADD32(&opal_cr_thread_num_in_library, ProcInc); \
|
|
|
|
while( (opal_cr_thread_num_in_library & ThreadFlag ) != 0 ) { \
|
|
|
|
sched_yield(); \
|
|
|
|
} \
|
|
|
|
}
|
|
|
|
#define OPAL_CR_UNLOCK() \
|
|
|
|
{ \
|
|
|
|
OPAL_THREAD_ADD32(&opal_cr_thread_num_in_library, -ProcInc); \
|
|
|
|
if( opal_cr_thread_num_in_library <= 0 ) { \
|
|
|
|
opal_cr_thread_in_library = false; \
|
|
|
|
} \
|
|
|
|
}
|
|
|
|
#define OPAL_CR_THREAD_LOCK() \
|
|
|
|
{ \
|
|
|
|
while(!OPAL_ATOMIC_CMPSET_32(&opal_cr_thread_num_in_library, 0, ThreadFlag)) { \
|
2008-10-08 18:19:37 +04:00
|
|
|
if( !opal_cr_thread_is_active && opal_cr_thread_is_done) { \
|
|
|
|
break; \
|
|
|
|
} \
|
2008-02-20 01:15:52 +03:00
|
|
|
sched_yield(); \
|
2009-05-05 20:45:49 +04:00
|
|
|
usleep(opal_cr_thread_sleep_check); \
|
2008-02-20 01:15:52 +03:00
|
|
|
} \
|
|
|
|
}
|
|
|
|
#define OPAL_CR_THREAD_UNLOCK() \
|
|
|
|
{ \
|
|
|
|
OPAL_THREAD_ADD32(&opal_cr_thread_num_in_library, -ThreadFlag); \
|
|
|
|
}
|
|
|
|
#endif
|
2007-03-17 02:11:45 +03:00
|
|
|
|
2008-02-20 01:15:52 +03:00
|
|
|
#endif /* OPAL_ENABLE_FT_THREAD == 1 */
|
2007-03-17 02:11:45 +03:00
|
|
|
|
2007-04-01 20:16:54 +04:00
|
|
|
int opal_cr_set_enabled(bool en)
|
|
|
|
{
|
2007-03-17 02:11:45 +03:00
|
|
|
opal_cr_is_enabled = en;
|
|
|
|
return OPAL_SUCCESS;
|
|
|
|
}
|
|
|
|
|
|
|
|
int opal_cr_init(void )
|
|
|
|
{
|
|
|
|
int ret, exit_status = OPAL_SUCCESS;
|
|
|
|
opal_cr_coord_callback_fn_t prev_coord_func;
|
A number of C/R enhancements per RFC below:
http://www.open-mpi.org/community/lists/devel/2010/07/8240.php
Documentation:
http://osl.iu.edu/research/ft/
Major Changes:
--------------
* Added C/R-enabled Debugging support.
Enabled with the --enable-crdebug flag. See the following website for more information:
http://osl.iu.edu/research/ft/crdebug/
* Added Stable Storage (SStore) framework for checkpoint storage
* 'central' component does a direct to central storage save
* 'stage' component stages checkpoints to central storage while the application continues execution.
* 'stage' supports offline compression of checkpoints before moving (sstore_stage_compress)
* 'stage' supports local caching of checkpoints to improve automatic recovery (sstore_stage_caching)
* Added Compression (compress) framework to support
* Add two new ErrMgr recovery policies
* {{{crmig}}} C/R Process Migration
* {{{autor}}} C/R Automatic Recovery
* Added the {{{ompi-migrate}}} command line tool to support the {{{crmig}}} ErrMgr component
* Added CR MPI Ext functions (enable them with {{{--enable-mpi-ext=cr}}} configure option)
* {{{OMPI_CR_Checkpoint}}} (Fixes trac:2342)
* {{{OMPI_CR_Restart}}}
* {{{OMPI_CR_Migrate}}} (may need some more work for mapping rules)
* {{{OMPI_CR_INC_register_callback}}} (Fixes trac:2192)
* {{{OMPI_CR_Quiesce_start}}}
* {{{OMPI_CR_Quiesce_checkpoint}}}
* {{{OMPI_CR_Quiesce_end}}}
* {{{OMPI_CR_self_register_checkpoint_callback}}}
* {{{OMPI_CR_self_register_restart_callback}}}
* {{{OMPI_CR_self_register_continue_callback}}}
* The ErrMgr predicted_fault() interface has been changed to take an opal_list_t of ErrMgr defined types. This will allow us to better support a wider range of fault prediction services in the future.
* Add a progress meter to:
* FileM rsh (filem_rsh_process_meter)
* SnapC full (snapc_full_progress_meter)
* SStore stage (sstore_stage_progress_meter)
* Added 2 new command line options to ompi-restart
* --showme : Display the full command line that would have been exec'ed.
* --mpirun_opts : Command line options to pass directly to mpirun. (Fixes trac:2413)
* Deprecated some MCA params:
* crs_base_snapshot_dir deprecated, use sstore_stage_local_snapshot_dir
* snapc_base_global_snapshot_dir deprecated, use sstore_base_global_snapshot_dir
* snapc_base_global_shared deprecated, use sstore_stage_global_is_shared
* snapc_base_store_in_place deprecated, replaced with different components of SStore
* snapc_base_global_snapshot_ref deprecated, use sstore_base_global_snapshot_ref
* snapc_base_establish_global_snapshot_dir deprecated, never well supported
* snapc_full_skip_filem deprecated, use sstore_stage_skip_filem
Minor Changes:
--------------
* Fixes trac:1924 : {{{ompi-restart}}} now recognizes path prefixed checkpoint handles and does the right thing.
* Fixes trac:2097 : {{{ompi-info}}} should now report all available CRS components
* Fixes trac:2161 : Manual checkpoint movement. A user can 'mv' a checkpoint directory from the original location to another and still restart from it.
* Fixes trac:2208 : Honor various TMPDIR varaibles instead of forcing {{{/tmp}}}
* Move {{{ompi_cr_continue_like_restart}}} to {{{orte_cr_continue_like_restart}}} to be more flexible in where this should be set.
* opal_crs_base_metadata_write* functions have been moved to SStore to support a wider range of metadata handling functionality.
* Cleanup the CRS framework and components to work with the SStore framework.
* Cleanup the SnapC framework and components to work with the SStore framework (cleans up these code paths considerably).
* Add 'quiesce' hook to CRCP for a future enhancement.
* We now require a BLCR version that supports {{{cr_request_file()}}} or {{{cr_request_checkpoint()}}} in order to make the code more maintainable. Note that {{{cr_request_file}}} has been deprecated since 0.7.0, so we prefer to use {{{cr_request_checkpoint()}}}.
* Add optional application level INC callbacks (registered through the CR MPI Ext interface).
* Increase the {{{opal_cr_thread_sleep_wait}}} parameter to 1000 microseconds to make the C/R thread less aggressive.
* {{{opal-restart}}} now looks for cache directories before falling back on stable storage when asked.
* {{{opal-restart}}} also support local decompression before restarting
* {{{orte-checkpoint}}} now uses the SStore framework to work with the metadata
* {{{orte-restart}}} now uses the SStore framework to work with the metadata
* Remove the {{{orte-restart}}} preload option. This was removed since the user only needs to select the 'stage' component in order to support this functionality.
* Since the '-am' parameter is saved in the metadata, {{{ompi-restart}}} no longer hard codes {{{-am ft-enable-cr}}}.
* Fix {{{hnp}}} ErrMgr so that if a previous component in the stack has 'fixed' the problem, then it should be skipped.
* Make sure to decrement the number of 'num_local_procs' in the orted when one goes away.
* odls now checks the SStore framework to see if it needs to load any checkpoint files before launching (to support 'stage'). This separates the SStore logic from the --preload-[binary|files] options.
* Add unique IDs to the named pipes established between the orted and the app in SnapC. This is to better support migration and automatic recovery activities.
* Improve the checks for 'already checkpointing' error path.
* A a recovery output timer, to show how long it takes to restart a job
* Do a better job of cleaning up the old session directory on restart.
* Add a local module to the autor and crmig ErrMgr components. These small modules prevent the 'orted' component from attempting a local recovery (Which does not work for MPI apps at the moment)
* Add a fix for bounding the checkpointable region between MPI_Init and MPI_Finalize.
This commit was SVN r23587.
The following Trac tickets were found above:
Ticket 1924 --> https://svn.open-mpi.org/trac/ompi/ticket/1924
Ticket 2097 --> https://svn.open-mpi.org/trac/ompi/ticket/2097
Ticket 2161 --> https://svn.open-mpi.org/trac/ompi/ticket/2161
Ticket 2192 --> https://svn.open-mpi.org/trac/ompi/ticket/2192
Ticket 2208 --> https://svn.open-mpi.org/trac/ompi/ticket/2208
Ticket 2342 --> https://svn.open-mpi.org/trac/ompi/ticket/2342
Ticket 2413 --> https://svn.open-mpi.org/trac/ompi/ticket/2413
2010-08-11 00:51:11 +04:00
|
|
|
int val, t;
|
2007-03-17 02:11:45 +03:00
|
|
|
|
2007-05-25 01:54:58 +04:00
|
|
|
if( ++opal_cr_initalized != 1 ) {
|
|
|
|
if( opal_cr_initalized < 1 ) {
|
2008-05-13 17:10:16 +04:00
|
|
|
exit_status = OPAL_ERROR;
|
|
|
|
goto cleanup;
|
2007-05-25 01:54:58 +04:00
|
|
|
}
|
2008-05-13 17:10:16 +04:00
|
|
|
exit_status = OPAL_SUCCESS;
|
|
|
|
goto cleanup;
|
2007-05-25 01:54:58 +04:00
|
|
|
}
|
|
|
|
|
2007-03-17 02:11:45 +03:00
|
|
|
/*
|
|
|
|
* Some startup MCA parameters
|
|
|
|
*/
|
|
|
|
ret = mca_base_param_reg_int_name("opal_cr", "verbose",
|
2007-10-09 00:53:02 +04:00
|
|
|
"Verbose output level for the runtime OPAL Checkpoint/Restart functionality",
|
|
|
|
false, false,
|
|
|
|
0,
|
|
|
|
&val);
|
2007-03-17 02:11:45 +03:00
|
|
|
if(0 != val) {
|
|
|
|
opal_cr_output = opal_output_open(NULL);
|
|
|
|
} else {
|
|
|
|
opal_cr_output = -1;
|
|
|
|
}
|
|
|
|
opal_output_set_verbosity(opal_cr_output, val);
|
|
|
|
|
2007-03-20 00:18:03 +03:00
|
|
|
opal_output_verbose(10, opal_cr_output,
|
|
|
|
"opal_cr: init: Verbose Level: %d",
|
|
|
|
val);
|
|
|
|
|
2007-03-17 02:11:45 +03:00
|
|
|
mca_base_param_reg_int_name("ft", "cr_enabled",
|
|
|
|
"Enable fault tolerance for this program",
|
|
|
|
false, false,
|
|
|
|
0, &val);
|
2008-02-20 01:15:52 +03:00
|
|
|
opal_cr_set_enabled(OPAL_INT_TO_BOOL(val));
|
2007-03-17 02:11:45 +03:00
|
|
|
|
2007-03-20 00:18:03 +03:00
|
|
|
opal_output_verbose(10, opal_cr_output,
|
|
|
|
"opal_cr: init: FT Enabled: %d",
|
|
|
|
val);
|
|
|
|
|
2008-10-16 19:09:00 +04:00
|
|
|
mca_base_param_reg_int_name("opal_cr", "enable_timer",
|
|
|
|
"Enable Checkpoint timer (Default: Disabled)",
|
|
|
|
false, false,
|
|
|
|
0, &val);
|
|
|
|
opal_cr_timing_enabled = OPAL_INT_TO_BOOL(val);
|
|
|
|
|
|
|
|
mca_base_param_reg_int_name("opal_cr", "enable_timer_barrier",
|
|
|
|
"Enable Checkpoint timer Barrier (Default: Disabled)",
|
|
|
|
false, false,
|
|
|
|
0, &val);
|
|
|
|
if( opal_cr_timing_enabled ) {
|
|
|
|
opal_cr_timing_barrier_enabled = OPAL_INT_TO_BOOL(val);
|
|
|
|
} else {
|
|
|
|
opal_cr_timing_barrier_enabled = false;
|
|
|
|
}
|
|
|
|
|
|
|
|
mca_base_param_reg_int_name("opal_cr", "timer_target_rank",
|
|
|
|
"Target Rank for the timer (Default: 0)",
|
|
|
|
false, false,
|
|
|
|
0, &val);
|
|
|
|
opal_cr_timing_target_rank = val;
|
|
|
|
|
2008-02-20 01:15:52 +03:00
|
|
|
#if OPAL_ENABLE_FT_THREAD == 1
|
|
|
|
mca_base_param_reg_int_name("opal_cr", "use_thread",
|
2008-10-08 18:19:37 +04:00
|
|
|
"Use an async thread to checkpoint this program (Default: Disabled)",
|
2008-02-20 01:15:52 +03:00
|
|
|
false, false,
|
2008-10-08 18:19:37 +04:00
|
|
|
0, &val);
|
2008-02-20 01:15:52 +03:00
|
|
|
opal_cr_thread_use_if_avail = OPAL_INT_TO_BOOL(val);
|
|
|
|
|
|
|
|
opal_output_verbose(10, opal_cr_output,
|
|
|
|
"opal_cr: init: FT Use thread: %d",
|
|
|
|
val);
|
|
|
|
|
|
|
|
mca_base_param_reg_int_name("opal_cr", "thread_sleep_check",
|
|
|
|
"Time to sleep between checking for a checkpoint (Default: 0)",
|
|
|
|
false, false,
|
|
|
|
0, &val);
|
|
|
|
opal_cr_thread_sleep_check = val;
|
|
|
|
|
|
|
|
mca_base_param_reg_int_name("opal_cr", "thread_sleep_wait",
|
A number of C/R enhancements per RFC below:
http://www.open-mpi.org/community/lists/devel/2010/07/8240.php
Documentation:
http://osl.iu.edu/research/ft/
Major Changes:
--------------
* Added C/R-enabled Debugging support.
Enabled with the --enable-crdebug flag. See the following website for more information:
http://osl.iu.edu/research/ft/crdebug/
* Added Stable Storage (SStore) framework for checkpoint storage
* 'central' component does a direct to central storage save
* 'stage' component stages checkpoints to central storage while the application continues execution.
* 'stage' supports offline compression of checkpoints before moving (sstore_stage_compress)
* 'stage' supports local caching of checkpoints to improve automatic recovery (sstore_stage_caching)
* Added Compression (compress) framework to support
* Add two new ErrMgr recovery policies
* {{{crmig}}} C/R Process Migration
* {{{autor}}} C/R Automatic Recovery
* Added the {{{ompi-migrate}}} command line tool to support the {{{crmig}}} ErrMgr component
* Added CR MPI Ext functions (enable them with {{{--enable-mpi-ext=cr}}} configure option)
* {{{OMPI_CR_Checkpoint}}} (Fixes trac:2342)
* {{{OMPI_CR_Restart}}}
* {{{OMPI_CR_Migrate}}} (may need some more work for mapping rules)
* {{{OMPI_CR_INC_register_callback}}} (Fixes trac:2192)
* {{{OMPI_CR_Quiesce_start}}}
* {{{OMPI_CR_Quiesce_checkpoint}}}
* {{{OMPI_CR_Quiesce_end}}}
* {{{OMPI_CR_self_register_checkpoint_callback}}}
* {{{OMPI_CR_self_register_restart_callback}}}
* {{{OMPI_CR_self_register_continue_callback}}}
* The ErrMgr predicted_fault() interface has been changed to take an opal_list_t of ErrMgr defined types. This will allow us to better support a wider range of fault prediction services in the future.
* Add a progress meter to:
* FileM rsh (filem_rsh_process_meter)
* SnapC full (snapc_full_progress_meter)
* SStore stage (sstore_stage_progress_meter)
* Added 2 new command line options to ompi-restart
* --showme : Display the full command line that would have been exec'ed.
* --mpirun_opts : Command line options to pass directly to mpirun. (Fixes trac:2413)
* Deprecated some MCA params:
* crs_base_snapshot_dir deprecated, use sstore_stage_local_snapshot_dir
* snapc_base_global_snapshot_dir deprecated, use sstore_base_global_snapshot_dir
* snapc_base_global_shared deprecated, use sstore_stage_global_is_shared
* snapc_base_store_in_place deprecated, replaced with different components of SStore
* snapc_base_global_snapshot_ref deprecated, use sstore_base_global_snapshot_ref
* snapc_base_establish_global_snapshot_dir deprecated, never well supported
* snapc_full_skip_filem deprecated, use sstore_stage_skip_filem
Minor Changes:
--------------
* Fixes trac:1924 : {{{ompi-restart}}} now recognizes path prefixed checkpoint handles and does the right thing.
* Fixes trac:2097 : {{{ompi-info}}} should now report all available CRS components
* Fixes trac:2161 : Manual checkpoint movement. A user can 'mv' a checkpoint directory from the original location to another and still restart from it.
* Fixes trac:2208 : Honor various TMPDIR varaibles instead of forcing {{{/tmp}}}
* Move {{{ompi_cr_continue_like_restart}}} to {{{orte_cr_continue_like_restart}}} to be more flexible in where this should be set.
* opal_crs_base_metadata_write* functions have been moved to SStore to support a wider range of metadata handling functionality.
* Cleanup the CRS framework and components to work with the SStore framework.
* Cleanup the SnapC framework and components to work with the SStore framework (cleans up these code paths considerably).
* Add 'quiesce' hook to CRCP for a future enhancement.
* We now require a BLCR version that supports {{{cr_request_file()}}} or {{{cr_request_checkpoint()}}} in order to make the code more maintainable. Note that {{{cr_request_file}}} has been deprecated since 0.7.0, so we prefer to use {{{cr_request_checkpoint()}}}.
* Add optional application level INC callbacks (registered through the CR MPI Ext interface).
* Increase the {{{opal_cr_thread_sleep_wait}}} parameter to 1000 microseconds to make the C/R thread less aggressive.
* {{{opal-restart}}} now looks for cache directories before falling back on stable storage when asked.
* {{{opal-restart}}} also support local decompression before restarting
* {{{orte-checkpoint}}} now uses the SStore framework to work with the metadata
* {{{orte-restart}}} now uses the SStore framework to work with the metadata
* Remove the {{{orte-restart}}} preload option. This was removed since the user only needs to select the 'stage' component in order to support this functionality.
* Since the '-am' parameter is saved in the metadata, {{{ompi-restart}}} no longer hard codes {{{-am ft-enable-cr}}}.
* Fix {{{hnp}}} ErrMgr so that if a previous component in the stack has 'fixed' the problem, then it should be skipped.
* Make sure to decrement the number of 'num_local_procs' in the orted when one goes away.
* odls now checks the SStore framework to see if it needs to load any checkpoint files before launching (to support 'stage'). This separates the SStore logic from the --preload-[binary|files] options.
* Add unique IDs to the named pipes established between the orted and the app in SnapC. This is to better support migration and automatic recovery activities.
* Improve the checks for 'already checkpointing' error path.
* A a recovery output timer, to show how long it takes to restart a job
* Do a better job of cleaning up the old session directory on restart.
* Add a local module to the autor and crmig ErrMgr components. These small modules prevent the 'orted' component from attempting a local recovery (Which does not work for MPI apps at the moment)
* Add a fix for bounding the checkpointable region between MPI_Init and MPI_Finalize.
This commit was SVN r23587.
The following Trac tickets were found above:
Ticket 1924 --> https://svn.open-mpi.org/trac/ompi/ticket/1924
Ticket 2097 --> https://svn.open-mpi.org/trac/ompi/ticket/2097
Ticket 2161 --> https://svn.open-mpi.org/trac/ompi/ticket/2161
Ticket 2192 --> https://svn.open-mpi.org/trac/ompi/ticket/2192
Ticket 2208 --> https://svn.open-mpi.org/trac/ompi/ticket/2208
Ticket 2342 --> https://svn.open-mpi.org/trac/ompi/ticket/2342
Ticket 2413 --> https://svn.open-mpi.org/trac/ompi/ticket/2413
2010-08-11 00:51:11 +04:00
|
|
|
"Time to sleep waiting for process to exit MPI library (Default: 1000)",
|
2008-02-20 01:15:52 +03:00
|
|
|
false, false,
|
A number of C/R enhancements per RFC below:
http://www.open-mpi.org/community/lists/devel/2010/07/8240.php
Documentation:
http://osl.iu.edu/research/ft/
Major Changes:
--------------
* Added C/R-enabled Debugging support.
Enabled with the --enable-crdebug flag. See the following website for more information:
http://osl.iu.edu/research/ft/crdebug/
* Added Stable Storage (SStore) framework for checkpoint storage
* 'central' component does a direct to central storage save
* 'stage' component stages checkpoints to central storage while the application continues execution.
* 'stage' supports offline compression of checkpoints before moving (sstore_stage_compress)
* 'stage' supports local caching of checkpoints to improve automatic recovery (sstore_stage_caching)
* Added Compression (compress) framework to support
* Add two new ErrMgr recovery policies
* {{{crmig}}} C/R Process Migration
* {{{autor}}} C/R Automatic Recovery
* Added the {{{ompi-migrate}}} command line tool to support the {{{crmig}}} ErrMgr component
* Added CR MPI Ext functions (enable them with {{{--enable-mpi-ext=cr}}} configure option)
* {{{OMPI_CR_Checkpoint}}} (Fixes trac:2342)
* {{{OMPI_CR_Restart}}}
* {{{OMPI_CR_Migrate}}} (may need some more work for mapping rules)
* {{{OMPI_CR_INC_register_callback}}} (Fixes trac:2192)
* {{{OMPI_CR_Quiesce_start}}}
* {{{OMPI_CR_Quiesce_checkpoint}}}
* {{{OMPI_CR_Quiesce_end}}}
* {{{OMPI_CR_self_register_checkpoint_callback}}}
* {{{OMPI_CR_self_register_restart_callback}}}
* {{{OMPI_CR_self_register_continue_callback}}}
* The ErrMgr predicted_fault() interface has been changed to take an opal_list_t of ErrMgr defined types. This will allow us to better support a wider range of fault prediction services in the future.
* Add a progress meter to:
* FileM rsh (filem_rsh_process_meter)
* SnapC full (snapc_full_progress_meter)
* SStore stage (sstore_stage_progress_meter)
* Added 2 new command line options to ompi-restart
* --showme : Display the full command line that would have been exec'ed.
* --mpirun_opts : Command line options to pass directly to mpirun. (Fixes trac:2413)
* Deprecated some MCA params:
* crs_base_snapshot_dir deprecated, use sstore_stage_local_snapshot_dir
* snapc_base_global_snapshot_dir deprecated, use sstore_base_global_snapshot_dir
* snapc_base_global_shared deprecated, use sstore_stage_global_is_shared
* snapc_base_store_in_place deprecated, replaced with different components of SStore
* snapc_base_global_snapshot_ref deprecated, use sstore_base_global_snapshot_ref
* snapc_base_establish_global_snapshot_dir deprecated, never well supported
* snapc_full_skip_filem deprecated, use sstore_stage_skip_filem
Minor Changes:
--------------
* Fixes trac:1924 : {{{ompi-restart}}} now recognizes path prefixed checkpoint handles and does the right thing.
* Fixes trac:2097 : {{{ompi-info}}} should now report all available CRS components
* Fixes trac:2161 : Manual checkpoint movement. A user can 'mv' a checkpoint directory from the original location to another and still restart from it.
* Fixes trac:2208 : Honor various TMPDIR varaibles instead of forcing {{{/tmp}}}
* Move {{{ompi_cr_continue_like_restart}}} to {{{orte_cr_continue_like_restart}}} to be more flexible in where this should be set.
* opal_crs_base_metadata_write* functions have been moved to SStore to support a wider range of metadata handling functionality.
* Cleanup the CRS framework and components to work with the SStore framework.
* Cleanup the SnapC framework and components to work with the SStore framework (cleans up these code paths considerably).
* Add 'quiesce' hook to CRCP for a future enhancement.
* We now require a BLCR version that supports {{{cr_request_file()}}} or {{{cr_request_checkpoint()}}} in order to make the code more maintainable. Note that {{{cr_request_file}}} has been deprecated since 0.7.0, so we prefer to use {{{cr_request_checkpoint()}}}.
* Add optional application level INC callbacks (registered through the CR MPI Ext interface).
* Increase the {{{opal_cr_thread_sleep_wait}}} parameter to 1000 microseconds to make the C/R thread less aggressive.
* {{{opal-restart}}} now looks for cache directories before falling back on stable storage when asked.
* {{{opal-restart}}} also support local decompression before restarting
* {{{orte-checkpoint}}} now uses the SStore framework to work with the metadata
* {{{orte-restart}}} now uses the SStore framework to work with the metadata
* Remove the {{{orte-restart}}} preload option. This was removed since the user only needs to select the 'stage' component in order to support this functionality.
* Since the '-am' parameter is saved in the metadata, {{{ompi-restart}}} no longer hard codes {{{-am ft-enable-cr}}}.
* Fix {{{hnp}}} ErrMgr so that if a previous component in the stack has 'fixed' the problem, then it should be skipped.
* Make sure to decrement the number of 'num_local_procs' in the orted when one goes away.
* odls now checks the SStore framework to see if it needs to load any checkpoint files before launching (to support 'stage'). This separates the SStore logic from the --preload-[binary|files] options.
* Add unique IDs to the named pipes established between the orted and the app in SnapC. This is to better support migration and automatic recovery activities.
* Improve the checks for 'already checkpointing' error path.
* A a recovery output timer, to show how long it takes to restart a job
* Do a better job of cleaning up the old session directory on restart.
* Add a local module to the autor and crmig ErrMgr components. These small modules prevent the 'orted' component from attempting a local recovery (Which does not work for MPI apps at the moment)
* Add a fix for bounding the checkpointable region between MPI_Init and MPI_Finalize.
This commit was SVN r23587.
The following Trac tickets were found above:
Ticket 1924 --> https://svn.open-mpi.org/trac/ompi/ticket/1924
Ticket 2097 --> https://svn.open-mpi.org/trac/ompi/ticket/2097
Ticket 2161 --> https://svn.open-mpi.org/trac/ompi/ticket/2161
Ticket 2192 --> https://svn.open-mpi.org/trac/ompi/ticket/2192
Ticket 2208 --> https://svn.open-mpi.org/trac/ompi/ticket/2208
Ticket 2342 --> https://svn.open-mpi.org/trac/ompi/ticket/2342
Ticket 2413 --> https://svn.open-mpi.org/trac/ompi/ticket/2413
2010-08-11 00:51:11 +04:00
|
|
|
1000, &val);
|
2008-02-20 01:15:52 +03:00
|
|
|
opal_cr_thread_sleep_wait = val;
|
|
|
|
|
|
|
|
opal_output_verbose(10, opal_cr_output,
|
|
|
|
"opal_cr: init: FT thread sleep: check = %d, wait = %d",
|
|
|
|
opal_cr_thread_sleep_check, opal_cr_thread_sleep_wait);
|
|
|
|
#endif
|
|
|
|
|
2007-03-17 02:11:45 +03:00
|
|
|
mca_base_param_reg_int_name("opal_cr", "is_tool",
|
|
|
|
"Is this a tool program, meaning does it require a fully operational OPAL or just enough to exec.",
|
|
|
|
false, false,
|
2007-10-17 17:47:36 +04:00
|
|
|
0,
|
2007-03-17 02:11:45 +03:00
|
|
|
&val);
|
2008-02-20 01:15:52 +03:00
|
|
|
opal_cr_is_tool = OPAL_INT_TO_BOOL(val);
|
2007-03-17 02:11:45 +03:00
|
|
|
|
2007-03-20 00:18:03 +03:00
|
|
|
opal_output_verbose(10, opal_cr_output,
|
|
|
|
"opal_cr: init: Is a tool program: %d",
|
|
|
|
val);
|
A number of C/R enhancements per RFC below:
http://www.open-mpi.org/community/lists/devel/2010/07/8240.php
Documentation:
http://osl.iu.edu/research/ft/
Major Changes:
--------------
* Added C/R-enabled Debugging support.
Enabled with the --enable-crdebug flag. See the following website for more information:
http://osl.iu.edu/research/ft/crdebug/
* Added Stable Storage (SStore) framework for checkpoint storage
* 'central' component does a direct to central storage save
* 'stage' component stages checkpoints to central storage while the application continues execution.
* 'stage' supports offline compression of checkpoints before moving (sstore_stage_compress)
* 'stage' supports local caching of checkpoints to improve automatic recovery (sstore_stage_caching)
* Added Compression (compress) framework to support
* Add two new ErrMgr recovery policies
* {{{crmig}}} C/R Process Migration
* {{{autor}}} C/R Automatic Recovery
* Added the {{{ompi-migrate}}} command line tool to support the {{{crmig}}} ErrMgr component
* Added CR MPI Ext functions (enable them with {{{--enable-mpi-ext=cr}}} configure option)
* {{{OMPI_CR_Checkpoint}}} (Fixes trac:2342)
* {{{OMPI_CR_Restart}}}
* {{{OMPI_CR_Migrate}}} (may need some more work for mapping rules)
* {{{OMPI_CR_INC_register_callback}}} (Fixes trac:2192)
* {{{OMPI_CR_Quiesce_start}}}
* {{{OMPI_CR_Quiesce_checkpoint}}}
* {{{OMPI_CR_Quiesce_end}}}
* {{{OMPI_CR_self_register_checkpoint_callback}}}
* {{{OMPI_CR_self_register_restart_callback}}}
* {{{OMPI_CR_self_register_continue_callback}}}
* The ErrMgr predicted_fault() interface has been changed to take an opal_list_t of ErrMgr defined types. This will allow us to better support a wider range of fault prediction services in the future.
* Add a progress meter to:
* FileM rsh (filem_rsh_process_meter)
* SnapC full (snapc_full_progress_meter)
* SStore stage (sstore_stage_progress_meter)
* Added 2 new command line options to ompi-restart
* --showme : Display the full command line that would have been exec'ed.
* --mpirun_opts : Command line options to pass directly to mpirun. (Fixes trac:2413)
* Deprecated some MCA params:
* crs_base_snapshot_dir deprecated, use sstore_stage_local_snapshot_dir
* snapc_base_global_snapshot_dir deprecated, use sstore_base_global_snapshot_dir
* snapc_base_global_shared deprecated, use sstore_stage_global_is_shared
* snapc_base_store_in_place deprecated, replaced with different components of SStore
* snapc_base_global_snapshot_ref deprecated, use sstore_base_global_snapshot_ref
* snapc_base_establish_global_snapshot_dir deprecated, never well supported
* snapc_full_skip_filem deprecated, use sstore_stage_skip_filem
Minor Changes:
--------------
* Fixes trac:1924 : {{{ompi-restart}}} now recognizes path prefixed checkpoint handles and does the right thing.
* Fixes trac:2097 : {{{ompi-info}}} should now report all available CRS components
* Fixes trac:2161 : Manual checkpoint movement. A user can 'mv' a checkpoint directory from the original location to another and still restart from it.
* Fixes trac:2208 : Honor various TMPDIR varaibles instead of forcing {{{/tmp}}}
* Move {{{ompi_cr_continue_like_restart}}} to {{{orte_cr_continue_like_restart}}} to be more flexible in where this should be set.
* opal_crs_base_metadata_write* functions have been moved to SStore to support a wider range of metadata handling functionality.
* Cleanup the CRS framework and components to work with the SStore framework.
* Cleanup the SnapC framework and components to work with the SStore framework (cleans up these code paths considerably).
* Add 'quiesce' hook to CRCP for a future enhancement.
* We now require a BLCR version that supports {{{cr_request_file()}}} or {{{cr_request_checkpoint()}}} in order to make the code more maintainable. Note that {{{cr_request_file}}} has been deprecated since 0.7.0, so we prefer to use {{{cr_request_checkpoint()}}}.
* Add optional application level INC callbacks (registered through the CR MPI Ext interface).
* Increase the {{{opal_cr_thread_sleep_wait}}} parameter to 1000 microseconds to make the C/R thread less aggressive.
* {{{opal-restart}}} now looks for cache directories before falling back on stable storage when asked.
* {{{opal-restart}}} also support local decompression before restarting
* {{{orte-checkpoint}}} now uses the SStore framework to work with the metadata
* {{{orte-restart}}} now uses the SStore framework to work with the metadata
* Remove the {{{orte-restart}}} preload option. This was removed since the user only needs to select the 'stage' component in order to support this functionality.
* Since the '-am' parameter is saved in the metadata, {{{ompi-restart}}} no longer hard codes {{{-am ft-enable-cr}}}.
* Fix {{{hnp}}} ErrMgr so that if a previous component in the stack has 'fixed' the problem, then it should be skipped.
* Make sure to decrement the number of 'num_local_procs' in the orted when one goes away.
* odls now checks the SStore framework to see if it needs to load any checkpoint files before launching (to support 'stage'). This separates the SStore logic from the --preload-[binary|files] options.
* Add unique IDs to the named pipes established between the orted and the app in SnapC. This is to better support migration and automatic recovery activities.
* Improve the checks for 'already checkpointing' error path.
* A a recovery output timer, to show how long it takes to restart a job
* Do a better job of cleaning up the old session directory on restart.
* Add a local module to the autor and crmig ErrMgr components. These small modules prevent the 'orted' component from attempting a local recovery (Which does not work for MPI apps at the moment)
* Add a fix for bounding the checkpointable region between MPI_Init and MPI_Finalize.
This commit was SVN r23587.
The following Trac tickets were found above:
Ticket 1924 --> https://svn.open-mpi.org/trac/ompi/ticket/1924
Ticket 2097 --> https://svn.open-mpi.org/trac/ompi/ticket/2097
Ticket 2161 --> https://svn.open-mpi.org/trac/ompi/ticket/2161
Ticket 2192 --> https://svn.open-mpi.org/trac/ompi/ticket/2192
Ticket 2208 --> https://svn.open-mpi.org/trac/ompi/ticket/2208
Ticket 2342 --> https://svn.open-mpi.org/trac/ompi/ticket/2342
Ticket 2413 --> https://svn.open-mpi.org/trac/ompi/ticket/2413
2010-08-11 00:51:11 +04:00
|
|
|
#if OPAL_ENABLE_CRDEBUG == 1
|
|
|
|
mca_base_param_reg_int_name("opal_cr", "enable_crdebug",
|
|
|
|
"Enable checkpoint/restart debugging",
|
|
|
|
false, false,
|
|
|
|
0,
|
|
|
|
&val);
|
|
|
|
MPIR_debug_with_checkpoint = OPAL_INT_TO_BOOL(val);
|
|
|
|
|
|
|
|
opal_output_verbose(10, opal_cr_output,
|
|
|
|
"opal_cr: init: C/R Debugging Enabled [%s]\n",
|
|
|
|
(MPIR_debug_with_checkpoint ? "True": "False"));
|
|
|
|
#endif
|
|
|
|
|
2007-04-01 20:16:54 +04:00
|
|
|
#ifndef __WINDOWS__
|
2007-03-17 02:11:45 +03:00
|
|
|
mca_base_param_reg_int_name("opal_cr", "signal",
|
2007-10-09 00:53:02 +04:00
|
|
|
"Checkpoint/Restart signal used to initialize an OPAL Only checkpoint of a program",
|
2007-03-17 02:11:45 +03:00
|
|
|
false, false,
|
|
|
|
SIGUSR1,
|
2007-10-09 00:53:02 +04:00
|
|
|
&opal_cr_entry_point_signal);
|
2007-04-01 20:16:54 +04:00
|
|
|
|
2007-03-20 00:18:03 +03:00
|
|
|
opal_output_verbose(10, opal_cr_output,
|
|
|
|
"opal_cr: init: Checkpoint Signal: %d",
|
2007-10-09 00:53:02 +04:00
|
|
|
opal_cr_entry_point_signal);
|
2007-03-20 00:18:03 +03:00
|
|
|
|
2008-04-23 04:17:12 +04:00
|
|
|
mca_base_param_reg_int_name("opal_cr", "debug_sigpipe",
|
|
|
|
"Activate a signal handler for debugging SIGPIPE Errors that can happen on restart. (Default: Disabled)",
|
|
|
|
false, false,
|
|
|
|
0, &val);
|
|
|
|
opal_cr_debug_sigpipe = OPAL_INT_TO_BOOL(val);
|
|
|
|
|
|
|
|
opal_output_verbose(10, opal_cr_output,
|
|
|
|
"opal_cr: init: Debug SIGPIPE: %d (%s)",
|
|
|
|
val, (opal_cr_debug_sigpipe ? "True" : "False"));
|
|
|
|
|
2008-09-22 23:03:41 +04:00
|
|
|
#if OPAL_ENABLE_FT_THREAD == 1
|
|
|
|
/* If we have a thread, then attach the SIGPIPE signal handler there since
|
|
|
|
* it is most likely to be the one that needs it.
|
|
|
|
*/
|
2008-09-22 20:49:56 +04:00
|
|
|
if( opal_cr_debug_sigpipe && !opal_cr_thread_use_if_avail ) {
|
2008-04-23 04:17:12 +04:00
|
|
|
if( SIG_ERR == signal(SIGPIPE, opal_cr_sigpipe_debug_signal_handler) ) {
|
|
|
|
;
|
|
|
|
}
|
|
|
|
}
|
2008-09-22 23:03:41 +04:00
|
|
|
#else
|
|
|
|
if( opal_cr_debug_sigpipe ) {
|
|
|
|
if( SIG_ERR == signal(SIGPIPE, opal_cr_sigpipe_debug_signal_handler) ) {
|
|
|
|
;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
#endif
|
2008-04-23 04:17:12 +04:00
|
|
|
|
2008-05-29 19:05:28 +04:00
|
|
|
#else
|
2008-12-11 00:13:27 +03:00
|
|
|
opal_cr_is_tool = true; /* no support for CR on Windows yet */
|
2008-05-29 19:05:28 +04:00
|
|
|
#endif /* __WINDOWS__ */
|
|
|
|
|
A number of C/R enhancements per RFC below:
http://www.open-mpi.org/community/lists/devel/2010/07/8240.php
Documentation:
http://osl.iu.edu/research/ft/
Major Changes:
--------------
* Added C/R-enabled Debugging support.
Enabled with the --enable-crdebug flag. See the following website for more information:
http://osl.iu.edu/research/ft/crdebug/
* Added Stable Storage (SStore) framework for checkpoint storage
* 'central' component does a direct to central storage save
* 'stage' component stages checkpoints to central storage while the application continues execution.
* 'stage' supports offline compression of checkpoints before moving (sstore_stage_compress)
* 'stage' supports local caching of checkpoints to improve automatic recovery (sstore_stage_caching)
* Added Compression (compress) framework to support
* Add two new ErrMgr recovery policies
* {{{crmig}}} C/R Process Migration
* {{{autor}}} C/R Automatic Recovery
* Added the {{{ompi-migrate}}} command line tool to support the {{{crmig}}} ErrMgr component
* Added CR MPI Ext functions (enable them with {{{--enable-mpi-ext=cr}}} configure option)
* {{{OMPI_CR_Checkpoint}}} (Fixes trac:2342)
* {{{OMPI_CR_Restart}}}
* {{{OMPI_CR_Migrate}}} (may need some more work for mapping rules)
* {{{OMPI_CR_INC_register_callback}}} (Fixes trac:2192)
* {{{OMPI_CR_Quiesce_start}}}
* {{{OMPI_CR_Quiesce_checkpoint}}}
* {{{OMPI_CR_Quiesce_end}}}
* {{{OMPI_CR_self_register_checkpoint_callback}}}
* {{{OMPI_CR_self_register_restart_callback}}}
* {{{OMPI_CR_self_register_continue_callback}}}
* The ErrMgr predicted_fault() interface has been changed to take an opal_list_t of ErrMgr defined types. This will allow us to better support a wider range of fault prediction services in the future.
* Add a progress meter to:
* FileM rsh (filem_rsh_process_meter)
* SnapC full (snapc_full_progress_meter)
* SStore stage (sstore_stage_progress_meter)
* Added 2 new command line options to ompi-restart
* --showme : Display the full command line that would have been exec'ed.
* --mpirun_opts : Command line options to pass directly to mpirun. (Fixes trac:2413)
* Deprecated some MCA params:
* crs_base_snapshot_dir deprecated, use sstore_stage_local_snapshot_dir
* snapc_base_global_snapshot_dir deprecated, use sstore_base_global_snapshot_dir
* snapc_base_global_shared deprecated, use sstore_stage_global_is_shared
* snapc_base_store_in_place deprecated, replaced with different components of SStore
* snapc_base_global_snapshot_ref deprecated, use sstore_base_global_snapshot_ref
* snapc_base_establish_global_snapshot_dir deprecated, never well supported
* snapc_full_skip_filem deprecated, use sstore_stage_skip_filem
Minor Changes:
--------------
* Fixes trac:1924 : {{{ompi-restart}}} now recognizes path prefixed checkpoint handles and does the right thing.
* Fixes trac:2097 : {{{ompi-info}}} should now report all available CRS components
* Fixes trac:2161 : Manual checkpoint movement. A user can 'mv' a checkpoint directory from the original location to another and still restart from it.
* Fixes trac:2208 : Honor various TMPDIR varaibles instead of forcing {{{/tmp}}}
* Move {{{ompi_cr_continue_like_restart}}} to {{{orte_cr_continue_like_restart}}} to be more flexible in where this should be set.
* opal_crs_base_metadata_write* functions have been moved to SStore to support a wider range of metadata handling functionality.
* Cleanup the CRS framework and components to work with the SStore framework.
* Cleanup the SnapC framework and components to work with the SStore framework (cleans up these code paths considerably).
* Add 'quiesce' hook to CRCP for a future enhancement.
* We now require a BLCR version that supports {{{cr_request_file()}}} or {{{cr_request_checkpoint()}}} in order to make the code more maintainable. Note that {{{cr_request_file}}} has been deprecated since 0.7.0, so we prefer to use {{{cr_request_checkpoint()}}}.
* Add optional application level INC callbacks (registered through the CR MPI Ext interface).
* Increase the {{{opal_cr_thread_sleep_wait}}} parameter to 1000 microseconds to make the C/R thread less aggressive.
* {{{opal-restart}}} now looks for cache directories before falling back on stable storage when asked.
* {{{opal-restart}}} also support local decompression before restarting
* {{{orte-checkpoint}}} now uses the SStore framework to work with the metadata
* {{{orte-restart}}} now uses the SStore framework to work with the metadata
* Remove the {{{orte-restart}}} preload option. This was removed since the user only needs to select the 'stage' component in order to support this functionality.
* Since the '-am' parameter is saved in the metadata, {{{ompi-restart}}} no longer hard codes {{{-am ft-enable-cr}}}.
* Fix {{{hnp}}} ErrMgr so that if a previous component in the stack has 'fixed' the problem, then it should be skipped.
* Make sure to decrement the number of 'num_local_procs' in the orted when one goes away.
* odls now checks the SStore framework to see if it needs to load any checkpoint files before launching (to support 'stage'). This separates the SStore logic from the --preload-[binary|files] options.
* Add unique IDs to the named pipes established between the orted and the app in SnapC. This is to better support migration and automatic recovery activities.
* Improve the checks for 'already checkpointing' error path.
* A a recovery output timer, to show how long it takes to restart a job
* Do a better job of cleaning up the old session directory on restart.
* Add a local module to the autor and crmig ErrMgr components. These small modules prevent the 'orted' component from attempting a local recovery (Which does not work for MPI apps at the moment)
* Add a fix for bounding the checkpointable region between MPI_Init and MPI_Finalize.
This commit was SVN r23587.
The following Trac tickets were found above:
Ticket 1924 --> https://svn.open-mpi.org/trac/ompi/ticket/1924
Ticket 2097 --> https://svn.open-mpi.org/trac/ompi/ticket/2097
Ticket 2161 --> https://svn.open-mpi.org/trac/ompi/ticket/2161
Ticket 2192 --> https://svn.open-mpi.org/trac/ompi/ticket/2192
Ticket 2208 --> https://svn.open-mpi.org/trac/ompi/ticket/2208
Ticket 2342 --> https://svn.open-mpi.org/trac/ompi/ticket/2342
Ticket 2413 --> https://svn.open-mpi.org/trac/ompi/ticket/2413
2010-08-11 00:51:11 +04:00
|
|
|
#if OPAL_ENABLE_CRDEBUG == 1
|
|
|
|
opal_cr_debug_num_free_threads = 3;
|
|
|
|
opal_cr_debug_free_threads = (opal_thread_t **)malloc(sizeof(opal_thread_t *) * opal_cr_debug_num_free_threads );
|
|
|
|
for(t = 0; t < opal_cr_debug_num_free_threads; ++t ) {
|
|
|
|
opal_cr_debug_free_threads[t] = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
mca_base_param_reg_int_name("opal_cr", "crdebug_signal",
|
|
|
|
"Checkpoint/Restart signal used to hold threads when debugging",
|
|
|
|
false, false,
|
|
|
|
SIGTSTP,
|
|
|
|
&opal_cr_debug_signal);
|
|
|
|
|
|
|
|
opal_output_verbose(10, opal_cr_output,
|
|
|
|
"opal_cr: init: Checkpoint Signal (Debug): %d",
|
|
|
|
opal_cr_debug_signal);
|
|
|
|
if( SIG_ERR == signal(opal_cr_debug_signal, MPIR_checkpoint_debugger_signal_handler) ) {
|
|
|
|
opal_output(opal_cr_output,
|
|
|
|
"opal_cr: init: Failed to register C/R debug signal (%d)",
|
|
|
|
opal_cr_debug_signal);
|
|
|
|
}
|
|
|
|
#else
|
|
|
|
/* Silence a compiler warning */
|
|
|
|
t = 0;
|
|
|
|
#endif
|
|
|
|
|
2007-03-17 02:11:45 +03:00
|
|
|
mca_base_param_reg_string_name("opal_cr", "tmp_dir",
|
|
|
|
"Temporary directory to place rendezvous files for a checkpoint",
|
|
|
|
false, false,
|
A number of C/R enhancements per RFC below:
http://www.open-mpi.org/community/lists/devel/2010/07/8240.php
Documentation:
http://osl.iu.edu/research/ft/
Major Changes:
--------------
* Added C/R-enabled Debugging support.
Enabled with the --enable-crdebug flag. See the following website for more information:
http://osl.iu.edu/research/ft/crdebug/
* Added Stable Storage (SStore) framework for checkpoint storage
* 'central' component does a direct to central storage save
* 'stage' component stages checkpoints to central storage while the application continues execution.
* 'stage' supports offline compression of checkpoints before moving (sstore_stage_compress)
* 'stage' supports local caching of checkpoints to improve automatic recovery (sstore_stage_caching)
* Added Compression (compress) framework to support
* Add two new ErrMgr recovery policies
* {{{crmig}}} C/R Process Migration
* {{{autor}}} C/R Automatic Recovery
* Added the {{{ompi-migrate}}} command line tool to support the {{{crmig}}} ErrMgr component
* Added CR MPI Ext functions (enable them with {{{--enable-mpi-ext=cr}}} configure option)
* {{{OMPI_CR_Checkpoint}}} (Fixes trac:2342)
* {{{OMPI_CR_Restart}}}
* {{{OMPI_CR_Migrate}}} (may need some more work for mapping rules)
* {{{OMPI_CR_INC_register_callback}}} (Fixes trac:2192)
* {{{OMPI_CR_Quiesce_start}}}
* {{{OMPI_CR_Quiesce_checkpoint}}}
* {{{OMPI_CR_Quiesce_end}}}
* {{{OMPI_CR_self_register_checkpoint_callback}}}
* {{{OMPI_CR_self_register_restart_callback}}}
* {{{OMPI_CR_self_register_continue_callback}}}
* The ErrMgr predicted_fault() interface has been changed to take an opal_list_t of ErrMgr defined types. This will allow us to better support a wider range of fault prediction services in the future.
* Add a progress meter to:
* FileM rsh (filem_rsh_process_meter)
* SnapC full (snapc_full_progress_meter)
* SStore stage (sstore_stage_progress_meter)
* Added 2 new command line options to ompi-restart
* --showme : Display the full command line that would have been exec'ed.
* --mpirun_opts : Command line options to pass directly to mpirun. (Fixes trac:2413)
* Deprecated some MCA params:
* crs_base_snapshot_dir deprecated, use sstore_stage_local_snapshot_dir
* snapc_base_global_snapshot_dir deprecated, use sstore_base_global_snapshot_dir
* snapc_base_global_shared deprecated, use sstore_stage_global_is_shared
* snapc_base_store_in_place deprecated, replaced with different components of SStore
* snapc_base_global_snapshot_ref deprecated, use sstore_base_global_snapshot_ref
* snapc_base_establish_global_snapshot_dir deprecated, never well supported
* snapc_full_skip_filem deprecated, use sstore_stage_skip_filem
Minor Changes:
--------------
* Fixes trac:1924 : {{{ompi-restart}}} now recognizes path prefixed checkpoint handles and does the right thing.
* Fixes trac:2097 : {{{ompi-info}}} should now report all available CRS components
* Fixes trac:2161 : Manual checkpoint movement. A user can 'mv' a checkpoint directory from the original location to another and still restart from it.
* Fixes trac:2208 : Honor various TMPDIR varaibles instead of forcing {{{/tmp}}}
* Move {{{ompi_cr_continue_like_restart}}} to {{{orte_cr_continue_like_restart}}} to be more flexible in where this should be set.
* opal_crs_base_metadata_write* functions have been moved to SStore to support a wider range of metadata handling functionality.
* Cleanup the CRS framework and components to work with the SStore framework.
* Cleanup the SnapC framework and components to work with the SStore framework (cleans up these code paths considerably).
* Add 'quiesce' hook to CRCP for a future enhancement.
* We now require a BLCR version that supports {{{cr_request_file()}}} or {{{cr_request_checkpoint()}}} in order to make the code more maintainable. Note that {{{cr_request_file}}} has been deprecated since 0.7.0, so we prefer to use {{{cr_request_checkpoint()}}}.
* Add optional application level INC callbacks (registered through the CR MPI Ext interface).
* Increase the {{{opal_cr_thread_sleep_wait}}} parameter to 1000 microseconds to make the C/R thread less aggressive.
* {{{opal-restart}}} now looks for cache directories before falling back on stable storage when asked.
* {{{opal-restart}}} also support local decompression before restarting
* {{{orte-checkpoint}}} now uses the SStore framework to work with the metadata
* {{{orte-restart}}} now uses the SStore framework to work with the metadata
* Remove the {{{orte-restart}}} preload option. This was removed since the user only needs to select the 'stage' component in order to support this functionality.
* Since the '-am' parameter is saved in the metadata, {{{ompi-restart}}} no longer hard codes {{{-am ft-enable-cr}}}.
* Fix {{{hnp}}} ErrMgr so that if a previous component in the stack has 'fixed' the problem, then it should be skipped.
* Make sure to decrement the number of 'num_local_procs' in the orted when one goes away.
* odls now checks the SStore framework to see if it needs to load any checkpoint files before launching (to support 'stage'). This separates the SStore logic from the --preload-[binary|files] options.
* Add unique IDs to the named pipes established between the orted and the app in SnapC. This is to better support migration and automatic recovery activities.
* Improve the checks for 'already checkpointing' error path.
* A a recovery output timer, to show how long it takes to restart a job
* Do a better job of cleaning up the old session directory on restart.
* Add a local module to the autor and crmig ErrMgr components. These small modules prevent the 'orted' component from attempting a local recovery (Which does not work for MPI apps at the moment)
* Add a fix for bounding the checkpointable region between MPI_Init and MPI_Finalize.
This commit was SVN r23587.
The following Trac tickets were found above:
Ticket 1924 --> https://svn.open-mpi.org/trac/ompi/ticket/1924
Ticket 2097 --> https://svn.open-mpi.org/trac/ompi/ticket/2097
Ticket 2161 --> https://svn.open-mpi.org/trac/ompi/ticket/2161
Ticket 2192 --> https://svn.open-mpi.org/trac/ompi/ticket/2192
Ticket 2208 --> https://svn.open-mpi.org/trac/ompi/ticket/2208
Ticket 2342 --> https://svn.open-mpi.org/trac/ompi/ticket/2342
Ticket 2413 --> https://svn.open-mpi.org/trac/ompi/ticket/2413
2010-08-11 00:51:11 +04:00
|
|
|
opal_tmp_directory(),
|
2007-03-17 02:11:45 +03:00
|
|
|
&opal_cr_pipe_dir);
|
|
|
|
|
2007-03-20 00:18:03 +03:00
|
|
|
opal_output_verbose(10, opal_cr_output,
|
|
|
|
"opal_cr: init: Temp Directory: %s",
|
|
|
|
opal_cr_pipe_dir);
|
|
|
|
|
2007-03-17 02:11:45 +03:00
|
|
|
if( !opal_cr_is_tool ) {
|
|
|
|
/* Register the OPAL interlevel coordination callback */
|
|
|
|
opal_cr_reg_coord_callback(opal_cr_coord, &prev_coord_func);
|
|
|
|
|
|
|
|
opal_cr_stall_check = false;
|
2007-10-09 00:53:02 +04:00
|
|
|
opal_cr_currently_stalled = false;
|
2007-03-17 02:11:45 +03:00
|
|
|
|
|
|
|
} /* End opal_cr_is_tool = true */
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If fault tolerance was not compiled in then
|
|
|
|
* we need to make sure that the listener thread is active to tell
|
|
|
|
* the tools that this is not a checkpointable job.
|
|
|
|
* We don't need the CRS framework to be initalized.
|
|
|
|
*/
|
2010-03-13 02:57:50 +03:00
|
|
|
#if OPAL_ENABLE_FT_CR == 1
|
2007-03-17 02:11:45 +03:00
|
|
|
/*
|
|
|
|
* Open the checkpoint / restart service components
|
|
|
|
*/
|
|
|
|
if (OPAL_SUCCESS != (ret = opal_crs_base_open())) {
|
2011-12-07 18:58:08 +04:00
|
|
|
opal_show_help( "help-opal-runtime.txt",
|
|
|
|
"opal_cr_init:no-crs", true,
|
|
|
|
"opal_crs_base_open", ret );
|
2008-05-13 17:10:16 +04:00
|
|
|
exit_status = ret;
|
2007-03-17 02:11:45 +03:00
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (OPAL_SUCCESS != (ret = opal_crs_base_select())) {
|
2011-12-07 18:58:08 +04:00
|
|
|
opal_show_help( "help-opal-runtime.txt",
|
|
|
|
"opal_cr_init:no-crs", true,
|
|
|
|
"opal_crs_base_select", ret );
|
2008-05-13 17:10:16 +04:00
|
|
|
exit_status = ret;
|
2007-03-17 02:11:45 +03:00
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2008-02-20 01:15:52 +03:00
|
|
|
#if OPAL_ENABLE_FT_THREAD == 1
|
|
|
|
if( !opal_cr_is_tool && opal_cr_thread_use_if_avail) {
|
2008-03-08 00:10:14 +03:00
|
|
|
opal_output_verbose(10, opal_cr_output,
|
|
|
|
"opal_cr: init: starting the thread\n");
|
|
|
|
|
2011-03-01 21:47:31 +03:00
|
|
|
/* JJH: We really do need this line below since it enables
|
|
|
|
* actual locks for threads. However currently the
|
|
|
|
* upper layers will deadlock if it is enabled.
|
|
|
|
* So hack around the problem for now, while working
|
|
|
|
* on a complete solution. See ticket #2741 for more
|
|
|
|
* details.
|
|
|
|
* opal_set_using_threads(true);
|
|
|
|
*/
|
|
|
|
|
2008-02-20 01:15:52 +03:00
|
|
|
/*
|
|
|
|
* Start the thread
|
|
|
|
*/
|
|
|
|
OBJ_CONSTRUCT(&opal_cr_thread, opal_thread_t);
|
|
|
|
OBJ_CONSTRUCT(&opal_cr_thread_lock, opal_mutex_t);
|
|
|
|
|
2008-02-21 01:33:42 +03:00
|
|
|
opal_cr_thread_is_done = false;
|
2008-02-20 01:15:52 +03:00
|
|
|
opal_cr_thread_is_active = false;
|
|
|
|
opal_cr_thread_in_library = false;
|
2008-02-21 01:33:42 +03:00
|
|
|
opal_cr_thread_num_in_library = 0;
|
2008-02-20 01:15:52 +03:00
|
|
|
|
|
|
|
opal_cr_thread.t_run = opal_cr_thread_fn;
|
|
|
|
opal_cr_thread.t_arg = NULL;
|
|
|
|
opal_thread_start(&opal_cr_thread);
|
|
|
|
|
|
|
|
} /* End opal_cr_is_tool = true */
|
2008-03-08 00:10:14 +03:00
|
|
|
else {
|
|
|
|
opal_output_verbose(10, opal_cr_output,
|
|
|
|
"opal_cr: init: *Not* Using C/R thread\n");
|
|
|
|
}
|
2008-02-20 01:15:52 +03:00
|
|
|
#endif /* OPAL_ENABLE_FT_THREAD == 1 */
|
|
|
|
|
2007-03-17 02:11:45 +03:00
|
|
|
cleanup:
|
|
|
|
return exit_status;
|
|
|
|
}
|
|
|
|
|
|
|
|
int opal_cr_finalize(void)
|
|
|
|
{
|
2008-05-08 22:47:47 +04:00
|
|
|
int exit_status = OPAL_SUCCESS;
|
2007-03-17 02:11:45 +03:00
|
|
|
|
2007-05-25 01:54:58 +04:00
|
|
|
if( --opal_cr_initalized != 0 ) {
|
|
|
|
if( opal_cr_initalized < 0 ) {
|
|
|
|
return OPAL_ERROR;
|
|
|
|
}
|
|
|
|
return OPAL_SUCCESS;
|
|
|
|
}
|
|
|
|
|
2007-03-17 02:11:45 +03:00
|
|
|
if( !opal_cr_is_tool ) {
|
2008-02-20 01:15:52 +03:00
|
|
|
#if OPAL_ENABLE_FT_THREAD == 1
|
|
|
|
if( opal_cr_thread_use_if_avail ) {
|
|
|
|
void *data;
|
|
|
|
/*
|
|
|
|
* Stop the thread
|
|
|
|
*/
|
2008-02-21 01:33:42 +03:00
|
|
|
opal_cr_thread_is_done = true;
|
2008-02-21 00:17:08 +03:00
|
|
|
opal_cr_thread_is_active = false;
|
|
|
|
opal_cr_thread_in_library = true;
|
|
|
|
|
2008-02-20 01:15:52 +03:00
|
|
|
opal_thread_join(&opal_cr_thread, &data);
|
|
|
|
OBJ_DESTRUCT(&opal_cr_thread);
|
|
|
|
OBJ_DESTRUCT(&opal_cr_thread_lock);
|
|
|
|
}
|
|
|
|
#endif /* OPAL_ENABLE_FT_THREAD == 1 */
|
|
|
|
|
2007-03-17 02:11:45 +03:00
|
|
|
/* Nothing to do for just process notifications */
|
2008-05-08 22:47:47 +04:00
|
|
|
opal_cr_checkpointing_state = OPAL_CR_STATUS_TERM;
|
|
|
|
opal_cr_checkpoint_request = OPAL_CR_STATUS_TERM;
|
2007-03-17 02:11:45 +03:00
|
|
|
}
|
|
|
|
|
A number of C/R enhancements per RFC below:
http://www.open-mpi.org/community/lists/devel/2010/07/8240.php
Documentation:
http://osl.iu.edu/research/ft/
Major Changes:
--------------
* Added C/R-enabled Debugging support.
Enabled with the --enable-crdebug flag. See the following website for more information:
http://osl.iu.edu/research/ft/crdebug/
* Added Stable Storage (SStore) framework for checkpoint storage
* 'central' component does a direct to central storage save
* 'stage' component stages checkpoints to central storage while the application continues execution.
* 'stage' supports offline compression of checkpoints before moving (sstore_stage_compress)
* 'stage' supports local caching of checkpoints to improve automatic recovery (sstore_stage_caching)
* Added Compression (compress) framework to support
* Add two new ErrMgr recovery policies
* {{{crmig}}} C/R Process Migration
* {{{autor}}} C/R Automatic Recovery
* Added the {{{ompi-migrate}}} command line tool to support the {{{crmig}}} ErrMgr component
* Added CR MPI Ext functions (enable them with {{{--enable-mpi-ext=cr}}} configure option)
* {{{OMPI_CR_Checkpoint}}} (Fixes trac:2342)
* {{{OMPI_CR_Restart}}}
* {{{OMPI_CR_Migrate}}} (may need some more work for mapping rules)
* {{{OMPI_CR_INC_register_callback}}} (Fixes trac:2192)
* {{{OMPI_CR_Quiesce_start}}}
* {{{OMPI_CR_Quiesce_checkpoint}}}
* {{{OMPI_CR_Quiesce_end}}}
* {{{OMPI_CR_self_register_checkpoint_callback}}}
* {{{OMPI_CR_self_register_restart_callback}}}
* {{{OMPI_CR_self_register_continue_callback}}}
* The ErrMgr predicted_fault() interface has been changed to take an opal_list_t of ErrMgr defined types. This will allow us to better support a wider range of fault prediction services in the future.
* Add a progress meter to:
* FileM rsh (filem_rsh_process_meter)
* SnapC full (snapc_full_progress_meter)
* SStore stage (sstore_stage_progress_meter)
* Added 2 new command line options to ompi-restart
* --showme : Display the full command line that would have been exec'ed.
* --mpirun_opts : Command line options to pass directly to mpirun. (Fixes trac:2413)
* Deprecated some MCA params:
* crs_base_snapshot_dir deprecated, use sstore_stage_local_snapshot_dir
* snapc_base_global_snapshot_dir deprecated, use sstore_base_global_snapshot_dir
* snapc_base_global_shared deprecated, use sstore_stage_global_is_shared
* snapc_base_store_in_place deprecated, replaced with different components of SStore
* snapc_base_global_snapshot_ref deprecated, use sstore_base_global_snapshot_ref
* snapc_base_establish_global_snapshot_dir deprecated, never well supported
* snapc_full_skip_filem deprecated, use sstore_stage_skip_filem
Minor Changes:
--------------
* Fixes trac:1924 : {{{ompi-restart}}} now recognizes path prefixed checkpoint handles and does the right thing.
* Fixes trac:2097 : {{{ompi-info}}} should now report all available CRS components
* Fixes trac:2161 : Manual checkpoint movement. A user can 'mv' a checkpoint directory from the original location to another and still restart from it.
* Fixes trac:2208 : Honor various TMPDIR varaibles instead of forcing {{{/tmp}}}
* Move {{{ompi_cr_continue_like_restart}}} to {{{orte_cr_continue_like_restart}}} to be more flexible in where this should be set.
* opal_crs_base_metadata_write* functions have been moved to SStore to support a wider range of metadata handling functionality.
* Cleanup the CRS framework and components to work with the SStore framework.
* Cleanup the SnapC framework and components to work with the SStore framework (cleans up these code paths considerably).
* Add 'quiesce' hook to CRCP for a future enhancement.
* We now require a BLCR version that supports {{{cr_request_file()}}} or {{{cr_request_checkpoint()}}} in order to make the code more maintainable. Note that {{{cr_request_file}}} has been deprecated since 0.7.0, so we prefer to use {{{cr_request_checkpoint()}}}.
* Add optional application level INC callbacks (registered through the CR MPI Ext interface).
* Increase the {{{opal_cr_thread_sleep_wait}}} parameter to 1000 microseconds to make the C/R thread less aggressive.
* {{{opal-restart}}} now looks for cache directories before falling back on stable storage when asked.
* {{{opal-restart}}} also support local decompression before restarting
* {{{orte-checkpoint}}} now uses the SStore framework to work with the metadata
* {{{orte-restart}}} now uses the SStore framework to work with the metadata
* Remove the {{{orte-restart}}} preload option. This was removed since the user only needs to select the 'stage' component in order to support this functionality.
* Since the '-am' parameter is saved in the metadata, {{{ompi-restart}}} no longer hard codes {{{-am ft-enable-cr}}}.
* Fix {{{hnp}}} ErrMgr so that if a previous component in the stack has 'fixed' the problem, then it should be skipped.
* Make sure to decrement the number of 'num_local_procs' in the orted when one goes away.
* odls now checks the SStore framework to see if it needs to load any checkpoint files before launching (to support 'stage'). This separates the SStore logic from the --preload-[binary|files] options.
* Add unique IDs to the named pipes established between the orted and the app in SnapC. This is to better support migration and automatic recovery activities.
* Improve the checks for 'already checkpointing' error path.
* A a recovery output timer, to show how long it takes to restart a job
* Do a better job of cleaning up the old session directory on restart.
* Add a local module to the autor and crmig ErrMgr components. These small modules prevent the 'orted' component from attempting a local recovery (Which does not work for MPI apps at the moment)
* Add a fix for bounding the checkpointable region between MPI_Init and MPI_Finalize.
This commit was SVN r23587.
The following Trac tickets were found above:
Ticket 1924 --> https://svn.open-mpi.org/trac/ompi/ticket/1924
Ticket 2097 --> https://svn.open-mpi.org/trac/ompi/ticket/2097
Ticket 2161 --> https://svn.open-mpi.org/trac/ompi/ticket/2161
Ticket 2192 --> https://svn.open-mpi.org/trac/ompi/ticket/2192
Ticket 2208 --> https://svn.open-mpi.org/trac/ompi/ticket/2208
Ticket 2342 --> https://svn.open-mpi.org/trac/ompi/ticket/2342
Ticket 2413 --> https://svn.open-mpi.org/trac/ompi/ticket/2413
2010-08-11 00:51:11 +04:00
|
|
|
#if OPAL_ENABLE_CRDEBUG == 1
|
|
|
|
if( NULL != opal_cr_debug_free_threads ) {
|
|
|
|
free( opal_cr_debug_free_threads );
|
|
|
|
opal_cr_debug_free_threads = NULL;
|
|
|
|
}
|
|
|
|
opal_cr_debug_num_free_threads = 0;
|
|
|
|
#endif
|
|
|
|
|
2009-02-13 01:54:43 +03:00
|
|
|
if (NULL != opal_cr_pipe_dir) {
|
|
|
|
free(opal_cr_pipe_dir);
|
|
|
|
opal_cr_pipe_dir = NULL;
|
|
|
|
}
|
|
|
|
|
2010-03-13 02:57:50 +03:00
|
|
|
#if OPAL_ENABLE_FT_CR == 1
|
2007-03-17 02:11:45 +03:00
|
|
|
/*
|
|
|
|
* Close the checkpoint / restart service components
|
|
|
|
*/
|
|
|
|
opal_crs_base_close();
|
|
|
|
#endif
|
|
|
|
|
|
|
|
return exit_status;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Check if a checkpoint request needs to be operated upon
|
|
|
|
*/
|
2007-04-01 20:16:54 +04:00
|
|
|
void opal_cr_test_if_checkpoint_ready(void)
|
|
|
|
{
|
2007-03-17 02:11:45 +03:00
|
|
|
int ret;
|
|
|
|
|
2007-10-09 00:53:02 +04:00
|
|
|
if( opal_cr_currently_stalled) {
|
2007-03-17 02:11:45 +03:00
|
|
|
opal_output_verbose(20, opal_cr_output,
|
2007-10-09 00:53:02 +04:00
|
|
|
"opal_cr:opal_test_if_ready: JUMPING to Post Stall stage");
|
2007-03-17 02:11:45 +03:00
|
|
|
goto STAGE_1;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2007-10-09 00:53:02 +04:00
|
|
|
* If there is no checkpoint request to act on
|
|
|
|
* then just return
|
2007-03-17 02:11:45 +03:00
|
|
|
*/
|
2007-10-09 00:53:02 +04:00
|
|
|
if(OPAL_CR_STATUS_REQUESTED != opal_cr_checkpoint_request ) {
|
2007-03-17 02:11:45 +03:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2007-10-09 00:53:02 +04:00
|
|
|
* If we are currently checkpointing:
|
|
|
|
* - If a request is pending then cancel it
|
|
|
|
* - o.w., skip it.
|
2007-03-17 02:11:45 +03:00
|
|
|
*/
|
2008-05-08 22:47:47 +04:00
|
|
|
if(OPAL_CR_STATUS_RUNNING == opal_cr_checkpointing_state ) {
|
2007-10-09 00:53:02 +04:00
|
|
|
if( OPAL_SUCCESS != (ret = cur_notify_callback(OPAL_CHECKPOINT_CMD_IN_PROGRESS) ) ) {
|
|
|
|
opal_output(opal_cr_output,
|
|
|
|
"Error: opal_cr: test_if_checkpoint_ready: Respond [In Progress] Failed. (%d)",
|
|
|
|
ret);
|
|
|
|
}
|
|
|
|
opal_cr_checkpoint_request = OPAL_CR_STATUS_NONE;
|
2007-03-17 02:11:45 +03:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If no CRS module is loaded return an error
|
|
|
|
*/
|
|
|
|
if (NULL == opal_crs.crs_checkpoint ) {
|
2007-10-09 00:53:02 +04:00
|
|
|
if( OPAL_SUCCESS != (ret = cur_notify_callback(OPAL_CHECKPOINT_CMD_NULL) ) ) {
|
2007-03-17 02:11:45 +03:00
|
|
|
opal_output(opal_cr_output,
|
|
|
|
"Error: opal_cr: test_if_checkpoint_ready: Respond [Not Able/NULL] Failed. (%d)",
|
|
|
|
ret);
|
|
|
|
}
|
|
|
|
opal_cr_checkpoint_request = OPAL_CR_STATUS_NONE;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Start the checkpoint
|
|
|
|
*/
|
2008-05-08 22:47:47 +04:00
|
|
|
opal_cr_checkpointing_state = OPAL_CR_STATUS_RUNNING;
|
|
|
|
opal_cr_checkpoint_request = OPAL_CR_STATUS_NONE;
|
2007-03-17 02:11:45 +03:00
|
|
|
|
|
|
|
STAGE_1:
|
2007-10-09 00:53:02 +04:00
|
|
|
if( OPAL_SUCCESS != (ret = cur_notify_callback(OPAL_CHECKPOINT_CMD_START) ) ) {
|
2007-03-17 02:11:45 +03:00
|
|
|
opal_output(opal_cr_output,
|
|
|
|
"Error: opal_cr: test_if_checkpoint_ready: Respond [Start Ckpt] Failed. (%d)",
|
|
|
|
ret);
|
|
|
|
}
|
|
|
|
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*******************************
|
|
|
|
* Notification Routines
|
|
|
|
*******************************/
|
2009-05-09 00:51:13 +04:00
|
|
|
int opal_cr_inc_core_prep(void)
|
2007-03-17 02:11:45 +03:00
|
|
|
{
|
2009-05-09 00:51:13 +04:00
|
|
|
int ret;
|
2007-03-17 02:11:45 +03:00
|
|
|
|
A number of C/R enhancements per RFC below:
http://www.open-mpi.org/community/lists/devel/2010/07/8240.php
Documentation:
http://osl.iu.edu/research/ft/
Major Changes:
--------------
* Added C/R-enabled Debugging support.
Enabled with the --enable-crdebug flag. See the following website for more information:
http://osl.iu.edu/research/ft/crdebug/
* Added Stable Storage (SStore) framework for checkpoint storage
* 'central' component does a direct to central storage save
* 'stage' component stages checkpoints to central storage while the application continues execution.
* 'stage' supports offline compression of checkpoints before moving (sstore_stage_compress)
* 'stage' supports local caching of checkpoints to improve automatic recovery (sstore_stage_caching)
* Added Compression (compress) framework to support
* Add two new ErrMgr recovery policies
* {{{crmig}}} C/R Process Migration
* {{{autor}}} C/R Automatic Recovery
* Added the {{{ompi-migrate}}} command line tool to support the {{{crmig}}} ErrMgr component
* Added CR MPI Ext functions (enable them with {{{--enable-mpi-ext=cr}}} configure option)
* {{{OMPI_CR_Checkpoint}}} (Fixes trac:2342)
* {{{OMPI_CR_Restart}}}
* {{{OMPI_CR_Migrate}}} (may need some more work for mapping rules)
* {{{OMPI_CR_INC_register_callback}}} (Fixes trac:2192)
* {{{OMPI_CR_Quiesce_start}}}
* {{{OMPI_CR_Quiesce_checkpoint}}}
* {{{OMPI_CR_Quiesce_end}}}
* {{{OMPI_CR_self_register_checkpoint_callback}}}
* {{{OMPI_CR_self_register_restart_callback}}}
* {{{OMPI_CR_self_register_continue_callback}}}
* The ErrMgr predicted_fault() interface has been changed to take an opal_list_t of ErrMgr defined types. This will allow us to better support a wider range of fault prediction services in the future.
* Add a progress meter to:
* FileM rsh (filem_rsh_process_meter)
* SnapC full (snapc_full_progress_meter)
* SStore stage (sstore_stage_progress_meter)
* Added 2 new command line options to ompi-restart
* --showme : Display the full command line that would have been exec'ed.
* --mpirun_opts : Command line options to pass directly to mpirun. (Fixes trac:2413)
* Deprecated some MCA params:
* crs_base_snapshot_dir deprecated, use sstore_stage_local_snapshot_dir
* snapc_base_global_snapshot_dir deprecated, use sstore_base_global_snapshot_dir
* snapc_base_global_shared deprecated, use sstore_stage_global_is_shared
* snapc_base_store_in_place deprecated, replaced with different components of SStore
* snapc_base_global_snapshot_ref deprecated, use sstore_base_global_snapshot_ref
* snapc_base_establish_global_snapshot_dir deprecated, never well supported
* snapc_full_skip_filem deprecated, use sstore_stage_skip_filem
Minor Changes:
--------------
* Fixes trac:1924 : {{{ompi-restart}}} now recognizes path prefixed checkpoint handles and does the right thing.
* Fixes trac:2097 : {{{ompi-info}}} should now report all available CRS components
* Fixes trac:2161 : Manual checkpoint movement. A user can 'mv' a checkpoint directory from the original location to another and still restart from it.
* Fixes trac:2208 : Honor various TMPDIR varaibles instead of forcing {{{/tmp}}}
* Move {{{ompi_cr_continue_like_restart}}} to {{{orte_cr_continue_like_restart}}} to be more flexible in where this should be set.
* opal_crs_base_metadata_write* functions have been moved to SStore to support a wider range of metadata handling functionality.
* Cleanup the CRS framework and components to work with the SStore framework.
* Cleanup the SnapC framework and components to work with the SStore framework (cleans up these code paths considerably).
* Add 'quiesce' hook to CRCP for a future enhancement.
* We now require a BLCR version that supports {{{cr_request_file()}}} or {{{cr_request_checkpoint()}}} in order to make the code more maintainable. Note that {{{cr_request_file}}} has been deprecated since 0.7.0, so we prefer to use {{{cr_request_checkpoint()}}}.
* Add optional application level INC callbacks (registered through the CR MPI Ext interface).
* Increase the {{{opal_cr_thread_sleep_wait}}} parameter to 1000 microseconds to make the C/R thread less aggressive.
* {{{opal-restart}}} now looks for cache directories before falling back on stable storage when asked.
* {{{opal-restart}}} also support local decompression before restarting
* {{{orte-checkpoint}}} now uses the SStore framework to work with the metadata
* {{{orte-restart}}} now uses the SStore framework to work with the metadata
* Remove the {{{orte-restart}}} preload option. This was removed since the user only needs to select the 'stage' component in order to support this functionality.
* Since the '-am' parameter is saved in the metadata, {{{ompi-restart}}} no longer hard codes {{{-am ft-enable-cr}}}.
* Fix {{{hnp}}} ErrMgr so that if a previous component in the stack has 'fixed' the problem, then it should be skipped.
* Make sure to decrement the number of 'num_local_procs' in the orted when one goes away.
* odls now checks the SStore framework to see if it needs to load any checkpoint files before launching (to support 'stage'). This separates the SStore logic from the --preload-[binary|files] options.
* Add unique IDs to the named pipes established between the orted and the app in SnapC. This is to better support migration and automatic recovery activities.
* Improve the checks for 'already checkpointing' error path.
* A a recovery output timer, to show how long it takes to restart a job
* Do a better job of cleaning up the old session directory on restart.
* Add a local module to the autor and crmig ErrMgr components. These small modules prevent the 'orted' component from attempting a local recovery (Which does not work for MPI apps at the moment)
* Add a fix for bounding the checkpointable region between MPI_Init and MPI_Finalize.
This commit was SVN r23587.
The following Trac tickets were found above:
Ticket 1924 --> https://svn.open-mpi.org/trac/ompi/ticket/1924
Ticket 2097 --> https://svn.open-mpi.org/trac/ompi/ticket/2097
Ticket 2161 --> https://svn.open-mpi.org/trac/ompi/ticket/2161
Ticket 2192 --> https://svn.open-mpi.org/trac/ompi/ticket/2192
Ticket 2208 --> https://svn.open-mpi.org/trac/ompi/ticket/2208
Ticket 2342 --> https://svn.open-mpi.org/trac/ompi/ticket/2342
Ticket 2413 --> https://svn.open-mpi.org/trac/ompi/ticket/2413
2010-08-11 00:51:11 +04:00
|
|
|
/*
|
|
|
|
* Call User Level INC
|
|
|
|
*/
|
|
|
|
if(OPAL_SUCCESS != (ret = trigger_user_inc_callback(OMPI_CR_INC_PRE_CRS_PRE_MPI,
|
|
|
|
OMPI_CR_INC_STATE_PREPARE)) ) {
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2007-03-17 02:11:45 +03:00
|
|
|
/*
|
|
|
|
* Use the registered coordination routine
|
|
|
|
*/
|
|
|
|
if(OPAL_SUCCESS != (ret = cur_coord_callback(OPAL_CRS_CHECKPOINT)) ) {
|
|
|
|
if ( OPAL_EXISTS != ret ) {
|
|
|
|
opal_output(opal_cr_output,
|
2007-10-09 00:53:02 +04:00
|
|
|
"opal_cr: inc_core: Error: cur_coord_callback(%d) failed! %d\n",
|
2007-03-17 02:11:45 +03:00
|
|
|
OPAL_CRS_CHECKPOINT, ret);
|
|
|
|
}
|
2009-05-09 00:51:13 +04:00
|
|
|
return ret;
|
2007-03-17 02:11:45 +03:00
|
|
|
}
|
2009-05-09 00:51:13 +04:00
|
|
|
|
A number of C/R enhancements per RFC below:
http://www.open-mpi.org/community/lists/devel/2010/07/8240.php
Documentation:
http://osl.iu.edu/research/ft/
Major Changes:
--------------
* Added C/R-enabled Debugging support.
Enabled with the --enable-crdebug flag. See the following website for more information:
http://osl.iu.edu/research/ft/crdebug/
* Added Stable Storage (SStore) framework for checkpoint storage
* 'central' component does a direct to central storage save
* 'stage' component stages checkpoints to central storage while the application continues execution.
* 'stage' supports offline compression of checkpoints before moving (sstore_stage_compress)
* 'stage' supports local caching of checkpoints to improve automatic recovery (sstore_stage_caching)
* Added Compression (compress) framework to support
* Add two new ErrMgr recovery policies
* {{{crmig}}} C/R Process Migration
* {{{autor}}} C/R Automatic Recovery
* Added the {{{ompi-migrate}}} command line tool to support the {{{crmig}}} ErrMgr component
* Added CR MPI Ext functions (enable them with {{{--enable-mpi-ext=cr}}} configure option)
* {{{OMPI_CR_Checkpoint}}} (Fixes trac:2342)
* {{{OMPI_CR_Restart}}}
* {{{OMPI_CR_Migrate}}} (may need some more work for mapping rules)
* {{{OMPI_CR_INC_register_callback}}} (Fixes trac:2192)
* {{{OMPI_CR_Quiesce_start}}}
* {{{OMPI_CR_Quiesce_checkpoint}}}
* {{{OMPI_CR_Quiesce_end}}}
* {{{OMPI_CR_self_register_checkpoint_callback}}}
* {{{OMPI_CR_self_register_restart_callback}}}
* {{{OMPI_CR_self_register_continue_callback}}}
* The ErrMgr predicted_fault() interface has been changed to take an opal_list_t of ErrMgr defined types. This will allow us to better support a wider range of fault prediction services in the future.
* Add a progress meter to:
* FileM rsh (filem_rsh_process_meter)
* SnapC full (snapc_full_progress_meter)
* SStore stage (sstore_stage_progress_meter)
* Added 2 new command line options to ompi-restart
* --showme : Display the full command line that would have been exec'ed.
* --mpirun_opts : Command line options to pass directly to mpirun. (Fixes trac:2413)
* Deprecated some MCA params:
* crs_base_snapshot_dir deprecated, use sstore_stage_local_snapshot_dir
* snapc_base_global_snapshot_dir deprecated, use sstore_base_global_snapshot_dir
* snapc_base_global_shared deprecated, use sstore_stage_global_is_shared
* snapc_base_store_in_place deprecated, replaced with different components of SStore
* snapc_base_global_snapshot_ref deprecated, use sstore_base_global_snapshot_ref
* snapc_base_establish_global_snapshot_dir deprecated, never well supported
* snapc_full_skip_filem deprecated, use sstore_stage_skip_filem
Minor Changes:
--------------
* Fixes trac:1924 : {{{ompi-restart}}} now recognizes path prefixed checkpoint handles and does the right thing.
* Fixes trac:2097 : {{{ompi-info}}} should now report all available CRS components
* Fixes trac:2161 : Manual checkpoint movement. A user can 'mv' a checkpoint directory from the original location to another and still restart from it.
* Fixes trac:2208 : Honor various TMPDIR varaibles instead of forcing {{{/tmp}}}
* Move {{{ompi_cr_continue_like_restart}}} to {{{orte_cr_continue_like_restart}}} to be more flexible in where this should be set.
* opal_crs_base_metadata_write* functions have been moved to SStore to support a wider range of metadata handling functionality.
* Cleanup the CRS framework and components to work with the SStore framework.
* Cleanup the SnapC framework and components to work with the SStore framework (cleans up these code paths considerably).
* Add 'quiesce' hook to CRCP for a future enhancement.
* We now require a BLCR version that supports {{{cr_request_file()}}} or {{{cr_request_checkpoint()}}} in order to make the code more maintainable. Note that {{{cr_request_file}}} has been deprecated since 0.7.0, so we prefer to use {{{cr_request_checkpoint()}}}.
* Add optional application level INC callbacks (registered through the CR MPI Ext interface).
* Increase the {{{opal_cr_thread_sleep_wait}}} parameter to 1000 microseconds to make the C/R thread less aggressive.
* {{{opal-restart}}} now looks for cache directories before falling back on stable storage when asked.
* {{{opal-restart}}} also support local decompression before restarting
* {{{orte-checkpoint}}} now uses the SStore framework to work with the metadata
* {{{orte-restart}}} now uses the SStore framework to work with the metadata
* Remove the {{{orte-restart}}} preload option. This was removed since the user only needs to select the 'stage' component in order to support this functionality.
* Since the '-am' parameter is saved in the metadata, {{{ompi-restart}}} no longer hard codes {{{-am ft-enable-cr}}}.
* Fix {{{hnp}}} ErrMgr so that if a previous component in the stack has 'fixed' the problem, then it should be skipped.
* Make sure to decrement the number of 'num_local_procs' in the orted when one goes away.
* odls now checks the SStore framework to see if it needs to load any checkpoint files before launching (to support 'stage'). This separates the SStore logic from the --preload-[binary|files] options.
* Add unique IDs to the named pipes established between the orted and the app in SnapC. This is to better support migration and automatic recovery activities.
* Improve the checks for 'already checkpointing' error path.
* A a recovery output timer, to show how long it takes to restart a job
* Do a better job of cleaning up the old session directory on restart.
* Add a local module to the autor and crmig ErrMgr components. These small modules prevent the 'orted' component from attempting a local recovery (Which does not work for MPI apps at the moment)
* Add a fix for bounding the checkpointable region between MPI_Init and MPI_Finalize.
This commit was SVN r23587.
The following Trac tickets were found above:
Ticket 1924 --> https://svn.open-mpi.org/trac/ompi/ticket/1924
Ticket 2097 --> https://svn.open-mpi.org/trac/ompi/ticket/2097
Ticket 2161 --> https://svn.open-mpi.org/trac/ompi/ticket/2161
Ticket 2192 --> https://svn.open-mpi.org/trac/ompi/ticket/2192
Ticket 2208 --> https://svn.open-mpi.org/trac/ompi/ticket/2208
Ticket 2342 --> https://svn.open-mpi.org/trac/ompi/ticket/2342
Ticket 2413 --> https://svn.open-mpi.org/trac/ompi/ticket/2413
2010-08-11 00:51:11 +04:00
|
|
|
/*
|
|
|
|
* Call User Level INC
|
|
|
|
*/
|
|
|
|
if(OPAL_SUCCESS != (ret = trigger_user_inc_callback(OMPI_CR_INC_PRE_CRS_POST_MPI,
|
|
|
|
OMPI_CR_INC_STATE_PREPARE)) ) {
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2009-05-09 00:51:13 +04:00
|
|
|
core_prev_pid = getpid();
|
|
|
|
|
|
|
|
return OPAL_SUCCESS;
|
|
|
|
}
|
|
|
|
|
2009-09-22 22:26:12 +04:00
|
|
|
int opal_cr_inc_core_ckpt(pid_t pid,
|
|
|
|
opal_crs_base_snapshot_t *snapshot,
|
|
|
|
opal_crs_base_ckpt_options_t *options,
|
|
|
|
int *state)
|
2009-05-09 00:51:13 +04:00
|
|
|
{
|
|
|
|
int ret, exit_status = OPAL_SUCCESS;
|
|
|
|
|
2008-10-16 19:09:00 +04:00
|
|
|
OPAL_CR_SET_TIMER(OPAL_CR_TIMER_CORE0);
|
2009-09-22 22:26:12 +04:00
|
|
|
if(OPAL_SUCCESS != (ret = opal_crs.crs_checkpoint(pid,
|
|
|
|
snapshot,
|
|
|
|
options,
|
|
|
|
(opal_crs_state_type_t *)state))) {
|
2007-03-17 02:11:45 +03:00
|
|
|
opal_output(opal_cr_output,
|
2007-10-09 00:53:02 +04:00
|
|
|
"opal_cr: inc_core: Error: The checkpoint failed. %d\n", ret);
|
2007-03-17 02:11:45 +03:00
|
|
|
exit_status = ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
if(*state == OPAL_CRS_CONTINUE) {
|
2008-10-16 19:09:00 +04:00
|
|
|
OPAL_CR_SET_TIMER(OPAL_CR_TIMER_CORE1);
|
|
|
|
|
2009-09-22 22:26:12 +04:00
|
|
|
if(options->term) {
|
2007-03-17 02:11:45 +03:00
|
|
|
*state = OPAL_CRS_TERM;
|
2008-05-08 22:47:47 +04:00
|
|
|
opal_cr_checkpointing_state = OPAL_CR_STATUS_TERM;
|
|
|
|
} else {
|
|
|
|
opal_cr_checkpointing_state = OPAL_CR_STATUS_CONTINUE;
|
|
|
|
}
|
2007-03-17 02:11:45 +03:00
|
|
|
}
|
|
|
|
else {
|
2009-09-22 22:26:12 +04:00
|
|
|
options->term = false;
|
2007-03-17 02:11:45 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If restarting read environment stuff that opal-restart left us.
|
|
|
|
*/
|
|
|
|
if(*state == OPAL_CRS_RESTART) {
|
A number of C/R enhancements per RFC below:
http://www.open-mpi.org/community/lists/devel/2010/07/8240.php
Documentation:
http://osl.iu.edu/research/ft/
Major Changes:
--------------
* Added C/R-enabled Debugging support.
Enabled with the --enable-crdebug flag. See the following website for more information:
http://osl.iu.edu/research/ft/crdebug/
* Added Stable Storage (SStore) framework for checkpoint storage
* 'central' component does a direct to central storage save
* 'stage' component stages checkpoints to central storage while the application continues execution.
* 'stage' supports offline compression of checkpoints before moving (sstore_stage_compress)
* 'stage' supports local caching of checkpoints to improve automatic recovery (sstore_stage_caching)
* Added Compression (compress) framework to support
* Add two new ErrMgr recovery policies
* {{{crmig}}} C/R Process Migration
* {{{autor}}} C/R Automatic Recovery
* Added the {{{ompi-migrate}}} command line tool to support the {{{crmig}}} ErrMgr component
* Added CR MPI Ext functions (enable them with {{{--enable-mpi-ext=cr}}} configure option)
* {{{OMPI_CR_Checkpoint}}} (Fixes trac:2342)
* {{{OMPI_CR_Restart}}}
* {{{OMPI_CR_Migrate}}} (may need some more work for mapping rules)
* {{{OMPI_CR_INC_register_callback}}} (Fixes trac:2192)
* {{{OMPI_CR_Quiesce_start}}}
* {{{OMPI_CR_Quiesce_checkpoint}}}
* {{{OMPI_CR_Quiesce_end}}}
* {{{OMPI_CR_self_register_checkpoint_callback}}}
* {{{OMPI_CR_self_register_restart_callback}}}
* {{{OMPI_CR_self_register_continue_callback}}}
* The ErrMgr predicted_fault() interface has been changed to take an opal_list_t of ErrMgr defined types. This will allow us to better support a wider range of fault prediction services in the future.
* Add a progress meter to:
* FileM rsh (filem_rsh_process_meter)
* SnapC full (snapc_full_progress_meter)
* SStore stage (sstore_stage_progress_meter)
* Added 2 new command line options to ompi-restart
* --showme : Display the full command line that would have been exec'ed.
* --mpirun_opts : Command line options to pass directly to mpirun. (Fixes trac:2413)
* Deprecated some MCA params:
* crs_base_snapshot_dir deprecated, use sstore_stage_local_snapshot_dir
* snapc_base_global_snapshot_dir deprecated, use sstore_base_global_snapshot_dir
* snapc_base_global_shared deprecated, use sstore_stage_global_is_shared
* snapc_base_store_in_place deprecated, replaced with different components of SStore
* snapc_base_global_snapshot_ref deprecated, use sstore_base_global_snapshot_ref
* snapc_base_establish_global_snapshot_dir deprecated, never well supported
* snapc_full_skip_filem deprecated, use sstore_stage_skip_filem
Minor Changes:
--------------
* Fixes trac:1924 : {{{ompi-restart}}} now recognizes path prefixed checkpoint handles and does the right thing.
* Fixes trac:2097 : {{{ompi-info}}} should now report all available CRS components
* Fixes trac:2161 : Manual checkpoint movement. A user can 'mv' a checkpoint directory from the original location to another and still restart from it.
* Fixes trac:2208 : Honor various TMPDIR varaibles instead of forcing {{{/tmp}}}
* Move {{{ompi_cr_continue_like_restart}}} to {{{orte_cr_continue_like_restart}}} to be more flexible in where this should be set.
* opal_crs_base_metadata_write* functions have been moved to SStore to support a wider range of metadata handling functionality.
* Cleanup the CRS framework and components to work with the SStore framework.
* Cleanup the SnapC framework and components to work with the SStore framework (cleans up these code paths considerably).
* Add 'quiesce' hook to CRCP for a future enhancement.
* We now require a BLCR version that supports {{{cr_request_file()}}} or {{{cr_request_checkpoint()}}} in order to make the code more maintainable. Note that {{{cr_request_file}}} has been deprecated since 0.7.0, so we prefer to use {{{cr_request_checkpoint()}}}.
* Add optional application level INC callbacks (registered through the CR MPI Ext interface).
* Increase the {{{opal_cr_thread_sleep_wait}}} parameter to 1000 microseconds to make the C/R thread less aggressive.
* {{{opal-restart}}} now looks for cache directories before falling back on stable storage when asked.
* {{{opal-restart}}} also support local decompression before restarting
* {{{orte-checkpoint}}} now uses the SStore framework to work with the metadata
* {{{orte-restart}}} now uses the SStore framework to work with the metadata
* Remove the {{{orte-restart}}} preload option. This was removed since the user only needs to select the 'stage' component in order to support this functionality.
* Since the '-am' parameter is saved in the metadata, {{{ompi-restart}}} no longer hard codes {{{-am ft-enable-cr}}}.
* Fix {{{hnp}}} ErrMgr so that if a previous component in the stack has 'fixed' the problem, then it should be skipped.
* Make sure to decrement the number of 'num_local_procs' in the orted when one goes away.
* odls now checks the SStore framework to see if it needs to load any checkpoint files before launching (to support 'stage'). This separates the SStore logic from the --preload-[binary|files] options.
* Add unique IDs to the named pipes established between the orted and the app in SnapC. This is to better support migration and automatic recovery activities.
* Improve the checks for 'already checkpointing' error path.
* A a recovery output timer, to show how long it takes to restart a job
* Do a better job of cleaning up the old session directory on restart.
* Add a local module to the autor and crmig ErrMgr components. These small modules prevent the 'orted' component from attempting a local recovery (Which does not work for MPI apps at the moment)
* Add a fix for bounding the checkpointable region between MPI_Init and MPI_Finalize.
This commit was SVN r23587.
The following Trac tickets were found above:
Ticket 1924 --> https://svn.open-mpi.org/trac/ompi/ticket/1924
Ticket 2097 --> https://svn.open-mpi.org/trac/ompi/ticket/2097
Ticket 2161 --> https://svn.open-mpi.org/trac/ompi/ticket/2161
Ticket 2192 --> https://svn.open-mpi.org/trac/ompi/ticket/2192
Ticket 2208 --> https://svn.open-mpi.org/trac/ompi/ticket/2208
Ticket 2342 --> https://svn.open-mpi.org/trac/ompi/ticket/2342
Ticket 2413 --> https://svn.open-mpi.org/trac/ompi/ticket/2413
2010-08-11 00:51:11 +04:00
|
|
|
opal_cr_refresh_environ(core_prev_pid);
|
2008-05-08 22:47:47 +04:00
|
|
|
opal_cr_checkpointing_state = OPAL_CR_STATUS_RESTART_PRE;
|
2007-03-17 02:11:45 +03:00
|
|
|
}
|
|
|
|
|
2009-05-09 00:51:13 +04:00
|
|
|
return exit_status;
|
|
|
|
}
|
|
|
|
|
|
|
|
int opal_cr_inc_core_recover(int state)
|
|
|
|
{
|
|
|
|
int ret;
|
A number of C/R enhancements per RFC below:
http://www.open-mpi.org/community/lists/devel/2010/07/8240.php
Documentation:
http://osl.iu.edu/research/ft/
Major Changes:
--------------
* Added C/R-enabled Debugging support.
Enabled with the --enable-crdebug flag. See the following website for more information:
http://osl.iu.edu/research/ft/crdebug/
* Added Stable Storage (SStore) framework for checkpoint storage
* 'central' component does a direct to central storage save
* 'stage' component stages checkpoints to central storage while the application continues execution.
* 'stage' supports offline compression of checkpoints before moving (sstore_stage_compress)
* 'stage' supports local caching of checkpoints to improve automatic recovery (sstore_stage_caching)
* Added Compression (compress) framework to support
* Add two new ErrMgr recovery policies
* {{{crmig}}} C/R Process Migration
* {{{autor}}} C/R Automatic Recovery
* Added the {{{ompi-migrate}}} command line tool to support the {{{crmig}}} ErrMgr component
* Added CR MPI Ext functions (enable them with {{{--enable-mpi-ext=cr}}} configure option)
* {{{OMPI_CR_Checkpoint}}} (Fixes trac:2342)
* {{{OMPI_CR_Restart}}}
* {{{OMPI_CR_Migrate}}} (may need some more work for mapping rules)
* {{{OMPI_CR_INC_register_callback}}} (Fixes trac:2192)
* {{{OMPI_CR_Quiesce_start}}}
* {{{OMPI_CR_Quiesce_checkpoint}}}
* {{{OMPI_CR_Quiesce_end}}}
* {{{OMPI_CR_self_register_checkpoint_callback}}}
* {{{OMPI_CR_self_register_restart_callback}}}
* {{{OMPI_CR_self_register_continue_callback}}}
* The ErrMgr predicted_fault() interface has been changed to take an opal_list_t of ErrMgr defined types. This will allow us to better support a wider range of fault prediction services in the future.
* Add a progress meter to:
* FileM rsh (filem_rsh_process_meter)
* SnapC full (snapc_full_progress_meter)
* SStore stage (sstore_stage_progress_meter)
* Added 2 new command line options to ompi-restart
* --showme : Display the full command line that would have been exec'ed.
* --mpirun_opts : Command line options to pass directly to mpirun. (Fixes trac:2413)
* Deprecated some MCA params:
* crs_base_snapshot_dir deprecated, use sstore_stage_local_snapshot_dir
* snapc_base_global_snapshot_dir deprecated, use sstore_base_global_snapshot_dir
* snapc_base_global_shared deprecated, use sstore_stage_global_is_shared
* snapc_base_store_in_place deprecated, replaced with different components of SStore
* snapc_base_global_snapshot_ref deprecated, use sstore_base_global_snapshot_ref
* snapc_base_establish_global_snapshot_dir deprecated, never well supported
* snapc_full_skip_filem deprecated, use sstore_stage_skip_filem
Minor Changes:
--------------
* Fixes trac:1924 : {{{ompi-restart}}} now recognizes path prefixed checkpoint handles and does the right thing.
* Fixes trac:2097 : {{{ompi-info}}} should now report all available CRS components
* Fixes trac:2161 : Manual checkpoint movement. A user can 'mv' a checkpoint directory from the original location to another and still restart from it.
* Fixes trac:2208 : Honor various TMPDIR varaibles instead of forcing {{{/tmp}}}
* Move {{{ompi_cr_continue_like_restart}}} to {{{orte_cr_continue_like_restart}}} to be more flexible in where this should be set.
* opal_crs_base_metadata_write* functions have been moved to SStore to support a wider range of metadata handling functionality.
* Cleanup the CRS framework and components to work with the SStore framework.
* Cleanup the SnapC framework and components to work with the SStore framework (cleans up these code paths considerably).
* Add 'quiesce' hook to CRCP for a future enhancement.
* We now require a BLCR version that supports {{{cr_request_file()}}} or {{{cr_request_checkpoint()}}} in order to make the code more maintainable. Note that {{{cr_request_file}}} has been deprecated since 0.7.0, so we prefer to use {{{cr_request_checkpoint()}}}.
* Add optional application level INC callbacks (registered through the CR MPI Ext interface).
* Increase the {{{opal_cr_thread_sleep_wait}}} parameter to 1000 microseconds to make the C/R thread less aggressive.
* {{{opal-restart}}} now looks for cache directories before falling back on stable storage when asked.
* {{{opal-restart}}} also support local decompression before restarting
* {{{orte-checkpoint}}} now uses the SStore framework to work with the metadata
* {{{orte-restart}}} now uses the SStore framework to work with the metadata
* Remove the {{{orte-restart}}} preload option. This was removed since the user only needs to select the 'stage' component in order to support this functionality.
* Since the '-am' parameter is saved in the metadata, {{{ompi-restart}}} no longer hard codes {{{-am ft-enable-cr}}}.
* Fix {{{hnp}}} ErrMgr so that if a previous component in the stack has 'fixed' the problem, then it should be skipped.
* Make sure to decrement the number of 'num_local_procs' in the orted when one goes away.
* odls now checks the SStore framework to see if it needs to load any checkpoint files before launching (to support 'stage'). This separates the SStore logic from the --preload-[binary|files] options.
* Add unique IDs to the named pipes established between the orted and the app in SnapC. This is to better support migration and automatic recovery activities.
* Improve the checks for 'already checkpointing' error path.
* A a recovery output timer, to show how long it takes to restart a job
* Do a better job of cleaning up the old session directory on restart.
* Add a local module to the autor and crmig ErrMgr components. These small modules prevent the 'orted' component from attempting a local recovery (Which does not work for MPI apps at the moment)
* Add a fix for bounding the checkpointable region between MPI_Init and MPI_Finalize.
This commit was SVN r23587.
The following Trac tickets were found above:
Ticket 1924 --> https://svn.open-mpi.org/trac/ompi/ticket/1924
Ticket 2097 --> https://svn.open-mpi.org/trac/ompi/ticket/2097
Ticket 2161 --> https://svn.open-mpi.org/trac/ompi/ticket/2161
Ticket 2192 --> https://svn.open-mpi.org/trac/ompi/ticket/2192
Ticket 2208 --> https://svn.open-mpi.org/trac/ompi/ticket/2208
Ticket 2342 --> https://svn.open-mpi.org/trac/ompi/ticket/2342
Ticket 2413 --> https://svn.open-mpi.org/trac/ompi/ticket/2413
2010-08-11 00:51:11 +04:00
|
|
|
opal_cr_user_inc_callback_state_t cb_state;
|
2009-05-09 00:51:13 +04:00
|
|
|
|
|
|
|
if( opal_cr_checkpointing_state != OPAL_CR_STATUS_TERM &&
|
|
|
|
opal_cr_checkpointing_state != OPAL_CR_STATUS_CONTINUE &&
|
|
|
|
opal_cr_checkpointing_state != OPAL_CR_STATUS_RESTART_PRE &&
|
|
|
|
opal_cr_checkpointing_state != OPAL_CR_STATUS_RESTART_POST ) {
|
|
|
|
|
|
|
|
if(state == OPAL_CRS_CONTINUE) {
|
|
|
|
OPAL_CR_SET_TIMER(OPAL_CR_TIMER_CORE1);
|
|
|
|
opal_cr_checkpointing_state = OPAL_CR_STATUS_CONTINUE;
|
|
|
|
}
|
|
|
|
/*
|
|
|
|
* If restarting read environment stuff that opal-restart left us.
|
|
|
|
*/
|
|
|
|
else if(state == OPAL_CRS_RESTART) {
|
A number of C/R enhancements per RFC below:
http://www.open-mpi.org/community/lists/devel/2010/07/8240.php
Documentation:
http://osl.iu.edu/research/ft/
Major Changes:
--------------
* Added C/R-enabled Debugging support.
Enabled with the --enable-crdebug flag. See the following website for more information:
http://osl.iu.edu/research/ft/crdebug/
* Added Stable Storage (SStore) framework for checkpoint storage
* 'central' component does a direct to central storage save
* 'stage' component stages checkpoints to central storage while the application continues execution.
* 'stage' supports offline compression of checkpoints before moving (sstore_stage_compress)
* 'stage' supports local caching of checkpoints to improve automatic recovery (sstore_stage_caching)
* Added Compression (compress) framework to support
* Add two new ErrMgr recovery policies
* {{{crmig}}} C/R Process Migration
* {{{autor}}} C/R Automatic Recovery
* Added the {{{ompi-migrate}}} command line tool to support the {{{crmig}}} ErrMgr component
* Added CR MPI Ext functions (enable them with {{{--enable-mpi-ext=cr}}} configure option)
* {{{OMPI_CR_Checkpoint}}} (Fixes trac:2342)
* {{{OMPI_CR_Restart}}}
* {{{OMPI_CR_Migrate}}} (may need some more work for mapping rules)
* {{{OMPI_CR_INC_register_callback}}} (Fixes trac:2192)
* {{{OMPI_CR_Quiesce_start}}}
* {{{OMPI_CR_Quiesce_checkpoint}}}
* {{{OMPI_CR_Quiesce_end}}}
* {{{OMPI_CR_self_register_checkpoint_callback}}}
* {{{OMPI_CR_self_register_restart_callback}}}
* {{{OMPI_CR_self_register_continue_callback}}}
* The ErrMgr predicted_fault() interface has been changed to take an opal_list_t of ErrMgr defined types. This will allow us to better support a wider range of fault prediction services in the future.
* Add a progress meter to:
* FileM rsh (filem_rsh_process_meter)
* SnapC full (snapc_full_progress_meter)
* SStore stage (sstore_stage_progress_meter)
* Added 2 new command line options to ompi-restart
* --showme : Display the full command line that would have been exec'ed.
* --mpirun_opts : Command line options to pass directly to mpirun. (Fixes trac:2413)
* Deprecated some MCA params:
* crs_base_snapshot_dir deprecated, use sstore_stage_local_snapshot_dir
* snapc_base_global_snapshot_dir deprecated, use sstore_base_global_snapshot_dir
* snapc_base_global_shared deprecated, use sstore_stage_global_is_shared
* snapc_base_store_in_place deprecated, replaced with different components of SStore
* snapc_base_global_snapshot_ref deprecated, use sstore_base_global_snapshot_ref
* snapc_base_establish_global_snapshot_dir deprecated, never well supported
* snapc_full_skip_filem deprecated, use sstore_stage_skip_filem
Minor Changes:
--------------
* Fixes trac:1924 : {{{ompi-restart}}} now recognizes path prefixed checkpoint handles and does the right thing.
* Fixes trac:2097 : {{{ompi-info}}} should now report all available CRS components
* Fixes trac:2161 : Manual checkpoint movement. A user can 'mv' a checkpoint directory from the original location to another and still restart from it.
* Fixes trac:2208 : Honor various TMPDIR varaibles instead of forcing {{{/tmp}}}
* Move {{{ompi_cr_continue_like_restart}}} to {{{orte_cr_continue_like_restart}}} to be more flexible in where this should be set.
* opal_crs_base_metadata_write* functions have been moved to SStore to support a wider range of metadata handling functionality.
* Cleanup the CRS framework and components to work with the SStore framework.
* Cleanup the SnapC framework and components to work with the SStore framework (cleans up these code paths considerably).
* Add 'quiesce' hook to CRCP for a future enhancement.
* We now require a BLCR version that supports {{{cr_request_file()}}} or {{{cr_request_checkpoint()}}} in order to make the code more maintainable. Note that {{{cr_request_file}}} has been deprecated since 0.7.0, so we prefer to use {{{cr_request_checkpoint()}}}.
* Add optional application level INC callbacks (registered through the CR MPI Ext interface).
* Increase the {{{opal_cr_thread_sleep_wait}}} parameter to 1000 microseconds to make the C/R thread less aggressive.
* {{{opal-restart}}} now looks for cache directories before falling back on stable storage when asked.
* {{{opal-restart}}} also support local decompression before restarting
* {{{orte-checkpoint}}} now uses the SStore framework to work with the metadata
* {{{orte-restart}}} now uses the SStore framework to work with the metadata
* Remove the {{{orte-restart}}} preload option. This was removed since the user only needs to select the 'stage' component in order to support this functionality.
* Since the '-am' parameter is saved in the metadata, {{{ompi-restart}}} no longer hard codes {{{-am ft-enable-cr}}}.
* Fix {{{hnp}}} ErrMgr so that if a previous component in the stack has 'fixed' the problem, then it should be skipped.
* Make sure to decrement the number of 'num_local_procs' in the orted when one goes away.
* odls now checks the SStore framework to see if it needs to load any checkpoint files before launching (to support 'stage'). This separates the SStore logic from the --preload-[binary|files] options.
* Add unique IDs to the named pipes established between the orted and the app in SnapC. This is to better support migration and automatic recovery activities.
* Improve the checks for 'already checkpointing' error path.
* A a recovery output timer, to show how long it takes to restart a job
* Do a better job of cleaning up the old session directory on restart.
* Add a local module to the autor and crmig ErrMgr components. These small modules prevent the 'orted' component from attempting a local recovery (Which does not work for MPI apps at the moment)
* Add a fix for bounding the checkpointable region between MPI_Init and MPI_Finalize.
This commit was SVN r23587.
The following Trac tickets were found above:
Ticket 1924 --> https://svn.open-mpi.org/trac/ompi/ticket/1924
Ticket 2097 --> https://svn.open-mpi.org/trac/ompi/ticket/2097
Ticket 2161 --> https://svn.open-mpi.org/trac/ompi/ticket/2161
Ticket 2192 --> https://svn.open-mpi.org/trac/ompi/ticket/2192
Ticket 2208 --> https://svn.open-mpi.org/trac/ompi/ticket/2208
Ticket 2342 --> https://svn.open-mpi.org/trac/ompi/ticket/2342
Ticket 2413 --> https://svn.open-mpi.org/trac/ompi/ticket/2413
2010-08-11 00:51:11 +04:00
|
|
|
opal_cr_refresh_environ(core_prev_pid);
|
2009-05-09 00:51:13 +04:00
|
|
|
opal_cr_checkpointing_state = OPAL_CR_STATUS_RESTART_PRE;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
A number of C/R enhancements per RFC below:
http://www.open-mpi.org/community/lists/devel/2010/07/8240.php
Documentation:
http://osl.iu.edu/research/ft/
Major Changes:
--------------
* Added C/R-enabled Debugging support.
Enabled with the --enable-crdebug flag. See the following website for more information:
http://osl.iu.edu/research/ft/crdebug/
* Added Stable Storage (SStore) framework for checkpoint storage
* 'central' component does a direct to central storage save
* 'stage' component stages checkpoints to central storage while the application continues execution.
* 'stage' supports offline compression of checkpoints before moving (sstore_stage_compress)
* 'stage' supports local caching of checkpoints to improve automatic recovery (sstore_stage_caching)
* Added Compression (compress) framework to support
* Add two new ErrMgr recovery policies
* {{{crmig}}} C/R Process Migration
* {{{autor}}} C/R Automatic Recovery
* Added the {{{ompi-migrate}}} command line tool to support the {{{crmig}}} ErrMgr component
* Added CR MPI Ext functions (enable them with {{{--enable-mpi-ext=cr}}} configure option)
* {{{OMPI_CR_Checkpoint}}} (Fixes trac:2342)
* {{{OMPI_CR_Restart}}}
* {{{OMPI_CR_Migrate}}} (may need some more work for mapping rules)
* {{{OMPI_CR_INC_register_callback}}} (Fixes trac:2192)
* {{{OMPI_CR_Quiesce_start}}}
* {{{OMPI_CR_Quiesce_checkpoint}}}
* {{{OMPI_CR_Quiesce_end}}}
* {{{OMPI_CR_self_register_checkpoint_callback}}}
* {{{OMPI_CR_self_register_restart_callback}}}
* {{{OMPI_CR_self_register_continue_callback}}}
* The ErrMgr predicted_fault() interface has been changed to take an opal_list_t of ErrMgr defined types. This will allow us to better support a wider range of fault prediction services in the future.
* Add a progress meter to:
* FileM rsh (filem_rsh_process_meter)
* SnapC full (snapc_full_progress_meter)
* SStore stage (sstore_stage_progress_meter)
* Added 2 new command line options to ompi-restart
* --showme : Display the full command line that would have been exec'ed.
* --mpirun_opts : Command line options to pass directly to mpirun. (Fixes trac:2413)
* Deprecated some MCA params:
* crs_base_snapshot_dir deprecated, use sstore_stage_local_snapshot_dir
* snapc_base_global_snapshot_dir deprecated, use sstore_base_global_snapshot_dir
* snapc_base_global_shared deprecated, use sstore_stage_global_is_shared
* snapc_base_store_in_place deprecated, replaced with different components of SStore
* snapc_base_global_snapshot_ref deprecated, use sstore_base_global_snapshot_ref
* snapc_base_establish_global_snapshot_dir deprecated, never well supported
* snapc_full_skip_filem deprecated, use sstore_stage_skip_filem
Minor Changes:
--------------
* Fixes trac:1924 : {{{ompi-restart}}} now recognizes path prefixed checkpoint handles and does the right thing.
* Fixes trac:2097 : {{{ompi-info}}} should now report all available CRS components
* Fixes trac:2161 : Manual checkpoint movement. A user can 'mv' a checkpoint directory from the original location to another and still restart from it.
* Fixes trac:2208 : Honor various TMPDIR varaibles instead of forcing {{{/tmp}}}
* Move {{{ompi_cr_continue_like_restart}}} to {{{orte_cr_continue_like_restart}}} to be more flexible in where this should be set.
* opal_crs_base_metadata_write* functions have been moved to SStore to support a wider range of metadata handling functionality.
* Cleanup the CRS framework and components to work with the SStore framework.
* Cleanup the SnapC framework and components to work with the SStore framework (cleans up these code paths considerably).
* Add 'quiesce' hook to CRCP for a future enhancement.
* We now require a BLCR version that supports {{{cr_request_file()}}} or {{{cr_request_checkpoint()}}} in order to make the code more maintainable. Note that {{{cr_request_file}}} has been deprecated since 0.7.0, so we prefer to use {{{cr_request_checkpoint()}}}.
* Add optional application level INC callbacks (registered through the CR MPI Ext interface).
* Increase the {{{opal_cr_thread_sleep_wait}}} parameter to 1000 microseconds to make the C/R thread less aggressive.
* {{{opal-restart}}} now looks for cache directories before falling back on stable storage when asked.
* {{{opal-restart}}} also support local decompression before restarting
* {{{orte-checkpoint}}} now uses the SStore framework to work with the metadata
* {{{orte-restart}}} now uses the SStore framework to work with the metadata
* Remove the {{{orte-restart}}} preload option. This was removed since the user only needs to select the 'stage' component in order to support this functionality.
* Since the '-am' parameter is saved in the metadata, {{{ompi-restart}}} no longer hard codes {{{-am ft-enable-cr}}}.
* Fix {{{hnp}}} ErrMgr so that if a previous component in the stack has 'fixed' the problem, then it should be skipped.
* Make sure to decrement the number of 'num_local_procs' in the orted when one goes away.
* odls now checks the SStore framework to see if it needs to load any checkpoint files before launching (to support 'stage'). This separates the SStore logic from the --preload-[binary|files] options.
* Add unique IDs to the named pipes established between the orted and the app in SnapC. This is to better support migration and automatic recovery activities.
* Improve the checks for 'already checkpointing' error path.
* A a recovery output timer, to show how long it takes to restart a job
* Do a better job of cleaning up the old session directory on restart.
* Add a local module to the autor and crmig ErrMgr components. These small modules prevent the 'orted' component from attempting a local recovery (Which does not work for MPI apps at the moment)
* Add a fix for bounding the checkpointable region between MPI_Init and MPI_Finalize.
This commit was SVN r23587.
The following Trac tickets were found above:
Ticket 1924 --> https://svn.open-mpi.org/trac/ompi/ticket/1924
Ticket 2097 --> https://svn.open-mpi.org/trac/ompi/ticket/2097
Ticket 2161 --> https://svn.open-mpi.org/trac/ompi/ticket/2161
Ticket 2192 --> https://svn.open-mpi.org/trac/ompi/ticket/2192
Ticket 2208 --> https://svn.open-mpi.org/trac/ompi/ticket/2208
Ticket 2342 --> https://svn.open-mpi.org/trac/ompi/ticket/2342
Ticket 2413 --> https://svn.open-mpi.org/trac/ompi/ticket/2413
2010-08-11 00:51:11 +04:00
|
|
|
/*
|
|
|
|
* Call User Level INC
|
|
|
|
*/
|
|
|
|
if( OPAL_CRS_CONTINUE == state ) {
|
|
|
|
cb_state = OMPI_CR_INC_STATE_CONTINUE;
|
|
|
|
}
|
|
|
|
else if( OPAL_CRS_RESTART == state ) {
|
|
|
|
cb_state = OMPI_CR_INC_STATE_RESTART;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
cb_state = OMPI_CR_INC_STATE_ERROR;
|
|
|
|
}
|
|
|
|
|
|
|
|
if(OPAL_SUCCESS != (ret = trigger_user_inc_callback(OMPI_CR_INC_POST_CRS_PRE_MPI,
|
|
|
|
cb_state)) ) {
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2007-03-17 02:11:45 +03:00
|
|
|
/*
|
|
|
|
* Use the registered coordination routine
|
|
|
|
*/
|
2009-05-09 00:51:13 +04:00
|
|
|
if(OPAL_SUCCESS != (ret = cur_coord_callback(state)) ) {
|
2007-03-17 02:11:45 +03:00
|
|
|
if ( OPAL_EXISTS != ret ) {
|
|
|
|
opal_output(opal_cr_output,
|
2007-10-09 00:53:02 +04:00
|
|
|
"opal_cr: inc_core: Error: cur_coord_callback(%d) failed! %d\n",
|
2009-05-09 00:51:13 +04:00
|
|
|
state, ret);
|
2007-03-17 02:11:45 +03:00
|
|
|
}
|
2009-05-09 00:51:13 +04:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
A number of C/R enhancements per RFC below:
http://www.open-mpi.org/community/lists/devel/2010/07/8240.php
Documentation:
http://osl.iu.edu/research/ft/
Major Changes:
--------------
* Added C/R-enabled Debugging support.
Enabled with the --enable-crdebug flag. See the following website for more information:
http://osl.iu.edu/research/ft/crdebug/
* Added Stable Storage (SStore) framework for checkpoint storage
* 'central' component does a direct to central storage save
* 'stage' component stages checkpoints to central storage while the application continues execution.
* 'stage' supports offline compression of checkpoints before moving (sstore_stage_compress)
* 'stage' supports local caching of checkpoints to improve automatic recovery (sstore_stage_caching)
* Added Compression (compress) framework to support
* Add two new ErrMgr recovery policies
* {{{crmig}}} C/R Process Migration
* {{{autor}}} C/R Automatic Recovery
* Added the {{{ompi-migrate}}} command line tool to support the {{{crmig}}} ErrMgr component
* Added CR MPI Ext functions (enable them with {{{--enable-mpi-ext=cr}}} configure option)
* {{{OMPI_CR_Checkpoint}}} (Fixes trac:2342)
* {{{OMPI_CR_Restart}}}
* {{{OMPI_CR_Migrate}}} (may need some more work for mapping rules)
* {{{OMPI_CR_INC_register_callback}}} (Fixes trac:2192)
* {{{OMPI_CR_Quiesce_start}}}
* {{{OMPI_CR_Quiesce_checkpoint}}}
* {{{OMPI_CR_Quiesce_end}}}
* {{{OMPI_CR_self_register_checkpoint_callback}}}
* {{{OMPI_CR_self_register_restart_callback}}}
* {{{OMPI_CR_self_register_continue_callback}}}
* The ErrMgr predicted_fault() interface has been changed to take an opal_list_t of ErrMgr defined types. This will allow us to better support a wider range of fault prediction services in the future.
* Add a progress meter to:
* FileM rsh (filem_rsh_process_meter)
* SnapC full (snapc_full_progress_meter)
* SStore stage (sstore_stage_progress_meter)
* Added 2 new command line options to ompi-restart
* --showme : Display the full command line that would have been exec'ed.
* --mpirun_opts : Command line options to pass directly to mpirun. (Fixes trac:2413)
* Deprecated some MCA params:
* crs_base_snapshot_dir deprecated, use sstore_stage_local_snapshot_dir
* snapc_base_global_snapshot_dir deprecated, use sstore_base_global_snapshot_dir
* snapc_base_global_shared deprecated, use sstore_stage_global_is_shared
* snapc_base_store_in_place deprecated, replaced with different components of SStore
* snapc_base_global_snapshot_ref deprecated, use sstore_base_global_snapshot_ref
* snapc_base_establish_global_snapshot_dir deprecated, never well supported
* snapc_full_skip_filem deprecated, use sstore_stage_skip_filem
Minor Changes:
--------------
* Fixes trac:1924 : {{{ompi-restart}}} now recognizes path prefixed checkpoint handles and does the right thing.
* Fixes trac:2097 : {{{ompi-info}}} should now report all available CRS components
* Fixes trac:2161 : Manual checkpoint movement. A user can 'mv' a checkpoint directory from the original location to another and still restart from it.
* Fixes trac:2208 : Honor various TMPDIR varaibles instead of forcing {{{/tmp}}}
* Move {{{ompi_cr_continue_like_restart}}} to {{{orte_cr_continue_like_restart}}} to be more flexible in where this should be set.
* opal_crs_base_metadata_write* functions have been moved to SStore to support a wider range of metadata handling functionality.
* Cleanup the CRS framework and components to work with the SStore framework.
* Cleanup the SnapC framework and components to work with the SStore framework (cleans up these code paths considerably).
* Add 'quiesce' hook to CRCP for a future enhancement.
* We now require a BLCR version that supports {{{cr_request_file()}}} or {{{cr_request_checkpoint()}}} in order to make the code more maintainable. Note that {{{cr_request_file}}} has been deprecated since 0.7.0, so we prefer to use {{{cr_request_checkpoint()}}}.
* Add optional application level INC callbacks (registered through the CR MPI Ext interface).
* Increase the {{{opal_cr_thread_sleep_wait}}} parameter to 1000 microseconds to make the C/R thread less aggressive.
* {{{opal-restart}}} now looks for cache directories before falling back on stable storage when asked.
* {{{opal-restart}}} also support local decompression before restarting
* {{{orte-checkpoint}}} now uses the SStore framework to work with the metadata
* {{{orte-restart}}} now uses the SStore framework to work with the metadata
* Remove the {{{orte-restart}}} preload option. This was removed since the user only needs to select the 'stage' component in order to support this functionality.
* Since the '-am' parameter is saved in the metadata, {{{ompi-restart}}} no longer hard codes {{{-am ft-enable-cr}}}.
* Fix {{{hnp}}} ErrMgr so that if a previous component in the stack has 'fixed' the problem, then it should be skipped.
* Make sure to decrement the number of 'num_local_procs' in the orted when one goes away.
* odls now checks the SStore framework to see if it needs to load any checkpoint files before launching (to support 'stage'). This separates the SStore logic from the --preload-[binary|files] options.
* Add unique IDs to the named pipes established between the orted and the app in SnapC. This is to better support migration and automatic recovery activities.
* Improve the checks for 'already checkpointing' error path.
* A a recovery output timer, to show how long it takes to restart a job
* Do a better job of cleaning up the old session directory on restart.
* Add a local module to the autor and crmig ErrMgr components. These small modules prevent the 'orted' component from attempting a local recovery (Which does not work for MPI apps at the moment)
* Add a fix for bounding the checkpointable region between MPI_Init and MPI_Finalize.
This commit was SVN r23587.
The following Trac tickets were found above:
Ticket 1924 --> https://svn.open-mpi.org/trac/ompi/ticket/1924
Ticket 2097 --> https://svn.open-mpi.org/trac/ompi/ticket/2097
Ticket 2161 --> https://svn.open-mpi.org/trac/ompi/ticket/2161
Ticket 2192 --> https://svn.open-mpi.org/trac/ompi/ticket/2192
Ticket 2208 --> https://svn.open-mpi.org/trac/ompi/ticket/2208
Ticket 2342 --> https://svn.open-mpi.org/trac/ompi/ticket/2342
Ticket 2413 --> https://svn.open-mpi.org/trac/ompi/ticket/2413
2010-08-11 00:51:11 +04:00
|
|
|
if(OPAL_SUCCESS != (ret = trigger_user_inc_callback(OMPI_CR_INC_POST_CRS_POST_MPI,
|
|
|
|
cb_state)) ) {
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
#if OPAL_ENABLE_CRDEBUG == 1
|
|
|
|
opal_cr_debug_clear_current_ckpt_thread();
|
|
|
|
#endif
|
|
|
|
|
2009-05-09 00:51:13 +04:00
|
|
|
return OPAL_SUCCESS;
|
|
|
|
}
|
|
|
|
|
2009-09-22 22:26:12 +04:00
|
|
|
int opal_cr_inc_core(pid_t pid,
|
|
|
|
opal_crs_base_snapshot_t *snapshot,
|
|
|
|
opal_crs_base_ckpt_options_t *options,
|
|
|
|
int *state)
|
2009-05-09 00:51:13 +04:00
|
|
|
{
|
|
|
|
int ret, exit_status = OPAL_SUCCESS;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* INC: Prepare stack using the registered coordination routine
|
|
|
|
*/
|
|
|
|
if(OPAL_SUCCESS != (ret = opal_cr_inc_core_prep() ) ) {
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* INC: Take the checkpoint
|
|
|
|
*/
|
2009-09-22 22:26:12 +04:00
|
|
|
if(OPAL_SUCCESS != (ret = opal_cr_inc_core_ckpt(pid, snapshot, options, state) ) ) {
|
2007-03-17 02:11:45 +03:00
|
|
|
exit_status = ret;
|
2009-05-09 00:51:13 +04:00
|
|
|
/* Don't return here since we want to restart the OPAL level stuff */
|
2007-03-17 02:11:45 +03:00
|
|
|
}
|
2009-05-09 00:51:13 +04:00
|
|
|
|
|
|
|
/*
|
|
|
|
* INC: Recover stack using the registered coordination routine
|
|
|
|
*/
|
|
|
|
if(OPAL_SUCCESS != (ret = opal_cr_inc_core_recover(*state) ) ) {
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2007-03-17 02:11:45 +03:00
|
|
|
return exit_status;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*******************************
|
|
|
|
* Coordination Routines
|
|
|
|
*******************************/
|
|
|
|
/**
|
|
|
|
* Current Coordination callback routines
|
|
|
|
*/
|
|
|
|
int opal_cr_coord(int state)
|
|
|
|
{
|
|
|
|
if(OPAL_CRS_CHECKPOINT == state) {
|
|
|
|
/* Do Checkpoint Phase work */
|
|
|
|
}
|
|
|
|
else if (OPAL_CRS_CONTINUE == state ) {
|
|
|
|
/* Do Continue Phase work */
|
|
|
|
}
|
|
|
|
else if (OPAL_CRS_RESTART == state ) {
|
|
|
|
/* Do Restart Phase work */
|
2007-04-14 06:06:05 +04:00
|
|
|
|
2011-01-26 01:43:47 +03:00
|
|
|
/*
|
|
|
|
* Re-initialize the event engine
|
|
|
|
* Otherwise it may/will use stale file descriptors which will disrupt
|
|
|
|
* the intended users of the soon-to-be newly assigned file descriptors.
|
|
|
|
*/
|
|
|
|
opal_event_reinit(opal_event_base);
|
|
|
|
|
2007-04-14 06:06:05 +04:00
|
|
|
/*
|
|
|
|
* Flush if() functionality, since it caches system specific info.
|
|
|
|
*/
|
2010-09-22 05:11:40 +04:00
|
|
|
opal_if_base_close();
|
2007-04-14 06:06:05 +04:00
|
|
|
/* Since opal_ifinit() is not exposed, the necessary
|
|
|
|
* functions will call it when needed. Just make sure we
|
|
|
|
* finalized this code so we don't get old socket addrs.
|
|
|
|
*/
|
2010-03-13 02:57:50 +03:00
|
|
|
opal_output_reopen_all();
|
2007-03-17 02:11:45 +03:00
|
|
|
}
|
|
|
|
else if (OPAL_CRS_TERM == state ) {
|
|
|
|
/* Do Continue Phase work in prep to terminate the application */
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
/* We must have been in an error state from the checkpoint
|
|
|
|
* recreate everything, as in the Continue Phase
|
|
|
|
*/
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Here we are returning to either:
|
|
|
|
* - [orte | ompi]_notify()
|
|
|
|
*/
|
2008-05-08 22:47:47 +04:00
|
|
|
opal_cr_checkpointing_state = OPAL_CR_STATUS_RESTART_POST;
|
|
|
|
|
2007-03-17 02:11:45 +03:00
|
|
|
return OPAL_SUCCESS;
|
|
|
|
}
|
|
|
|
|
2007-10-09 00:53:02 +04:00
|
|
|
int opal_cr_reg_notify_callback(opal_cr_notify_callback_fn_t new_func,
|
|
|
|
opal_cr_notify_callback_fn_t *prev_func)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* Preserve the previous callback
|
|
|
|
*/
|
|
|
|
if( NULL != cur_notify_callback) {
|
|
|
|
*prev_func = cur_notify_callback;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
*prev_func = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Update the callbacks
|
|
|
|
*/
|
|
|
|
cur_notify_callback = new_func;
|
|
|
|
|
|
|
|
return OPAL_SUCCESS;
|
|
|
|
}
|
|
|
|
|
A number of C/R enhancements per RFC below:
http://www.open-mpi.org/community/lists/devel/2010/07/8240.php
Documentation:
http://osl.iu.edu/research/ft/
Major Changes:
--------------
* Added C/R-enabled Debugging support.
Enabled with the --enable-crdebug flag. See the following website for more information:
http://osl.iu.edu/research/ft/crdebug/
* Added Stable Storage (SStore) framework for checkpoint storage
* 'central' component does a direct to central storage save
* 'stage' component stages checkpoints to central storage while the application continues execution.
* 'stage' supports offline compression of checkpoints before moving (sstore_stage_compress)
* 'stage' supports local caching of checkpoints to improve automatic recovery (sstore_stage_caching)
* Added Compression (compress) framework to support
* Add two new ErrMgr recovery policies
* {{{crmig}}} C/R Process Migration
* {{{autor}}} C/R Automatic Recovery
* Added the {{{ompi-migrate}}} command line tool to support the {{{crmig}}} ErrMgr component
* Added CR MPI Ext functions (enable them with {{{--enable-mpi-ext=cr}}} configure option)
* {{{OMPI_CR_Checkpoint}}} (Fixes trac:2342)
* {{{OMPI_CR_Restart}}}
* {{{OMPI_CR_Migrate}}} (may need some more work for mapping rules)
* {{{OMPI_CR_INC_register_callback}}} (Fixes trac:2192)
* {{{OMPI_CR_Quiesce_start}}}
* {{{OMPI_CR_Quiesce_checkpoint}}}
* {{{OMPI_CR_Quiesce_end}}}
* {{{OMPI_CR_self_register_checkpoint_callback}}}
* {{{OMPI_CR_self_register_restart_callback}}}
* {{{OMPI_CR_self_register_continue_callback}}}
* The ErrMgr predicted_fault() interface has been changed to take an opal_list_t of ErrMgr defined types. This will allow us to better support a wider range of fault prediction services in the future.
* Add a progress meter to:
* FileM rsh (filem_rsh_process_meter)
* SnapC full (snapc_full_progress_meter)
* SStore stage (sstore_stage_progress_meter)
* Added 2 new command line options to ompi-restart
* --showme : Display the full command line that would have been exec'ed.
* --mpirun_opts : Command line options to pass directly to mpirun. (Fixes trac:2413)
* Deprecated some MCA params:
* crs_base_snapshot_dir deprecated, use sstore_stage_local_snapshot_dir
* snapc_base_global_snapshot_dir deprecated, use sstore_base_global_snapshot_dir
* snapc_base_global_shared deprecated, use sstore_stage_global_is_shared
* snapc_base_store_in_place deprecated, replaced with different components of SStore
* snapc_base_global_snapshot_ref deprecated, use sstore_base_global_snapshot_ref
* snapc_base_establish_global_snapshot_dir deprecated, never well supported
* snapc_full_skip_filem deprecated, use sstore_stage_skip_filem
Minor Changes:
--------------
* Fixes trac:1924 : {{{ompi-restart}}} now recognizes path prefixed checkpoint handles and does the right thing.
* Fixes trac:2097 : {{{ompi-info}}} should now report all available CRS components
* Fixes trac:2161 : Manual checkpoint movement. A user can 'mv' a checkpoint directory from the original location to another and still restart from it.
* Fixes trac:2208 : Honor various TMPDIR varaibles instead of forcing {{{/tmp}}}
* Move {{{ompi_cr_continue_like_restart}}} to {{{orte_cr_continue_like_restart}}} to be more flexible in where this should be set.
* opal_crs_base_metadata_write* functions have been moved to SStore to support a wider range of metadata handling functionality.
* Cleanup the CRS framework and components to work with the SStore framework.
* Cleanup the SnapC framework and components to work with the SStore framework (cleans up these code paths considerably).
* Add 'quiesce' hook to CRCP for a future enhancement.
* We now require a BLCR version that supports {{{cr_request_file()}}} or {{{cr_request_checkpoint()}}} in order to make the code more maintainable. Note that {{{cr_request_file}}} has been deprecated since 0.7.0, so we prefer to use {{{cr_request_checkpoint()}}}.
* Add optional application level INC callbacks (registered through the CR MPI Ext interface).
* Increase the {{{opal_cr_thread_sleep_wait}}} parameter to 1000 microseconds to make the C/R thread less aggressive.
* {{{opal-restart}}} now looks for cache directories before falling back on stable storage when asked.
* {{{opal-restart}}} also support local decompression before restarting
* {{{orte-checkpoint}}} now uses the SStore framework to work with the metadata
* {{{orte-restart}}} now uses the SStore framework to work with the metadata
* Remove the {{{orte-restart}}} preload option. This was removed since the user only needs to select the 'stage' component in order to support this functionality.
* Since the '-am' parameter is saved in the metadata, {{{ompi-restart}}} no longer hard codes {{{-am ft-enable-cr}}}.
* Fix {{{hnp}}} ErrMgr so that if a previous component in the stack has 'fixed' the problem, then it should be skipped.
* Make sure to decrement the number of 'num_local_procs' in the orted when one goes away.
* odls now checks the SStore framework to see if it needs to load any checkpoint files before launching (to support 'stage'). This separates the SStore logic from the --preload-[binary|files] options.
* Add unique IDs to the named pipes established between the orted and the app in SnapC. This is to better support migration and automatic recovery activities.
* Improve the checks for 'already checkpointing' error path.
* A a recovery output timer, to show how long it takes to restart a job
* Do a better job of cleaning up the old session directory on restart.
* Add a local module to the autor and crmig ErrMgr components. These small modules prevent the 'orted' component from attempting a local recovery (Which does not work for MPI apps at the moment)
* Add a fix for bounding the checkpointable region between MPI_Init and MPI_Finalize.
This commit was SVN r23587.
The following Trac tickets were found above:
Ticket 1924 --> https://svn.open-mpi.org/trac/ompi/ticket/1924
Ticket 2097 --> https://svn.open-mpi.org/trac/ompi/ticket/2097
Ticket 2161 --> https://svn.open-mpi.org/trac/ompi/ticket/2161
Ticket 2192 --> https://svn.open-mpi.org/trac/ompi/ticket/2192
Ticket 2208 --> https://svn.open-mpi.org/trac/ompi/ticket/2208
Ticket 2342 --> https://svn.open-mpi.org/trac/ompi/ticket/2342
Ticket 2413 --> https://svn.open-mpi.org/trac/ompi/ticket/2413
2010-08-11 00:51:11 +04:00
|
|
|
int opal_cr_user_inc_register_callback(opal_cr_user_inc_callback_event_t event,
|
|
|
|
opal_cr_user_inc_callback_fn_t function,
|
|
|
|
opal_cr_user_inc_callback_fn_t *prev_function)
|
|
|
|
{
|
|
|
|
if( event < 0 || event >= OMPI_CR_INC_MAX ) {
|
|
|
|
return OPAL_ERROR;
|
|
|
|
}
|
|
|
|
|
|
|
|
if( NULL != cur_user_coord_callback[event] ) {
|
|
|
|
*prev_function = cur_user_coord_callback[event];
|
|
|
|
} else {
|
|
|
|
*prev_function = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
cur_user_coord_callback[event] = function;
|
|
|
|
|
|
|
|
return OPAL_SUCCESS;
|
|
|
|
}
|
|
|
|
|
|
|
|
int trigger_user_inc_callback(opal_cr_user_inc_callback_event_t event,
|
|
|
|
opal_cr_user_inc_callback_state_t state)
|
|
|
|
{
|
|
|
|
if( NULL == cur_user_coord_callback[event] ) {
|
|
|
|
return OPAL_SUCCESS;
|
|
|
|
}
|
|
|
|
|
|
|
|
if( event < 0 || event >= OMPI_CR_INC_MAX ) {
|
|
|
|
return OPAL_ERROR;
|
|
|
|
}
|
|
|
|
|
|
|
|
return ((cur_user_coord_callback[event])(event, state));
|
|
|
|
}
|
|
|
|
|
2007-04-01 20:16:54 +04:00
|
|
|
int opal_cr_reg_coord_callback(opal_cr_coord_callback_fn_t new_func,
|
|
|
|
opal_cr_coord_callback_fn_t *prev_func)
|
2007-03-17 02:11:45 +03:00
|
|
|
{
|
|
|
|
/*
|
|
|
|
* Preserve the previous callback
|
|
|
|
*/
|
|
|
|
if( NULL != cur_coord_callback) {
|
|
|
|
*prev_func = cur_coord_callback;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
*prev_func = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Update the callbacks
|
|
|
|
*/
|
|
|
|
cur_coord_callback = new_func;
|
|
|
|
|
|
|
|
return OPAL_SUCCESS;
|
|
|
|
}
|
|
|
|
|
A number of C/R enhancements per RFC below:
http://www.open-mpi.org/community/lists/devel/2010/07/8240.php
Documentation:
http://osl.iu.edu/research/ft/
Major Changes:
--------------
* Added C/R-enabled Debugging support.
Enabled with the --enable-crdebug flag. See the following website for more information:
http://osl.iu.edu/research/ft/crdebug/
* Added Stable Storage (SStore) framework for checkpoint storage
* 'central' component does a direct to central storage save
* 'stage' component stages checkpoints to central storage while the application continues execution.
* 'stage' supports offline compression of checkpoints before moving (sstore_stage_compress)
* 'stage' supports local caching of checkpoints to improve automatic recovery (sstore_stage_caching)
* Added Compression (compress) framework to support
* Add two new ErrMgr recovery policies
* {{{crmig}}} C/R Process Migration
* {{{autor}}} C/R Automatic Recovery
* Added the {{{ompi-migrate}}} command line tool to support the {{{crmig}}} ErrMgr component
* Added CR MPI Ext functions (enable them with {{{--enable-mpi-ext=cr}}} configure option)
* {{{OMPI_CR_Checkpoint}}} (Fixes trac:2342)
* {{{OMPI_CR_Restart}}}
* {{{OMPI_CR_Migrate}}} (may need some more work for mapping rules)
* {{{OMPI_CR_INC_register_callback}}} (Fixes trac:2192)
* {{{OMPI_CR_Quiesce_start}}}
* {{{OMPI_CR_Quiesce_checkpoint}}}
* {{{OMPI_CR_Quiesce_end}}}
* {{{OMPI_CR_self_register_checkpoint_callback}}}
* {{{OMPI_CR_self_register_restart_callback}}}
* {{{OMPI_CR_self_register_continue_callback}}}
* The ErrMgr predicted_fault() interface has been changed to take an opal_list_t of ErrMgr defined types. This will allow us to better support a wider range of fault prediction services in the future.
* Add a progress meter to:
* FileM rsh (filem_rsh_process_meter)
* SnapC full (snapc_full_progress_meter)
* SStore stage (sstore_stage_progress_meter)
* Added 2 new command line options to ompi-restart
* --showme : Display the full command line that would have been exec'ed.
* --mpirun_opts : Command line options to pass directly to mpirun. (Fixes trac:2413)
* Deprecated some MCA params:
* crs_base_snapshot_dir deprecated, use sstore_stage_local_snapshot_dir
* snapc_base_global_snapshot_dir deprecated, use sstore_base_global_snapshot_dir
* snapc_base_global_shared deprecated, use sstore_stage_global_is_shared
* snapc_base_store_in_place deprecated, replaced with different components of SStore
* snapc_base_global_snapshot_ref deprecated, use sstore_base_global_snapshot_ref
* snapc_base_establish_global_snapshot_dir deprecated, never well supported
* snapc_full_skip_filem deprecated, use sstore_stage_skip_filem
Minor Changes:
--------------
* Fixes trac:1924 : {{{ompi-restart}}} now recognizes path prefixed checkpoint handles and does the right thing.
* Fixes trac:2097 : {{{ompi-info}}} should now report all available CRS components
* Fixes trac:2161 : Manual checkpoint movement. A user can 'mv' a checkpoint directory from the original location to another and still restart from it.
* Fixes trac:2208 : Honor various TMPDIR varaibles instead of forcing {{{/tmp}}}
* Move {{{ompi_cr_continue_like_restart}}} to {{{orte_cr_continue_like_restart}}} to be more flexible in where this should be set.
* opal_crs_base_metadata_write* functions have been moved to SStore to support a wider range of metadata handling functionality.
* Cleanup the CRS framework and components to work with the SStore framework.
* Cleanup the SnapC framework and components to work with the SStore framework (cleans up these code paths considerably).
* Add 'quiesce' hook to CRCP for a future enhancement.
* We now require a BLCR version that supports {{{cr_request_file()}}} or {{{cr_request_checkpoint()}}} in order to make the code more maintainable. Note that {{{cr_request_file}}} has been deprecated since 0.7.0, so we prefer to use {{{cr_request_checkpoint()}}}.
* Add optional application level INC callbacks (registered through the CR MPI Ext interface).
* Increase the {{{opal_cr_thread_sleep_wait}}} parameter to 1000 microseconds to make the C/R thread less aggressive.
* {{{opal-restart}}} now looks for cache directories before falling back on stable storage when asked.
* {{{opal-restart}}} also support local decompression before restarting
* {{{orte-checkpoint}}} now uses the SStore framework to work with the metadata
* {{{orte-restart}}} now uses the SStore framework to work with the metadata
* Remove the {{{orte-restart}}} preload option. This was removed since the user only needs to select the 'stage' component in order to support this functionality.
* Since the '-am' parameter is saved in the metadata, {{{ompi-restart}}} no longer hard codes {{{-am ft-enable-cr}}}.
* Fix {{{hnp}}} ErrMgr so that if a previous component in the stack has 'fixed' the problem, then it should be skipped.
* Make sure to decrement the number of 'num_local_procs' in the orted when one goes away.
* odls now checks the SStore framework to see if it needs to load any checkpoint files before launching (to support 'stage'). This separates the SStore logic from the --preload-[binary|files] options.
* Add unique IDs to the named pipes established between the orted and the app in SnapC. This is to better support migration and automatic recovery activities.
* Improve the checks for 'already checkpointing' error path.
* A a recovery output timer, to show how long it takes to restart a job
* Do a better job of cleaning up the old session directory on restart.
* Add a local module to the autor and crmig ErrMgr components. These small modules prevent the 'orted' component from attempting a local recovery (Which does not work for MPI apps at the moment)
* Add a fix for bounding the checkpointable region between MPI_Init and MPI_Finalize.
This commit was SVN r23587.
The following Trac tickets were found above:
Ticket 1924 --> https://svn.open-mpi.org/trac/ompi/ticket/1924
Ticket 2097 --> https://svn.open-mpi.org/trac/ompi/ticket/2097
Ticket 2161 --> https://svn.open-mpi.org/trac/ompi/ticket/2161
Ticket 2192 --> https://svn.open-mpi.org/trac/ompi/ticket/2192
Ticket 2208 --> https://svn.open-mpi.org/trac/ompi/ticket/2208
Ticket 2342 --> https://svn.open-mpi.org/trac/ompi/ticket/2342
Ticket 2413 --> https://svn.open-mpi.org/trac/ompi/ticket/2413
2010-08-11 00:51:11 +04:00
|
|
|
int opal_cr_refresh_environ(int prev_pid) {
|
|
|
|
int val;
|
|
|
|
char *file_name = NULL;
|
|
|
|
struct stat file_status;
|
|
|
|
|
|
|
|
if( 0 >= prev_pid ) {
|
|
|
|
prev_pid = getpid();
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Make sure the file exists. If it doesn't then this means 2 things:
|
|
|
|
* 1) We have already executed this function, and
|
|
|
|
* 2) The file has been deleted on the previous round.
|
|
|
|
*/
|
|
|
|
asprintf(&file_name, "%s/%s-%d", opal_tmp_directory(), OPAL_CR_BASE_ENV_NAME, prev_pid);
|
|
|
|
if(0 != stat(file_name, &file_status) ){
|
|
|
|
return OPAL_SUCCESS;
|
|
|
|
}
|
|
|
|
|
|
|
|
#if OPAL_ENABLE_CRDEBUG == 1
|
|
|
|
opal_unsetenv(mca_base_param_env_var("opal_cr_enable_crdebug"), &environ);
|
|
|
|
#endif
|
|
|
|
|
|
|
|
extract_env_vars(prev_pid, file_name);
|
|
|
|
|
|
|
|
#if OPAL_ENABLE_CRDEBUG == 1
|
|
|
|
mca_base_param_reg_int_name("opal_cr", "enable_crdebug",
|
|
|
|
"Enable checkpoint/restart debugging",
|
|
|
|
false, false,
|
|
|
|
0,
|
|
|
|
&val);
|
|
|
|
MPIR_debug_with_checkpoint = OPAL_INT_TO_BOOL(val);
|
|
|
|
|
|
|
|
opal_output_verbose(10, opal_cr_output,
|
|
|
|
"opal_cr: init: C/R Debugging Enabled [%s] (refresh)\n",
|
|
|
|
(MPIR_debug_with_checkpoint ? "True": "False"));
|
|
|
|
#else
|
|
|
|
val = 0; /* Silence Compiler warning */
|
|
|
|
#endif
|
|
|
|
|
|
|
|
if( NULL != file_name ){
|
|
|
|
free(file_name);
|
|
|
|
file_name = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
return OPAL_SUCCESS;
|
|
|
|
}
|
|
|
|
|
2007-10-09 00:53:02 +04:00
|
|
|
/*
|
|
|
|
* Extract environment variables from a saved file
|
|
|
|
* and place them in the environment.
|
|
|
|
*/
|
A number of C/R enhancements per RFC below:
http://www.open-mpi.org/community/lists/devel/2010/07/8240.php
Documentation:
http://osl.iu.edu/research/ft/
Major Changes:
--------------
* Added C/R-enabled Debugging support.
Enabled with the --enable-crdebug flag. See the following website for more information:
http://osl.iu.edu/research/ft/crdebug/
* Added Stable Storage (SStore) framework for checkpoint storage
* 'central' component does a direct to central storage save
* 'stage' component stages checkpoints to central storage while the application continues execution.
* 'stage' supports offline compression of checkpoints before moving (sstore_stage_compress)
* 'stage' supports local caching of checkpoints to improve automatic recovery (sstore_stage_caching)
* Added Compression (compress) framework to support
* Add two new ErrMgr recovery policies
* {{{crmig}}} C/R Process Migration
* {{{autor}}} C/R Automatic Recovery
* Added the {{{ompi-migrate}}} command line tool to support the {{{crmig}}} ErrMgr component
* Added CR MPI Ext functions (enable them with {{{--enable-mpi-ext=cr}}} configure option)
* {{{OMPI_CR_Checkpoint}}} (Fixes trac:2342)
* {{{OMPI_CR_Restart}}}
* {{{OMPI_CR_Migrate}}} (may need some more work for mapping rules)
* {{{OMPI_CR_INC_register_callback}}} (Fixes trac:2192)
* {{{OMPI_CR_Quiesce_start}}}
* {{{OMPI_CR_Quiesce_checkpoint}}}
* {{{OMPI_CR_Quiesce_end}}}
* {{{OMPI_CR_self_register_checkpoint_callback}}}
* {{{OMPI_CR_self_register_restart_callback}}}
* {{{OMPI_CR_self_register_continue_callback}}}
* The ErrMgr predicted_fault() interface has been changed to take an opal_list_t of ErrMgr defined types. This will allow us to better support a wider range of fault prediction services in the future.
* Add a progress meter to:
* FileM rsh (filem_rsh_process_meter)
* SnapC full (snapc_full_progress_meter)
* SStore stage (sstore_stage_progress_meter)
* Added 2 new command line options to ompi-restart
* --showme : Display the full command line that would have been exec'ed.
* --mpirun_opts : Command line options to pass directly to mpirun. (Fixes trac:2413)
* Deprecated some MCA params:
* crs_base_snapshot_dir deprecated, use sstore_stage_local_snapshot_dir
* snapc_base_global_snapshot_dir deprecated, use sstore_base_global_snapshot_dir
* snapc_base_global_shared deprecated, use sstore_stage_global_is_shared
* snapc_base_store_in_place deprecated, replaced with different components of SStore
* snapc_base_global_snapshot_ref deprecated, use sstore_base_global_snapshot_ref
* snapc_base_establish_global_snapshot_dir deprecated, never well supported
* snapc_full_skip_filem deprecated, use sstore_stage_skip_filem
Minor Changes:
--------------
* Fixes trac:1924 : {{{ompi-restart}}} now recognizes path prefixed checkpoint handles and does the right thing.
* Fixes trac:2097 : {{{ompi-info}}} should now report all available CRS components
* Fixes trac:2161 : Manual checkpoint movement. A user can 'mv' a checkpoint directory from the original location to another and still restart from it.
* Fixes trac:2208 : Honor various TMPDIR varaibles instead of forcing {{{/tmp}}}
* Move {{{ompi_cr_continue_like_restart}}} to {{{orte_cr_continue_like_restart}}} to be more flexible in where this should be set.
* opal_crs_base_metadata_write* functions have been moved to SStore to support a wider range of metadata handling functionality.
* Cleanup the CRS framework and components to work with the SStore framework.
* Cleanup the SnapC framework and components to work with the SStore framework (cleans up these code paths considerably).
* Add 'quiesce' hook to CRCP for a future enhancement.
* We now require a BLCR version that supports {{{cr_request_file()}}} or {{{cr_request_checkpoint()}}} in order to make the code more maintainable. Note that {{{cr_request_file}}} has been deprecated since 0.7.0, so we prefer to use {{{cr_request_checkpoint()}}}.
* Add optional application level INC callbacks (registered through the CR MPI Ext interface).
* Increase the {{{opal_cr_thread_sleep_wait}}} parameter to 1000 microseconds to make the C/R thread less aggressive.
* {{{opal-restart}}} now looks for cache directories before falling back on stable storage when asked.
* {{{opal-restart}}} also support local decompression before restarting
* {{{orte-checkpoint}}} now uses the SStore framework to work with the metadata
* {{{orte-restart}}} now uses the SStore framework to work with the metadata
* Remove the {{{orte-restart}}} preload option. This was removed since the user only needs to select the 'stage' component in order to support this functionality.
* Since the '-am' parameter is saved in the metadata, {{{ompi-restart}}} no longer hard codes {{{-am ft-enable-cr}}}.
* Fix {{{hnp}}} ErrMgr so that if a previous component in the stack has 'fixed' the problem, then it should be skipped.
* Make sure to decrement the number of 'num_local_procs' in the orted when one goes away.
* odls now checks the SStore framework to see if it needs to load any checkpoint files before launching (to support 'stage'). This separates the SStore logic from the --preload-[binary|files] options.
* Add unique IDs to the named pipes established between the orted and the app in SnapC. This is to better support migration and automatic recovery activities.
* Improve the checks for 'already checkpointing' error path.
* A a recovery output timer, to show how long it takes to restart a job
* Do a better job of cleaning up the old session directory on restart.
* Add a local module to the autor and crmig ErrMgr components. These small modules prevent the 'orted' component from attempting a local recovery (Which does not work for MPI apps at the moment)
* Add a fix for bounding the checkpointable region between MPI_Init and MPI_Finalize.
This commit was SVN r23587.
The following Trac tickets were found above:
Ticket 1924 --> https://svn.open-mpi.org/trac/ompi/ticket/1924
Ticket 2097 --> https://svn.open-mpi.org/trac/ompi/ticket/2097
Ticket 2161 --> https://svn.open-mpi.org/trac/ompi/ticket/2161
Ticket 2192 --> https://svn.open-mpi.org/trac/ompi/ticket/2192
Ticket 2208 --> https://svn.open-mpi.org/trac/ompi/ticket/2208
Ticket 2342 --> https://svn.open-mpi.org/trac/ompi/ticket/2342
Ticket 2413 --> https://svn.open-mpi.org/trac/ompi/ticket/2413
2010-08-11 00:51:11 +04:00
|
|
|
static int extract_env_vars(int prev_pid, char * file_name)
|
2007-03-17 02:11:45 +03:00
|
|
|
{
|
2007-10-09 00:53:02 +04:00
|
|
|
int exit_status = OPAL_SUCCESS;
|
|
|
|
FILE *env_data = NULL;
|
2009-05-07 00:11:28 +04:00
|
|
|
int len = OPAL_PATH_MAX;
|
2007-10-09 00:53:02 +04:00
|
|
|
char * tmp_str = NULL;
|
|
|
|
|
2009-05-09 00:51:13 +04:00
|
|
|
if( 0 >= prev_pid ) {
|
2007-10-09 00:53:02 +04:00
|
|
|
opal_output(opal_cr_output,
|
|
|
|
"opal_cr: extract_env_vars: Invalid PID (%d)\n",
|
|
|
|
prev_pid);
|
|
|
|
exit_status = OPAL_ERROR;
|
|
|
|
goto cleanup;
|
|
|
|
}
|
2007-03-17 02:11:45 +03:00
|
|
|
|
2007-10-09 00:53:02 +04:00
|
|
|
if (NULL == (env_data = fopen(file_name, "r")) ) {
|
|
|
|
exit_status = OPAL_ERROR;
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Extract an env var */
|
|
|
|
while(!feof(env_data) ) {
|
|
|
|
char **t_set = NULL;
|
2009-05-07 00:11:28 +04:00
|
|
|
len = OPAL_PATH_MAX;
|
2007-10-09 00:53:02 +04:00
|
|
|
|
|
|
|
tmp_str = (char *) malloc(sizeof(char) * len);
|
2009-05-14 04:21:15 +04:00
|
|
|
if( NULL == tmp_str) {
|
|
|
|
exit_status = OPAL_ERR_OUT_OF_RESOURCE;
|
|
|
|
goto cleanup;
|
|
|
|
}
|
2007-10-09 00:53:02 +04:00
|
|
|
if( NULL == fgets(tmp_str, len, env_data) ) {
|
|
|
|
exit_status = OPAL_ERROR;
|
|
|
|
goto cleanup;
|
2007-03-17 02:11:45 +03:00
|
|
|
}
|
2007-10-09 00:53:02 +04:00
|
|
|
len = strlen(tmp_str);
|
2008-11-13 00:43:34 +03:00
|
|
|
if(tmp_str[len - 1] == '\n') {
|
2007-10-09 00:53:02 +04:00
|
|
|
tmp_str[len - 1] = '\0';
|
2008-11-13 00:43:34 +03:00
|
|
|
} else {
|
|
|
|
opal_output(opal_cr_output,
|
|
|
|
"opal_cr: extract_env_vars: Error: Parameter too long (%s)\n",
|
|
|
|
tmp_str);
|
2009-01-03 18:12:16 +03:00
|
|
|
free(tmp_str);
|
2009-05-14 04:21:15 +04:00
|
|
|
tmp_str = NULL;
|
2008-11-13 00:43:34 +03:00
|
|
|
continue;
|
|
|
|
}
|
2007-10-09 00:53:02 +04:00
|
|
|
|
|
|
|
if( NULL == (t_set = opal_argv_split(tmp_str, '=')) ) {
|
|
|
|
break;
|
2007-03-17 02:11:45 +03:00
|
|
|
}
|
2007-10-09 00:53:02 +04:00
|
|
|
|
|
|
|
opal_setenv(t_set[0], t_set[1], true, &environ);
|
|
|
|
|
|
|
|
free(tmp_str);
|
|
|
|
tmp_str = NULL;
|
2007-03-17 02:11:45 +03:00
|
|
|
}
|
2007-10-09 00:53:02 +04:00
|
|
|
|
|
|
|
cleanup:
|
|
|
|
if( NULL != env_data ) {
|
|
|
|
fclose(env_data);
|
|
|
|
}
|
|
|
|
unlink(file_name);
|
|
|
|
|
|
|
|
if( NULL != tmp_str ){
|
|
|
|
free(tmp_str);
|
2007-03-17 02:11:45 +03:00
|
|
|
}
|
2007-10-09 00:53:02 +04:00
|
|
|
|
|
|
|
return exit_status;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*****************************************
|
|
|
|
* OPAL CR Entry Point Functionality
|
|
|
|
*****************************************/
|
2008-04-23 04:17:12 +04:00
|
|
|
/*
|
|
|
|
* Used only for debugging SIGPIPE problems
|
|
|
|
*/
|
|
|
|
static void opal_cr_sigpipe_debug_signal_handler (int signo)
|
|
|
|
{
|
|
|
|
int sleeper = 1;
|
|
|
|
|
|
|
|
if( !opal_cr_debug_sigpipe ) {
|
|
|
|
opal_output_verbose(10, opal_cr_output,
|
|
|
|
"opal_cr: sigpipe_debug: Debug SIGPIPE Not enabled :(\n");
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2008-04-24 21:54:22 +04:00
|
|
|
opal_output(0,
|
|
|
|
"opal_cr: sigpipe_debug: Debug SIGPIPE [%d]: PID (%d)\n",
|
|
|
|
signo, getpid());
|
2008-04-23 04:17:12 +04:00
|
|
|
while(sleeper == 1 ) {
|
|
|
|
sleep(1);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2008-02-20 01:15:52 +03:00
|
|
|
#if OPAL_ENABLE_FT_THREAD == 1
|
|
|
|
static void* opal_cr_thread_fn(opal_object_t *obj)
|
|
|
|
{
|
|
|
|
/* Sanity Check */
|
|
|
|
if( !opal_cr_thread_use_if_avail ) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2008-09-22 20:49:56 +04:00
|
|
|
if( opal_cr_debug_sigpipe ) {
|
|
|
|
if( SIG_ERR == signal(SIGPIPE, opal_cr_sigpipe_debug_signal_handler) ) {
|
|
|
|
;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2008-02-20 01:15:52 +03:00
|
|
|
/*
|
|
|
|
* Register this thread with the OPAL CRS
|
|
|
|
*/
|
|
|
|
if( NULL != opal_crs.crs_reg_thread ) {
|
|
|
|
if( OPAL_SUCCESS != opal_crs.crs_reg_thread() ) {
|
|
|
|
opal_output(0, "Error: Thread registration failed\n");
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
A number of C/R enhancements per RFC below:
http://www.open-mpi.org/community/lists/devel/2010/07/8240.php
Documentation:
http://osl.iu.edu/research/ft/
Major Changes:
--------------
* Added C/R-enabled Debugging support.
Enabled with the --enable-crdebug flag. See the following website for more information:
http://osl.iu.edu/research/ft/crdebug/
* Added Stable Storage (SStore) framework for checkpoint storage
* 'central' component does a direct to central storage save
* 'stage' component stages checkpoints to central storage while the application continues execution.
* 'stage' supports offline compression of checkpoints before moving (sstore_stage_compress)
* 'stage' supports local caching of checkpoints to improve automatic recovery (sstore_stage_caching)
* Added Compression (compress) framework to support
* Add two new ErrMgr recovery policies
* {{{crmig}}} C/R Process Migration
* {{{autor}}} C/R Automatic Recovery
* Added the {{{ompi-migrate}}} command line tool to support the {{{crmig}}} ErrMgr component
* Added CR MPI Ext functions (enable them with {{{--enable-mpi-ext=cr}}} configure option)
* {{{OMPI_CR_Checkpoint}}} (Fixes trac:2342)
* {{{OMPI_CR_Restart}}}
* {{{OMPI_CR_Migrate}}} (may need some more work for mapping rules)
* {{{OMPI_CR_INC_register_callback}}} (Fixes trac:2192)
* {{{OMPI_CR_Quiesce_start}}}
* {{{OMPI_CR_Quiesce_checkpoint}}}
* {{{OMPI_CR_Quiesce_end}}}
* {{{OMPI_CR_self_register_checkpoint_callback}}}
* {{{OMPI_CR_self_register_restart_callback}}}
* {{{OMPI_CR_self_register_continue_callback}}}
* The ErrMgr predicted_fault() interface has been changed to take an opal_list_t of ErrMgr defined types. This will allow us to better support a wider range of fault prediction services in the future.
* Add a progress meter to:
* FileM rsh (filem_rsh_process_meter)
* SnapC full (snapc_full_progress_meter)
* SStore stage (sstore_stage_progress_meter)
* Added 2 new command line options to ompi-restart
* --showme : Display the full command line that would have been exec'ed.
* --mpirun_opts : Command line options to pass directly to mpirun. (Fixes trac:2413)
* Deprecated some MCA params:
* crs_base_snapshot_dir deprecated, use sstore_stage_local_snapshot_dir
* snapc_base_global_snapshot_dir deprecated, use sstore_base_global_snapshot_dir
* snapc_base_global_shared deprecated, use sstore_stage_global_is_shared
* snapc_base_store_in_place deprecated, replaced with different components of SStore
* snapc_base_global_snapshot_ref deprecated, use sstore_base_global_snapshot_ref
* snapc_base_establish_global_snapshot_dir deprecated, never well supported
* snapc_full_skip_filem deprecated, use sstore_stage_skip_filem
Minor Changes:
--------------
* Fixes trac:1924 : {{{ompi-restart}}} now recognizes path prefixed checkpoint handles and does the right thing.
* Fixes trac:2097 : {{{ompi-info}}} should now report all available CRS components
* Fixes trac:2161 : Manual checkpoint movement. A user can 'mv' a checkpoint directory from the original location to another and still restart from it.
* Fixes trac:2208 : Honor various TMPDIR varaibles instead of forcing {{{/tmp}}}
* Move {{{ompi_cr_continue_like_restart}}} to {{{orte_cr_continue_like_restart}}} to be more flexible in where this should be set.
* opal_crs_base_metadata_write* functions have been moved to SStore to support a wider range of metadata handling functionality.
* Cleanup the CRS framework and components to work with the SStore framework.
* Cleanup the SnapC framework and components to work with the SStore framework (cleans up these code paths considerably).
* Add 'quiesce' hook to CRCP for a future enhancement.
* We now require a BLCR version that supports {{{cr_request_file()}}} or {{{cr_request_checkpoint()}}} in order to make the code more maintainable. Note that {{{cr_request_file}}} has been deprecated since 0.7.0, so we prefer to use {{{cr_request_checkpoint()}}}.
* Add optional application level INC callbacks (registered through the CR MPI Ext interface).
* Increase the {{{opal_cr_thread_sleep_wait}}} parameter to 1000 microseconds to make the C/R thread less aggressive.
* {{{opal-restart}}} now looks for cache directories before falling back on stable storage when asked.
* {{{opal-restart}}} also support local decompression before restarting
* {{{orte-checkpoint}}} now uses the SStore framework to work with the metadata
* {{{orte-restart}}} now uses the SStore framework to work with the metadata
* Remove the {{{orte-restart}}} preload option. This was removed since the user only needs to select the 'stage' component in order to support this functionality.
* Since the '-am' parameter is saved in the metadata, {{{ompi-restart}}} no longer hard codes {{{-am ft-enable-cr}}}.
* Fix {{{hnp}}} ErrMgr so that if a previous component in the stack has 'fixed' the problem, then it should be skipped.
* Make sure to decrement the number of 'num_local_procs' in the orted when one goes away.
* odls now checks the SStore framework to see if it needs to load any checkpoint files before launching (to support 'stage'). This separates the SStore logic from the --preload-[binary|files] options.
* Add unique IDs to the named pipes established between the orted and the app in SnapC. This is to better support migration and automatic recovery activities.
* Improve the checks for 'already checkpointing' error path.
* A a recovery output timer, to show how long it takes to restart a job
* Do a better job of cleaning up the old session directory on restart.
* Add a local module to the autor and crmig ErrMgr components. These small modules prevent the 'orted' component from attempting a local recovery (Which does not work for MPI apps at the moment)
* Add a fix for bounding the checkpointable region between MPI_Init and MPI_Finalize.
This commit was SVN r23587.
The following Trac tickets were found above:
Ticket 1924 --> https://svn.open-mpi.org/trac/ompi/ticket/1924
Ticket 2097 --> https://svn.open-mpi.org/trac/ompi/ticket/2097
Ticket 2161 --> https://svn.open-mpi.org/trac/ompi/ticket/2161
Ticket 2192 --> https://svn.open-mpi.org/trac/ompi/ticket/2192
Ticket 2208 --> https://svn.open-mpi.org/trac/ompi/ticket/2208
Ticket 2342 --> https://svn.open-mpi.org/trac/ompi/ticket/2342
Ticket 2413 --> https://svn.open-mpi.org/trac/ompi/ticket/2413
2010-08-11 00:51:11 +04:00
|
|
|
#if OPAL_ENABLE_CRDEBUG == 1
|
|
|
|
opal_cr_debug_free_threads[1] = opal_thread_get_self();
|
|
|
|
#endif
|
|
|
|
|
2008-02-20 01:15:52 +03:00
|
|
|
/*
|
|
|
|
* Wait to become active
|
|
|
|
*/
|
2008-02-21 01:33:42 +03:00
|
|
|
while( !opal_cr_thread_is_active && !opal_cr_thread_is_done) {
|
2008-02-20 01:15:52 +03:00
|
|
|
sched_yield();
|
|
|
|
}
|
|
|
|
|
2008-02-21 01:33:42 +03:00
|
|
|
if( opal_cr_thread_is_done ) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2008-02-20 01:15:52 +03:00
|
|
|
/*
|
|
|
|
* While active
|
|
|
|
*/
|
2008-10-08 18:19:37 +04:00
|
|
|
while( opal_cr_thread_is_active && !opal_cr_thread_is_done) {
|
2008-02-20 01:15:52 +03:00
|
|
|
/*
|
|
|
|
* While no threads are in the MPI library then try to process
|
|
|
|
* checkpoint requests.
|
|
|
|
*/
|
|
|
|
OPAL_CR_THREAD_LOCK();
|
|
|
|
|
|
|
|
while ( !opal_cr_thread_in_library ) {
|
|
|
|
sched_yield();
|
|
|
|
usleep(opal_cr_thread_sleep_check);
|
|
|
|
|
|
|
|
OPAL_CR_TEST_CHECKPOINT_READY();
|
|
|
|
/* Sanity check */
|
|
|
|
if( OPAL_UNLIKELY(opal_cr_currently_stalled) ) {
|
|
|
|
OPAL_CR_TEST_CHECKPOINT_READY();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* While they are in the MPI library yield
|
|
|
|
*/
|
|
|
|
OPAL_CR_THREAD_UNLOCK();
|
|
|
|
|
|
|
|
while ( opal_cr_thread_in_library && opal_cr_thread_is_active ) {
|
|
|
|
usleep(opal_cr_thread_sleep_wait);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
void opal_cr_thread_init_library(void)
|
|
|
|
{
|
|
|
|
if( !opal_cr_thread_use_if_avail ) {
|
|
|
|
OPAL_CR_TEST_CHECKPOINT_READY();
|
|
|
|
} else {
|
|
|
|
/* Activate the CR Thread */
|
|
|
|
opal_cr_thread_in_library = false;
|
2008-02-21 01:33:42 +03:00
|
|
|
opal_cr_thread_is_done = false;
|
2008-02-20 01:15:52 +03:00
|
|
|
opal_cr_thread_is_active = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void opal_cr_thread_finalize_library(void)
|
|
|
|
{
|
|
|
|
if( !opal_cr_thread_use_if_avail ) {
|
|
|
|
OPAL_CR_TEST_CHECKPOINT_READY();
|
|
|
|
} else {
|
|
|
|
/* Deactivate the CR Thread */
|
2008-02-21 01:33:42 +03:00
|
|
|
opal_cr_thread_is_done = true;
|
2008-10-08 18:19:37 +04:00
|
|
|
opal_cr_thread_is_active = false;
|
|
|
|
OPAL_CR_LOCK();
|
2008-02-20 01:15:52 +03:00
|
|
|
opal_cr_thread_in_library = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void opal_cr_thread_abort_library(void)
|
|
|
|
{
|
|
|
|
if( !opal_cr_thread_use_if_avail ) {
|
|
|
|
OPAL_CR_TEST_CHECKPOINT_READY();
|
|
|
|
} else {
|
|
|
|
/* Deactivate the CR Thread */
|
2008-02-21 01:33:42 +03:00
|
|
|
opal_cr_thread_is_done = true;
|
2008-10-08 18:19:37 +04:00
|
|
|
opal_cr_thread_is_active = false;
|
|
|
|
OPAL_CR_LOCK();
|
2008-02-20 01:15:52 +03:00
|
|
|
opal_cr_thread_in_library = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void opal_cr_thread_enter_library(void)
|
|
|
|
{
|
|
|
|
if( !opal_cr_thread_use_if_avail ) {
|
|
|
|
OPAL_CR_TEST_CHECKPOINT_READY();
|
|
|
|
} else {
|
|
|
|
/* Lock out the CR Thread */
|
|
|
|
OPAL_CR_LOCK();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void opal_cr_thread_exit_library(void)
|
|
|
|
{
|
|
|
|
if( !opal_cr_thread_use_if_avail ) {
|
|
|
|
OPAL_CR_TEST_CHECKPOINT_READY();
|
|
|
|
} else {
|
|
|
|
/* Allow CR Thread to continue */
|
|
|
|
OPAL_CR_UNLOCK();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void opal_cr_thread_noop_progress(void)
|
|
|
|
{
|
|
|
|
if( !opal_cr_thread_use_if_avail ) {
|
|
|
|
OPAL_CR_TEST_CHECKPOINT_READY();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#endif /* OPAL_ENABLE_FT_THREAD == 1 */
|
2008-10-16 19:09:00 +04:00
|
|
|
|
|
|
|
static double opal_cr_get_time() {
|
|
|
|
double wtime;
|
|
|
|
|
|
|
|
#if OPAL_TIMER_USEC_NATIVE
|
|
|
|
wtime = (double)opal_timer_base_get_usec() / 1000000.0;
|
|
|
|
#else
|
|
|
|
struct timeval tv;
|
|
|
|
gettimeofday(&tv, NULL);
|
|
|
|
wtime = tv.tv_sec;
|
|
|
|
wtime += (double)tv.tv_usec / 1000000.0;
|
|
|
|
#endif
|
|
|
|
|
|
|
|
return wtime;
|
|
|
|
}
|
|
|
|
|
|
|
|
void opal_cr_set_time(int idx)
|
|
|
|
{
|
|
|
|
if(idx < OPAL_CR_TIMER_MAX ) {
|
|
|
|
if( timer_start[idx] <= 0.0 ) {
|
|
|
|
timer_start[idx] = opal_cr_get_time();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void opal_cr_clear_timers(void)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
for(i = 0; i < OPAL_CR_TIMER_MAX; ++i) {
|
|
|
|
timer_start[i] = 0.0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void display_indv_timer_core(double diff, char *str) {
|
|
|
|
double total = 0;
|
|
|
|
double perc = 0;
|
|
|
|
|
|
|
|
total = timer_start[OPAL_CR_TIMER_MAX-1] - timer_start[OPAL_CR_TIMER_ENTRY0];
|
|
|
|
perc = (diff/total) * 100;
|
|
|
|
|
|
|
|
opal_output(0,
|
|
|
|
"opal_cr: timing: %-20s = %10.2f s\t%10.2f s\t%6.2f\n",
|
|
|
|
str,
|
|
|
|
diff,
|
|
|
|
total,
|
|
|
|
perc);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
void opal_cr_display_all_timers(void)
|
|
|
|
{
|
|
|
|
double diff = 0.0;
|
|
|
|
char * label = NULL;
|
|
|
|
|
|
|
|
if( opal_cr_timing_target_rank != opal_cr_timing_my_rank ) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
opal_output(0, "OPAL CR Timing: ******************** Summary Begin\n");
|
|
|
|
|
|
|
|
/********** Entry into the system **********/
|
|
|
|
label = strdup("Start Entry Point");
|
|
|
|
if( opal_cr_timing_barrier_enabled ) {
|
|
|
|
diff = timer_start[OPAL_CR_TIMER_CRCPBR0] - timer_start[OPAL_CR_TIMER_ENTRY0];
|
|
|
|
} else {
|
|
|
|
diff = timer_start[OPAL_CR_TIMER_CRCP0] - timer_start[OPAL_CR_TIMER_ENTRY0];
|
|
|
|
}
|
|
|
|
display_indv_timer_core(diff, label);
|
|
|
|
free(label);
|
|
|
|
|
|
|
|
/********** CRCP Protocol **********/
|
|
|
|
label = strdup("CRCP Protocol");
|
|
|
|
if( opal_cr_timing_barrier_enabled ) {
|
|
|
|
diff = timer_start[OPAL_CR_TIMER_CRCPBR1] - timer_start[OPAL_CR_TIMER_CRCP0];
|
|
|
|
} else {
|
|
|
|
diff = timer_start[OPAL_CR_TIMER_P2P0] - timer_start[OPAL_CR_TIMER_CRCP0];
|
|
|
|
}
|
|
|
|
display_indv_timer_core(diff, label);
|
|
|
|
free(label);
|
|
|
|
|
|
|
|
/********** P2P Suspend **********/
|
|
|
|
label = strdup("P2P Suspend");
|
|
|
|
if( opal_cr_timing_barrier_enabled ) {
|
|
|
|
diff = timer_start[OPAL_CR_TIMER_P2PBR0] - timer_start[OPAL_CR_TIMER_P2P0];
|
|
|
|
} else {
|
|
|
|
diff = timer_start[OPAL_CR_TIMER_CORE0] - timer_start[OPAL_CR_TIMER_P2P0];
|
|
|
|
}
|
|
|
|
display_indv_timer_core(diff, label);
|
|
|
|
free(label);
|
|
|
|
|
|
|
|
/********** Checkpoint to Disk **********/
|
|
|
|
label = strdup("Checkpoint");
|
|
|
|
diff = timer_start[OPAL_CR_TIMER_CORE1] - timer_start[OPAL_CR_TIMER_CORE0];
|
|
|
|
display_indv_timer_core(diff, label);
|
|
|
|
free(label);
|
|
|
|
|
|
|
|
/********** P2P Reactivation **********/
|
|
|
|
label = strdup("P2P Reactivation");
|
|
|
|
if( opal_cr_timing_barrier_enabled ) {
|
|
|
|
diff = timer_start[OPAL_CR_TIMER_P2PBR2] - timer_start[OPAL_CR_TIMER_CORE1];
|
|
|
|
} else {
|
|
|
|
diff = timer_start[OPAL_CR_TIMER_CRCP1] - timer_start[OPAL_CR_TIMER_CORE1];
|
|
|
|
}
|
|
|
|
display_indv_timer_core(diff, label);
|
|
|
|
free(label);
|
|
|
|
|
|
|
|
/********** CRCP Protocol Finalize **********/
|
|
|
|
label = strdup("CRCP Cleanup");
|
|
|
|
if( opal_cr_timing_barrier_enabled ) {
|
|
|
|
diff = timer_start[OPAL_CR_TIMER_COREBR1] - timer_start[OPAL_CR_TIMER_CRCP1];
|
|
|
|
} else {
|
|
|
|
diff = timer_start[OPAL_CR_TIMER_CORE2] - timer_start[OPAL_CR_TIMER_CRCP1];
|
|
|
|
}
|
|
|
|
display_indv_timer_core(diff, label);
|
|
|
|
free(label);
|
|
|
|
|
|
|
|
/********** Exit the system **********/
|
|
|
|
label = strdup("Finish Entry Point");
|
|
|
|
diff = timer_start[OPAL_CR_TIMER_ENTRY4] - timer_start[OPAL_CR_TIMER_CORE2];
|
|
|
|
display_indv_timer_core(diff, label);
|
|
|
|
free(label);
|
|
|
|
|
|
|
|
opal_output(0, "OPAL CR Timing: ******************** Summary End\n");
|
|
|
|
}
|
A number of C/R enhancements per RFC below:
http://www.open-mpi.org/community/lists/devel/2010/07/8240.php
Documentation:
http://osl.iu.edu/research/ft/
Major Changes:
--------------
* Added C/R-enabled Debugging support.
Enabled with the --enable-crdebug flag. See the following website for more information:
http://osl.iu.edu/research/ft/crdebug/
* Added Stable Storage (SStore) framework for checkpoint storage
* 'central' component does a direct to central storage save
* 'stage' component stages checkpoints to central storage while the application continues execution.
* 'stage' supports offline compression of checkpoints before moving (sstore_stage_compress)
* 'stage' supports local caching of checkpoints to improve automatic recovery (sstore_stage_caching)
* Added Compression (compress) framework to support
* Add two new ErrMgr recovery policies
* {{{crmig}}} C/R Process Migration
* {{{autor}}} C/R Automatic Recovery
* Added the {{{ompi-migrate}}} command line tool to support the {{{crmig}}} ErrMgr component
* Added CR MPI Ext functions (enable them with {{{--enable-mpi-ext=cr}}} configure option)
* {{{OMPI_CR_Checkpoint}}} (Fixes trac:2342)
* {{{OMPI_CR_Restart}}}
* {{{OMPI_CR_Migrate}}} (may need some more work for mapping rules)
* {{{OMPI_CR_INC_register_callback}}} (Fixes trac:2192)
* {{{OMPI_CR_Quiesce_start}}}
* {{{OMPI_CR_Quiesce_checkpoint}}}
* {{{OMPI_CR_Quiesce_end}}}
* {{{OMPI_CR_self_register_checkpoint_callback}}}
* {{{OMPI_CR_self_register_restart_callback}}}
* {{{OMPI_CR_self_register_continue_callback}}}
* The ErrMgr predicted_fault() interface has been changed to take an opal_list_t of ErrMgr defined types. This will allow us to better support a wider range of fault prediction services in the future.
* Add a progress meter to:
* FileM rsh (filem_rsh_process_meter)
* SnapC full (snapc_full_progress_meter)
* SStore stage (sstore_stage_progress_meter)
* Added 2 new command line options to ompi-restart
* --showme : Display the full command line that would have been exec'ed.
* --mpirun_opts : Command line options to pass directly to mpirun. (Fixes trac:2413)
* Deprecated some MCA params:
* crs_base_snapshot_dir deprecated, use sstore_stage_local_snapshot_dir
* snapc_base_global_snapshot_dir deprecated, use sstore_base_global_snapshot_dir
* snapc_base_global_shared deprecated, use sstore_stage_global_is_shared
* snapc_base_store_in_place deprecated, replaced with different components of SStore
* snapc_base_global_snapshot_ref deprecated, use sstore_base_global_snapshot_ref
* snapc_base_establish_global_snapshot_dir deprecated, never well supported
* snapc_full_skip_filem deprecated, use sstore_stage_skip_filem
Minor Changes:
--------------
* Fixes trac:1924 : {{{ompi-restart}}} now recognizes path prefixed checkpoint handles and does the right thing.
* Fixes trac:2097 : {{{ompi-info}}} should now report all available CRS components
* Fixes trac:2161 : Manual checkpoint movement. A user can 'mv' a checkpoint directory from the original location to another and still restart from it.
* Fixes trac:2208 : Honor various TMPDIR varaibles instead of forcing {{{/tmp}}}
* Move {{{ompi_cr_continue_like_restart}}} to {{{orte_cr_continue_like_restart}}} to be more flexible in where this should be set.
* opal_crs_base_metadata_write* functions have been moved to SStore to support a wider range of metadata handling functionality.
* Cleanup the CRS framework and components to work with the SStore framework.
* Cleanup the SnapC framework and components to work with the SStore framework (cleans up these code paths considerably).
* Add 'quiesce' hook to CRCP for a future enhancement.
* We now require a BLCR version that supports {{{cr_request_file()}}} or {{{cr_request_checkpoint()}}} in order to make the code more maintainable. Note that {{{cr_request_file}}} has been deprecated since 0.7.0, so we prefer to use {{{cr_request_checkpoint()}}}.
* Add optional application level INC callbacks (registered through the CR MPI Ext interface).
* Increase the {{{opal_cr_thread_sleep_wait}}} parameter to 1000 microseconds to make the C/R thread less aggressive.
* {{{opal-restart}}} now looks for cache directories before falling back on stable storage when asked.
* {{{opal-restart}}} also support local decompression before restarting
* {{{orte-checkpoint}}} now uses the SStore framework to work with the metadata
* {{{orte-restart}}} now uses the SStore framework to work with the metadata
* Remove the {{{orte-restart}}} preload option. This was removed since the user only needs to select the 'stage' component in order to support this functionality.
* Since the '-am' parameter is saved in the metadata, {{{ompi-restart}}} no longer hard codes {{{-am ft-enable-cr}}}.
* Fix {{{hnp}}} ErrMgr so that if a previous component in the stack has 'fixed' the problem, then it should be skipped.
* Make sure to decrement the number of 'num_local_procs' in the orted when one goes away.
* odls now checks the SStore framework to see if it needs to load any checkpoint files before launching (to support 'stage'). This separates the SStore logic from the --preload-[binary|files] options.
* Add unique IDs to the named pipes established between the orted and the app in SnapC. This is to better support migration and automatic recovery activities.
* Improve the checks for 'already checkpointing' error path.
* A a recovery output timer, to show how long it takes to restart a job
* Do a better job of cleaning up the old session directory on restart.
* Add a local module to the autor and crmig ErrMgr components. These small modules prevent the 'orted' component from attempting a local recovery (Which does not work for MPI apps at the moment)
* Add a fix for bounding the checkpointable region between MPI_Init and MPI_Finalize.
This commit was SVN r23587.
The following Trac tickets were found above:
Ticket 1924 --> https://svn.open-mpi.org/trac/ompi/ticket/1924
Ticket 2097 --> https://svn.open-mpi.org/trac/ompi/ticket/2097
Ticket 2161 --> https://svn.open-mpi.org/trac/ompi/ticket/2161
Ticket 2192 --> https://svn.open-mpi.org/trac/ompi/ticket/2192
Ticket 2208 --> https://svn.open-mpi.org/trac/ompi/ticket/2208
Ticket 2342 --> https://svn.open-mpi.org/trac/ompi/ticket/2342
Ticket 2413 --> https://svn.open-mpi.org/trac/ompi/ticket/2413
2010-08-11 00:51:11 +04:00
|
|
|
|
|
|
|
#if OPAL_ENABLE_CRDEBUG == 1
|
|
|
|
int opal_cr_debug_set_current_ckpt_thread_self(void)
|
|
|
|
{
|
|
|
|
int t;
|
|
|
|
|
|
|
|
if( NULL == opal_cr_debug_free_threads ) {
|
|
|
|
opal_cr_debug_num_free_threads = 3;
|
|
|
|
opal_cr_debug_free_threads = (opal_thread_t **)malloc(sizeof(opal_thread_t *) * opal_cr_debug_num_free_threads );
|
|
|
|
for(t = 0; t < opal_cr_debug_num_free_threads; ++t ) {
|
|
|
|
opal_cr_debug_free_threads[t] = NULL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
opal_cr_debug_free_threads[0] = opal_thread_get_self();
|
|
|
|
|
|
|
|
return OPAL_SUCCESS;
|
|
|
|
}
|
|
|
|
|
|
|
|
int opal_cr_debug_clear_current_ckpt_thread(void)
|
|
|
|
{
|
|
|
|
opal_cr_debug_free_threads[0] = NULL;
|
|
|
|
|
|
|
|
return OPAL_SUCCESS;
|
|
|
|
}
|
|
|
|
|
|
|
|
int MPIR_checkpoint_debugger_detach(void) {
|
|
|
|
/* This function is meant to be a noop function for checkpoint/restart
|
|
|
|
* enabled debugging functionality */
|
|
|
|
#if 0
|
|
|
|
/* Once the debugger can successfully force threads into the function below,
|
|
|
|
* then we can uncomment this line */
|
|
|
|
if( MPIR_debug_with_checkpoint ) {
|
|
|
|
opal_cr_debug_threads_already_waiting = true;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
return OPAL_SUCCESS;
|
|
|
|
}
|
|
|
|
|
|
|
|
void MPIR_checkpoint_debugger_signal_handler(int signo)
|
|
|
|
{
|
|
|
|
opal_output_verbose(1, opal_cr_output,
|
|
|
|
"crs: MPIR_checkpoint_debugger_signal_handler(): Enter Debug signal handler...");
|
|
|
|
|
|
|
|
MPIR_checkpoint_debugger_waitpoint();
|
|
|
|
|
|
|
|
opal_output_verbose(1, opal_cr_output,
|
|
|
|
"crs: MPIR_checkpoint_debugger_signal_handler(): Leave Debug signal handler...");
|
|
|
|
}
|
|
|
|
|
|
|
|
void *MPIR_checkpoint_debugger_waitpoint(void)
|
|
|
|
{
|
|
|
|
int t;
|
|
|
|
opal_thread_t *thr = NULL;
|
|
|
|
|
|
|
|
thr = opal_thread_get_self();
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Sanity check, if the debugger is not going to attach, then do not wait
|
|
|
|
* Make sure to open the debug gate, so that threads can get out
|
|
|
|
*/
|
|
|
|
if( !MPIR_debug_with_checkpoint ) {
|
|
|
|
opal_output_verbose(1, opal_cr_output,
|
|
|
|
"crs: MPIR_checkpoint_debugger_waitpoint(): Debugger is not attaching... (%d)",
|
|
|
|
(int)thr->t_handle);
|
|
|
|
MPIR_checkpoint_debug_gate = 1;
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
opal_output_verbose(1, opal_cr_output,
|
|
|
|
"crs: MPIR_checkpoint_debugger_waitpoint(): Waiting for the Debugger to attach... (%d)",
|
|
|
|
(int)thr->t_handle);
|
|
|
|
MPIR_checkpoint_debug_gate = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Let special threads escape without waiting, they will wait later
|
|
|
|
*/
|
|
|
|
for(t = 0; t < opal_cr_debug_num_free_threads; ++t) {
|
|
|
|
if( opal_cr_debug_free_threads[t] != NULL &&
|
|
|
|
opal_thread_self_compare(opal_cr_debug_free_threads[t]) ) {
|
|
|
|
opal_output_verbose(1, opal_cr_output,
|
|
|
|
"crs: MPIR_checkpoint_debugger_waitpoint(): Checkpointing thread does not wait here... (%d)",
|
|
|
|
(int)thr->t_handle);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Force all other threads into the waiting function,
|
|
|
|
* unless they are already in there, then just return so we do not nest
|
|
|
|
* calls into this wait function and potentially confuse the debugger.
|
|
|
|
*/
|
|
|
|
if( opal_cr_debug_threads_already_waiting ) {
|
|
|
|
opal_output_verbose(1, opal_cr_output,
|
|
|
|
"crs: MPIR_checkpoint_debugger_waitpoint(): Threads are already waiting from debugger detach, do not wait here... (%d)",
|
|
|
|
(int)thr->t_handle);
|
|
|
|
return NULL;
|
|
|
|
} else {
|
|
|
|
opal_output_verbose(1, opal_cr_output,
|
|
|
|
"crs: MPIR_checkpoint_debugger_waitpoint(): Wait... (%d)",
|
|
|
|
(int)thr->t_handle);
|
|
|
|
return MPIR_checkpoint_debugger_breakpoint();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* A tight loop to wait for debugger to release this process from the
|
|
|
|
* breakpoint.
|
|
|
|
*/
|
|
|
|
void *MPIR_checkpoint_debugger_breakpoint(void)
|
|
|
|
{
|
|
|
|
/* spin until debugger attaches and releases us */
|
|
|
|
while (MPIR_checkpoint_debug_gate == 0) {
|
|
|
|
#if defined(__WINDOWS__)
|
|
|
|
Sleep(100); /* milliseconds */
|
|
|
|
#elif defined(HAVE_USLEEP)
|
|
|
|
usleep(100000); /* microseconds */
|
|
|
|
#else
|
|
|
|
sleep(1); /* seconds */
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
opal_cr_debug_threads_already_waiting = false;
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
#endif
|