Merging in the jjhursey-ft-cr-stable branch (r13912 : HEAD).
This merge adds Checkpoint/Restart support to Open MPI. The initial frameworks and components support a LAM/MPI-like implementation. This commit follows the risk assessment presented to the Open MPI core development group on Feb. 22, 2007. This commit closes trac:158 More details to follow. This commit was SVN r14051. The following SVN revisions from the original message are invalid or inconsistent and therefore were not cross-referenced: r13912 The following Trac tickets were found above: Ticket 158 --> https://svn.open-mpi.org/trac/ompi/ticket/158
This commit is contained in:
parent
924cb0af11
commit
dadca7da88
@ -1,6 +1,6 @@
|
||||
dnl -*- shell-script -*-
|
||||
dnl
|
||||
dnl Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
|
||||
dnl Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
|
||||
dnl University Research and Technology
|
||||
dnl Corporation. All rights reserved.
|
||||
dnl Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
@ -520,6 +520,105 @@ elif test "$with_cross" != "" ; then
|
||||
. "$with_cross"
|
||||
fi
|
||||
|
||||
#
|
||||
# --with-ft=TYPE
|
||||
# TYPE:
|
||||
# - LAM (synonym for 'cr' currently)
|
||||
# - cr
|
||||
# /* General FT sections */
|
||||
# #if OPAL_ENABLE_FT == 0 /* FT Disabled globaly */
|
||||
# #if OPAL_ENABLE_FT == 1 /* FT Enabled globaly */
|
||||
# /* CR Specific sections */
|
||||
# #if OPAL_ENABLE_FT_CR == 0 /* FT Ckpt/Restart Disabled */
|
||||
# #if OPAL_ENABLE_FT_CR == 1 /* FT Ckpt/Restart Enabled */
|
||||
#
|
||||
AC_MSG_CHECKING([if want fault tolerance])
|
||||
AC_ARG_WITH(ft,
|
||||
[AC_HELP_STRING([--with-ft=TYPE],
|
||||
[Specify the type of fault tolerance to enable. Options: LAM (LAM/MPI-like), cr (Checkpoint/Restart) (default: disabled)])],
|
||||
[ompi_want_ft=1],
|
||||
[ompi_want_ft=0])
|
||||
if test "$with_ft" = "no" -o "$ompi_want_ft" = "0"; then
|
||||
ompi_want_ft=0
|
||||
ompi_want_ft_cr=0
|
||||
AC_MSG_RESULT([Disabled fault tolerance])
|
||||
else
|
||||
ompi_want_ft=1
|
||||
ompi_want_ft_cr=0
|
||||
ompi_want_ft_type=none
|
||||
|
||||
# Default value
|
||||
if test "$with_ft" = "" -o "$with_ft" = "yes"; then
|
||||
ompi_want_ft_type=cr
|
||||
ompi_want_ft_cr=1
|
||||
elif test "$with_ft" = "LAM"; then
|
||||
ompi_want_ft_type=lam
|
||||
ompi_want_ft_cr=1
|
||||
elif test "$with_ft" = "lam"; then
|
||||
ompi_want_ft_type=lam
|
||||
ompi_want_ft_cr=1
|
||||
elif test "$with_ft" = "CR"; then
|
||||
ompi_want_ft_type=cr
|
||||
ompi_want_ft_cr=1
|
||||
elif test "$with_ft" = "cr"; then
|
||||
ompi_want_ft_type=cr
|
||||
ompi_want_ft_cr=1
|
||||
else
|
||||
AC_MSG_RESULT([Unrecognized FT TYPE: $with_ft])
|
||||
AC_MSG_ERROR([Cannot continue])
|
||||
fi
|
||||
AC_MSG_RESULT([Enabled $with_ft ($ompi_want_ft_type)])
|
||||
AC_MSG_WARN([**************************************************])
|
||||
AC_MSG_WARN([*** Fault Tolerance Integration into Open MPI is *])
|
||||
AC_MSG_WARN([*** a research quality implementation, and care *])
|
||||
AC_MSG_WARN([*** should be used when choosing to enable it. *])
|
||||
AC_MSG_WARN([**************************************************])
|
||||
fi
|
||||
AC_DEFINE_UNQUOTED([OPAL_ENABLE_FT], [$ompi_want_ft],
|
||||
[Enable fault tolerance general components and logic])
|
||||
AC_DEFINE_UNQUOTED([OPAL_ENABLE_FT_CR], [$ompi_want_ft_cr],
|
||||
[Enable fault tolerance checkpoint/restart components and logic])
|
||||
AM_CONDITIONAL(WANT_FT, test "$ompi_want_ft" = "1")
|
||||
|
||||
#
|
||||
# Fault Tolerance Components and Logic
|
||||
#
|
||||
# --enable-ft-thread
|
||||
# #if OPAL_ENABLE_FT_THREAD == 0 /* Disabled */
|
||||
# #if OPAL_ENABLE_FT_THREAD == 1 /* Enabled */
|
||||
#
|
||||
AC_MSG_CHECKING([if want fault tolerance thread])
|
||||
AC_ARG_ENABLE([ft_thread],
|
||||
[AC_HELP_STRING([--enable-ft-thread],
|
||||
[Enable fault tolerance thread running inside all processes. Requires progress threads (default: disabled)])])
|
||||
if test "$ompi_want_ft" = "0"; then
|
||||
ompi_want_ft_thread=0
|
||||
AC_MSG_RESULT([Disabled (fault tolerance disabled --without-ft-style)])
|
||||
elif test "$enable_ft_thread" = "yes"; then
|
||||
# This check may not fire since progress threads are checked after this section :/
|
||||
if test "$OMPI_ENABLE_PROGRESS_THREADS" = "0"; then
|
||||
AC_MSG_RESULT([Must enable progress threads to use this option])
|
||||
AC_MSG_ERROR([Cannot continue])
|
||||
else
|
||||
AC_MSG_RESULT([yes])
|
||||
ompi_want_ft_thread=1
|
||||
AC_MSG_WARN([**************************************************])
|
||||
AC_MSG_WARN([*** Fault Tolerance with a thread in Open MPI *])
|
||||
AC_MSG_WARN([*** is an experimental, research quality option. *])
|
||||
AC_MSG_WARN([*** It requires progress threads to be used, and *])
|
||||
AC_MSG_WARN([*** care should be used when enabling these *])
|
||||
AC_MSG_WARN([*** options. *])
|
||||
AC_MSG_WARN([**************************************************])
|
||||
fi
|
||||
else
|
||||
ompi_want_ft_thread=0
|
||||
AC_MSG_RESULT([Disabled])
|
||||
fi
|
||||
AC_DEFINE_UNQUOTED([OPAL_ENABLE_FT_THREAD], [$ompi_want_ft_thread],
|
||||
[Enable fault tolerance thread in Open PAL])
|
||||
AM_CONDITIONAL(WANT_FT_THREAD, test "$ompi_want_ft_thread" = "1")
|
||||
|
||||
|
||||
#
|
||||
# Do we want to install binaries?
|
||||
#
|
||||
|
@ -1,6 +1,6 @@
|
||||
# -*- shell-script -*-
|
||||
#
|
||||
# Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
|
||||
# Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
|
||||
# University Research and Technology
|
||||
# Corporation. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
@ -1155,6 +1155,8 @@ AC_CONFIG_FILES([
|
||||
opal/tools/wrappers/Makefile
|
||||
opal/tools/wrappers/opalcc-wrapper-data.txt
|
||||
opal/tools/wrappers/opalc++-wrapper-data.txt
|
||||
opal/tools/opal-checkpoint/Makefile
|
||||
opal/tools/opal-restart/Makefile
|
||||
|
||||
orte/Makefile
|
||||
orte/include/Makefile
|
||||
@ -1170,6 +1172,8 @@ AC_CONFIG_FILES([
|
||||
orte/tools/wrappers/Makefile
|
||||
orte/tools/wrappers/ortecc-wrapper-data.txt
|
||||
orte/tools/wrappers/ortec++-wrapper-data.txt
|
||||
orte/tools/orte-checkpoint/Makefile
|
||||
orte/tools/orte-restart/Makefile
|
||||
orte/tools/orte-ps/Makefile
|
||||
orte/tools/orte-clean/Makefile
|
||||
|
||||
|
@ -19,6 +19,10 @@
|
||||
amca_paramdir = $(AMCA_PARAM_SETS_DIR)
|
||||
dist_amca_param_DATA = amca-param-sets/example.conf
|
||||
|
||||
if WANT_FT
|
||||
dist_amca_param_DATA += amca-param-sets/ft-enable-cr
|
||||
endif
|
||||
|
||||
EXTRA_DIST = \
|
||||
dist/make_dist_tarball \
|
||||
dist/linux/openmpi.spec \
|
||||
|
34
contrib/amca-param-sets/ft-enable-cr
Normal file
34
contrib/amca-param-sets/ft-enable-cr
Normal file
@ -0,0 +1,34 @@
|
||||
#
|
||||
# An Aggregate MCA Parameter Set to enable checkpoint/restart capabilities
|
||||
# for a job.
|
||||
#
|
||||
# Usage:
|
||||
# shell$ mpirun -am ft-enable-cr ./app
|
||||
#
|
||||
|
||||
#
|
||||
# OPAL Parameters
|
||||
# - Select only checkpoint ready components
|
||||
# - Enable Additional FT infrastructure
|
||||
# - Auto-select OPAL CRS component
|
||||
#
|
||||
mca_base_component_distill_checkpoint_ready=1
|
||||
ft_cr_enabled=1
|
||||
crs=
|
||||
|
||||
#
|
||||
# ORTE Parameters
|
||||
# - Wrap the RML
|
||||
# - Use the 'full' Snapshot Coordinator
|
||||
#
|
||||
rml_wrapper=ftrm
|
||||
snapc=full
|
||||
#filem=rsh
|
||||
|
||||
#
|
||||
# OMPI Parameters
|
||||
# - Wrap the PML
|
||||
# - Use the LAM/MPI-like Coordinated Checkpoint/Restart Coordination Protocol
|
||||
#
|
||||
pml_wrapper=crcpw
|
||||
crcp=coord
|
@ -1,6 +1,6 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; -*- */
|
||||
/*
|
||||
* Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
|
||||
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2006 The University of Tennessee and The University
|
||||
@ -437,6 +437,8 @@ int ompi_ddt_get_pack_description( ompi_datatype_t* datatype,
|
||||
if( NULL == datatype->packed_description ) {
|
||||
if( datatype->flags & DT_FLAG_PREDEFINED ) {
|
||||
datatype->packed_description = malloc( 2 * sizeof(int) );
|
||||
} else if( NULL == args ) {
|
||||
return OMPI_ERROR;
|
||||
} else {
|
||||
datatype->packed_description = malloc( args->total_pack_size );
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
@ -46,8 +46,8 @@ mca_allocator_base_component_t mca_allocator_basic_component = {
|
||||
/* Next the MCA v1.0.0 module meta data */
|
||||
|
||||
{
|
||||
/* Whether the module is checkpointable or not */
|
||||
false
|
||||
/* The component is checkpoint ready */
|
||||
MCA_BASE_METADATA_PARAM_CHECKPOINT
|
||||
},
|
||||
mca_allocator_basic_component_init
|
||||
};
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
@ -125,8 +125,8 @@ mca_allocator_base_component_t mca_allocator_bucket_component = {
|
||||
/* Next the MCA v1.0.0 module meta data */
|
||||
|
||||
{
|
||||
/* Whether the module is checkpointable or not */
|
||||
false
|
||||
/* The component is checkpoint ready */
|
||||
MCA_BASE_METADATA_PARAM_CHECKPOINT
|
||||
},
|
||||
mca_allocator_bucket_module_init
|
||||
};
|
||||
|
@ -1,5 +1,5 @@
|
||||
#
|
||||
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
# Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
|
||||
# University Research and Technology
|
||||
# Corporation. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
@ -22,10 +22,11 @@ headers += \
|
||||
base/bml_base_endpoint.h
|
||||
|
||||
libmca_bml_la_SOURCES += \
|
||||
base/bml_base_btl.c \
|
||||
base/bml_base_btl.c \
|
||||
base/bml_base_btl.h \
|
||||
base/bml_base_endpoint.h \
|
||||
base/bml_base_endpoint.c \
|
||||
base/bml_base_endpoint.c \
|
||||
base/bml_base_init.c \
|
||||
base/bml_base_close.c \
|
||||
base/bml_base_open.c
|
||||
base/bml_base_open.c \
|
||||
base/bml_base_ft.c
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2006 The University of Tennessee and The University
|
||||
@ -53,6 +53,7 @@ OMPI_DECLSPEC int mca_bml_base_init(bool enable_progress_threads,
|
||||
OMPI_DECLSPEC int mca_bml_base_close(void);
|
||||
OMPI_DECLSPEC bool mca_bml_base_inited(void);
|
||||
|
||||
OMPI_DECLSPEC int mca_bml_base_ft_event(int state);
|
||||
|
||||
|
||||
/*
|
||||
|
70
ompi/mca/bml/base/bml_base_ft.c
Normal file
70
ompi/mca/bml/base/bml_base_ft.c
Normal file
@ -0,0 +1,70 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2006 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "ompi_config.h"
|
||||
#include "ompi/mca/bml/bml.h"
|
||||
#include "ompi/mca/bml/base/base.h"
|
||||
#include "ompi/mca/btl/btl.h"
|
||||
#include "ompi/mca/btl/base/base.h"
|
||||
#include "ompi/mca/bml/base/bml_base_endpoint.h"
|
||||
#include "ompi/mca/bml/base/bml_base_btl.h"
|
||||
|
||||
int mca_bml_base_ft_event(int state)
|
||||
{
|
||||
if(OPAL_CRS_CHECKPOINT == state) {
|
||||
;
|
||||
}
|
||||
else if(OPAL_CRS_CONTINUE == state) {
|
||||
;
|
||||
}
|
||||
else if(OPAL_CRS_RESTART == state) {
|
||||
;
|
||||
}
|
||||
else if(OPAL_CRS_TERM == state ) {
|
||||
;
|
||||
}
|
||||
else {
|
||||
;
|
||||
}
|
||||
|
||||
/*
|
||||
* BML is expected to call ft_event in
|
||||
* - BTL(s)
|
||||
* - MPool(s)
|
||||
* Currently you can't do this from outside a component
|
||||
* So just return Unimplemented
|
||||
*/
|
||||
|
||||
if(OPAL_CRS_CHECKPOINT == state) {
|
||||
;
|
||||
}
|
||||
else if(OPAL_CRS_CONTINUE == state) {
|
||||
;
|
||||
}
|
||||
else if(OPAL_CRS_RESTART == state) {
|
||||
;
|
||||
}
|
||||
else if(OPAL_CRS_TERM == state ) {
|
||||
;
|
||||
}
|
||||
else {
|
||||
;
|
||||
}
|
||||
|
||||
return OMPI_ERR_NOT_IMPLEMENTED;
|
||||
}
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2006 The University of Tennessee and The University
|
||||
@ -38,7 +38,8 @@ mca_bml_base_module_t mca_bml = {
|
||||
NULL, /* bml_register */
|
||||
NULL, /* bml_register_error */
|
||||
NULL, /* bml_finalize*/
|
||||
NULL /* bml_progress */
|
||||
NULL, /* bml_progress */
|
||||
NULL /* FT event */
|
||||
};
|
||||
mca_bml_base_component_t mca_bml_component;
|
||||
|
||||
@ -98,4 +99,3 @@ int mca_bml_base_init( bool enable_progress_threads,
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
@ -34,6 +34,9 @@
|
||||
#include "ompi/types.h"
|
||||
#include "ompi/class/ompi_free_list.h"
|
||||
|
||||
#include "opal/mca/crs/crs.h"
|
||||
#include "opal/mca/crs/base/base.h"
|
||||
|
||||
#define OMPI_ENABLE_DEBUG_RELIABILITY 0
|
||||
|
||||
/*
|
||||
@ -445,7 +448,6 @@ typedef struct mca_bml_base_module_t* (*mca_bml_base_component_init_fn_t)(
|
||||
|
||||
typedef int (*mca_bml_base_module_progress_fn_t)(void);
|
||||
|
||||
|
||||
/**
|
||||
* BML component descriptor. Contains component version information
|
||||
* and component open/close/init functions.
|
||||
@ -455,7 +457,6 @@ struct mca_bml_base_component_1_0_0_t {
|
||||
mca_base_component_t bml_version;
|
||||
mca_base_component_data_1_0_0_t bml_data;
|
||||
mca_bml_base_component_init_fn_t bml_init;
|
||||
|
||||
};
|
||||
typedef struct mca_bml_base_component_1_0_0_t mca_bml_base_component_1_0_0_t;
|
||||
typedef struct mca_bml_base_component_1_0_0_t mca_bml_base_component_t;
|
||||
@ -610,7 +611,12 @@ typedef int (*mca_bml_base_module_register_error_cb_fn_t)(
|
||||
mca_btl_base_module_error_cb_fn_t cbfunc
|
||||
);
|
||||
|
||||
|
||||
/**
|
||||
* Fault Tolerance Event Notification Function
|
||||
* @param status Checkpoint Status
|
||||
* @return OMPI_SUCCESS or failure status
|
||||
*/
|
||||
typedef int (*mca_bml_base_module_ft_event_fn_t)(int status);
|
||||
|
||||
|
||||
/**
|
||||
@ -638,6 +644,7 @@ struct mca_bml_base_module_t {
|
||||
|
||||
mca_bml_base_module_progress_fn_t bml_progress;
|
||||
|
||||
mca_bml_base_module_ft_event_fn_t bml_ft_event;
|
||||
};
|
||||
typedef struct mca_bml_base_module_t mca_bml_base_module_t;
|
||||
|
||||
|
@ -52,7 +52,8 @@ mca_bml_r2_module_t mca_bml_r2 = {
|
||||
mca_bml_r2_register,
|
||||
mca_bml_r2_register_error,
|
||||
mca_bml_r2_finalize,
|
||||
mca_bml_r2_progress
|
||||
mca_bml_r2_progress,
|
||||
mca_bml_r2_ft_event
|
||||
}
|
||||
|
||||
};
|
||||
@ -797,3 +798,114 @@ int mca_bml_r2_component_fini(void)
|
||||
}
|
||||
|
||||
|
||||
int mca_bml_r2_ft_event(int state) {
|
||||
size_t btl_idx;
|
||||
int ret;
|
||||
ompi_proc_t** procs = NULL;
|
||||
size_t num_procs;
|
||||
|
||||
#if 0
|
||||
opal_output(0, "bml:r2: ft_event: *** R2 BML *** (%d)\n", state);
|
||||
#endif
|
||||
|
||||
if(OPAL_CRS_CHECKPOINT == state) {
|
||||
/* Do nothing for now */
|
||||
}
|
||||
else if(OPAL_CRS_CONTINUE == state) {
|
||||
/* Since nothingin Checkpoint, we are fine here */
|
||||
}
|
||||
else if(OPAL_CRS_RESTART == state) {
|
||||
procs = ompi_proc_all(&num_procs);
|
||||
if(NULL == procs)
|
||||
goto END_PRE_RESTART;
|
||||
|
||||
if (OMPI_SUCCESS != (ret = mca_bml_r2_del_procs(num_procs, procs) ) ) {
|
||||
goto END_PRE_RESTART;
|
||||
}
|
||||
|
||||
END_PRE_RESTART:
|
||||
;
|
||||
}
|
||||
else if(OPAL_CRS_TERM == state ) {
|
||||
;
|
||||
}
|
||||
else {
|
||||
;
|
||||
}
|
||||
|
||||
/*
|
||||
* Call ft_event in:
|
||||
* - BTL
|
||||
* - MPool
|
||||
*/
|
||||
for(btl_idx = 0; btl_idx < mca_bml_r2.num_btl_modules; btl_idx++) {
|
||||
#if 0
|
||||
opal_output(0, "bml:r2: ft_event: Notify the %s BTL.\n",
|
||||
(mca_bml_r2.btl_modules[btl_idx])->btl_component->btl_version.mca_component_name);
|
||||
#endif
|
||||
/*
|
||||
* Close the btl
|
||||
*/
|
||||
if( NULL != (mca_bml_r2.btl_modules[btl_idx])->btl_ft_event) {
|
||||
if(OMPI_SUCCESS != (ret = (mca_bml_r2.btl_modules[btl_idx])->btl_ft_event(state) ) ) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Close its mpool
|
||||
*/
|
||||
if( NULL != (mca_bml_r2.btl_modules[btl_idx])->btl_mpool) {
|
||||
if(OMPI_SUCCESS != (ret = (mca_bml_r2.btl_modules[btl_idx])->btl_mpool->mpool_ft_event(state) ) ) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(OPAL_CRS_CHECKPOINT == state) {
|
||||
;
|
||||
}
|
||||
else if(OPAL_CRS_CONTINUE == state) {
|
||||
;
|
||||
}
|
||||
else if(OPAL_CRS_RESTART == state) {
|
||||
struct mca_bml_base_endpoint_t ** endpoints = NULL;
|
||||
ompi_bitmap_t reachable;
|
||||
|
||||
OBJ_CONSTRUCT(&reachable, ompi_bitmap_t);
|
||||
if( OMPI_SUCCESS != (ret = ompi_bitmap_init(&reachable, num_procs)) ) {
|
||||
goto END_POST_RESTART;
|
||||
}
|
||||
|
||||
endpoints = (struct mca_bml_base_endpoint_t **) malloc ( num_procs *
|
||||
sizeof(struct mca_bml_base_endpoint_t*));
|
||||
if ( NULL == endpoints ) {
|
||||
goto END_POST_RESTART;
|
||||
}
|
||||
|
||||
/* Don't need to do this again since we still have the
|
||||
* values from PRE_RESTART
|
||||
* procs = ompi_proc_all(&num_procs);
|
||||
*/
|
||||
if (OMPI_SUCCESS != (ret = mca_bml_r2_add_procs(num_procs, procs, endpoints, &reachable) ) ) {
|
||||
goto END_POST_RESTART;
|
||||
}
|
||||
|
||||
END_POST_RESTART:
|
||||
if ( NULL != endpoints ) {
|
||||
free ( endpoints) ;
|
||||
}
|
||||
OBJ_DESTRUCT(&reachable);
|
||||
}
|
||||
else if(OPAL_CRS_TERM == state ) {
|
||||
;
|
||||
}
|
||||
else {
|
||||
;
|
||||
}
|
||||
|
||||
if( NULL != procs)
|
||||
free(procs);
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2006 The University of Tennessee and The University
|
||||
@ -96,6 +96,8 @@ int mca_bml_r2_finalize( void );
|
||||
|
||||
int mca_bml_r2_component_fini(void);
|
||||
|
||||
int mca_bml_r2_ft_event(int status);
|
||||
|
||||
#if defined(c_plusplus) || defined(__cplusplus)
|
||||
}
|
||||
#endif
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2006 The University of Tennessee and The University
|
||||
@ -46,8 +46,8 @@ mca_bml_base_component_1_0_0_t mca_bml_r2_component = {
|
||||
|
||||
/* Next the MCA v1.0.0 component meta data */
|
||||
{
|
||||
/* Whether the component is checkpointable or not */
|
||||
false
|
||||
/* The component is checkpoint ready */
|
||||
MCA_BASE_METADATA_PARAM_CHECKPOINT
|
||||
},
|
||||
mca_bml_r2_component_init
|
||||
};
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2006 The University of Tennessee and The University
|
||||
@ -114,6 +114,9 @@
|
||||
#include "ompi/types.h"
|
||||
#include "ompi/mca/mpool/mpool.h"
|
||||
|
||||
#include "opal/mca/crs/crs.h"
|
||||
#include "opal/mca/crs/base/base.h"
|
||||
|
||||
#if defined(c_plusplus) || defined(__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
@ -544,6 +547,12 @@ typedef void (*mca_btl_base_module_dump_fn_t)(
|
||||
int verbose
|
||||
);
|
||||
|
||||
/**
|
||||
* Fault Tolerance Event Notification Function
|
||||
* @param state Checkpoint Status
|
||||
* @return OMPI_SUCCESS or failure status
|
||||
*/
|
||||
typedef int (*mca_btl_base_module_ft_event_fn_t)(int state);
|
||||
|
||||
/**
|
||||
* BTL module interface functions and attributes.
|
||||
@ -582,6 +591,7 @@ struct mca_btl_base_module_t {
|
||||
/* register a default error handler */
|
||||
mca_btl_base_module_register_error_fn_t btl_register_error;
|
||||
|
||||
mca_btl_base_module_ft_event_fn_t btl_ft_event;
|
||||
};
|
||||
typedef struct mca_btl_base_module_t mca_btl_base_module_t;
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
@ -87,8 +87,8 @@ mca_btl_gm_module_t mca_btl_gm_module = {
|
||||
#endif
|
||||
mca_btl_base_dump,
|
||||
NULL, /* mpool */
|
||||
mca_btl_gm_register_error_cb
|
||||
|
||||
mca_btl_gm_register_error_cb,
|
||||
mca_btl_gm_ft_event
|
||||
}
|
||||
};
|
||||
|
||||
@ -956,3 +956,22 @@ int mca_btl_gm_finalize(struct mca_btl_base_module_t* btl)
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
int mca_btl_gm_ft_event(int state) {
|
||||
if(OPAL_CRS_CHECKPOINT == state) {
|
||||
;
|
||||
}
|
||||
else if(OPAL_CRS_CONTINUE == state) {
|
||||
;
|
||||
}
|
||||
else if(OPAL_CRS_RESTART == state) {
|
||||
;
|
||||
}
|
||||
else if(OPAL_CRS_TERM == state ) {
|
||||
;
|
||||
}
|
||||
else {
|
||||
;
|
||||
}
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
@ -338,6 +338,12 @@ extern mca_btl_base_descriptor_t* mca_btl_gm_prepare_dst(
|
||||
size_t reserve,
|
||||
size_t* size);
|
||||
|
||||
/**
|
||||
* Fault Tolerance Event Notification Function
|
||||
* @param state Checkpoint Stae
|
||||
* @return OMPI_SUCCESS or failure status
|
||||
*/
|
||||
int mca_btl_gm_ft_event(int state);
|
||||
|
||||
/**
|
||||
* Acquire a send token - queue the fragment if none available
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
@ -73,9 +73,8 @@ mca_btl_gm_component_t mca_btl_gm_component = {
|
||||
/* Next the MCA v1.0.0 component meta data */
|
||||
|
||||
{
|
||||
/* Whether the component is checkpointable or not */
|
||||
|
||||
false
|
||||
/* The component is not checkpoint ready */
|
||||
MCA_BASE_METADATA_PARAM_NONE
|
||||
},
|
||||
|
||||
mca_btl_gm_component_init,
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
@ -63,7 +63,8 @@ mca_btl_mvapi_module_t mca_btl_mvapi_module = {
|
||||
mca_btl_mvapi_get,
|
||||
mca_btl_mvapi_dump,
|
||||
NULL, /* mpool */
|
||||
NULL /* error call back registration */
|
||||
NULL, /* error call back registration */
|
||||
mca_btl_mvapi_ft_event
|
||||
}
|
||||
};
|
||||
|
||||
@ -827,3 +828,23 @@ void mca_btl_mvapi_dump(
|
||||
opal_output( 0, "sd_wqe_hp %d\n", endpoint->sd_wqe_hp );
|
||||
opal_output( 0, "sd_wqe_lp %d\n", endpoint->sd_wqe_lp );
|
||||
}
|
||||
|
||||
int mca_btl_mvapi_ft_event(int state) {
|
||||
if(OPAL_CRS_CHECKPOINT == state) {
|
||||
;
|
||||
}
|
||||
else if(OPAL_CRS_CONTINUE == state) {
|
||||
;
|
||||
}
|
||||
else if(OPAL_CRS_RESTART == state) {
|
||||
;
|
||||
}
|
||||
else if(OPAL_CRS_TERM == state ) {
|
||||
;
|
||||
}
|
||||
else {
|
||||
;
|
||||
}
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
@ -515,6 +515,13 @@ extern void mca_btl_mvapi_dump(
|
||||
|
||||
int mca_btl_mvapi_module_init(mca_btl_mvapi_module_t* mvapi_btl);
|
||||
|
||||
/**
|
||||
* Fault Tolerance Event Notification Function
|
||||
* @param state Checkpoint Stae
|
||||
* @return OMPI_SUCCESS or failure status
|
||||
*/
|
||||
int mca_btl_mvapi_ft_event(int state);
|
||||
|
||||
|
||||
#if defined(c_plusplus) || defined(__cplusplus)
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
@ -77,9 +77,8 @@ mca_btl_mvapi_component_t mca_btl_mvapi_component = {
|
||||
/* Next the MCA v1.0.0 component meta data */
|
||||
|
||||
{
|
||||
/* Whether the component is checkpointable or not */
|
||||
|
||||
false
|
||||
/* The component is not checkpoint ready */
|
||||
MCA_BASE_METADATA_PARAM_NONE
|
||||
},
|
||||
|
||||
mca_btl_mvapi_component_init,
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2006 The University of Tennessee and The University
|
||||
@ -477,6 +477,27 @@ int mca_btl_mx_finalize( struct mca_btl_base_module_t* btl )
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
int mca_btl_mx_ft_event(int state) {
|
||||
if(OPAL_CRS_CHECKPOINT == state) {
|
||||
;
|
||||
}
|
||||
else if(OPAL_CRS_CONTINUE == state) {
|
||||
;
|
||||
}
|
||||
else if(OPAL_CRS_RESTART == state) {
|
||||
;
|
||||
}
|
||||
else if(OPAL_CRS_TERM == state ) {
|
||||
;
|
||||
}
|
||||
else {
|
||||
;
|
||||
}
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
mca_btl_mx_module_t mca_btl_mx_module = {
|
||||
{
|
||||
&mca_btl_mx_component.super,
|
||||
@ -502,7 +523,7 @@ mca_btl_mx_module_t mca_btl_mx_module = {
|
||||
NULL, /* get */
|
||||
mca_btl_base_dump,
|
||||
NULL, /* mpool */
|
||||
NULL /* register error */
|
||||
NULL, /* register error */
|
||||
mca_btl_mx_ft_event
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2006 The University of Tennessee and The University
|
||||
@ -290,6 +290,13 @@ mca_btl_mx_prepare_dst( struct mca_btl_base_module_t* btl,
|
||||
size_t reserve,
|
||||
size_t* size );
|
||||
|
||||
/**
|
||||
* Fault Tolerance Event Notification Function
|
||||
* @param state Checkpoint Stae
|
||||
* @return OMPI_SUCCESS or failure status
|
||||
*/
|
||||
int mca_btl_mx_ft_event(int state);
|
||||
|
||||
#if defined(c_plusplus) || defined(__cplusplus)
|
||||
}
|
||||
#endif
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2006 The University of Tennessee and The University
|
||||
@ -58,9 +58,8 @@ mca_btl_mx_component_t mca_btl_mx_component = {
|
||||
/* Next the MCA v1.0.0 component meta data */
|
||||
|
||||
{
|
||||
/* Whether the component is checkpointable or not */
|
||||
|
||||
false
|
||||
/* The component is not checkpoint ready */
|
||||
MCA_BASE_METADATA_PARAM_NONE
|
||||
},
|
||||
|
||||
mca_btl_mx_component_init,
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
@ -78,7 +78,8 @@ mca_btl_openib_module_t mca_btl_openib_module = {
|
||||
mca_btl_openib_get,
|
||||
mca_btl_base_dump,
|
||||
NULL, /* mpool */
|
||||
mca_btl_openib_register_error_cb /* error call back registration */
|
||||
mca_btl_openib_register_error_cb, /* error call back registration */
|
||||
mca_btl_openib_ft_event
|
||||
}
|
||||
};
|
||||
|
||||
@ -905,3 +906,23 @@ int mca_btl_openib_create_cq_srq(mca_btl_openib_module_t *openib_btl)
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
int mca_btl_openib_ft_event(int state) {
|
||||
if(OPAL_CRS_CHECKPOINT == state) {
|
||||
;
|
||||
}
|
||||
else if(OPAL_CRS_CONTINUE == state) {
|
||||
;
|
||||
}
|
||||
else if(OPAL_CRS_RESTART == state) {
|
||||
;
|
||||
}
|
||||
else if(OPAL_CRS_TERM == state ) {
|
||||
;
|
||||
}
|
||||
else {
|
||||
;
|
||||
}
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
@ -440,6 +440,13 @@ extern void mca_btl_openib_send_frag_return(mca_btl_base_module_t* btl,
|
||||
|
||||
int mca_btl_openib_create_cq_srq(mca_btl_openib_module_t* openib_btl);
|
||||
|
||||
/**
|
||||
* Fault Tolerance Event Notification Function
|
||||
* @param state Checkpoint Stae
|
||||
* @return OMPI_SUCCESS or failure status
|
||||
*/
|
||||
int mca_btl_openib_ft_event(int state);
|
||||
|
||||
#define BTL_OPENIB_HP_QP 0
|
||||
#define BTL_OPENIB_LP_QP 1
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
@ -111,9 +111,8 @@ mca_btl_openib_component_t mca_btl_openib_component = {
|
||||
/* Next the MCA v1.0.0 component meta data */
|
||||
|
||||
{
|
||||
/* Whether the component is checkpointable or not */
|
||||
|
||||
false
|
||||
/* The component is not checkpoint ready */
|
||||
MCA_BASE_METADATA_PARAM_NONE
|
||||
},
|
||||
|
||||
btl_openib_component_init,
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
@ -66,7 +66,8 @@ mca_btl_portals_module_t mca_btl_portals_module = {
|
||||
mca_btl_portals_get,
|
||||
mca_btl_base_dump,
|
||||
NULL, /* mpool */
|
||||
NULL /* register error */
|
||||
NULL, /* register error */
|
||||
NULL
|
||||
},
|
||||
};
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
@ -56,9 +56,8 @@ mca_btl_portals_component_t mca_btl_portals_component = {
|
||||
/* Next the MCA v1.0.0 module meta data */
|
||||
|
||||
{
|
||||
/* Whether the module is checkpointable or not */
|
||||
|
||||
false
|
||||
/* The component is not checkpoint ready */
|
||||
MCA_BASE_METADATA_PARAM_NONE
|
||||
},
|
||||
|
||||
mca_btl_portals_component_init,
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2006 The University of Tennessee and The University
|
||||
@ -67,7 +67,8 @@ mca_btl_base_module_t mca_btl_self = {
|
||||
mca_btl_self_rdma, /* get */
|
||||
mca_btl_base_dump,
|
||||
NULL, /* mpool */
|
||||
NULL /* register error cb */
|
||||
NULL, /* register error cb */
|
||||
mca_btl_self_ft_event
|
||||
};
|
||||
|
||||
|
||||
@ -399,3 +400,23 @@ int mca_btl_self_rdma( struct mca_btl_base_module_t* btl,
|
||||
des->des_cbfunc(btl,endpoint,des,OMPI_SUCCESS);
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
int mca_btl_self_ft_event(int state) {
|
||||
if(OPAL_CRS_CHECKPOINT == state) {
|
||||
;
|
||||
}
|
||||
else if(OPAL_CRS_CONTINUE == state) {
|
||||
;
|
||||
}
|
||||
else if(OPAL_CRS_RESTART == state) {
|
||||
;
|
||||
}
|
||||
else if(OPAL_CRS_TERM == state ) {
|
||||
;
|
||||
}
|
||||
else {
|
||||
;
|
||||
}
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2006 The University of Tennessee and The University
|
||||
@ -255,7 +255,14 @@ int mca_btl_self_rdma(
|
||||
struct mca_btl_base_endpoint_t* endpoint,
|
||||
struct mca_btl_base_descriptor_t* descriptor
|
||||
);
|
||||
|
||||
|
||||
/**
|
||||
* Fault Tolerance Event Notification Function
|
||||
* @param state Checkpoint Stae
|
||||
* @return OMPI_SUCCESS or failure status
|
||||
*/
|
||||
int mca_btl_self_ft_event(int state);
|
||||
|
||||
#if defined(c_plusplus) || defined(__cplusplus)
|
||||
}
|
||||
#endif
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2006 The University of Tennessee and The University
|
||||
@ -63,8 +63,8 @@ mca_btl_self_component_t mca_btl_self_component = {
|
||||
|
||||
/* Next the MCA v1.0.0 component meta data */
|
||||
{
|
||||
/* Whether the component is checkpointable or not */
|
||||
false
|
||||
/* The component is checkpoint ready */
|
||||
MCA_BASE_METADATA_PARAM_CHECKPOINT
|
||||
},
|
||||
|
||||
mca_btl_self_component_init,
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2006 The University of Tennessee and The University
|
||||
@ -108,7 +108,8 @@ mca_btl_sm_t mca_btl_sm[2] = {
|
||||
NULL, /* get */
|
||||
mca_btl_base_dump,
|
||||
NULL, /* mpool */
|
||||
mca_btl_sm_register_error_cb /* register error */
|
||||
mca_btl_sm_register_error_cb, /* register error */
|
||||
mca_btl_sm_ft_event
|
||||
}
|
||||
},
|
||||
{
|
||||
@ -136,7 +137,8 @@ mca_btl_sm_t mca_btl_sm[2] = {
|
||||
NULL, /* get function */
|
||||
mca_btl_base_dump,
|
||||
NULL, /* mpool */
|
||||
mca_btl_sm_register_error_cb /* register error */
|
||||
mca_btl_sm_register_error_cb, /* register error */
|
||||
mca_btl_sm_ft_event
|
||||
}
|
||||
}
|
||||
};
|
||||
@ -922,3 +924,23 @@ int mca_btl_sm_send(
|
||||
MCA_BTL_SM_FIFO_WRITE(endpoint, endpoint->my_smp_rank, endpoint->peer_smp_rank, frag->hdr, rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
int mca_btl_sm_ft_event(int state) {
|
||||
if(OPAL_CRS_CHECKPOINT == state) {
|
||||
;
|
||||
}
|
||||
else if(OPAL_CRS_CONTINUE == state) {
|
||||
;
|
||||
}
|
||||
else if(OPAL_CRS_RESTART == state) {
|
||||
;
|
||||
}
|
||||
else if(OPAL_CRS_TERM == state ) {
|
||||
;
|
||||
}
|
||||
else {
|
||||
;
|
||||
}
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
@ -1,6 +1,6 @@
|
||||
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2006 The University of Tennessee and The University
|
||||
@ -345,6 +345,12 @@ extern int mca_btl_sm_send(
|
||||
mca_btl_base_tag_t tag
|
||||
);
|
||||
|
||||
/**
|
||||
* Fault Tolerance Event Notification Function
|
||||
* @param state Checkpoint Stae
|
||||
* @return OMPI_SUCCESS or failure status
|
||||
*/
|
||||
int mca_btl_sm_ft_event(int state);
|
||||
|
||||
#if OMPI_ENABLE_PROGRESS_THREADS == 1
|
||||
void mca_btl_sm_component_event_thread(opal_object_t*);
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2006 The University of Tennessee and The University
|
||||
@ -78,8 +78,8 @@ mca_btl_sm_component_t mca_btl_sm_component = {
|
||||
|
||||
/* Next the MCA v1.0.0 component meta data */
|
||||
{
|
||||
/* Whether the component is checkpointable or not */
|
||||
false
|
||||
/* The component is not checkpoint ready */
|
||||
MCA_BASE_METADATA_PARAM_NONE
|
||||
},
|
||||
|
||||
mca_btl_sm_component_init,
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2006 The University of Tennessee and The University
|
||||
@ -61,7 +61,8 @@ mca_btl_tcp_module_t mca_btl_t |