1
1

Merging in the jjhursey-ft-cr-stable branch (r13912 : HEAD).

This merge adds Checkpoint/Restart support to Open MPI. The initial
frameworks and components support a LAM/MPI-like implementation.

This commit follows the risk assessment presented to the Open MPI core
development group on Feb. 22, 2007.

This commit closes trac:158

More details to follow.

This commit was SVN r14051.

The following SVN revisions from the original message are invalid or
inconsistent and therefore were not cross-referenced:
  r13912

The following Trac tickets were found above:
  Ticket 158 --> https://svn.open-mpi.org/trac/ompi/ticket/158
Этот коммит содержится в:
Josh Hursey 2007-03-16 23:11:45 +00:00
родитель 924cb0af11
Коммит dadca7da88
691 изменённых файлов: 30217 добавлений и 1182 удалений

Просмотреть файл

@ -1,6 +1,6 @@
dnl -*- shell-script -*-
dnl
dnl Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
dnl Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
dnl University Research and Technology
dnl Corporation. All rights reserved.
dnl Copyright (c) 2004-2005 The University of Tennessee and The University
@ -520,6 +520,105 @@ elif test "$with_cross" != "" ; then
. "$with_cross"
fi
#
# --with-ft=TYPE
# TYPE:
# - LAM (synonym for 'cr' currently)
# - cr
# /* General FT sections */
# #if OPAL_ENABLE_FT == 0 /* FT Disabled globaly */
# #if OPAL_ENABLE_FT == 1 /* FT Enabled globaly */
# /* CR Specific sections */
# #if OPAL_ENABLE_FT_CR == 0 /* FT Ckpt/Restart Disabled */
# #if OPAL_ENABLE_FT_CR == 1 /* FT Ckpt/Restart Enabled */
#
AC_MSG_CHECKING([if want fault tolerance])
AC_ARG_WITH(ft,
[AC_HELP_STRING([--with-ft=TYPE],
[Specify the type of fault tolerance to enable. Options: LAM (LAM/MPI-like), cr (Checkpoint/Restart) (default: disabled)])],
[ompi_want_ft=1],
[ompi_want_ft=0])
if test "$with_ft" = "no" -o "$ompi_want_ft" = "0"; then
ompi_want_ft=0
ompi_want_ft_cr=0
AC_MSG_RESULT([Disabled fault tolerance])
else
ompi_want_ft=1
ompi_want_ft_cr=0
ompi_want_ft_type=none
# Default value
if test "$with_ft" = "" -o "$with_ft" = "yes"; then
ompi_want_ft_type=cr
ompi_want_ft_cr=1
elif test "$with_ft" = "LAM"; then
ompi_want_ft_type=lam
ompi_want_ft_cr=1
elif test "$with_ft" = "lam"; then
ompi_want_ft_type=lam
ompi_want_ft_cr=1
elif test "$with_ft" = "CR"; then
ompi_want_ft_type=cr
ompi_want_ft_cr=1
elif test "$with_ft" = "cr"; then
ompi_want_ft_type=cr
ompi_want_ft_cr=1
else
AC_MSG_RESULT([Unrecognized FT TYPE: $with_ft])
AC_MSG_ERROR([Cannot continue])
fi
AC_MSG_RESULT([Enabled $with_ft ($ompi_want_ft_type)])
AC_MSG_WARN([**************************************************])
AC_MSG_WARN([*** Fault Tolerance Integration into Open MPI is *])
AC_MSG_WARN([*** a research quality implementation, and care *])
AC_MSG_WARN([*** should be used when choosing to enable it. *])
AC_MSG_WARN([**************************************************])
fi
AC_DEFINE_UNQUOTED([OPAL_ENABLE_FT], [$ompi_want_ft],
[Enable fault tolerance general components and logic])
AC_DEFINE_UNQUOTED([OPAL_ENABLE_FT_CR], [$ompi_want_ft_cr],
[Enable fault tolerance checkpoint/restart components and logic])
AM_CONDITIONAL(WANT_FT, test "$ompi_want_ft" = "1")
#
# Fault Tolerance Components and Logic
#
# --enable-ft-thread
# #if OPAL_ENABLE_FT_THREAD == 0 /* Disabled */
# #if OPAL_ENABLE_FT_THREAD == 1 /* Enabled */
#
AC_MSG_CHECKING([if want fault tolerance thread])
AC_ARG_ENABLE([ft_thread],
[AC_HELP_STRING([--enable-ft-thread],
[Enable fault tolerance thread running inside all processes. Requires progress threads (default: disabled)])])
if test "$ompi_want_ft" = "0"; then
ompi_want_ft_thread=0
AC_MSG_RESULT([Disabled (fault tolerance disabled --without-ft-style)])
elif test "$enable_ft_thread" = "yes"; then
# This check may not fire since progress threads are checked after this section :/
if test "$OMPI_ENABLE_PROGRESS_THREADS" = "0"; then
AC_MSG_RESULT([Must enable progress threads to use this option])
AC_MSG_ERROR([Cannot continue])
else
AC_MSG_RESULT([yes])
ompi_want_ft_thread=1
AC_MSG_WARN([**************************************************])
AC_MSG_WARN([*** Fault Tolerance with a thread in Open MPI *])
AC_MSG_WARN([*** is an experimental, research quality option. *])
AC_MSG_WARN([*** It requires progress threads to be used, and *])
AC_MSG_WARN([*** care should be used when enabling these *])
AC_MSG_WARN([*** options. *])
AC_MSG_WARN([**************************************************])
fi
else
ompi_want_ft_thread=0
AC_MSG_RESULT([Disabled])
fi
AC_DEFINE_UNQUOTED([OPAL_ENABLE_FT_THREAD], [$ompi_want_ft_thread],
[Enable fault tolerance thread in Open PAL])
AM_CONDITIONAL(WANT_FT_THREAD, test "$ompi_want_ft_thread" = "1")
#
# Do we want to install binaries?
#

Просмотреть файл

@ -1,6 +1,6 @@
# -*- shell-script -*-
#
# Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
# Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2005 The University of Tennessee and The University
@ -1155,6 +1155,8 @@ AC_CONFIG_FILES([
opal/tools/wrappers/Makefile
opal/tools/wrappers/opalcc-wrapper-data.txt
opal/tools/wrappers/opalc++-wrapper-data.txt
opal/tools/opal-checkpoint/Makefile
opal/tools/opal-restart/Makefile
orte/Makefile
orte/include/Makefile
@ -1170,6 +1172,8 @@ AC_CONFIG_FILES([
orte/tools/wrappers/Makefile
orte/tools/wrappers/ortecc-wrapper-data.txt
orte/tools/wrappers/ortec++-wrapper-data.txt
orte/tools/orte-checkpoint/Makefile
orte/tools/orte-restart/Makefile
orte/tools/orte-ps/Makefile
orte/tools/orte-clean/Makefile

Просмотреть файл

@ -19,6 +19,10 @@
amca_paramdir = $(AMCA_PARAM_SETS_DIR)
dist_amca_param_DATA = amca-param-sets/example.conf
if WANT_FT
dist_amca_param_DATA += amca-param-sets/ft-enable-cr
endif
EXTRA_DIST = \
dist/make_dist_tarball \
dist/linux/openmpi.spec \

34
contrib/amca-param-sets/ft-enable-cr Обычный файл
Просмотреть файл

@ -0,0 +1,34 @@
#
# An Aggregate MCA Parameter Set to enable checkpoint/restart capabilities
# for a job.
#
# Usage:
# shell$ mpirun -am ft-enable-cr ./app
#
#
# OPAL Parameters
# - Select only checkpoint ready components
# - Enable Additional FT infrastructure
# - Auto-select OPAL CRS component
#
mca_base_component_distill_checkpoint_ready=1
ft_cr_enabled=1
crs=
#
# ORTE Parameters
# - Wrap the RML
# - Use the 'full' Snapshot Coordinator
#
rml_wrapper=ftrm
snapc=full
#filem=rsh
#
# OMPI Parameters
# - Wrap the PML
# - Use the LAM/MPI-like Coordinated Checkpoint/Restart Coordination Protocol
#
pml_wrapper=crcpw
crcp=coord

Просмотреть файл

@ -1,6 +1,6 @@
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/*
* Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
@ -437,6 +437,8 @@ int ompi_ddt_get_pack_description( ompi_datatype_t* datatype,
if( NULL == datatype->packed_description ) {
if( datatype->flags & DT_FLAG_PREDEFINED ) {
datatype->packed_description = malloc( 2 * sizeof(int) );
} else if( NULL == args ) {
return OMPI_ERROR;
} else {
datatype->packed_description = malloc( args->total_pack_size );
}

Просмотреть файл

@ -1,5 +1,5 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
@ -46,8 +46,8 @@ mca_allocator_base_component_t mca_allocator_basic_component = {
/* Next the MCA v1.0.0 module meta data */
{
/* Whether the module is checkpointable or not */
false
/* The component is checkpoint ready */
MCA_BASE_METADATA_PARAM_CHECKPOINT
},
mca_allocator_basic_component_init
};

Просмотреть файл

@ -1,5 +1,5 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
@ -125,8 +125,8 @@ mca_allocator_base_component_t mca_allocator_bucket_component = {
/* Next the MCA v1.0.0 module meta data */
{
/* Whether the module is checkpointable or not */
false
/* The component is checkpoint ready */
MCA_BASE_METADATA_PARAM_CHECKPOINT
},
mca_allocator_bucket_module_init
};

Просмотреть файл

@ -1,5 +1,5 @@
#
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
# Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2005 The University of Tennessee and The University
@ -22,10 +22,11 @@ headers += \
base/bml_base_endpoint.h
libmca_bml_la_SOURCES += \
base/bml_base_btl.c \
base/bml_base_btl.c \
base/bml_base_btl.h \
base/bml_base_endpoint.h \
base/bml_base_endpoint.c \
base/bml_base_endpoint.c \
base/bml_base_init.c \
base/bml_base_close.c \
base/bml_base_open.c
base/bml_base_open.c \
base/bml_base_ft.c

Просмотреть файл

@ -1,5 +1,5 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
@ -53,6 +53,7 @@ OMPI_DECLSPEC int mca_bml_base_init(bool enable_progress_threads,
OMPI_DECLSPEC int mca_bml_base_close(void);
OMPI_DECLSPEC bool mca_bml_base_inited(void);
OMPI_DECLSPEC int mca_bml_base_ft_event(int state);
/*

70
ompi/mca/bml/base/bml_base_ft.c Обычный файл
Просмотреть файл

@ -0,0 +1,70 @@
/*
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "ompi/mca/bml/bml.h"
#include "ompi/mca/bml/base/base.h"
#include "ompi/mca/btl/btl.h"
#include "ompi/mca/btl/base/base.h"
#include "ompi/mca/bml/base/bml_base_endpoint.h"
#include "ompi/mca/bml/base/bml_base_btl.h"
int mca_bml_base_ft_event(int state)
{
if(OPAL_CRS_CHECKPOINT == state) {
;
}
else if(OPAL_CRS_CONTINUE == state) {
;
}
else if(OPAL_CRS_RESTART == state) {
;
}
else if(OPAL_CRS_TERM == state ) {
;
}
else {
;
}
/*
* BML is expected to call ft_event in
* - BTL(s)
* - MPool(s)
* Currently you can't do this from outside a component
* So just return Unimplemented
*/
if(OPAL_CRS_CHECKPOINT == state) {
;
}
else if(OPAL_CRS_CONTINUE == state) {
;
}
else if(OPAL_CRS_RESTART == state) {
;
}
else if(OPAL_CRS_TERM == state ) {
;
}
else {
;
}
return OMPI_ERR_NOT_IMPLEMENTED;
}

Просмотреть файл

@ -1,5 +1,5 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
@ -38,7 +38,8 @@ mca_bml_base_module_t mca_bml = {
NULL, /* bml_register */
NULL, /* bml_register_error */
NULL, /* bml_finalize*/
NULL /* bml_progress */
NULL, /* bml_progress */
NULL /* FT event */
};
mca_bml_base_component_t mca_bml_component;
@ -98,4 +99,3 @@ int mca_bml_base_init( bool enable_progress_threads,
}

Просмотреть файл

@ -1,5 +1,5 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
@ -34,6 +34,9 @@
#include "ompi/types.h"
#include "ompi/class/ompi_free_list.h"
#include "opal/mca/crs/crs.h"
#include "opal/mca/crs/base/base.h"
#define OMPI_ENABLE_DEBUG_RELIABILITY 0
/*
@ -445,7 +448,6 @@ typedef struct mca_bml_base_module_t* (*mca_bml_base_component_init_fn_t)(
typedef int (*mca_bml_base_module_progress_fn_t)(void);
/**
* BML component descriptor. Contains component version information
* and component open/close/init functions.
@ -455,7 +457,6 @@ struct mca_bml_base_component_1_0_0_t {
mca_base_component_t bml_version;
mca_base_component_data_1_0_0_t bml_data;
mca_bml_base_component_init_fn_t bml_init;
};
typedef struct mca_bml_base_component_1_0_0_t mca_bml_base_component_1_0_0_t;
typedef struct mca_bml_base_component_1_0_0_t mca_bml_base_component_t;
@ -610,7 +611,12 @@ typedef int (*mca_bml_base_module_register_error_cb_fn_t)(
mca_btl_base_module_error_cb_fn_t cbfunc
);
/**
* Fault Tolerance Event Notification Function
* @param status Checkpoint Status
* @return OMPI_SUCCESS or failure status
*/
typedef int (*mca_bml_base_module_ft_event_fn_t)(int status);
/**
@ -638,6 +644,7 @@ struct mca_bml_base_module_t {
mca_bml_base_module_progress_fn_t bml_progress;
mca_bml_base_module_ft_event_fn_t bml_ft_event;
};
typedef struct mca_bml_base_module_t mca_bml_base_module_t;

Просмотреть файл

@ -52,7 +52,8 @@ mca_bml_r2_module_t mca_bml_r2 = {
mca_bml_r2_register,
mca_bml_r2_register_error,
mca_bml_r2_finalize,
mca_bml_r2_progress
mca_bml_r2_progress,
mca_bml_r2_ft_event
}
};
@ -797,3 +798,114 @@ int mca_bml_r2_component_fini(void)
}
int mca_bml_r2_ft_event(int state) {
size_t btl_idx;
int ret;
ompi_proc_t** procs = NULL;
size_t num_procs;
#if 0
opal_output(0, "bml:r2: ft_event: *** R2 BML *** (%d)\n", state);
#endif
if(OPAL_CRS_CHECKPOINT == state) {
/* Do nothing for now */
}
else if(OPAL_CRS_CONTINUE == state) {
/* Since nothingin Checkpoint, we are fine here */
}
else if(OPAL_CRS_RESTART == state) {
procs = ompi_proc_all(&num_procs);
if(NULL == procs)
goto END_PRE_RESTART;
if (OMPI_SUCCESS != (ret = mca_bml_r2_del_procs(num_procs, procs) ) ) {
goto END_PRE_RESTART;
}
END_PRE_RESTART:
;
}
else if(OPAL_CRS_TERM == state ) {
;
}
else {
;
}
/*
* Call ft_event in:
* - BTL
* - MPool
*/
for(btl_idx = 0; btl_idx < mca_bml_r2.num_btl_modules; btl_idx++) {
#if 0
opal_output(0, "bml:r2: ft_event: Notify the %s BTL.\n",
(mca_bml_r2.btl_modules[btl_idx])->btl_component->btl_version.mca_component_name);
#endif
/*
* Close the btl
*/
if( NULL != (mca_bml_r2.btl_modules[btl_idx])->btl_ft_event) {
if(OMPI_SUCCESS != (ret = (mca_bml_r2.btl_modules[btl_idx])->btl_ft_event(state) ) ) {
continue;
}
}
/*
* Close its mpool
*/
if( NULL != (mca_bml_r2.btl_modules[btl_idx])->btl_mpool) {
if(OMPI_SUCCESS != (ret = (mca_bml_r2.btl_modules[btl_idx])->btl_mpool->mpool_ft_event(state) ) ) {
continue;
}
}
}
if(OPAL_CRS_CHECKPOINT == state) {
;
}
else if(OPAL_CRS_CONTINUE == state) {
;
}
else if(OPAL_CRS_RESTART == state) {
struct mca_bml_base_endpoint_t ** endpoints = NULL;
ompi_bitmap_t reachable;
OBJ_CONSTRUCT(&reachable, ompi_bitmap_t);
if( OMPI_SUCCESS != (ret = ompi_bitmap_init(&reachable, num_procs)) ) {
goto END_POST_RESTART;
}
endpoints = (struct mca_bml_base_endpoint_t **) malloc ( num_procs *
sizeof(struct mca_bml_base_endpoint_t*));
if ( NULL == endpoints ) {
goto END_POST_RESTART;
}
/* Don't need to do this again since we still have the
* values from PRE_RESTART
* procs = ompi_proc_all(&num_procs);
*/
if (OMPI_SUCCESS != (ret = mca_bml_r2_add_procs(num_procs, procs, endpoints, &reachable) ) ) {
goto END_POST_RESTART;
}
END_POST_RESTART:
if ( NULL != endpoints ) {
free ( endpoints) ;
}
OBJ_DESTRUCT(&reachable);
}
else if(OPAL_CRS_TERM == state ) {
;
}
else {
;
}
if( NULL != procs)
free(procs);
return OMPI_SUCCESS;
}

Просмотреть файл

@ -1,5 +1,5 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
@ -96,6 +96,8 @@ int mca_bml_r2_finalize( void );
int mca_bml_r2_component_fini(void);
int mca_bml_r2_ft_event(int status);
#if defined(c_plusplus) || defined(__cplusplus)
}
#endif

Просмотреть файл

@ -1,5 +1,5 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
@ -46,8 +46,8 @@ mca_bml_base_component_1_0_0_t mca_bml_r2_component = {
/* Next the MCA v1.0.0 component meta data */
{
/* Whether the component is checkpointable or not */
false
/* The component is checkpoint ready */
MCA_BASE_METADATA_PARAM_CHECKPOINT
},
mca_bml_r2_component_init
};

Просмотреть файл

@ -1,5 +1,5 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
@ -114,6 +114,9 @@
#include "ompi/types.h"
#include "ompi/mca/mpool/mpool.h"
#include "opal/mca/crs/crs.h"
#include "opal/mca/crs/base/base.h"
#if defined(c_plusplus) || defined(__cplusplus)
extern "C" {
#endif
@ -544,6 +547,12 @@ typedef void (*mca_btl_base_module_dump_fn_t)(
int verbose
);
/**
* Fault Tolerance Event Notification Function
* @param state Checkpoint Status
* @return OMPI_SUCCESS or failure status
*/
typedef int (*mca_btl_base_module_ft_event_fn_t)(int state);
/**
* BTL module interface functions and attributes.
@ -582,6 +591,7 @@ struct mca_btl_base_module_t {
/* register a default error handler */
mca_btl_base_module_register_error_fn_t btl_register_error;
mca_btl_base_module_ft_event_fn_t btl_ft_event;
};
typedef struct mca_btl_base_module_t mca_btl_base_module_t;

Просмотреть файл

@ -1,5 +1,5 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
@ -87,8 +87,8 @@ mca_btl_gm_module_t mca_btl_gm_module = {
#endif
mca_btl_base_dump,
NULL, /* mpool */
mca_btl_gm_register_error_cb
mca_btl_gm_register_error_cb,
mca_btl_gm_ft_event
}
};
@ -956,3 +956,22 @@ int mca_btl_gm_finalize(struct mca_btl_base_module_t* btl)
return OMPI_SUCCESS;
}
int mca_btl_gm_ft_event(int state) {
if(OPAL_CRS_CHECKPOINT == state) {
;
}
else if(OPAL_CRS_CONTINUE == state) {
;
}
else if(OPAL_CRS_RESTART == state) {
;
}
else if(OPAL_CRS_TERM == state ) {
;
}
else {
;
}
return OMPI_SUCCESS;
}

Просмотреть файл

@ -1,5 +1,5 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
@ -338,6 +338,12 @@ extern mca_btl_base_descriptor_t* mca_btl_gm_prepare_dst(
size_t reserve,
size_t* size);
/**
* Fault Tolerance Event Notification Function
* @param state Checkpoint Stae
* @return OMPI_SUCCESS or failure status
*/
int mca_btl_gm_ft_event(int state);
/**
* Acquire a send token - queue the fragment if none available

Просмотреть файл

@ -1,5 +1,5 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
@ -73,9 +73,8 @@ mca_btl_gm_component_t mca_btl_gm_component = {
/* Next the MCA v1.0.0 component meta data */
{
/* Whether the component is checkpointable or not */
false
/* The component is not checkpoint ready */
MCA_BASE_METADATA_PARAM_NONE
},
mca_btl_gm_component_init,

Просмотреть файл

@ -1,5 +1,5 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
@ -63,7 +63,8 @@ mca_btl_mvapi_module_t mca_btl_mvapi_module = {
mca_btl_mvapi_get,
mca_btl_mvapi_dump,
NULL, /* mpool */
NULL /* error call back registration */
NULL, /* error call back registration */
mca_btl_mvapi_ft_event
}
};
@ -827,3 +828,23 @@ void mca_btl_mvapi_dump(
opal_output( 0, "sd_wqe_hp %d\n", endpoint->sd_wqe_hp );
opal_output( 0, "sd_wqe_lp %d\n", endpoint->sd_wqe_lp );
}
int mca_btl_mvapi_ft_event(int state) {
if(OPAL_CRS_CHECKPOINT == state) {
;
}
else if(OPAL_CRS_CONTINUE == state) {
;
}
else if(OPAL_CRS_RESTART == state) {
;
}
else if(OPAL_CRS_TERM == state ) {
;
}
else {
;
}
return OMPI_SUCCESS;
}

Просмотреть файл

@ -1,5 +1,5 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
@ -515,6 +515,13 @@ extern void mca_btl_mvapi_dump(
int mca_btl_mvapi_module_init(mca_btl_mvapi_module_t* mvapi_btl);
/**
* Fault Tolerance Event Notification Function
* @param state Checkpoint Stae
* @return OMPI_SUCCESS or failure status
*/
int mca_btl_mvapi_ft_event(int state);
#if defined(c_plusplus) || defined(__cplusplus)
}

Просмотреть файл

@ -1,5 +1,5 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
@ -77,9 +77,8 @@ mca_btl_mvapi_component_t mca_btl_mvapi_component = {
/* Next the MCA v1.0.0 component meta data */
{
/* Whether the component is checkpointable or not */
false
/* The component is not checkpoint ready */
MCA_BASE_METADATA_PARAM_NONE
},
mca_btl_mvapi_component_init,

Просмотреть файл

@ -1,5 +1,5 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
@ -477,6 +477,27 @@ int mca_btl_mx_finalize( struct mca_btl_base_module_t* btl )
return OMPI_SUCCESS;
}
int mca_btl_mx_ft_event(int state) {
if(OPAL_CRS_CHECKPOINT == state) {
;
}
else if(OPAL_CRS_CONTINUE == state) {
;
}
else if(OPAL_CRS_RESTART == state) {
;
}
else if(OPAL_CRS_TERM == state ) {
;
}
else {
;
}
return OMPI_SUCCESS;
}
mca_btl_mx_module_t mca_btl_mx_module = {
{
&mca_btl_mx_component.super,
@ -502,7 +523,7 @@ mca_btl_mx_module_t mca_btl_mx_module = {
NULL, /* get */
mca_btl_base_dump,
NULL, /* mpool */
NULL /* register error */
NULL, /* register error */
mca_btl_mx_ft_event
}
};

Просмотреть файл

@ -1,6 +1,6 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
@ -290,6 +290,13 @@ mca_btl_mx_prepare_dst( struct mca_btl_base_module_t* btl,
size_t reserve,
size_t* size );
/**
* Fault Tolerance Event Notification Function
* @param state Checkpoint Stae
* @return OMPI_SUCCESS or failure status
*/
int mca_btl_mx_ft_event(int state);
#if defined(c_plusplus) || defined(__cplusplus)
}
#endif

Просмотреть файл

@ -1,5 +1,5 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
@ -58,9 +58,8 @@ mca_btl_mx_component_t mca_btl_mx_component = {
/* Next the MCA v1.0.0 component meta data */
{
/* Whether the component is checkpointable or not */
false
/* The component is not checkpoint ready */
MCA_BASE_METADATA_PARAM_NONE
},
mca_btl_mx_component_init,

Просмотреть файл

@ -1,5 +1,5 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
@ -78,7 +78,8 @@ mca_btl_openib_module_t mca_btl_openib_module = {
mca_btl_openib_get,
mca_btl_base_dump,
NULL, /* mpool */
mca_btl_openib_register_error_cb /* error call back registration */
mca_btl_openib_register_error_cb, /* error call back registration */
mca_btl_openib_ft_event
}
};
@ -905,3 +906,23 @@ int mca_btl_openib_create_cq_srq(mca_btl_openib_module_t *openib_btl)
return OMPI_SUCCESS;
}
int mca_btl_openib_ft_event(int state) {
if(OPAL_CRS_CHECKPOINT == state) {
;
}
else if(OPAL_CRS_CONTINUE == state) {
;
}
else if(OPAL_CRS_RESTART == state) {
;
}
else if(OPAL_CRS_TERM == state ) {
;
}
else {
;
}
return OMPI_SUCCESS;
}

Просмотреть файл

@ -1,5 +1,5 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
@ -440,6 +440,13 @@ extern void mca_btl_openib_send_frag_return(mca_btl_base_module_t* btl,
int mca_btl_openib_create_cq_srq(mca_btl_openib_module_t* openib_btl);
/**
* Fault Tolerance Event Notification Function
* @param state Checkpoint Stae
* @return OMPI_SUCCESS or failure status
*/
int mca_btl_openib_ft_event(int state);
#define BTL_OPENIB_HP_QP 0
#define BTL_OPENIB_LP_QP 1

Просмотреть файл

@ -1,5 +1,5 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
@ -111,9 +111,8 @@ mca_btl_openib_component_t mca_btl_openib_component = {
/* Next the MCA v1.0.0 component meta data */
{
/* Whether the component is checkpointable or not */
false
/* The component is not checkpoint ready */
MCA_BASE_METADATA_PARAM_NONE
},
btl_openib_component_init,

Просмотреть файл

@ -1,5 +1,5 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
@ -66,7 +66,8 @@ mca_btl_portals_module_t mca_btl_portals_module = {
mca_btl_portals_get,
mca_btl_base_dump,
NULL, /* mpool */
NULL /* register error */
NULL, /* register error */
NULL
},
};

Просмотреть файл

@ -1,5 +1,5 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
@ -56,9 +56,8 @@ mca_btl_portals_component_t mca_btl_portals_component = {
/* Next the MCA v1.0.0 module meta data */
{
/* Whether the module is checkpointable or not */
false
/* The component is not checkpoint ready */
MCA_BASE_METADATA_PARAM_NONE
},
mca_btl_portals_component_init,

Просмотреть файл

@ -1,5 +1,5 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
@ -67,7 +67,8 @@ mca_btl_base_module_t mca_btl_self = {
mca_btl_self_rdma, /* get */
mca_btl_base_dump,
NULL, /* mpool */
NULL /* register error cb */
NULL, /* register error cb */
mca_btl_self_ft_event
};
@ -399,3 +400,23 @@ int mca_btl_self_rdma( struct mca_btl_base_module_t* btl,
des->des_cbfunc(btl,endpoint,des,OMPI_SUCCESS);
return OMPI_SUCCESS;
}
int mca_btl_self_ft_event(int state) {
if(OPAL_CRS_CHECKPOINT == state) {
;
}
else if(OPAL_CRS_CONTINUE == state) {
;
}
else if(OPAL_CRS_RESTART == state) {
;
}
else if(OPAL_CRS_TERM == state ) {
;
}
else {
;
}
return OMPI_SUCCESS;
}

Просмотреть файл

@ -1,5 +1,5 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
@ -255,7 +255,14 @@ int mca_btl_self_rdma(
struct mca_btl_base_endpoint_t* endpoint,
struct mca_btl_base_descriptor_t* descriptor
);
/**
* Fault Tolerance Event Notification Function
* @param state Checkpoint Stae
* @return OMPI_SUCCESS or failure status
*/
int mca_btl_self_ft_event(int state);
#if defined(c_plusplus) || defined(__cplusplus)
}
#endif

Просмотреть файл

@ -1,5 +1,5 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
@ -63,8 +63,8 @@ mca_btl_self_component_t mca_btl_self_component = {
/* Next the MCA v1.0.0 component meta data */
{
/* Whether the component is checkpointable or not */
false
/* The component is checkpoint ready */
MCA_BASE_METADATA_PARAM_CHECKPOINT
},
mca_btl_self_component_init,

Просмотреть файл

@ -1,5 +1,5 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
@ -108,7 +108,8 @@ mca_btl_sm_t mca_btl_sm[2] = {
NULL, /* get */
mca_btl_base_dump,
NULL, /* mpool */
mca_btl_sm_register_error_cb /* register error */
mca_btl_sm_register_error_cb, /* register error */
mca_btl_sm_ft_event
}
},
{
@ -136,7 +137,8 @@ mca_btl_sm_t mca_btl_sm[2] = {
NULL, /* get function */
mca_btl_base_dump,
NULL, /* mpool */
mca_btl_sm_register_error_cb /* register error */
mca_btl_sm_register_error_cb, /* register error */
mca_btl_sm_ft_event
}
}
};
@ -922,3 +924,23 @@ int mca_btl_sm_send(
MCA_BTL_SM_FIFO_WRITE(endpoint, endpoint->my_smp_rank, endpoint->peer_smp_rank, frag->hdr, rc);
return rc;
}
int mca_btl_sm_ft_event(int state) {
if(OPAL_CRS_CHECKPOINT == state) {
;
}
else if(OPAL_CRS_CONTINUE == state) {
;
}
else if(OPAL_CRS_RESTART == state) {
;
}
else if(OPAL_CRS_TERM == state ) {
;
}
else {
;
}
return OMPI_SUCCESS;
}

Просмотреть файл

@ -1,6 +1,6 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
@ -345,6 +345,12 @@ extern int mca_btl_sm_send(
mca_btl_base_tag_t tag
);
/**
* Fault Tolerance Event Notification Function
* @param state Checkpoint Stae
* @return OMPI_SUCCESS or failure status
*/
int mca_btl_sm_ft_event(int state);
#if OMPI_ENABLE_PROGRESS_THREADS == 1
void mca_btl_sm_component_event_thread(opal_object_t*);

Просмотреть файл

@ -1,5 +1,5 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
@ -78,8 +78,8 @@ mca_btl_sm_component_t mca_btl_sm_component = {
/* Next the MCA v1.0.0 component meta data */
{
/* Whether the component is checkpointable or not */
false
/* The component is not checkpoint ready */
MCA_BASE_METADATA_PARAM_NONE
},
mca_btl_sm_component_init,

Просмотреть файл

@ -1,5 +1,5 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
@ -61,7 +61,8 @@ mca_btl_tcp_module_t mca_btl_tcp_module = {
NULL, /* get */
mca_btl_base_dump,
NULL, /* mpool */
NULL /* register error */
NULL, /* register error */
mca_btl_tcp_ft_event
}
};
@ -499,3 +500,31 @@ int mca_btl_tcp_finalize(struct mca_btl_base_module_t* btl)
return OMPI_SUCCESS;
}
int mca_btl_tcp_ft_event(int state) {
if(OPAL_CRS_CHECKPOINT == state) {
/* Do not remove the endpoints here
* We will likely be continuing from here
* so no need to do do much work.
*/
;
}
else if(OPAL_CRS_CONTINUE == state) {
;
}
else if(OPAL_CRS_RESTART == state) {
/*
* Flush all of the endpoints
*/
}
else if(OPAL_CRS_TERM == state ) {
;
}
else {
;
}
return OMPI_SUCCESS;
}

Просмотреть файл

@ -1,6 +1,6 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
@ -53,7 +53,7 @@ extern "C" {
/**
* Infiniband (TCP) BTL component.
* TCP BTL component.
*/
struct mca_btl_tcp_component_t {
@ -320,6 +320,13 @@ extern mca_btl_base_descriptor_t* mca_btl_tcp_prepare_dst(
size_t* size);
/**
* Fault Tolerance Event Notification Function
* @param state Checkpoint Stae
* @return OMPI_SUCCESS or failure status
*/
int mca_btl_tcp_ft_event(int state);
#if defined(c_plusplus) || defined(__cplusplus)
}
#endif

Просмотреть файл

@ -1,5 +1,5 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
@ -88,9 +88,8 @@ mca_btl_tcp_component_t mca_btl_tcp_component = {
/* Next the MCA v1.0.0 component meta data */
{
/* Whether the component is checkpointable or not */
false
/* The component is checkpoint ready */
MCA_BASE_METADATA_PARAM_CHECKPOINT
},
mca_btl_tcp_component_init,

Просмотреть файл

@ -1,5 +1,5 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
@ -57,7 +57,8 @@ mca_btl_template_module_t mca_btl_template_module = {
NULL, /* get */
NULL, /*dump */
NULL, /* mpool */
NULL /* register error cb */
NULL, /* register error cb */
mca_btl_template_ft_event
}
};
@ -415,3 +416,22 @@ int mca_btl_template_finalize(struct mca_btl_base_module_t* btl)
return OMPI_SUCCESS;
}
int mca_btl_template_ft_event(int state) {
if(OPAL_CRS_CHECKPOINT == state) {
;
}
else if(OPAL_CRS_CONTINUE == state) {
;
}
else if(OPAL_CRS_RESTART == state) {
;
}
else if(OPAL_CRS_TERM == state ) {
;
}
else {
;
}
return OMPI_SUCCESS;
}

Просмотреть файл

@ -1,6 +1,6 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
@ -304,6 +304,12 @@ extern mca_btl_base_descriptor_t* mca_btl_template_prepare_dst(
size_t reserve,
size_t* size);
/**
* Fault Tolerance Event Notification Function
* @param state Checkpoint Stae
* @return OMPI_SUCCESS or failure status
*/
int mca_btl_template_ft_event(int state);
#if defined(c_plusplus) || defined(__cplusplus)
}

Просмотреть файл

@ -1,5 +1,5 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
@ -57,9 +57,8 @@ mca_btl_template_component_t mca_btl_template_component = {
/* Next the MCA v1.0.0 component meta data */
{
/* Whether the component is checkpointable or not */
false
/* The component is not checkpoint ready */
MCA_BASE_METADATA_PARAM_NONE
},
mca_btl_template_component_init,

Просмотреть файл

@ -1,5 +1,5 @@
/*
* Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
@ -69,7 +69,8 @@ mca_btl_udapl_module_t mca_btl_udapl_module = {
NULL, /* get */
mca_btl_base_dump,
NULL, /* mpool */
NULL /* register error cb */
NULL, /* register error cb */
mca_btl_udapl_ft_event
}
};
@ -828,3 +829,22 @@ int mca_btl_udapl_get(
return OMPI_ERR_NOT_IMPLEMENTED;
}
int mca_btl_udapl_ft_event(int state) {
if(OPAL_CRS_CHECKPOINT == state) {
;
}
else if(OPAL_CRS_CONTINUE == state) {
;
}
else if(OPAL_CRS_RESTART == state) {
;
}
else if(OPAL_CRS_TERM == state ) {
;
}
else {
;
}
return OMPI_SUCCESS;
}

Просмотреть файл

@ -1,5 +1,5 @@
/*
* Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
@ -385,7 +385,12 @@ extern mca_btl_base_descriptor_t* mca_btl_udapl_prepare_dst(
size_t reserve,
size_t* size);
/**
* Fault Tolerance Event Notification Function
* @param state Checkpoint Stae
* @return OMPI_SUCCESS or failure status
*/
int mca_btl_udapl_ft_event(int state);
#if defined(c_plusplus) || defined(__cplusplus)
}

Просмотреть файл

@ -1,5 +1,5 @@
/*
* Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
@ -77,9 +77,8 @@ mca_btl_udapl_component_t mca_btl_udapl_component = {
/* Next the MCA v1.0.0 component meta data */
{
/* Whether the component is checkpointable or not */
false
/* The component is not checkpoint ready */
MCA_BASE_METADATA_PARAM_NONE
},
mca_btl_udapl_component_init,

Просмотреть файл

@ -1,5 +1,5 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
@ -254,6 +254,7 @@ extern "C" {
int root,
struct ompi_communicator_t *comm);
int mca_coll_basic_ft_event(int status);
/* Utility functions */

Просмотреть файл

@ -1,5 +1,5 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
@ -78,9 +78,8 @@ const mca_coll_base_component_1_0_0_t mca_coll_basic_component = {
/* Next the MCA v1.0.0 component meta data */
{
/* Whether the component is checkpointable or not */
true
/* The component is checkpoint ready */
MCA_BASE_METADATA_PARAM_CHECKPOINT
},
/* Initialization / querying functions */

Просмотреть файл

@ -1,5 +1,5 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
@ -55,7 +55,8 @@ static const mca_coll_base_module_1_0_0_t intra_linear = {
mca_coll_basic_reduce_scatter_intra,
mca_coll_basic_scan_intra,
mca_coll_basic_scatter_intra,
mca_coll_basic_scatterv_intra
mca_coll_basic_scatterv_intra,
mca_coll_basic_ft_event
};
@ -88,7 +89,8 @@ static const mca_coll_base_module_1_0_0_t intra_log = {
mca_coll_basic_reduce_scatter_intra,
mca_coll_basic_scan_intra,
mca_coll_basic_scatter_intra,
mca_coll_basic_scatterv_intra
mca_coll_basic_scatterv_intra,
mca_coll_basic_ft_event
};
@ -119,7 +121,8 @@ static const mca_coll_base_module_1_0_0_t inter_linear = {
mca_coll_basic_reduce_scatter_inter,
NULL,
mca_coll_basic_scatter_inter,
mca_coll_basic_scatterv_inter
mca_coll_basic_scatterv_inter,
mca_coll_basic_ft_event
};
@ -222,3 +225,23 @@ mca_coll_basic_module_finalize(struct ompi_communicator_t *comm)
comm->c_coll_basic_data = NULL;
return OMPI_SUCCESS;
}
int mca_coll_basic_ft_event(int state) {
if(OPAL_CRS_CHECKPOINT == state) {
;
}
else if(OPAL_CRS_CONTINUE == state) {
;
}
else if(OPAL_CRS_RESTART == state) {
;
}
else if(OPAL_CRS_TERM == state ) {
;
}
else {
;
}
return OMPI_SUCCESS;
}

Просмотреть файл

@ -1,5 +1,5 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
@ -25,6 +25,8 @@
#include "opal/mca/mca.h"
#include "opal/mca/base/base.h"
#include "opal/mca/crs/crs.h"
#include "opal/mca/crs/base/base.h"
/*
* Forward declaration
@ -115,6 +117,14 @@ typedef int (*mca_coll_base_module_scatterv_fn_t)
int root, struct ompi_communicator_t *comm);
/**
* Fault Tolerance Awareness function
* @param status Checkpoint status
* @return OMPI_SUCCESS or failure status
*/
typedef int (*mca_coll_base_module_ft_event_fn_t) (int state);
/*
* Structure for coll v1.0.0 components
* Chained to MCA v1.0.0
@ -164,6 +174,8 @@ struct mca_coll_base_module_1_0_0_t {
mca_coll_base_module_scan_fn_t coll_scan;
mca_coll_base_module_scatter_fn_t coll_scatter;
mca_coll_base_module_scatterv_fn_t coll_scatterv;
mca_coll_base_module_ft_event_fn_t ft_event;
};
typedef struct mca_coll_base_module_1_0_0_t mca_coll_base_module_1_0_0_t;

Просмотреть файл

@ -1,5 +1,5 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
@ -209,6 +209,8 @@ extern "C" {
struct ompi_datatype_t *rdtype, int root,
struct ompi_communicator_t *comm);
int mca_coll_demo_ft_event(int status);
#if defined(c_plusplus) || defined(__cplusplus)
}
#endif

Просмотреть файл

@ -1,5 +1,5 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
@ -80,9 +80,8 @@ const mca_coll_base_component_1_0_0_t mca_coll_demo_component = {
/* Next the MCA v1.0.0 component meta data */
{
/* Whether the component is checkpointable or not */
true
/* The component is checkpoint ready */
MCA_BASE_METADATA_PARAM_CHECKPOINT
},
/* Initialization / querying functions */

Просмотреть файл

@ -1,5 +1,5 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
@ -55,7 +55,8 @@ static const mca_coll_base_module_1_0_0_t intra = {
mca_coll_demo_reduce_scatter_intra,
mca_coll_demo_scan_intra,
mca_coll_demo_scatter_intra,
mca_coll_demo_scatterv_intra
mca_coll_demo_scatterv_intra,
mca_coll_demo_ft_event
};
@ -86,7 +87,8 @@ static const mca_coll_base_module_1_0_0_t inter = {
mca_coll_demo_reduce_scatter_inter,
NULL,
mca_coll_demo_scatter_inter,
mca_coll_demo_scatterv_inter
mca_coll_demo_scatterv_inter,
mca_coll_demo_ft_event
};
@ -145,3 +147,23 @@ int mca_coll_demo_module_finalize(struct ompi_communicator_t *comm)
{
return OMPI_SUCCESS;
}
int mca_coll_demo_ft_event(int status) {
if(OPAL_CRS_CHECKPOINT == state) {
;
}
else if(OPAL_CRS_CONTINUE == state) {
;
}
else if(OPAL_CRS_RESTART == state) {
;
}
else if(OPAL_CRS_TERM == state ) {
;
}
else {
;
}
return OMPI_SUCCESS;
}

Просмотреть файл

@ -1,5 +1,5 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
@ -78,7 +78,8 @@ static const mca_coll_base_module_1_0_0_t intra = {
NULL, /* reduce_scatter */
NULL, /* scan */
NULL, /* scatter */
NULL /* scatterv */
NULL, /* scatterv */
mca_coll_hierarch_ft_event
};
@ -107,6 +108,7 @@ static const mca_coll_base_module_1_0_0_t null_intra = {
NULL,
NULL,
NULL,
NULL,
NULL
};
@ -771,3 +773,22 @@ static void mca_coll_hierarch_dump_struct ( struct mca_coll_base_comm_t *c)
return;
}
int mca_coll_hierarch_ft_event(int state) {
if(OPAL_CRS_CHECKPOINT == state) {
;
}
else if(OPAL_CRS_CONTINUE == state) {
;
}
else if(OPAL_CRS_RESTART == state) {
;
}
else if(OPAL_CRS_TERM == state ) {
;
}
else {
;
}
return OMPI_SUCCESS;
}

Просмотреть файл

@ -1,5 +1,5 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
@ -310,8 +310,7 @@ int mca_coll_hierarch_reduce_tmp(void *sbuf, void *rbuf, int count,
struct ompi_op_t *op,
int root, struct ompi_communicator_t *comm);
int mca_coll_hierarch_ft_event(int status);
#if defined(c_plusplus) || defined(__cplusplus)
}

Просмотреть файл

@ -1,5 +1,5 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
@ -81,8 +81,8 @@ const mca_coll_base_component_1_0_0_t mca_coll_hierarch_component = {
/* Next the MCA v1.0.0 component meta data */
{
/* Whether the component is checkpointable or not */
true
/* The component is checkpoint ready */
MCA_BASE_METADATA_PARAM_CHECKPOINT
},
/* Initialization / querying functions */

Просмотреть файл

@ -331,9 +331,8 @@ const mca_coll_base_component_1_0_0_t mca_coll_libnbc_component = {
/* Next the MCA v1.0.0 component meta data */
{
/* Whether the component is checkpointable or not */
true
/* The component is checkpoint ready */
MCA_BASE_METADATA_PARAM_CHECKPOINT
},
/* Initialization / querying functions */

Просмотреть файл

@ -1,5 +1,5 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
@ -127,6 +127,8 @@ extern int mca_coll_self_priority_param;
struct ompi_datatype_t *rdtype, int root,
struct ompi_communicator_t *comm);
int mca_coll_self_ft_event(int state);
#if defined(c_plusplus) || defined(__cplusplus)
}
#endif

Просмотреть файл

@ -1,5 +1,5 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
@ -78,9 +78,8 @@ const mca_coll_base_component_1_0_0_t mca_coll_self_component = {
/* Next the MCA v1.0.0 component meta data */
{
/* Whether the component is checkpointable or not */
true
/* The component is checkpoint ready */
MCA_BASE_METADATA_PARAM_CHECKPOINT
},
/* Initialization / querying functions */

Просмотреть файл

@ -1,5 +1,5 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
@ -56,7 +56,8 @@ static const mca_coll_base_module_1_0_0_t module = {
mca_coll_self_reduce_scatter_intra,
mca_coll_self_scan_intra,
mca_coll_self_scatter_intra,
mca_coll_self_scatterv_intra
mca_coll_self_scatterv_intra,
mca_coll_self_ft_event
};
@ -117,3 +118,23 @@ int mca_coll_self_module_finalize(struct ompi_communicator_t *comm)
{
return OMPI_SUCCESS;
}
int mca_coll_self_ft_event(int state) {
if(OPAL_CRS_CHECKPOINT == state) {
;
}
else if(OPAL_CRS_CONTINUE == state) {
;
}
else if(OPAL_CRS_RESTART == state) {
;
}
else if(OPAL_CRS_TERM == state ) {
;
}
else {
;
}
return OMPI_SUCCESS;
}

Просмотреть файл

@ -1,5 +1,5 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
@ -429,6 +429,8 @@ extern "C" {
void* rbuf, int rcount,
struct ompi_datatype_t *rdtype, int root,
struct ompi_communicator_t *comm);
int mca_coll_sm_ft_event(int state);
#if defined(c_plusplus) || defined(__cplusplus)
}

Просмотреть файл

@ -1,5 +1,5 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
@ -81,9 +81,8 @@ mca_coll_sm_component_t mca_coll_sm_component = {
/* Next the MCA v1.0.0 component meta data */
{
/* Whether the component is checkpointable or not */
true
/* The component is not checkpoint ready */
MCA_BASE_METADATA_PARAM_NONE
},
/* Initialization / querying functions */

Просмотреть файл

@ -1,5 +1,5 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
@ -103,7 +103,8 @@ static const mca_coll_base_module_1_0_0_t module = {
NULL,
NULL,
NULL,
NULL
NULL,
mca_coll_sm_ft_event
};
@ -793,3 +794,23 @@ int mca_coll_sm_bootstrap_finalize(void)
return OMPI_SUCCESS;
}
int mca_coll_sm_ft_event(int state) {
if(OPAL_CRS_CHECKPOINT == state) {
;
}
else if(OPAL_CRS_CONTINUE == state) {
;
}
else if(OPAL_CRS_RESTART == state) {
;
}
else if(OPAL_CRS_TERM == state ) {
;
}
else {
;
}
return OMPI_SUCCESS;
}

Просмотреть файл

@ -1,5 +1,5 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
@ -272,6 +272,7 @@ extern int ompi_coll_tuned_forced_max_algorithms[COLLCOUNT];
int ompi_coll_tuned_scatterv_inter_dec_fixed(SCATTERV_ARGS);
int ompi_coll_tuned_scatterv_inter_dec_dynamic(SCATTERV_ARGS);
int mca_coll_tuned_ft_event(int state);
/* Utility functions */

Просмотреть файл

@ -1,5 +1,5 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
@ -96,11 +96,9 @@ mca_coll_tuned_component_t mca_coll_tuned_component = {
},
/* Next the MCA v1.0.0 component meta data */
{
/* Whether the component is checkpointable or not */
true
/* The component is checkpoint ready */
MCA_BASE_METADATA_PARAM_CHECKPOINT
},
/* Initialization / querying functions */

Просмотреть файл

@ -1,5 +1,5 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
@ -83,7 +83,8 @@ static const mca_coll_base_module_1_0_0_t intra_fixed = {
ompi_coll_tuned_scatter_intra_dec_fixed,
/* NULL, */
/* ompi_coll_tuned_scatterv_intra_dec_fixed */
NULL
NULL,
mca_coll_tuned_ft_event
};
static const mca_coll_base_module_1_0_0_t intra_dynamic = {
@ -126,7 +127,8 @@ static const mca_coll_base_module_1_0_0_t intra_dynamic = {
ompi_coll_tuned_scatter_intra_dec_dynamic,
/* NULL, */
/* ompi_coll_tuned_scatterv_intra_dec_dynamic */
NULL
NULL,
mca_coll_tuned_ft_event
};
/*
@ -175,7 +177,8 @@ static const mca_coll_base_module_1_0_0_t inter_fixed = {
/* ompi_coll_tuned_scatter_inter_dec_fixed, */
NULL,
/* ompi_coll_tuned_scatterv_inter_dec_fixed */
NULL
NULL,
NULL
};
static const mca_coll_base_module_1_0_0_t inter_dynamic = {
@ -218,7 +221,9 @@ static const mca_coll_base_module_1_0_0_t inter_dynamic = {
/* ompi_coll_tuned_scatter_inter_dec_dynamic, */
NULL,
/* ompi_coll_tuned_scatterv_inter_dec_dynamic */
NULL
NULL,
NULL
};
/*
@ -558,3 +563,22 @@ int ompi_coll_tuned_module_finalize(struct ompi_communicator_t *comm)
return OMPI_SUCCESS;
}
int mca_coll_tuned_ft_event(int state) {
if(OPAL_CRS_CHECKPOINT == state) {
;
}
else if(OPAL_CRS_CONTINUE == state) {
;
}
else if(OPAL_CRS_RESTART == state) {
;
}
else if(OPAL_CRS_TERM == state ) {
;
}
else {
;
}
return OMPI_SUCCESS;
}

45
ompi/mca/crcp/Makefile.am Обычный файл
Просмотреть файл

@ -0,0 +1,45 @@
#
# Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2005 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# main library setup
noinst_LTLIBRARIES = libmca_crcp.la
libmca_crcp_la_SOURCES =
# header setup
nobase_ompi_HEADERS =
# local files
headers = crcp.h
libmca_crcp_la_SOURCES += $(headers)
# Manual pages
man_MANS = ompi_crcp.7
EXTRA_DIST = ompi_crcp.7
# Conditionally install the header files
if WANT_INSTALL_HEADERS
nobase_ompi_HEADERS += $(headers)
ompidir = $(includedir)/openmpi/ompi/mca/crcp
else
ompidir = $(includedir)
endif
include base/Makefile.am
distclean-local:
rm -f base/static-components.h

28
ompi/mca/crcp/base/Makefile.am Обычный файл
Просмотреть файл

@ -0,0 +1,28 @@
#
# Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2005 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
dist_pkgdata_DATA = base/help-ompi-crcp-base.txt
headers += \
base/base.h
libmca_crcp_la_SOURCES += \
base/crcp_base_open.c \
base/crcp_base_close.c \
base/crcp_base_select.c \
base/crcp_base_fns.c

216
ompi/mca/crcp/base/base.h Обычный файл
Просмотреть файл

@ -0,0 +1,216 @@
/*
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef OMPI_CRCP_BASE_H
#define OMPI_CRCP_BASE_H
#include "ompi_config.h"
#include "ompi/constants.h"
#include "orte/mca/rml/rml.h"
#include "orte/dss/dss.h"
#include "ompi/mca/crcp/crcp.h"
/*
* Global functions for MCA overall CRCP
*/
#if defined(c_plusplus) || defined(__cplusplus)
extern "C" {
#endif
/**
* Initialize the CRCP MCA framework
*
* @retval OMPI_SUCCESS Upon success
* @retval OMPI_ERROR Upon failures
*
* This function is invoked during ompi_init();
*/
OMPI_DECLSPEC int ompi_crcp_base_open(void);
/**
* Select an available component.
*
* @retval OMPI_SUCCESS Upon Success
* @retval OMPI_NOT_FOUND If no component can be selected
* @retval OMPI_ERROR Upon other failure
*
*/
OMPI_DECLSPEC int ompi_crcp_base_select(void);
/**
* Finalize the CRCP MCA framework
*
* @retval OMPI_SUCCESS Upon success
* @retval OMPI_ERROR Upon failures
*
* This function is invoked during ompi_finalize();
*/
OMPI_DECLSPEC int ompi_crcp_base_close(void);
/**
* 'None' component functions
* These are to be used when no component is selected.
* They just return success, and empty strings as necessary.
*/
int ompi_crcp_base_none_open(void);
int ompi_crcp_base_none_close(void);
int ompi_crcp_base_module_init(void);
int ompi_crcp_base_module_finalize(void);
/* PML Interface */
ompi_crcp_base_pml_state_t* ompi_crcp_base_none_pml_enable( bool enable, ompi_crcp_base_pml_state_t* );
ompi_crcp_base_pml_state_t* ompi_crcp_base_none_pml_add_comm( struct ompi_communicator_t* comm, ompi_crcp_base_pml_state_t* );
ompi_crcp_base_pml_state_t* ompi_crcp_base_none_pml_del_comm( struct ompi_communicator_t* comm, ompi_crcp_base_pml_state_t* );
ompi_crcp_base_pml_state_t* ompi_crcp_base_none_pml_add_procs( struct ompi_proc_t **procs, size_t nprocs, ompi_crcp_base_pml_state_t* );
ompi_crcp_base_pml_state_t* ompi_crcp_base_none_pml_del_procs( struct ompi_proc_t **procs, size_t nprocs, ompi_crcp_base_pml_state_t* );
ompi_crcp_base_pml_state_t* ompi_crcp_base_none_pml_progress(ompi_crcp_base_pml_state_t*);
ompi_crcp_base_pml_state_t* ompi_crcp_base_none_pml_iprobe(int dst, int tag, struct ompi_communicator_t* comm, int *matched, ompi_status_public_t* status, ompi_crcp_base_pml_state_t* );
ompi_crcp_base_pml_state_t* ompi_crcp_base_none_pml_probe( int dst, int tag, struct ompi_communicator_t* comm, ompi_status_public_t* status, ompi_crcp_base_pml_state_t* );
ompi_crcp_base_pml_state_t* ompi_crcp_base_none_pml_isend_init( void *buf, size_t count, ompi_datatype_t *datatype, int dst, int tag,
mca_pml_base_send_mode_t mode, struct ompi_communicator_t* comm, struct ompi_request_t **request, ompi_crcp_base_pml_state_t* );
ompi_crcp_base_pml_state_t* ompi_crcp_base_none_pml_isend( void *buf, size_t count, ompi_datatype_t *datatype, int dst, int tag,
mca_pml_base_send_mode_t mode, struct ompi_communicator_t* comm, struct ompi_request_t **request, ompi_crcp_base_pml_state_t* );
ompi_crcp_base_pml_state_t* ompi_crcp_base_none_pml_send( void *buf, size_t count, ompi_datatype_t *datatype, int dst, int tag,
mca_pml_base_send_mode_t mode, struct ompi_communicator_t* comm, ompi_crcp_base_pml_state_t* );
ompi_crcp_base_pml_state_t* ompi_crcp_base_none_pml_irecv_init( void *buf, size_t count, ompi_datatype_t *datatype, int src, int tag,
struct ompi_communicator_t* comm, struct ompi_request_t **request, ompi_crcp_base_pml_state_t*);
ompi_crcp_base_pml_state_t* ompi_crcp_base_none_pml_irecv( void *buf, size_t count, ompi_datatype_t *datatype, int src, int tag,
struct ompi_communicator_t* comm, struct ompi_request_t **request, ompi_crcp_base_pml_state_t* );
ompi_crcp_base_pml_state_t* ompi_crcp_base_none_pml_recv( void *buf, size_t count, ompi_datatype_t *datatype, int src, int tag,
struct ompi_communicator_t* comm, ompi_status_public_t* status, ompi_crcp_base_pml_state_t*);
ompi_crcp_base_pml_state_t* ompi_crcp_base_none_pml_dump( struct ompi_communicator_t* comm, int verbose, ompi_crcp_base_pml_state_t* );
ompi_crcp_base_pml_state_t* ompi_crcp_base_none_pml_start( size_t count, ompi_request_t** requests, ompi_crcp_base_pml_state_t* );
ompi_crcp_base_pml_state_t* ompi_crcp_base_none_pml_ft_event(int state, ompi_crcp_base_pml_state_t*);
/* Request Interface */
int ompi_crcp_base_none_request_complete( struct ompi_request_t *request );
/* BTL Interface */
ompi_crcp_base_btl_state_t*
ompi_crcp_base_none_btl_add_procs( struct mca_btl_base_module_t* btl,
size_t nprocs,
struct ompi_proc_t** procs,
struct mca_btl_base_endpoint_t** endpoints,
struct ompi_bitmap_t* reachable,
ompi_crcp_base_btl_state_t* );
ompi_crcp_base_btl_state_t*
ompi_crcp_base_none_btl_del_procs( struct mca_btl_base_module_t* btl,
size_t nprocs,
struct ompi_proc_t** procs,
struct mca_btl_base_endpoint_t**,
ompi_crcp_base_btl_state_t*);
ompi_crcp_base_btl_state_t*
ompi_crcp_base_none_btl_register( struct mca_btl_base_module_t* btl,
mca_btl_base_tag_t tag,
mca_btl_base_module_recv_cb_fn_t cbfunc,
void* cbdata,
ompi_crcp_base_btl_state_t*);
ompi_crcp_base_btl_state_t*
ompi_crcp_base_none_btl_finalize( struct mca_btl_base_module_t* btl,
ompi_crcp_base_btl_state_t*);
ompi_crcp_base_btl_state_t*
ompi_crcp_base_none_btl_alloc( struct mca_btl_base_module_t* btl,
size_t size,
ompi_crcp_base_btl_state_t*);
ompi_crcp_base_btl_state_t*
ompi_crcp_base_none_btl_free( struct mca_btl_base_module_t* btl,
mca_btl_base_descriptor_t* descriptor,
ompi_crcp_base_btl_state_t*);
ompi_crcp_base_btl_state_t*
ompi_crcp_base_none_btl_prepare_src( struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* endpoint,
mca_mpool_base_registration_t* registration,
struct ompi_convertor_t* convertor,
size_t reserve,
size_t* size,
ompi_crcp_base_btl_state_t*);
ompi_crcp_base_btl_state_t*
ompi_crcp_base_none_btl_prepare_dst( struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* endpoint,
mca_mpool_base_registration_t* registration,
struct ompi_convertor_t* convertor,
size_t reserve,
size_t* size,
ompi_crcp_base_btl_state_t*);
ompi_crcp_base_btl_state_t*
ompi_crcp_base_none_btl_send( struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* endpoint,
struct mca_btl_base_descriptor_t* descriptor,
mca_btl_base_tag_t tag,
ompi_crcp_base_btl_state_t*);
ompi_crcp_base_btl_state_t*
ompi_crcp_base_none_btl_put( struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* endpoint,
struct mca_btl_base_descriptor_t* descriptor,
ompi_crcp_base_btl_state_t*);
ompi_crcp_base_btl_state_t*
ompi_crcp_base_none_btl_get( struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* endpoint,
struct mca_btl_base_descriptor_t* descriptor,
ompi_crcp_base_btl_state_t*);
ompi_crcp_base_btl_state_t*
ompi_crcp_base_none_btl_dump( struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* endpoint,
int verbose,
ompi_crcp_base_btl_state_t*);
ompi_crcp_base_btl_state_t*
ompi_crcp_base_none_btl_ft_event(int state,
ompi_crcp_base_btl_state_t*);
/* Utility Functions */
OMPI_DECLSPEC int ompi_crcp_base_reboot_pml(ompi_crcp_base_pml_state_t* pml_state);
OMPI_DECLSPEC extern int ompi_crcp_base_output;
OMPI_DECLSPEC extern opal_list_t ompi_crcp_base_components_available;
OMPI_DECLSPEC extern ompi_crcp_base_component_t ompi_crcp_base_selected_component;
OMPI_DECLSPEC extern ompi_crcp_base_module_t ompi_crcp;
#if defined(c_plusplus) || defined(__cplusplus)
}
#endif
#endif /* OMPI_CRCP_BASE_H */

39
ompi/mca/crcp/base/crcp_base_close.c Обычный файл
Просмотреть файл

@ -0,0 +1,39 @@
/*
* Copyright (c) 2004-2007 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "opal/mca/mca.h"
#include "opal/mca/base/base.h"
#include "opal/util/output.h"
#include "opal/mca/base/mca_base_param.h"
#include "ompi/mca/crcp/crcp.h"
#include "ompi/mca/crcp/base/base.h"
int ompi_crcp_base_close(void)
{
/* Close the selected component */
ompi_crcp.crcp_finalize();
/* Close all available modules that are open */
mca_base_components_close(ompi_crcp_base_output,
&ompi_crcp_base_components_available,
NULL);
return OMPI_SUCCESS;
}

617
ompi/mca/crcp/base/crcp_base_fns.c Обычный файл
Просмотреть файл

@ -0,0 +1,617 @@
/*
* Copyright (c) 2004-2007 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#if HAVE_SYS_TYPES_H
#include <sys/types.h>
#endif
#if HAVE_UNISTD_H
#include <unistd.h>
#endif
#include <time.h>
#include <libgen.h>
#include <ctype.h>
#include "opal/mca/mca.h"
#include "opal/mca/base/base.h"
#include "opal/util/output.h"
#include "opal/util/os_dirpath.h"
#include "orte/mca/smr/smr.h"
#include "orte/mca/gpr/gpr.h"
#include "ompi/communicator/communicator.h"
#include "ompi/proc/proc.h"
#include "opal/mca/base/mca_base_param.h"
#include "opal/mca/crs/crs.h"
#include "opal/mca/crs/base/base.h"
#include "ompi/mca/crcp/crcp.h"
#include "ompi/mca/crcp/base/base.h"
#include "ompi/mca/bml/bml.h"
#include "ompi/mca/bml/base/base.h"
#include "ompi/mca/pml/pml.h"
#include "ompi/mca/pml/base/base.h"
#include "ompi/mca/pml/base/pml_base_module_exchange.h"
#include "ompi/mca/pml/base/pml_base_request.h"
/******************
* Local Functions
******************/
/******************
* Object stuff
******************/
OBJ_CLASS_INSTANCE(ompi_crcp_base_pml_state_t,
ompi_free_list_item_t,
NULL,
NULL
);
OBJ_CLASS_INSTANCE(ompi_crcp_base_btl_state_t,
ompi_free_list_item_t,
NULL,
NULL
);
/***********************
* None component stuff
************************/
int ompi_crcp_base_none_open(void)
{
return OMPI_SUCCESS;
}
int ompi_crcp_base_none_close(void)
{
return OMPI_SUCCESS;
}
int ompi_crcp_base_module_init(void)
{
return OMPI_SUCCESS;
}
int ompi_crcp_base_module_finalize(void)
{
return OMPI_SUCCESS;
}
/****************
* PML Wrapper
****************/
ompi_crcp_base_pml_state_t* ompi_crcp_base_none_pml_enable( bool enable,
ompi_crcp_base_pml_state_t* pml_state )
{
pml_state->error_code = OMPI_SUCCESS;
return pml_state;
}
ompi_crcp_base_pml_state_t* ompi_crcp_base_none_pml_add_comm( struct ompi_communicator_t* comm,
ompi_crcp_base_pml_state_t* pml_state )
{
pml_state->error_code = OMPI_SUCCESS;
return pml_state;
}
ompi_crcp_base_pml_state_t* ompi_crcp_base_none_pml_del_comm( struct ompi_communicator_t* comm,
ompi_crcp_base_pml_state_t* pml_state )
{
pml_state->error_code = OMPI_SUCCESS;
return pml_state;
}
ompi_crcp_base_pml_state_t* ompi_crcp_base_none_pml_add_procs( struct ompi_proc_t **procs,
size_t nprocs,
ompi_crcp_base_pml_state_t* pml_state )
{
pml_state->error_code = OMPI_SUCCESS;
return pml_state;
}
ompi_crcp_base_pml_state_t* ompi_crcp_base_none_pml_del_procs( struct ompi_proc_t **procs,
size_t nprocs,
ompi_crcp_base_pml_state_t* pml_state )
{
pml_state->error_code = OMPI_SUCCESS;
return pml_state;
}
ompi_crcp_base_pml_state_t* ompi_crcp_base_none_pml_progress(ompi_crcp_base_pml_state_t* pml_state)
{
pml_state->error_code = OMPI_SUCCESS;
return pml_state;
}
ompi_crcp_base_pml_state_t* ompi_crcp_base_none_pml_iprobe(int dst, int tag,
struct ompi_communicator_t* comm,
int *matched, ompi_status_public_t* status,
ompi_crcp_base_pml_state_t* pml_state )
{
pml_state->error_code = OMPI_SUCCESS;
return pml_state;
}
ompi_crcp_base_pml_state_t* ompi_crcp_base_none_pml_probe( int dst, int tag,
struct ompi_communicator_t* comm,
ompi_status_public_t* status,
ompi_crcp_base_pml_state_t* pml_state )
{
pml_state->error_code = OMPI_SUCCESS;
return pml_state;
}
ompi_crcp_base_pml_state_t* ompi_crcp_base_none_pml_isend_init( void *buf, size_t count,
ompi_datatype_t *datatype,
int dst, int tag,
mca_pml_base_send_mode_t mode,
struct ompi_communicator_t* comm,
struct ompi_request_t **request,
ompi_crcp_base_pml_state_t* pml_state )
{
pml_state->error_code = OMPI_SUCCESS;
return pml_state;
}
ompi_crcp_base_pml_state_t* ompi_crcp_base_none_pml_isend( void *buf, size_t count,
ompi_datatype_t *datatype,
int dst, int tag,
mca_pml_base_send_mode_t mode,
struct ompi_communicator_t* comm,
struct ompi_request_t **request,
ompi_crcp_base_pml_state_t* pml_state )
{
pml_state->error_code = OMPI_SUCCESS;
return pml_state;
}
ompi_crcp_base_pml_state_t* ompi_crcp_base_none_pml_send( void *buf, size_t count,
ompi_datatype_t *datatype,
int dst, int tag,
mca_pml_base_send_mode_t mode,
struct ompi_communicator_t* comm,
ompi_crcp_base_pml_state_t* pml_state )
{
pml_state->error_code = OMPI_SUCCESS;
return pml_state;
}
ompi_crcp_base_pml_state_t* ompi_crcp_base_none_pml_irecv_init( void *buf, size_t count,
ompi_datatype_t *datatype,
int src, int tag,
struct ompi_communicator_t* comm,
struct ompi_request_t **request,
ompi_crcp_base_pml_state_t* pml_state)
{
pml_state->error_code = OMPI_SUCCESS;
return pml_state;
}
ompi_crcp_base_pml_state_t* ompi_crcp_base_none_pml_irecv( void *buf, size_t count,
ompi_datatype_t *datatype,
int src, int tag,
struct ompi_communicator_t* comm,
struct ompi_request_t **request,
ompi_crcp_base_pml_state_t* pml_state )
{
pml_state->error_code = OMPI_SUCCESS;
return pml_state;
}
ompi_crcp_base_pml_state_t* ompi_crcp_base_none_pml_recv( void *buf, size_t count,
ompi_datatype_t *datatype,
int src, int tag,
struct ompi_communicator_t* comm,
ompi_status_public_t* status,
ompi_crcp_base_pml_state_t* pml_state)
{
pml_state->error_code = OMPI_SUCCESS;
return pml_state;
}
ompi_crcp_base_pml_state_t* ompi_crcp_base_none_pml_dump( struct ompi_communicator_t* comm,
int verbose,
ompi_crcp_base_pml_state_t* pml_state )
{
pml_state->error_code = OMPI_SUCCESS;
return pml_state;
}
ompi_crcp_base_pml_state_t* ompi_crcp_base_none_pml_start( size_t count,
ompi_request_t** requests,
ompi_crcp_base_pml_state_t* pml_state )
{
pml_state->error_code = OMPI_SUCCESS;
return pml_state;
}
ompi_crcp_base_pml_state_t* ompi_crcp_base_none_pml_ft_event(int state,
ompi_crcp_base_pml_state_t* pml_state)
{
pml_state->error_code = OMPI_SUCCESS;
return pml_state;
}
/********************
* Request Interface
********************/
int ompi_crcp_base_none_request_complete( struct ompi_request_t *request ) {
return OMPI_SUCCESS;
}
/********************
* BTL Interface
********************/
ompi_crcp_base_btl_state_t*
ompi_crcp_base_none_btl_add_procs( struct mca_btl_base_module_t* btl,
size_t nprocs,
struct ompi_proc_t** procs,
struct mca_btl_base_endpoint_t** endpoints,
struct ompi_bitmap_t* reachable,
ompi_crcp_base_btl_state_t* btl_state)
{
btl_state->error_code = OMPI_SUCCESS;
return btl_state;
}
ompi_crcp_base_btl_state_t*
ompi_crcp_base_none_btl_del_procs( struct mca_btl_base_module_t* btl,
size_t nprocs,
struct ompi_proc_t** procs,
struct mca_btl_base_endpoint_t** endpoints,
ompi_crcp_base_btl_state_t* btl_state)
{
btl_state->error_code = OMPI_SUCCESS;
return btl_state;
}
ompi_crcp_base_btl_state_t*
ompi_crcp_base_none_btl_register( struct mca_btl_base_module_t* btl,
mca_btl_base_tag_t tag,
mca_btl_base_module_recv_cb_fn_t cbfunc,
void* cbdata,
ompi_crcp_base_btl_state_t* btl_state)
{
btl_state->error_code = OMPI_SUCCESS;
return btl_state;
}
ompi_crcp_base_btl_state_t*
ompi_crcp_base_none_btl_finalize( struct mca_btl_base_module_t* btl,
ompi_crcp_base_btl_state_t* btl_state)
{
btl_state->error_code = OMPI_SUCCESS;
return btl_state;
}
ompi_crcp_base_btl_state_t*
ompi_crcp_base_none_btl_alloc( struct mca_btl_base_module_t* btl,
size_t size,
ompi_crcp_base_btl_state_t* btl_state)
{
btl_state->error_code = OMPI_SUCCESS;
return btl_state;
}
ompi_crcp_base_btl_state_t*
ompi_crcp_base_none_btl_free( struct mca_btl_base_module_t* btl,
mca_btl_base_descriptor_t* descriptor,
ompi_crcp_base_btl_state_t* btl_state)
{
btl_state->error_code = OMPI_SUCCESS;
return btl_state;
}
ompi_crcp_base_btl_state_t*
ompi_crcp_base_none_btl_prepare_src( struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* endpoint,
mca_mpool_base_registration_t* registration,
struct ompi_convertor_t* convertor,
size_t reserve,
size_t* size,
ompi_crcp_base_btl_state_t* btl_state)
{
btl_state->error_code = OMPI_SUCCESS;
return btl_state;
}
ompi_crcp_base_btl_state_t*
ompi_crcp_base_none_btl_prepare_dst( struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* endpoint,
mca_mpool_base_registration_t* registration,
struct ompi_convertor_t* convertor,
size_t reserve,
size_t* size,
ompi_crcp_base_btl_state_t* btl_state)
{
btl_state->error_code = OMPI_SUCCESS;
return btl_state;
}
ompi_crcp_base_btl_state_t*
ompi_crcp_base_none_btl_send( struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* endpoint,
struct mca_btl_base_descriptor_t* descriptor,
mca_btl_base_tag_t tag,
ompi_crcp_base_btl_state_t* btl_state)
{
btl_state->error_code = OMPI_SUCCESS;
return btl_state;
}
ompi_crcp_base_btl_state_t*
ompi_crcp_base_none_btl_put( struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* endpoint,
struct mca_btl_base_descriptor_t* descriptor,
ompi_crcp_base_btl_state_t* btl_state)
{
btl_state->error_code = OMPI_SUCCESS;
return btl_state;
}
ompi_crcp_base_btl_state_t*
ompi_crcp_base_none_btl_get( struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* endpoint,
struct mca_btl_base_descriptor_t* descriptor,
ompi_crcp_base_btl_state_t* btl_state)
{
btl_state->error_code = OMPI_SUCCESS;
return btl_state;
}
ompi_crcp_base_btl_state_t*
ompi_crcp_base_none_btl_dump( struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* endpoint,
int verbose,
ompi_crcp_base_btl_state_t* btl_state)
{
btl_state->error_code = OMPI_SUCCESS;
return btl_state;
}
ompi_crcp_base_btl_state_t*
ompi_crcp_base_none_btl_ft_event(int state,
ompi_crcp_base_btl_state_t* btl_state)
{
btl_state->error_code = OMPI_SUCCESS;
return btl_state;
}
/********************
* Utility functions
********************/
int ompi_crcp_base_reboot_pml(ompi_crcp_base_pml_state_t* pml_state) {
int ret;
ompi_proc_t** procs;
size_t nprocs;
char *error_msg = NULL;
int return_code = OMPI_SUCCESS;
ompi_communicator_t *tmp_comm;
int comm_size = 0;
uint32_t c = 0;
opal_output_verbose(5, ompi_crcp_base_output,
"crcp:coord: reboot_pml(): PML/BML/BTL are rebooting [PML = %s]",
mca_pml_base_selected_component.pmlm_version.mca_component_name);
opal_output_verbose(25, ompi_crcp_base_output,
"crcp:coord: reboot_pml(): Disable PML");
/* Disable the PML */
if( OMPI_SUCCESS != (ret = MCA_PML_CALL(enable(false)) ) ) {
error_msg = "PML control failed";
return_code = ret;
goto cleanup;
}
/* Get all the processes that we know about */
nprocs = 0;
if (NULL == (procs = ompi_proc_world(&nprocs))) {
error_msg = "ompi_proc_world() failed";
return_code = ret;
goto cleanup;
}
opal_output_verbose(25, ompi_crcp_base_output,
"crcp:coord: reboot_pml(): Delete all communicators from the PML");
/* Get all the communicators that we know about */
comm_size = ompi_pointer_array_get_size(&ompi_mpi_communicators);
for( c = 0; c < (uint32_t)comm_size; ++c) {
tmp_comm = ompi_comm_lookup(c);
if( ompi_comm_invalid(tmp_comm) ) {
continue;
}
if( OMPI_SUCCESS != (ret = MCA_PML_CALL(del_comm(tmp_comm)) ) ) {
error_msg = "PML del comm failed";
return_code = ret;
goto cleanup;;
}
}
opal_output_verbose(25, ompi_crcp_base_output,
"crcp:coord: reboot_pml(): Delete all %d processes from the PML",
(int)nprocs);
/* Delete all the procs */
if( OMPI_SUCCESS != (ret = MCA_PML_CALL(del_procs(procs, nprocs)) ) ) {
error_msg = "PML del procs failed";
return_code = ret;
goto cleanup;
}
/* Shutdown the PML/BML/BTL */
opal_output_verbose(25, ompi_crcp_base_output,
"crcp:coord: reboot_pml(): Shutdown the PML");
mca_pml_base_close();
opal_output_verbose(25, ompi_crcp_base_output,
"crcp:coord: reboot_pml(): Shutdown the BML");
mca_bml.bml_finalize();
if (OMPI_SUCCESS != (ret = mca_pml_base_modex_finalize())) {
error_msg = "PML base_modex_finalize failed";
return_code = ret;
goto cleanup;
}
/* Refresh the ompi_proc structures &
* Since they are pointed to by the communicators then they are updated as well */
opal_output_verbose(25, ompi_crcp_base_output,
"crcp:coord: reboot_pml(): Refresh Process List");
for(c = 0; c < nprocs; ++c) {
if( procs[c]->proc_modex != NULL ) {
OBJ_RELEASE(procs[c]->proc_modex);
procs[c]->proc_modex = NULL;
}
if( procs[c]->proc_bml != NULL ) {
OBJ_RELEASE( procs[c]->proc_bml);
procs[c]->proc_bml = NULL;
}
if( procs[c]->proc_pml != NULL ) {
free( procs[c]->proc_pml);
procs[c]->proc_pml = NULL;
}
if( procs[c]->proc_hostname != NULL ) {
free( procs[c]->proc_hostname );
procs[c]->proc_hostname = NULL;
}
/*procs[c]->proc_arch = ompi_mpi_local_arch;*/
procs[c]->proc_flags = 0;
}
/* Restart the PML/BML/BTL */
opal_output_verbose(25, ompi_crcp_base_output,
"crcp:coord: reboot_pml(): PML base Open");
if (OMPI_SUCCESS != (ret = mca_pml_base_open())) {
error_msg = "PML base_open failed";
return_code = ret;
goto cleanup;
}
opal_output_verbose(25, ompi_crcp_base_output,
"crcp:coord: reboot_pml(): PML init modex");
if (OMPI_SUCCESS != (ret = mca_pml_base_modex_init())) {
error_msg = "PML base_modex_init failed";
return_code = ret;
goto cleanup;
}
opal_output_verbose(25, ompi_crcp_base_output,
"crcp:coord: reboot_pml(): PML base select");
if (OMPI_SUCCESS !=
(ret = mca_pml_base_select(OMPI_ENABLE_PROGRESS_THREADS,
OMPI_ENABLE_MPI_THREADS))) {
error_msg = "PML base_select failed";
return_code = ret;
goto cleanup;
}
opal_output_verbose(25, ompi_crcp_base_output,
"crcp:coord: reboot_pml(): PML modex exchange");
if (OMPI_SUCCESS != (ret = mca_pml_base_modex_exchange())) {
error_msg = "PML base_modex_exchange failed";
return_code = ret;
goto cleanup;
}
opal_output_verbose(25, ompi_crcp_base_output,
"crcp:coord: reboot_pml(): Enter Stage Gate 1");
if (ORTE_SUCCESS != (ret = orte_smr.set_proc_state(orte_process_info.my_name,
ORTE_PROC_STATE_AT_STG1, 0))) {
error_msg = "PML Stage Gate 1 SOH failed";
return_code = ret;
goto cleanup;
}
if (ORTE_SUCCESS != (ret = orte_rml.xcast(ORTE_PROC_MY_NAME->jobid, true,
NULL, orte_gpr.deliver_notify_msg))) {
error_msg = "PML RML.Xcast(Stage1) failed";
return_code = ret;
goto cleanup;
}
/* Enable the PML */
opal_output_verbose(25, ompi_crcp_base_output,
"crcp:coord: reboot_pml(): PML Enable");
/* JJH RETURN HERE -- mca_bml_r2.btl_modules is corrupted. I bet it is not removed properly....*/
if( OMPI_SUCCESS != (ret = MCA_PML_CALL(enable(true)) ) ) {
error_msg = "PML control failed";
return_code = ret;
goto cleanup;
}
/* Add back the processes */
opal_output_verbose(25, ompi_crcp_base_output,
"crcp:coord: reboot_pml(): PML get new list of processes");
/* Get all the processes that we know about */
nprocs = 0;
if (NULL == (procs = ompi_proc_world(&nprocs))) {
error_msg = "ompi_proc_world() failed";
return_code = ret;
goto cleanup;
}
opal_output_verbose(52, ompi_crcp_base_output,
"crcp:coord: reboot_pml(): PML Add back the %d processes",
(int)nprocs);
if( OMPI_SUCCESS != (ret = MCA_PML_CALL(add_procs(procs, nprocs)) ) ) {
error_msg = "PML add procs failed";
return_code = ret;
goto cleanup;
}
free(procs);
/* Add back the communicators */
opal_output_verbose(25, ompi_crcp_base_output,
"crcp:coord: reboot_pml(): PML Add back the communicators");
for( c = 0; c < (uint32_t)comm_size; ++c) {
tmp_comm = ompi_comm_lookup(c);
if( ompi_comm_invalid(tmp_comm) ) {
continue;
}
if( OMPI_SUCCESS != (ret = MCA_PML_CALL(add_comm(tmp_comm)) ) ) {
error_msg = "PML add comm failed";
return_code = ret;
goto cleanup;;
}
}
opal_output_verbose(25, ompi_crcp_base_output,
"crcp:coord: reboot_pml(): Enter Stage Gate 2");
if (ORTE_SUCCESS != (ret = orte_smr.set_proc_state(orte_process_info.my_name,
ORTE_PROC_STATE_AT_STG2, 0))) {
error_msg = "PML Stage Gate 2 SOH failed";
return_code = ret;
goto cleanup;
}
if (ORTE_SUCCESS != (ret = orte_rml.xcast(ORTE_PROC_MY_NAME->jobid, false,
NULL, orte_gpr.deliver_notify_msg))) {
error_msg = "PML RML.Xcast(Stage2) failed";
return_code = ret;
goto cleanup;
}
opal_output_verbose(5, ompi_crcp_base_output,
"crcp:coord: reboot_pml(): PML/BML/BTL have been rebooted [PML = %s]",
mca_pml_base_selected_component.pmlm_version.mca_component_name);
cleanup:
return return_code;
}

89
ompi/mca/crcp/base/crcp_base_open.c Обычный файл
Просмотреть файл

@ -0,0 +1,89 @@
/*
* Copyright (c) 2004-2007 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "opal/mca/mca.h"
#include "opal/mca/base/base.h"
#include "opal/util/output.h"
#include "opal/mca/base/mca_base_param.h"
#include "ompi/mca/crcp/crcp.h"
#include "ompi/mca/crcp/base/base.h"
#include "ompi/mca/crcp/base/static-components.h"
/*
* Globals
*/
OMPI_DECLSPEC int ompi_crcp_base_output = -1;
OMPI_DECLSPEC ompi_crcp_base_module_t ompi_crcp = {
NULL, /* crcp_init */
NULL /* crcp_finalize */
};
opal_list_t ompi_crcp_base_components_available;
ompi_crcp_base_component_t ompi_crcp_base_selected_component;
/**
* Function for finding and opening either all MCA components,
* or the one that was specifically requested via a MCA parameter.
*/
int ompi_crcp_base_open(void)
{
int value;
char *str_value = NULL;
/* Debugging/Verbose output */
mca_base_param_reg_int_name("crcp_base",
"verbose",
"Verbosity level of the CRCP framework",
false, false,
0, &value);
if(0 != value) {
ompi_crcp_base_output = opal_output_open(NULL);
} else {
ompi_crcp_base_output = -1;
}
/*
* Which CRCP component to open
* - NULL or "" = auto-select
* - "none" = Empty component
* - ow. select that specific component
* Note: Set the default to NULL here so ompi_info will work correctly,
* The 'real' default is set in base_select.c
*/
mca_base_param_reg_string_name("crcp", NULL,
"Which CRCP component to use (empty = auto-select)",
false, false,
NULL, &str_value);
if( NULL != str_value ) {
free(str_value);
}
/* Open up all available components */
if (OPAL_SUCCESS !=
mca_base_components_open("crcp",
ompi_crcp_base_output,
mca_crcp_base_static_components,
&ompi_crcp_base_components_available,
true)) {
return OMPI_ERROR;
}
return OMPI_SUCCESS;
}

241
ompi/mca/crcp/base/crcp_base_select.c Обычный файл
Просмотреть файл

@ -0,0 +1,241 @@
/*
* Copyright (c) 2004-2007 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "opal/mca/mca.h"
#include "opal/mca/base/base.h"
#include "opal/util/output.h"
#include "opal/mca/base/mca_base_param.h"
#include "ompi/mca/crcp/crcp.h"
#include "ompi/mca/crcp/base/base.h"
static ompi_crcp_base_component_t none_component = {
/* Handle the general mca_component_t struct containing
* meta information about the component itself
*/
{
OMPI_CRCP_BASE_VERSION_1_0_0,
/* Component name and version */
"none",
OMPI_MAJOR_VERSION,
OMPI_MINOR_VERSION,
OMPI_RELEASE_VERSION,
/* Component open and close functions */
ompi_crcp_base_none_open,
ompi_crcp_base_none_close
},
/* Next the MCA v1.0.0 component meta data */
{
/* Is the component checkpointable ? */
true
},
/* Query Function */
NULL,
/* Verbosity level */
0,
/* opal_output handler */
-1,
/* Default priority */
1
};
static ompi_crcp_base_module_t none_module = {
/** Initialization Function */
ompi_crcp_base_module_init,
/** Finalization Function */
ompi_crcp_base_module_finalize,
/** PML Wrapper */
ompi_crcp_base_none_pml_enable,
ompi_crcp_base_none_pml_add_comm,
ompi_crcp_base_none_pml_del_comm,
ompi_crcp_base_none_pml_add_procs,
ompi_crcp_base_none_pml_del_procs,
ompi_crcp_base_none_pml_progress,
ompi_crcp_base_none_pml_iprobe,
ompi_crcp_base_none_pml_probe,
ompi_crcp_base_none_pml_isend_init,
ompi_crcp_base_none_pml_isend,
ompi_crcp_base_none_pml_send,
ompi_crcp_base_none_pml_irecv_init,
ompi_crcp_base_none_pml_irecv,
ompi_crcp_base_none_pml_recv,
ompi_crcp_base_none_pml_dump,
ompi_crcp_base_none_pml_start,
ompi_crcp_base_none_pml_ft_event,
/** Request Wrapper */
ompi_crcp_base_none_request_complete,
/** BTL Wrapper */
ompi_crcp_base_none_btl_add_procs,
ompi_crcp_base_none_btl_del_procs,
ompi_crcp_base_none_btl_register,
ompi_crcp_base_none_btl_finalize,
ompi_crcp_base_none_btl_alloc,
ompi_crcp_base_none_btl_free,
ompi_crcp_base_none_btl_prepare_src,
ompi_crcp_base_none_btl_prepare_dst,
ompi_crcp_base_none_btl_send,
ompi_crcp_base_none_btl_put,
ompi_crcp_base_none_btl_get,
ompi_crcp_base_none_btl_dump,
ompi_crcp_base_none_btl_ft_event
};
int ompi_crcp_base_select(void)
{
int priority = 0, best_priority = -1;
opal_list_item_t *item = NULL;
mca_base_component_list_item_t *cli = NULL;
ompi_crcp_base_component_t *component = NULL, *best_component = NULL;
ompi_crcp_base_module_t *module = NULL, *best_module = NULL;
char *crcp_include_list = NULL;
bool fail_on_non_selection = false;
/* Register the framework MCA param and look it up */
mca_base_param_reg_string_name("crcp", NULL,
"Which CRCP component to use (empty = auto-select)",
false, false,
strdup("none"), &crcp_include_list);
if (NULL == crcp_include_list || 0 == strlen(crcp_include_list)) {
opal_output_verbose(20, ompi_crcp_base_output,
"crcp:select: auto-selecting");
} else {
opal_output_verbose(20, ompi_crcp_base_output,
"crcp:select: looking for %s component", crcp_include_list);
if(0 == strncmp(crcp_include_list, "none", strlen("none")) ) {
goto do_none_comp;
}
else {
fail_on_non_selection = true;
}
}
/* Traverse the list of available components;
* calling their init functions
*/
for (item = opal_list_get_first(&ompi_crcp_base_components_available);
item != opal_list_get_end(&ompi_crcp_base_components_available);
item = opal_list_get_next(item) ) {
cli = (mca_base_component_list_item_t *) item;
component = (ompi_crcp_base_component_t *) cli->cli_component;
/* If there is an include list -
* the item must be in the list to be included :)
*/
if (NULL != crcp_include_list &&
0 < strlen(crcp_include_list) &&
0 != strncmp(component->crcp_version.mca_component_name,
crcp_include_list, strlen(crcp_include_list)) ) {
opal_output_verbose(20, ompi_crcp_base_output,
"crcp:select: Skipping %s component",
component->crcp_version.mca_component_name);
continue;
}
if (NULL == component->crcp_query) {
opal_output_verbose(20, ompi_crcp_base_output,
"crcp:select: No init function! Ignoring component %s",
component->crcp_version.mca_component_name );
continue;
}
opal_output_verbose(20, ompi_crcp_base_output,
"crcp:select: Initializing component %s",
component->crcp_version.mca_component_name);
module = component->crcp_query(&priority);
if (NULL == module) {
opal_output_verbose(20, ompi_crcp_base_output,
"crcp:select: Init returned failure for component %s",
component->crcp_version.mca_component_name );
continue;
}
opal_output_verbose(20, ompi_crcp_base_output,
"crcp:select: Init returned priority %d",
priority);
if (priority > best_priority) {
best_priority = priority;
best_component = component;
best_module = module;
}
}
/* Finished querying all components.
* Check for the bozo case.
*/
do_none_comp:
if (NULL == best_component ) {
if( fail_on_non_selection ) {
return OMPI_ERROR;
}
else {
opal_output_verbose(20, ompi_crcp_base_output,
"crcp:select: No component found, using the base component. ;(");
best_component = &none_component;
best_module = &none_module;
}
}
/* Go through the list and close
* the non-selected components
*/
mca_base_components_close(0, /* We must pass it 0, to keep it from closing it */
&ompi_crcp_base_components_available,
(mca_base_component_t *) best_component);
/* Save the winner */
ompi_crcp_base_selected_component = *best_component;
ompi_crcp = *best_module;
opal_output_verbose(15, ompi_crcp_base_output,
"crcp:select: Component %s selected",
best_component->crcp_version.mca_component_name);
/* Initialize the winner */
if (NULL != best_module) {
if (OMPI_SUCCESS != ompi_crcp.crcp_init( )) {
return OMPI_ERROR;
}
}
if( NULL != crcp_include_list ) {
free(crcp_include_list);
}
return OMPI_SUCCESS;
}

20
ompi/mca/crcp/base/help-ompi-crcp-base.txt Обычный файл
Просмотреть файл

@ -0,0 +1,20 @@
-*- text -*-
#
# Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2005 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# This is the US/English general help file for ORTE SNAPC framework.
#

51
ompi/mca/crcp/coord/Makefile.am Обычный файл
Просмотреть файл

@ -0,0 +1,51 @@
#
# Copyright (c) 2004-2007 The Trustees of Indiana University.
# All rights reserved.
# Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
# All rights reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
dist_pkgdata_DATA = help-ompi-crcp-coord.txt
sources = \
crcp_coord.h \
crcp_coord_pml.h \
crcp_coord_btl.h \
crcp_coord_component.c \
crcp_coord_module.c \
crcp_coord_btl.c \
crcp_coord_pml.c
# Make the output library in this directory, and name it either
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
# (for static builds).
if OMPI_BUILD_crcp_coord_DSO
component_noinst =
component_install = mca_crcp_coord.la
else
component_noinst = libmca_crcp_coord.la
component_install =
endif
mcacomponentdir = $(libdir)/openmpi
mcacomponent_LTLIBRARIES = $(component_install)
mca_crcp_coord_la_SOURCES = $(sources)
mca_crcp_coord_la_LDFLAGS = -module -avoid-version
mca_crcp_coord_la_LIBADD = \
$(top_ompi_builddir)/ompi/libmpi.la \
$(top_ompi_builddir)/orte/libopen-rte.la \
$(top_ompi_builddir)/opal/libopen-pal.la
noinst_LTLIBRARIES = $(component_noinst)
libmca_crcp_coord_la_SOURCES = $(sources)
libmca_crcp_coord_la_LDFLAGS = -module -avoid-version

25
ompi/mca/crcp/coord/configure.m4 Обычный файл
Просмотреть файл

@ -0,0 +1,25 @@
# -*- shell-script -*-
#
# Copyright (c) 2004-2007 The Trustees of Indiana University.
# All rights reserved.
# Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
# All rights reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# MCA_crcp_coord_CONFIG([action-if-found], [action-if-not-found])
# -----------------------------------------------------------
AC_DEFUN([MCA_crcp_coord_CONFIG],[
# If we don't want FT, don't compile this component
AS_IF([test "$ompi_want_ft" = "1"],
[$1],
[$2])
])dnl

19
ompi/mca/crcp/coord/configure.params Обычный файл
Просмотреть файл

@ -0,0 +1,19 @@
# -*- shell-script -*-
#
# Copyright (c) 2004-2007 The Trustees of Indiana University.
# All rights reserved.
# Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
# All rights reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
PARAM_INIT_FILE=crcp_coord_component.c
PARAM_CONFIG_FILES="Makefile"

69
ompi/mca/crcp/coord/crcp_coord.h Обычный файл
Просмотреть файл

@ -0,0 +1,69 @@
/*
* Copyright (c) 2004-2007 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/**
* @file
*
* Coord CRCP component
*
*/
#ifndef MCA_CRCP_COORD_EXPORT_H
#define MCA_CRCP_COORD_EXPORT_H
#include "ompi_config.h"
#include "opal/mca/mca.h"
#include "ompi/mca/crcp/crcp.h"
#include "ompi/communicator/communicator.h"
#include "orte/mca/ns/ns.h"
#include "opal/runtime/opal_cr.h"
#include "opal/threads/mutex.h"
#include "opal/threads/condition.h"
#if defined(c_plusplus) || defined(__cplusplus)
extern "C" {
#endif
/*
* Local Component structures
*/
struct ompi_crcp_coord_component_t {
ompi_crcp_base_component_t super; /** Base CRCP component */
};
typedef struct ompi_crcp_coord_component_t ompi_crcp_coord_component_t;
extern ompi_crcp_coord_component_t mca_crcp_coord_component;
/*
* Module functions
*/
ompi_crcp_base_module_1_0_0_t *
ompi_crcp_coord_component_query(int *priority);
int ompi_crcp_coord_module_init(void);
int ompi_crcp_coord_module_finalize(void);
int ompi_crcp_coord_pml_init(void);
int ompi_crcp_coord_pml_finalize(void);
int ompi_crcp_coord_btl_init(void);
int ompi_crcp_coord_btl_finalize(void);
#if defined(c_plusplus) || defined(__cplusplus)
}
#endif
#endif /* MCA_CRCP_COORD_EXPORT_H */

182
ompi/mca/crcp/coord/crcp_coord_btl.c Обычный файл
Просмотреть файл

@ -0,0 +1,182 @@
/*
* Copyright (c) 2004-2007 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include <sys/types.h>
#include <unistd.h>
#include "opal/runtime/opal_cr.h"
#include "opal/event/event.h"
#include "opal/util/output.h"
#include "opal/util/show_help.h"
#include "opal/util/argv.h"
#include "opal/util/opal_environ.h"
#include "opal/mca/base/mca_base_param.h"
#include "opal/mca/mca.h"
#include "opal/mca/base/base.h"
#include "ompi/mca/btl/btl.h"
#include "ompi/mca/btl/base/base.h"
#include "ompi/mca/crcp/crcp.h"
#include "ompi/mca/crcp/base/base.h"
#include "crcp_coord.h"
#include "crcp_coord_btl.h"
int ompi_crcp_coord_btl_init(void) {
return OMPI_SUCCESS;
}
int ompi_crcp_coord_btl_finalize(void) {
return OMPI_SUCCESS;
}
ompi_crcp_base_btl_state_t*
ompi_crcp_base_coord_btl_add_procs( struct mca_btl_base_module_t* btl,
size_t nprocs,
struct ompi_proc_t** procs,
struct mca_btl_base_endpoint_t** endpoints,
struct ompi_bitmap_t* reachable,
ompi_crcp_base_btl_state_t* btl_state)
{
btl_state->error_code = OMPI_SUCCESS;
return btl_state;
}
ompi_crcp_base_btl_state_t*
ompi_crcp_base_coord_btl_del_procs( struct mca_btl_base_module_t* btl,
size_t nprocs,
struct ompi_proc_t** procs,
struct mca_btl_base_endpoint_t** endpoints,
ompi_crcp_base_btl_state_t* btl_state)
{
btl_state->error_code = OMPI_SUCCESS;
return btl_state;
}
ompi_crcp_base_btl_state_t*
ompi_crcp_base_coord_btl_register( struct mca_btl_base_module_t* btl,
mca_btl_base_tag_t tag,
mca_btl_base_module_recv_cb_fn_t cbfunc,
void* cbdata,
ompi_crcp_base_btl_state_t* btl_state)
{
btl_state->error_code = OMPI_SUCCESS;
return btl_state;
}
ompi_crcp_base_btl_state_t*
ompi_crcp_base_coord_btl_finalize( struct mca_btl_base_module_t* btl,
ompi_crcp_base_btl_state_t* btl_state)
{
btl_state->error_code = OMPI_SUCCESS;
return btl_state;
}
ompi_crcp_base_btl_state_t*
ompi_crcp_base_coord_btl_alloc( struct mca_btl_base_module_t* btl,
size_t size,
ompi_crcp_base_btl_state_t* btl_state)
{
btl_state->error_code = OMPI_SUCCESS;
return btl_state;
}
ompi_crcp_base_btl_state_t*
ompi_crcp_base_coord_btl_free( struct mca_btl_base_module_t* btl,
mca_btl_base_descriptor_t* descriptor,
ompi_crcp_base_btl_state_t* btl_state)
{
btl_state->error_code = OMPI_SUCCESS;
return btl_state;
}
ompi_crcp_base_btl_state_t*
ompi_crcp_base_coord_btl_prepare_src( struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* endpoint,
mca_mpool_base_registration_t* registration,
struct ompi_convertor_t* convertor,
size_t reserve,
size_t* size,
ompi_crcp_base_btl_state_t* btl_state)
{
btl_state->error_code = OMPI_SUCCESS;
return btl_state;
}
ompi_crcp_base_btl_state_t*
ompi_crcp_base_coord_btl_prepare_dst( struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* endpoint,
mca_mpool_base_registration_t* registration,
struct ompi_convertor_t* convertor,
size_t reserve,
size_t* size,
ompi_crcp_base_btl_state_t* btl_state)
{
btl_state->error_code = OMPI_SUCCESS;
return btl_state;
}
ompi_crcp_base_btl_state_t*
ompi_crcp_base_coord_btl_send( struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* endpoint,
struct mca_btl_base_descriptor_t* descriptor,
mca_btl_base_tag_t tag,
ompi_crcp_base_btl_state_t* btl_state)
{
btl_state->error_code = OMPI_SUCCESS;
return btl_state;
}
ompi_crcp_base_btl_state_t*
ompi_crcp_base_coord_btl_put( struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* endpoint,
struct mca_btl_base_descriptor_t* descriptor,
ompi_crcp_base_btl_state_t* btl_state)
{
btl_state->error_code = OMPI_SUCCESS;
return btl_state;
}
ompi_crcp_base_btl_state_t*
ompi_crcp_base_coord_btl_get( struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* endpoint,
struct mca_btl_base_descriptor_t* descriptor,
ompi_crcp_base_btl_state_t* btl_state)
{
btl_state->error_code = OMPI_SUCCESS;
return btl_state;
}
ompi_crcp_base_btl_state_t*
ompi_crcp_base_coord_btl_dump( struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* endpoint,
int verbose,
ompi_crcp_base_btl_state_t* btl_state)
{
btl_state->error_code = OMPI_SUCCESS;
return btl_state;
}
ompi_crcp_base_btl_state_t*
ompi_crcp_base_coord_btl_ft_event(int state,
ompi_crcp_base_btl_state_t* btl_state)
{
btl_state->error_code = OMPI_SUCCESS;
return btl_state;
}

133
ompi/mca/crcp/coord/crcp_coord_btl.h Обычный файл
Просмотреть файл

@ -0,0 +1,133 @@
/*
* Copyright (c) 2004-2007 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/**
* @file
*
* Coord CRCP component
*
*/
#ifndef MCA_CRCP_COORD_BTL_EXPORT_H
#define MCA_CRCP_COORD_BTL_EXPORT_H
#include "ompi_config.h"
#include "opal/mca/mca.h"
#include "ompi/mca/crcp/crcp.h"
#include "ompi/communicator/communicator.h"
#include "orte/mca/ns/ns.h"
#include "opal/runtime/opal_cr.h"
#include "opal/threads/mutex.h"
#include "opal/threads/condition.h"
#include "ompi/mca/crcp/coord/crcp_coord.h"
#if defined(c_plusplus) || defined(__cplusplus)
extern "C" {
#endif
/*
* BTL Coordination functions
*/
ompi_crcp_base_btl_state_t* ompi_crcp_base_coord_btl_add_procs
( struct mca_btl_base_module_t* btl,
size_t nprocs,
struct ompi_proc_t** procs,
struct mca_btl_base_endpoint_t** endpoints,
struct ompi_bitmap_t* reachable,
ompi_crcp_base_btl_state_t* );
ompi_crcp_base_btl_state_t* ompi_crcp_base_coord_btl_del_procs
( struct mca_btl_base_module_t* btl,
size_t nprocs,
struct ompi_proc_t** procs,
struct mca_btl_base_endpoint_t**,
ompi_crcp_base_btl_state_t*);
ompi_crcp_base_btl_state_t* ompi_crcp_base_coord_btl_register
( struct mca_btl_base_module_t* btl,
mca_btl_base_tag_t tag,
mca_btl_base_module_recv_cb_fn_t cbfunc,
void* cbdata,
ompi_crcp_base_btl_state_t*);
ompi_crcp_base_btl_state_t* ompi_crcp_base_coord_btl_finalize
( struct mca_btl_base_module_t* btl,
ompi_crcp_base_btl_state_t*);
ompi_crcp_base_btl_state_t* ompi_crcp_base_coord_btl_alloc
( struct mca_btl_base_module_t* btl,
size_t size,
ompi_crcp_base_btl_state_t*);
ompi_crcp_base_btl_state_t* ompi_crcp_base_coord_btl_free
( struct mca_btl_base_module_t* btl,
mca_btl_base_descriptor_t* descriptor,
ompi_crcp_base_btl_state_t*);
ompi_crcp_base_btl_state_t* ompi_crcp_base_coord_btl_prepare_src
( struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* endpoint,
mca_mpool_base_registration_t* registration,
struct ompi_convertor_t* convertor,
size_t reserve,
size_t* size,
ompi_crcp_base_btl_state_t*);
ompi_crcp_base_btl_state_t* ompi_crcp_base_coord_btl_prepare_dst
( struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* endpoint,
mca_mpool_base_registration_t* registration,
struct ompi_convertor_t* convertor,
size_t reserve,
size_t* size,
ompi_crcp_base_btl_state_t*);
ompi_crcp_base_btl_state_t* ompi_crcp_base_coord_btl_send
( struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* endpoint,
struct mca_btl_base_descriptor_t* descriptor,
mca_btl_base_tag_t tag,
ompi_crcp_base_btl_state_t*);
ompi_crcp_base_btl_state_t* ompi_crcp_base_coord_btl_put
( struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* endpoint,
struct mca_btl_base_descriptor_t* descriptor,
ompi_crcp_base_btl_state_t*);
ompi_crcp_base_btl_state_t* ompi_crcp_base_coord_btl_get
( struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* endpoint,
struct mca_btl_base_descriptor_t* descriptor,
ompi_crcp_base_btl_state_t*);
ompi_crcp_base_btl_state_t* ompi_crcp_base_coord_btl_dump
( struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* endpoint,
int verbose,
ompi_crcp_base_btl_state_t*);
ompi_crcp_base_btl_state_t* ompi_crcp_base_coord_btl_ft_event
(int state,
ompi_crcp_base_btl_state_t*);
#if defined(c_plusplus) || defined(__cplusplus)
}
#endif
#endif /* MCA_CRCP_COORD_BTL_EXPORT_H */

126
ompi/mca/crcp/coord/crcp_coord_component.c Обычный файл
Просмотреть файл

@ -0,0 +1,126 @@
/*
* Copyright (c) 2004-2007 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "ompi/mca/crcp/crcp.h"
#include "ompi/mca/crcp/base/base.h"
#include "crcp_coord.h"
/*
* Public string for version number
*/
const char *ompi_crcp_coord_component_version_string =
"OMPI CRCP coord MCA component version " OMPI_VERSION;
/*
* Local functionality
*/
static int crcp_coord_open(void);
static int crcp_coord_close(void);
/*
* Instantiate the public struct with all of our public information
* and pointer to our public functions in it
*/
ompi_crcp_coord_component_t mca_crcp_coord_component = {
/* First do the base component stuff */
{
/* Handle the general mca_component_t struct containing
* meta information about the component itcoord
*/
{
OMPI_CRCP_BASE_VERSION_1_0_0,
/* Component name and version */
"coord",
OMPI_MAJOR_VERSION,
OMPI_MINOR_VERSION,
OMPI_RELEASE_VERSION,
/* Component open and close functions */
crcp_coord_open,
crcp_coord_close
},
/* Next the MCA v1.0.0 component meta data */
{
/* The component is checkpoint ready */
MCA_BASE_METADATA_PARAM_CHECKPOINT
},
/* Query Function */
ompi_crcp_coord_component_query,
/* Verbosity level */
0,
/* opal_output handler */
-1,
/* Default priority */
20
}
};
static int crcp_coord_open(void)
{
/*
* This should be the last componet to ever get used since
* it doesn't do anything.
*/
mca_base_param_reg_int(&mca_crcp_coord_component.super.crcp_version,
"priority",
"Priority of the CRCP coord component",
false, false,
mca_crcp_coord_component.super.priority,
&mca_crcp_coord_component.super.priority);
mca_base_param_reg_int(&mca_crcp_coord_component.super.crcp_version,
"verbose",
"Verbose level for the CRCP coord component",
false, false,
mca_crcp_coord_component.super.verbose,
&mca_crcp_coord_component.super.verbose);
/* If there is a custom verbose level for this component than use it
* otherwise take our parents level and output channel
*/
if ( 0 != mca_crcp_coord_component.super.verbose) {
mca_crcp_coord_component.super.output_handle = opal_output_open(NULL);
opal_output_set_verbosity(mca_crcp_coord_component.super.output_handle,
mca_crcp_coord_component.super.verbose);
} else {
mca_crcp_coord_component.super.output_handle = ompi_crcp_base_output;
}
/*
* Debug Output
*/
opal_output_verbose(10, mca_crcp_coord_component.super.output_handle,
"crcp:coord: open()");
opal_output_verbose(20, mca_crcp_coord_component.super.output_handle,
"crcp:coord: open: priority = %d",
mca_crcp_coord_component.super.priority);
opal_output_verbose(20, mca_crcp_coord_component.super.output_handle,
"crcp:coord: open: verbosity = %d",
mca_crcp_coord_component.super.verbose);
return OMPI_SUCCESS;
}
static int crcp_coord_close(void)
{
opal_output_verbose(10, mca_crcp_coord_component.super.output_handle,
"crcp:coord: close()");
return OMPI_SUCCESS;
}

156
ompi/mca/crcp/coord/crcp_coord_module.c Обычный файл
Просмотреть файл

@ -0,0 +1,156 @@
/*
* Copyright (c) 2004-2007 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include <sys/types.h>
#include <unistd.h>
#include "opal/mca/mca.h"
#include "opal/mca/base/base.h"
#include "opal/util/output.h"
#include "opal/mca/base/mca_base_param.h"
#include "opal/util/output.h"
#include "opal/util/show_help.h"
#include "opal/util/argv.h"
#include "opal/util/opal_environ.h"
#include "ompi/mca/crcp/crcp.h"
#include "ompi/mca/crcp/base/base.h"
#include "crcp_coord.h"
#include "crcp_coord_pml.h"
#include "crcp_coord_btl.h"
/*
* Coord module
*/
static ompi_crcp_base_module_t loc_module = {
/** Initialization Function */
ompi_crcp_coord_module_init,
/** Finalization Function */
ompi_crcp_coord_module_finalize,
/** PML Wrapper */
ompi_crcp_coord_pml_enable,
ompi_crcp_coord_pml_add_comm,
ompi_crcp_coord_pml_del_comm,
ompi_crcp_coord_pml_add_procs,
ompi_crcp_coord_pml_del_procs,
ompi_crcp_coord_pml_progress,
ompi_crcp_coord_pml_iprobe,
ompi_crcp_coord_pml_probe,
ompi_crcp_coord_pml_isend_init,
ompi_crcp_coord_pml_isend,
ompi_crcp_coord_pml_send,
ompi_crcp_coord_pml_irecv_init,
ompi_crcp_coord_pml_irecv,
ompi_crcp_coord_pml_recv,
ompi_crcp_coord_pml_dump,
ompi_crcp_coord_pml_start,
ompi_crcp_coord_pml_ft_event,
/* Request Functions */
ompi_crcp_coord_request_complete,
/* BTL Wrapper Functions */
NULL, /* btl_add_procs */
NULL, /* btl_del_procs */
NULL, /* btl_register */
NULL, /* btl_finalize */
NULL, /* btl_alloc */
NULL, /* btl_free */
NULL, /* btl_prepare_src */
NULL, /* btl_prepare_dst */
NULL, /* btl_send */
NULL, /* btl_put */
NULL, /* btl_get */
NULL, /* btl_dump */
NULL /* btl_ft_event */
};
/************************************
* Locally Global vars & functions :)
************************************/
opal_mutex_t ompi_crcp_coord_ft_global_cs_lock;
opal_condition_t ompi_crcp_coord_ft_global_cs_cond;
int ompi_crcp_coord_ft_global_cs_count;
bool ompi_crcp_coord_ft_global_cs_block;
opal_mutex_t ompi_crcp_coord_ft_send_cs_lock;
opal_condition_t ompi_crcp_coord_ft_send_cs_cond;
int ompi_crcp_coord_ft_send_cs_count;
bool ompi_crcp_coord_ft_send_cs_block;
opal_mutex_t ompi_crcp_coord_ft_recv_cs_lock;
opal_condition_t ompi_crcp_coord_ft_recv_cs_cond;
int ompi_crcp_coord_ft_recv_cs_count;
bool ompi_crcp_coord_ft_recv_cs_block;
/************************
* Function Definitions
************************/
/*
* MCA Functions
*/
ompi_crcp_base_module_1_0_0_t *
ompi_crcp_coord_component_query(int *priority)
{
opal_output_verbose(10, mca_crcp_coord_component.super.output_handle,
"crcp:coord: component_query()");
*priority = mca_crcp_coord_component.super.priority;
return &loc_module;
}
int ompi_crcp_coord_module_init(void)
{
opal_output_verbose(10, mca_crcp_coord_component.super.output_handle,
"crcp:coord: module_init()");
ompi_crcp_coord_pml_init();
ompi_crcp_coord_btl_init();
return OMPI_SUCCESS;
}
int ompi_crcp_coord_module_finalize(void)
{
opal_output_verbose(10, mca_crcp_coord_component.super.output_handle,
"crcp:coord: module_finalize()");
ompi_crcp_coord_pml_finalize();
ompi_crcp_coord_btl_finalize();
return OMPI_SUCCESS;
}
/******************
* Local functions
******************/

3752
ompi/mca/crcp/coord/crcp_coord_pml.c Обычный файл

Разница между файлами не показана из-за своего большого размера Загрузить разницу

582
ompi/mca/crcp/coord/crcp_coord_pml.h Обычный файл
Просмотреть файл

@ -0,0 +1,582 @@
/*
* Copyright (c) 2004-2007 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/**
* @file
*
* Coord CRCP component
*
*/
#ifndef MCA_CRCP_COORD_PML_EXPORT_H
#define MCA_CRCP_COORD_PML_EXPORT_H
#include "ompi_config.h"
#include "opal/mca/mca.h"
#include "ompi/mca/crcp/crcp.h"
#include "ompi/communicator/communicator.h"
#include "orte/mca/ns/ns.h"
#include "opal/runtime/opal_cr.h"
#include "opal/threads/mutex.h"
#include "opal/threads/condition.h"
#include "ompi/mca/crcp/coord/crcp_coord.h"
#if defined(c_plusplus) || defined(__cplusplus)
extern "C" {
#endif
/*
* PML Coordination functions
*/
ompi_crcp_base_pml_state_t* ompi_crcp_coord_pml_enable
( bool enable, ompi_crcp_base_pml_state_t* pml_state );
ompi_crcp_base_pml_state_t* ompi_crcp_coord_pml_add_comm
( struct ompi_communicator_t* comm,
ompi_crcp_base_pml_state_t* pml_state );
ompi_crcp_base_pml_state_t* ompi_crcp_coord_pml_del_comm
( struct ompi_communicator_t* comm,
ompi_crcp_base_pml_state_t* pml_state );
ompi_crcp_base_pml_state_t* ompi_crcp_coord_pml_add_procs
( struct ompi_proc_t **procs, size_t nprocs,
ompi_crcp_base_pml_state_t* pml_state );
ompi_crcp_base_pml_state_t* ompi_crcp_coord_pml_del_procs
( struct ompi_proc_t **procs, size_t nprocs,
ompi_crcp_base_pml_state_t* pml_state );
ompi_crcp_base_pml_state_t* ompi_crcp_coord_pml_progress
(ompi_crcp_base_pml_state_t* pml_state);
ompi_crcp_base_pml_state_t* ompi_crcp_coord_pml_iprobe
(int dst, int tag, struct ompi_communicator_t* comm,
int *matched, ompi_status_public_t* status,
ompi_crcp_base_pml_state_t* pml_state );
ompi_crcp_base_pml_state_t* ompi_crcp_coord_pml_probe
( int dst, int tag, struct ompi_communicator_t* comm,
ompi_status_public_t* status,
ompi_crcp_base_pml_state_t* pml_state );
ompi_crcp_base_pml_state_t* ompi_crcp_coord_pml_isend_init
( void *buf, size_t count, ompi_datatype_t *datatype,
int dst, int tag, mca_pml_base_send_mode_t mode,
struct ompi_communicator_t* comm,
struct ompi_request_t **request,
ompi_crcp_base_pml_state_t* pml_state );
ompi_crcp_base_pml_state_t* ompi_crcp_coord_pml_isend
( void *buf, size_t count, ompi_datatype_t *datatype,
int dst, int tag, mca_pml_base_send_mode_t mode,
struct ompi_communicator_t* comm,
struct ompi_request_t **request,
ompi_crcp_base_pml_state_t* pml_state );
ompi_crcp_base_pml_state_t* ompi_crcp_coord_pml_send
( void *buf, size_t count, ompi_datatype_t *datatype,
int dst, int tag, mca_pml_base_send_mode_t mode,
struct ompi_communicator_t* comm,
ompi_crcp_base_pml_state_t* pml_state );
ompi_crcp_base_pml_state_t* ompi_crcp_coord_pml_irecv_init
( void *buf, size_t count, ompi_datatype_t *datatype,
int src, int tag, struct ompi_communicator_t* comm,
struct ompi_request_t **request,
ompi_crcp_base_pml_state_t* pml_state);
ompi_crcp_base_pml_state_t* ompi_crcp_coord_pml_irecv
( void *buf, size_t count, ompi_datatype_t *datatype,
int src, int tag, struct ompi_communicator_t* comm,
struct ompi_request_t **request,
ompi_crcp_base_pml_state_t* pml_state );
ompi_crcp_base_pml_state_t* ompi_crcp_coord_pml_recv
( void *buf, size_t count, ompi_datatype_t *datatype,
int src, int tag, struct ompi_communicator_t* comm,
ompi_status_public_t* status,
ompi_crcp_base_pml_state_t* pml_state);
ompi_crcp_base_pml_state_t* ompi_crcp_coord_pml_dump
( struct ompi_communicator_t* comm, int verbose,
ompi_crcp_base_pml_state_t* pml_state );
ompi_crcp_base_pml_state_t* ompi_crcp_coord_pml_start
( size_t count, ompi_request_t** requests,
ompi_crcp_base_pml_state_t* pml_state );
ompi_crcp_base_pml_state_t* ompi_crcp_coord_pml_ft_event
(int state, ompi_crcp_base_pml_state_t* pml_state);
/*
* Request function
*/
int ompi_crcp_coord_request_complete(struct ompi_request_t *request);
/***********************************
* Globally Defined Variables
***********************************/
/*
* List of Messages received from ANY_SOURCES
* -- ompi_crcp_coord_pml_message_ref_t
*/
OMPI_MODULE_DECLSPEC extern opal_list_t unknown_recv_from_list;
OMPI_MODULE_DECLSPEC extern opal_list_t unknown_persist_recv_list;
/*
* List of pending ACKs to drained messages
* -- ompi_crcp_coord_pml_drain_msg_ack_ref_t
*/
OMPI_MODULE_DECLSPEC extern opal_list_t drained_msg_ack_list;
/*
* List of drained messages to match against
* -- ompi_crcp_coord_pml_message_ref_t
*/
OMPI_MODULE_DECLSPEC extern opal_list_t drained_msg_list;
/*
* List of processes known
* -- ompi_crcp_coord_pml_bookmark_proc_t
*/
OMPI_MODULE_DECLSPEC extern opal_list_t ompi_crcp_coord_pml_procs;
/*
* Message reference
*/
struct ompi_crcp_coord_pml_message_ref_t {
/** This is a list object */
opal_list_item_t super;
/** Sequence Number of this message */
uint64_t msg_id;
/** Buffer for data */
void * buffer;
/** Count for data */
size_t count;
/** Datatype */
struct ompi_datatype_t * datatype;
/** Quick reference to the size of the datatype */
size_t ddt_size;
/** Message Tag */
int tag;
/** Peer rank to which it was sent/recv'ed if known */
int rank;
/** Communicator pointer */
ompi_communicator_t* comm;
/** Message Mode */
mca_pml_base_send_mode_t mode;
/* Is this an asynchronous message */
bool async;
/** Receive Request */
ompi_request_t *request;
/** Status */
ompi_status_public_t status;
/** Peer which we received from */
orte_process_name_t proc_name;
/* Sample movement of values (mirrored for send):
* Recv() iRecv() irecv_init() start() req_complete()
* * Pre:
* matched = false false false --- ---
* done = false false false --- true
* active = true true false true false
* already_posted = true true true --- ---
* * Post:
* matched = false false false --- ---
* done = true false false false true
* active = false true false true false
* already_posted = true true true --- ---
* * Drain
* already_posted = false -> true when posted irecv
*/
/** Has this message been matched by the peer?
* true = peer confirmed the receipt of this message
* false = unknown if peer has received this message or not
*/
bool matched;
/** Is this message complete WRT PML semantics?
* true = message done on this side (send or receive)
* false = message still in process (sending or receiving)
*/
bool done;
/** Is the message actively being worked on?
* true = Message is !done, and is in the progress cycle
* false = Message is !done and is *not* in the progress cycle ( [send/recv]_init requests)
*/
bool active;
/** Has this message been posted?
* true = message was posted (Send or recv)
* false = message was not yet posted.
* Used when trying to figure out which messages the drain protocol needs to post, and
* which message have already been posted for it.
*/
bool already_posted;
/** Suggested Rank that this should be matched to
* This is used when rank = ANY_SOURCE and we need to
* drain it to a specific peer
*/
int suggested_rank;
};
typedef struct ompi_crcp_coord_pml_message_ref_t ompi_crcp_coord_pml_message_ref_t;
OBJ_CLASS_DECLARATION(ompi_crcp_coord_pml_message_ref_t);
void ompi_crcp_coord_pml_message_ref_construct(ompi_crcp_coord_pml_message_ref_t *msg_ref);
void ompi_crcp_coord_pml_message_ref_destruct( ompi_crcp_coord_pml_message_ref_t *msg_ref);
/*
* A structure for a single process
* Contains:
* - List of sent messages to this peer
* - List of received message from this peer
* - Message totals
*/
struct ompi_crcp_coord_pml_bookmark_proc_t {
/** This is a list object */
opal_list_item_t super;
/** Name of peer */
orte_process_name_t proc_name;
/*
* Just to control concurrent access to some of these counters,
* and the PML
*/
opal_mutex_t lock;
opal_condition_t cond;
/** List of messages sent to this peer */
opal_list_t send_list; /**< pml_send */
opal_list_t isend_list; /**< pml_isend */
opal_list_t send_init_list; /**< pml_isend_init */
/** List of messages recved from this peer */
opal_list_t recv_list; /**< pml_recv */
opal_list_t irecv_list; /**< pml_irecv */
opal_list_t recv_init_list; /**< pml_irecv_init */
/*
* These are totals over all communicators provided for convenience.
*
* If we are P_n and this structure represent P_m then:
* - total_* = P_n --> P_m
* - matched_* = P_n <-- P_m
* Where P_n --> P_m means:
* the number of messages P_n knows that it has sent/recv to/from P_m
* And P_n --> P_m means:
* the number of messages P_m told us that is has sent/recv to/from P_n
*
* How total* are used:
* Send:
* Before put on the wire: ++total
* Recv:
* Once completed: ++total
*/
/** Total Number of messages sent */
uint32_t total_send_msgs;
uint32_t total_isend_msgs;
uint32_t total_send_init_msgs;
uint32_t matched_send_msgs;
uint32_t matched_isend_msgs;
uint32_t matched_send_init_msgs;
/** Total Number of messages received */
uint32_t total_recv_msgs;
uint32_t total_irecv_msgs;
uint32_t total_recv_init_msgs;
uint32_t matched_recv_msgs;
uint32_t matched_irecv_msgs;
uint32_t matched_recv_init_msgs;
};
typedef struct ompi_crcp_coord_pml_bookmark_proc_t ompi_crcp_coord_pml_bookmark_proc_t;
OBJ_CLASS_DECLARATION(ompi_crcp_coord_pml_bookmark_proc_t);
void ompi_crcp_coord_pml_bookmark_proc_construct(ompi_crcp_coord_pml_bookmark_proc_t *bkm_proc);
void ompi_crcp_coord_pml_bookmark_proc_destruct( ompi_crcp_coord_pml_bookmark_proc_t *bkm_proc);
struct ompi_crcp_coord_pml_state_t {
ompi_crcp_base_pml_state_t super;
ompi_crcp_base_pml_state_t *prev_ptr;
ompi_crcp_coord_pml_bookmark_proc_t *peer_ref;
ompi_crcp_coord_pml_message_ref_t *msg_ref;
};
typedef struct ompi_crcp_coord_pml_state_t ompi_crcp_coord_pml_state_t;
/***************
* A bit of locking, it's good for you
***************/
/*
* Any thread can call this when entering a critical section
* This in not strictly a critical section, but a protected
* section of code while checkpointing is occuring.
*/
#if OPAL_ENABLE_FT == 1
#define OMPI_CRCP_COORD_CS_ENTER(bvar, ctr, lock, cond) \
{ \
opal_mutex_lock(&lock); \
while( bvar ) { \
opal_condition_wait(&cond, \
&lock); \
} \
ctr++; \
opal_mutex_unlock(&lock); \
}
#else
#define OMPI_CRCP_COORD_CS_ENTER(bvar, ctr, lock, cond) ;
#endif
/*
* Any thread can call this when exiting a critical section
*/
#if OPAL_ENABLE_FT == 1
#define OMPI_CRCP_COORD_CS_EXIT(ctr, lock, cond) \
{ \
opal_mutex_lock(&lock); \
ctr--; \
opal_condition_signal(&cond); \
opal_mutex_unlock(&lock); \
}
#else
#define OMPI_CRCP_COORD_CS_EXIT(ctr, lock, cond) ;
#endif
/*
* Checkpoint protocol calls this to restrict processes
* from entering a specific critical section.
*/
#if OPAL_ENABLE_FT == 1
#define OMPI_CRCP_COORD_CS_RESTRICT(bvar, ctr, lock, cond, wait, dbg_str) \
{ \
opal_mutex_lock(&lock); \
bvar = true; \
while(ctr > 0 && wait) { \
opal_condition_wait(&cond, \
&lock); \
} \
opal_mutex_unlock(&lock); \
}
#else
#define OMPI_CRCP_COORD_CS_RESTRICT(bvar, ctr, lock, cond, wait, dbg_str) ;
#endif
/*
* Checkpoint protocol calls this to release all the blocking
* threads so that they may enter the critical section.
*/
#if OPAL_ENABLE_FT == 1
#define OMPI_CRCP_COORD_CS_RELEASE(bvar, lock, cond) \
{ \
opal_mutex_lock(&lock); \
bvar = false; \
opal_condition_signal(&cond); \
opal_mutex_unlock(&lock); \
}
#else
#define OMPI_CRCP_COORD_CS_RELEASE(bvar, lock, cond) ;
#endif
/*
* Some short cuts.
*/
OPAL_DECLSPEC extern opal_mutex_t ompi_crcp_coord_ft_global_cs_lock;
OPAL_DECLSPEC extern opal_condition_t ompi_crcp_coord_ft_global_cs_cond;
OPAL_DECLSPEC extern int ompi_crcp_coord_ft_global_cs_count;
OPAL_DECLSPEC extern bool ompi_crcp_coord_ft_global_cs_block;
OPAL_DECLSPEC extern opal_mutex_t ompi_crcp_coord_ft_send_cs_lock;
OPAL_DECLSPEC extern opal_condition_t ompi_crcp_coord_ft_send_cs_cond;
OPAL_DECLSPEC extern int ompi_crcp_coord_ft_send_cs_count;
OPAL_DECLSPEC extern bool ompi_crcp_coord_ft_send_cs_block;
OPAL_DECLSPEC extern opal_mutex_t ompi_crcp_coord_ft_recv_cs_lock;
OPAL_DECLSPEC extern opal_condition_t ompi_crcp_coord_ft_recv_cs_cond;
OPAL_DECLSPEC extern int ompi_crcp_coord_ft_recv_cs_count;
OPAL_DECLSPEC extern bool ompi_crcp_coord_ft_recv_cs_block;
#if OPAL_ENABLE_FT == 1 && OPAL_ENABLE_FT_THREAD == 1
/* Global stuff */
#define OMPI_CRCP_COORD_FT_GLOBAL_INIT() \
{ \
OBJ_CONSTRUCT(&ompi_crcp_coord_ft_global_cs_lock, opal_mutex_t); \
OBJ_CONSTRUCT(&ompi_crcp_coord_ft_global_cs_cond, opal_condition_t); \
ompi_crcp_coord_ft_global_cs_count = 0; \
ompi_crcp_coord_ft_global_cs_block = false; \
}
#define OMPI_CRCP_COORD_FT_GLOBAL_FINALIZE() \
{ \
OBJ_DESTRUCT(&ompi_crcp_coord_ft_global_cs_lock); \
OBJ_DESTRUCT(&ompi_crcp_coord_ft_global_cs_cond); \
ompi_crcp_coord_ft_global_cs_count = 0; \
ompi_crcp_coord_ft_global_cs_block = false; \
}
#define OMPI_CRCP_COORD_FT_GLOBAL_CS_ENTER() \
OMPI_CRCP_COORD_CS_ENTER(ompi_crcp_coord_ft_global_cs_block, \
ompi_crcp_coord_ft_global_cs_count, \
ompi_crcp_coord_ft_global_cs_lock, \
ompi_crcp_coord_ft_global_cs_cond);
#define OMPI_CRCP_COORD_FT_GLOBAL_CS_EXIT() \
OMPI_CRCP_COORD_CS_EXIT(ompi_crcp_coord_ft_global_cs_count, \
ompi_crcp_coord_ft_global_cs_lock, \
ompi_crcp_coord_ft_global_cs_cond);
#define OMPI_CRCP_COORD_FT_GLOBAL_CS_RESTRICT(wait) \
OMPI_CRCP_COORD_CS_RESTRICT(ompi_crcp_coord_ft_global_cs_block, \
ompi_crcp_coord_ft_global_cs_count, \
ompi_crcp_coord_ft_global_cs_lock, \
ompi_crcp_coord_ft_global_cs_cond, \
wait, \
"CRCP GLOBAL");
#define OMPI_CRCP_COORD_FT_GLOBAL_CS_RELEASE() \
OMPI_CRCP_COORD_CS_RELEASE(ompi_crcp_coord_ft_global_cs_block, \
ompi_crcp_coord_ft_global_cs_lock, \
ompi_crcp_coord_ft_global_cs_cond);
/* Send stuff */
#define OMPI_CRCP_COORD_FT_SEND_INIT() \
{ \
OBJ_CONSTRUCT(&ompi_crcp_coord_ft_send_cs_lock, opal_mutex_t); \
OBJ_CONSTRUCT(&ompi_crcp_coord_ft_send_cs_cond, opal_condition_t); \
ompi_crcp_coord_ft_send_cs_count = 0; \
ompi_crcp_coord_ft_send_cs_block = false; \
}
#define OMPI_CRCP_COORD_FT_SEND_FINALIZE() \
{ \
OBJ_DESTRUCT(&ompi_crcp_coord_ft_send_cs_lock); \
OBJ_DESTRUCT(&ompi_crcp_coord_ft_send_cs_cond); \
ompi_crcp_coord_ft_send_cs_count = 0; \
ompi_crcp_coord_ft_send_cs_block = false; \
}
#define OMPI_CRCP_COORD_FT_SEND_CS_ENTER() \
OMPI_CRCP_COORD_CS_ENTER(ompi_crcp_coord_ft_send_cs_block, \
ompi_crcp_coord_ft_send_cs_count, \
ompi_crcp_coord_ft_send_cs_lock, \
ompi_crcp_coord_ft_send_cs_cond);
#define OMPI_CRCP_COORD_FT_SEND_CS_EXIT() \
OMPI_CRCP_COORD_CS_EXIT(ompi_crcp_coord_ft_send_cs_count, \
ompi_crcp_coord_ft_send_cs_lock, \
ompi_crcp_coord_ft_send_cs_cond);
#define OMPI_CRCP_COORD_FT_SEND_CS_RESTRICT(wait) \
OMPI_CRCP_COORD_CS_RESTRICT(ompi_crcp_coord_ft_send_cs_block, \
ompi_crcp_coord_ft_send_cs_count, \
ompi_crcp_coord_ft_send_cs_lock, \
ompi_crcp_coord_ft_send_cs_cond, \
wait, \
"CRCP SEND");
#define OMPI_CRCP_COORD_FT_SEND_CS_RELEASE() \
OMPI_CRCP_COORD_CS_RELEASE(ompi_crcp_coord_ft_send_cs_block, \
ompi_crcp_coord_ft_send_cs_lock, \
ompi_crcp_coord_ft_send_cs_cond);
/* Receive stuff */
#define OMPI_CRCP_COORD_FT_RECV_INIT() \
{ \
OBJ_CONSTRUCT(&ompi_crcp_coord_ft_recv_cs_lock, opal_mutex_t); \
OBJ_CONSTRUCT(&ompi_crcp_coord_ft_recv_cs_cond, opal_condition_t); \
ompi_crcp_coord_ft_recv_cs_count = 0; \
ompi_crcp_coord_ft_recv_cs_block = false; \
}
#define OMPI_CRCP_COORD_FT_RECV_FINALIZE() \
{ \
OBJ_DESTRUCT(&ompi_crcp_coord_ft_recv_cs_lock); \
OBJ_DESTRUCT(&ompi_crcp_coord_ft_recv_cs_cond); \
ompi_crcp_coord_ft_recv_cs_count = 0; \
ompi_crcp_coord_ft_recv_cs_block = false; \
}
#define OMPI_CRCP_COORD_FT_RECV_CS_ENTER() \
OMPI_CRCP_COORD_CS_ENTER(ompi_crcp_coord_ft_recv_cs_block, \
ompi_crcp_coord_ft_recv_cs_count, \
ompi_crcp_coord_ft_recv_cs_lock, \
ompi_crcp_coord_ft_recv_cs_cond);
#define OMPI_CRCP_COORD_FT_RECV_CS_EXIT() \
OMPI_CRCP_COORD_CS_EXIT(ompi_crcp_coord_ft_recv_cs_count, \
ompi_crcp_coord_ft_recv_cs_lock, \
ompi_crcp_coord_ft_recv_cs_cond);
#define OMPI_CRCP_COORD_FT_RECV_CS_RESTRICT(wait) \
OMPI_CRCP_COORD_CS_RESTRICT(ompi_crcp_coord_ft_recv_cs_block, \
ompi_crcp_coord_ft_recv_cs_count, \
ompi_crcp_coord_ft_recv_cs_lock, \
ompi_crcp_coord_ft_recv_cs_cond, \
wait, \
"CRCP RECV");
#define OMPI_CRCP_COORD_FT_RECV_CS_RELEASE() \
OMPI_CRCP_COORD_CS_RELEASE(ompi_crcp_coord_ft_recv_cs_block, \
ompi_crcp_coord_ft_recv_cs_lock, \
ompi_crcp_coord_ft_recv_cs_cond);
#else
#define OMPI_CRCP_COORD_FT_GLOBAL_INIT() ;
#define OMPI_CRCP_COORD_FT_GLOBAL_FINALIZE() ;
#define OMPI_CRCP_COORD_FT_GLOBAL_CS_ENTER() ;
#define OMPI_CRCP_COORD_FT_GLOBAL_CS_EXIT() ;
#define OMPI_CRCP_COORD_FT_GLOBAL_CS_RESTRICT(wait) ;
#define OMPI_CRCP_COORD_FT_GLOBAL_CS_RELEASE() ;
#define OMPI_CRCP_COORD_FT_SEND_INIT() ;
#define OMPI_CRCP_COORD_FT_SEND_FINALIZE() ;
#define OMPI_CRCP_COORD_FT_SEND_CS_ENTER() ;
#define OMPI_CRCP_COORD_FT_SEND_CS_EXIT() ;
#define OMPI_CRCP_COORD_FT_SEND_CS_RESTRICT(wait) ;
#define OMPI_CRCP_COORD_FT_SEND_CS_RELEASE() ;
#define OMPI_CRCP_COORD_FT_RECV_INIT() ;
#define OMPI_CRCP_COORD_FT_RECV_FINALIZE() ;
#define OMPI_CRCP_COORD_FT_RECV_CS_ENTER() ;
#define OMPI_CRCP_COORD_FT_RECV_CS_EXIT() ;
#define OMPI_CRCP_COORD_FT_RECV_CS_RESTRICT(wait) ;
#define OMPI_CRCP_COORD_FT_RECV_CS_RELEASE() ;
#endif /* ENABLE_FT */
#if defined(c_plusplus) || defined(__cplusplus)
}
#endif
#endif /* MCA_CRCP_COORD_PML_EXPORT_H */

Просмотреть файл

@ -0,0 +1,20 @@
-*- text -*-
#
# Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2005 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# This is the US/English general help file for ORTE SnapC framework.
#

372
ompi/mca/crcp/crcp.h Обычный файл
Просмотреть файл

@ -0,0 +1,372 @@
/*
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/**
* @file
*
* Checkpoint/Restart Coordination Protocol (CRCP) Interface
*
*/
#ifndef MCA_CRCP_H
#define MCA_CRCP_H
#include "ompi_config.h"
#include "opal/mca/mca.h"
#include "opal/mca/base/base.h"
#include "opal/mca/crs/crs.h"
#include "opal/mca/crs/base/base.h"
#include "opal/class/opal_object.h"
#include "ompi/datatype/datatype.h"
#include "ompi/request/request.h"
#include "ompi/class/ompi_free_list.h"
#include "ompi/mca/pml/pml.h"
#include "ompi/mca/pml/base/base.h"
#include "ompi/mca/btl/btl.h"
#include "ompi/mca/btl/base/base.h"
/**
* Query function for CRCP components.
* Returns a priority to rank it agaianst other available CRCP components.
*/
typedef struct ompi_crcp_base_module_1_0_0_t *
(*ompi_crcp_base_component_query_1_0_0_fn_t)
(int *priority);
/**
* Module initialization function.
* Returns OMPI_SUCCESS
*/
typedef int (*ompi_crcp_base_module_init_fn_t)
(void);
/**
* Module finalization function.
* Returns OMPI_SUCCESS
*/
typedef int (*ompi_crcp_base_module_finalize_fn_t)
(void);
/************************
* PML Wrapper hooks
* PML Wrapper is the CRCPW PML component
************************/
/**
* To allow us to work before and after a PML command
*/
enum ompi_crcp_base_pml_states_t {
OMPI_CRCP_PML_PRE,
OMPI_CRCP_PML_POST,
OMPI_CRCP_PML_SKIP,
OMPI_CRCP_PML_DONE
};
typedef enum ompi_crcp_base_pml_states_t ompi_crcp_base_pml_states_t;
struct ompi_crcp_base_pml_state_t {
ompi_free_list_item_t super;
ompi_crcp_base_pml_states_t state;
int error_code;
mca_pml_base_component_t *wrapped_pml_component;
mca_pml_base_module_t *wrapped_pml_module;
};
typedef struct ompi_crcp_base_pml_state_t ompi_crcp_base_pml_state_t;
OBJ_CLASS_DECLARATION(ompi_crcp_base_pml_state_t);
typedef ompi_crcp_base_pml_state_t* (*ompi_crcp_base_pml_enable_fn_t)
(bool enable, ompi_crcp_base_pml_state_t* );
typedef ompi_crcp_base_pml_state_t* (*ompi_crcp_base_pml_add_comm_fn_t)
( struct ompi_communicator_t* comm , ompi_crcp_base_pml_state_t*);
typedef ompi_crcp_base_pml_state_t* (*ompi_crcp_base_pml_del_comm_fn_t)
( struct ompi_communicator_t* comm , ompi_crcp_base_pml_state_t*);
typedef ompi_crcp_base_pml_state_t* (*ompi_crcp_base_pml_add_procs_fn_t)
( struct ompi_proc_t **procs, size_t nprocs , ompi_crcp_base_pml_state_t*);
typedef ompi_crcp_base_pml_state_t* (*ompi_crcp_base_pml_del_procs_fn_t)
( struct ompi_proc_t **procs, size_t nprocs , ompi_crcp_base_pml_state_t*);
typedef ompi_crcp_base_pml_state_t* (*ompi_crcp_base_pml_progress_fn_t)
(ompi_crcp_base_pml_state_t*);
typedef ompi_crcp_base_pml_state_t* (*ompi_crcp_base_pml_iprobe_fn_t)
(int dst, int tag, struct ompi_communicator_t* comm, int *matched,
ompi_status_public_t* status, ompi_crcp_base_pml_state_t* );
typedef ompi_crcp_base_pml_state_t* (*ompi_crcp_base_pml_probe_fn_t)
( int dst, int tag, struct ompi_communicator_t* comm,
ompi_status_public_t* status, ompi_crcp_base_pml_state_t* );
typedef ompi_crcp_base_pml_state_t* (*ompi_crcp_base_pml_isend_init_fn_t)
( void *buf, size_t count, ompi_datatype_t *datatype, int dst, int tag,
mca_pml_base_send_mode_t mode, struct ompi_communicator_t* comm,
struct ompi_request_t **request, ompi_crcp_base_pml_state_t* );
typedef ompi_crcp_base_pml_state_t* (*ompi_crcp_base_pml_isend_fn_t)
( void *buf, size_t count, ompi_datatype_t *datatype, int dst, int tag,
mca_pml_base_send_mode_t mode, struct ompi_communicator_t* comm,
struct ompi_request_t **request, ompi_crcp_base_pml_state_t* );
typedef ompi_crcp_base_pml_state_t* (*ompi_crcp_base_pml_send_fn_t)
( void *buf, size_t count, ompi_datatype_t *datatype, int dst, int tag,
mca_pml_base_send_mode_t mode, struct ompi_communicator_t* comm,
ompi_crcp_base_pml_state_t* );
typedef ompi_crcp_base_pml_state_t* (*ompi_crcp_base_pml_irecv_init_fn_t)
( void *buf, size_t count, ompi_datatype_t *datatype, int src, int tag,
struct ompi_communicator_t* comm, struct ompi_request_t **request,
ompi_crcp_base_pml_state_t*);
typedef ompi_crcp_base_pml_state_t* (*ompi_crcp_base_pml_irecv_fn_t)
( void *buf, size_t count, ompi_datatype_t *datatype, int src, int tag,
struct ompi_communicator_t* comm, struct ompi_request_t **request,
ompi_crcp_base_pml_state_t* );
typedef ompi_crcp_base_pml_state_t* (*ompi_crcp_base_pml_recv_fn_t)
( void *buf, size_t count, ompi_datatype_t *datatype, int src, int tag,
struct ompi_communicator_t* comm, ompi_status_public_t* status,
ompi_crcp_base_pml_state_t*);
typedef ompi_crcp_base_pml_state_t* (*ompi_crcp_base_pml_dump_fn_t)
( struct ompi_communicator_t* comm, int verbose, ompi_crcp_base_pml_state_t* );
typedef ompi_crcp_base_pml_state_t* (*ompi_crcp_base_pml_start_fn_t)
( size_t count, ompi_request_t** requests, ompi_crcp_base_pml_state_t* );
typedef ompi_crcp_base_pml_state_t* (*ompi_crcp_base_pml_ft_event_fn_t)
(int state, ompi_crcp_base_pml_state_t*);
/* Request Interface */
typedef int (*ompi_crcp_base_request_complete_fn_t)
(struct ompi_request_t *request);
/************************
* BTL Wrapper hooks
* JJH: Wrapper BTL not currently implemented.
************************/
/**
* To allow us to work before and after a BTL command
*/
enum ompi_crcp_base_btl_states_t {
OMPI_CRCP_BTL_PRE,
OMPI_CRCP_BTL_POST,
OMPI_CRCP_BTL_SKIP,
OMPI_CRCP_BTL_DONE
};
typedef enum ompi_crcp_base_btl_states_t ompi_crcp_base_btl_states_t;
struct ompi_crcp_base_btl_state_t {
ompi_free_list_item_t super;
ompi_crcp_base_btl_states_t state;
int error_code;
mca_btl_base_descriptor_t* des;
mca_btl_base_component_t *wrapped_btl_component;
mca_btl_base_module_t *wrapped_btl_module;
};
typedef struct ompi_crcp_base_btl_state_t ompi_crcp_base_btl_state_t;
OBJ_CLASS_DECLARATION(ompi_crcp_base_btl_state_t);
typedef ompi_crcp_base_btl_state_t* (*mca_crcp_base_btl_module_add_procs_fn_t)
( struct mca_btl_base_module_t* btl,
size_t nprocs,
struct ompi_proc_t** procs,
struct mca_btl_base_endpoint_t** endpoints,
struct ompi_bitmap_t* reachable,
ompi_crcp_base_btl_state_t* );
typedef ompi_crcp_base_btl_state_t* (*mca_crcp_base_btl_module_del_procs_fn_t)
( struct mca_btl_base_module_t* btl,
size_t nprocs,
struct ompi_proc_t** procs,
struct mca_btl_base_endpoint_t**,
ompi_crcp_base_btl_state_t*);
typedef ompi_crcp_base_btl_state_t* (*mca_crcp_base_btl_module_register_fn_t)
( struct mca_btl_base_module_t* btl,
mca_btl_base_tag_t tag,
mca_btl_base_module_recv_cb_fn_t cbfunc,
void* cbdata,
ompi_crcp_base_btl_state_t*);
typedef ompi_crcp_base_btl_state_t* (*mca_crcp_base_btl_module_finalize_fn_t)
( struct mca_btl_base_module_t* btl,
ompi_crcp_base_btl_state_t*);
typedef ompi_crcp_base_btl_state_t* (*mca_crcp_base_btl_module_alloc_fn_t)
( struct mca_btl_base_module_t* btl,
size_t size,
ompi_crcp_base_btl_state_t*);
typedef ompi_crcp_base_btl_state_t* (*mca_crcp_base_btl_module_free_fn_t)
( struct mca_btl_base_module_t* btl,
mca_btl_base_descriptor_t* descriptor,
ompi_crcp_base_btl_state_t*);
typedef ompi_crcp_base_btl_state_t* (*mca_crcp_base_btl_module_prepare_fn_t)
( struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* endpoint,
mca_mpool_base_registration_t* registration,
struct ompi_convertor_t* convertor,
size_t reserve,
size_t* size,
ompi_crcp_base_btl_state_t*);
typedef ompi_crcp_base_btl_state_t* (*mca_crcp_base_btl_module_send_fn_t)
( struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* endpoint,
struct mca_btl_base_descriptor_t* descriptor,
mca_btl_base_tag_t tag,
ompi_crcp_base_btl_state_t*);
typedef ompi_crcp_base_btl_state_t* (*mca_crcp_base_btl_module_put_fn_t)
( struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* endpoint,
struct mca_btl_base_descriptor_t* descriptor,
ompi_crcp_base_btl_state_t*);
typedef ompi_crcp_base_btl_state_t* (*mca_crcp_base_btl_module_get_fn_t)
( struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* endpoint,
struct mca_btl_base_descriptor_t* descriptor,
ompi_crcp_base_btl_state_t*);
typedef ompi_crcp_base_btl_state_t* (*mca_crcp_base_btl_module_dump_fn_t)
( struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* endpoint,
int verbose,
ompi_crcp_base_btl_state_t*);
typedef ompi_crcp_base_btl_state_t* (*mca_crcp_base_btl_module_ft_event_fn_t)
(int state,
ompi_crcp_base_btl_state_t*);
/**
* Structure for CRCP v1.0.0 components.
*/
struct ompi_crcp_base_component_1_0_0_t {
/** MCA base component */
mca_base_component_t crcp_version;
/** MCA base data */
mca_base_component_data_1_0_0_t crcp_data;
/** Component Query for Selection Function */
ompi_crcp_base_component_query_1_0_0_fn_t crcp_query;
/** Verbosity Level */
int verbose;
/** Output Handle for opal_output */
int output_handle;
/** Default Priority */
int priority;
};
typedef struct ompi_crcp_base_component_1_0_0_t ompi_crcp_base_component_1_0_0_t;
typedef struct ompi_crcp_base_component_1_0_0_t ompi_crcp_base_component_t;
/**
* Structure for CRCP v1.0.0 modules
*/
struct ompi_crcp_base_module_1_0_0_t {
/** Initialization Function */
ompi_crcp_base_module_init_fn_t crcp_init;
/** Finalization Function */
ompi_crcp_base_module_finalize_fn_t crcp_finalize;
/**< PML Wrapper Functions ****************************/
ompi_crcp_base_pml_enable_fn_t pml_enable;
ompi_crcp_base_pml_add_comm_fn_t pml_add_comm;
ompi_crcp_base_pml_del_comm_fn_t pml_del_comm;
ompi_crcp_base_pml_add_procs_fn_t pml_add_procs;
ompi_crcp_base_pml_del_procs_fn_t pml_del_procs;
ompi_crcp_base_pml_progress_fn_t pml_progress;
ompi_crcp_base_pml_iprobe_fn_t pml_iprobe;
ompi_crcp_base_pml_probe_fn_t pml_probe;
ompi_crcp_base_pml_isend_init_fn_t pml_isend_init;
ompi_crcp_base_pml_isend_fn_t pml_isend;
ompi_crcp_base_pml_send_fn_t pml_send;
ompi_crcp_base_pml_irecv_init_fn_t pml_irecv_init;
ompi_crcp_base_pml_irecv_fn_t pml_irecv;
ompi_crcp_base_pml_recv_fn_t pml_recv;
ompi_crcp_base_pml_dump_fn_t pml_dump;
ompi_crcp_base_pml_start_fn_t pml_start;
ompi_crcp_base_pml_ft_event_fn_t pml_ft_event;
/**< Request complete Function ****************************/
ompi_crcp_base_request_complete_fn_t request_complete;
/**< BTL Wrapper Functions ****************************/
mca_crcp_base_btl_module_add_procs_fn_t btl_add_procs;
mca_crcp_base_btl_module_del_procs_fn_t btl_del_procs;
mca_crcp_base_btl_module_register_fn_t btl_register;
mca_crcp_base_btl_module_finalize_fn_t btl_finalize;
mca_crcp_base_btl_module_alloc_fn_t btl_alloc;
mca_crcp_base_btl_module_free_fn_t btl_free;
mca_crcp_base_btl_module_prepare_fn_t btl_prepare_src;
mca_crcp_base_btl_module_prepare_fn_t btl_prepare_dst;
mca_crcp_base_btl_module_send_fn_t btl_send;
mca_crcp_base_btl_module_put_fn_t btl_put;
mca_crcp_base_btl_module_get_fn_t btl_get;
mca_crcp_base_btl_module_dump_fn_t btl_dump;
mca_crcp_base_btl_module_ft_event_fn_t btl_ft_event;
};
typedef struct ompi_crcp_base_module_1_0_0_t ompi_crcp_base_module_1_0_0_t;
typedef struct ompi_crcp_base_module_1_0_0_t ompi_crcp_base_module_t;
OMPI_DECLSPEC extern ompi_crcp_base_module_t ompi_crcp;
/**
* Macro for use in components that are of type CRCP v1.0.0
*/
#define OMPI_CRCP_BASE_VERSION_1_0_0 \
/* CRCP v1.0 is chained to MCA v1.0 */ \
MCA_BASE_VERSION_1_0_0, \
/* CRCP v1.0 */ \
"crcp", 1, 0, 0
/**
* Macro to call the CRCP Request Complete function
*/
#if OPAL_ENABLE_FT == 1
#define OMPI_CRCP_REQUEST_COMPLETE(req) \
if( NULL != ompi_crcp.request_complete) { \
ompi_crcp.request_complete(req); \
}
#else
#define OMPI_CRCP_REQUEST_COMPLETE(req) ;
#endif
#endif /* OMPI_CRCP_H */

93
ompi/mca/crcp/ompi_crcp.7 Обычный файл
Просмотреть файл

@ -0,0 +1,93 @@
.\"
.\" Man page for OMPI's CRCP Functionality
.\"
.\" .TH name section center-footer left-footer center-header
.TH OMPI_CRCP 7 "March 2007" "Open MPI" "OPEN MPI CRCP OVERVIEW"
.\" **************************
.\" Name Section
.\" **************************
.SH NAME
.
Open MPI MCA Checkpoint/Restart Coordination Protocol (CRCP) Framework \- Overview of Open MPI's CRCP
framework, and selected modules.
.
.\" **************************
.\" Description Section
.\" **************************
.SH DESCRIPTION
.
.PP
The CRCP Framework is used by Open MPI for the encapsulation of various
Checkpoint/Restart Coordination Protocols (e.g., Coordinated, Uncoordinated,
Message/Communication Induced, ...).
.
.\" **************************
.\" General Process Requirements Section
.\" **************************
.SH GENERAL PROCESS REQUIREMENTS
.PP
In order for a process to use the Open MPI CRCP components it must adhear to a
few programmatic requirements.
.PP
First, the program must call \fIMPI_INIT\fR early in its execution.
.PP
The program must call \fIMPI_FINALIZE\fR before termination.
.PP
A user may initiate a checkpoint of a parallel application by using the
ompi-checkpoint(1) and ompi-restart(1) commands.
.
.\" **********************************
.\" Available Components Section
.\" **********************************
.SH AVAILABLE COMPONENTS
.PP
Open MPI currently ships with one CRCP component: \fIcoord\fR.
.
.PP
The following MCA parameters apply to all components:
.
.TP 4
crcp_base_verbose
Set the verbosity level for all components. Default is 0, or silent except on error.
.
.\" Coord Component
.\" ******************
.SS coord CRCP Component
.PP
The \fIcoord\fR component implements a Coordinated Checkpoint/Restart
Coordination Protocol similar to the one implemented in LAM/MPI.
.
.PP
The \fIcoord\fR component has the following MCA parameters:
.
.TP 4
crcp_coord_priority
The component's priority to use when selecting the most appropriate component
for a run.
.
.TP 4
crcp_coord_verbose
Set the verbosity level for this component. Default is 0, or silent except on
error.
.
.\" Special 'none' option
.\" ************************
.SS none CRCP Component
.PP
The \fInone\fP component simply selects no CRCP component. All of the CRCP
function calls return immediately with ORTE_SUCCESS.
.
.PP
This component is the last component to be selected by default. This means that if
another component is available, and the \fInone\fP component was not explicity
requested then Open MPI will attempt to activate all of the available components
before falling back to this component.
.
.\" **************************
.\" See Also Section
.\" **************************
.
.SH SEE ALSO
ompi-checkpoint(1), ompi-restart(1), opal-checkpoint(1), opal-restart(1),
orte_snapc(7), orte_filem(7), opal_crs(7)
.

Просмотреть файл

@ -1,5 +1,5 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
@ -98,8 +98,8 @@ mca_io_base_component_1_0_0_t mca_io_romio_component = {
/* Next the MCA v1.0.0 component meta data */
{
/* Whether the component is checkpointable or not */
false
/* The component is checkpoint ready */
MCA_BASE_METADATA_PARAM_CHECKPOINT
},
/* Additional number of bytes required for this component's

Просмотреть файл

@ -1,5 +1,5 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
@ -130,6 +130,12 @@ int mca_mpool_gm_retain(
);
/**
* Fault Tolerance Event Notification Function
* @param state Checkpoint Stae
* @return OMPI_SUCCESS or failure status
*/
int mca_mpool_gm_ft_event(int state);
void* mca_common_gm_segment_alloc(
struct mca_mpool_base_module_t* module,
@ -141,18 +147,3 @@ void* mca_common_gm_segment_alloc(
}
#endif
#endif

Просмотреть файл

@ -1,5 +1,5 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
@ -55,8 +55,8 @@ mca_mpool_gm_component_t mca_mpool_gm_component = {
/* Next the MCA v1.0.0 component meta data */
{
/* Whether the component is checkpointable or not */
false
/* The component is not checkpoint ready */
MCA_BASE_METADATA_PARAM_NONE
},
mca_mpool_gm_init

Просмотреть файл

@ -1,5 +1,5 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
@ -44,6 +44,7 @@ void mca_mpool_gm_module_init(mca_mpool_gm_module_t* mpool)
mpool->super.mpool_retain = mca_mpool_gm_retain;
mpool->super.mpool_release = mca_mpool_gm_release;
mpool->super.mpool_finalize = NULL;
mpool->super.mpool_ft_event = mca_mpool_gm_ft_event;
mpool->super.rcache =
mca_rcache_base_module_create(mca_mpool_gm_component.rcache_name);
mpool->super.flags = MCA_MPOOL_FLAGS_MPI_ALLOC_MEM;
@ -218,26 +219,22 @@ int mca_mpool_gm_retain(
return OMPI_SUCCESS;
}
int mca_mpool_gm_ft_event(int state) {
if(OPAL_CRS_CHECKPOINT == state) {
;
}
else if(OPAL_CRS_CONTINUE == state) {
;
}
else if(OPAL_CRS_RESTART == state) {
;
}
else if(OPAL_CRS_TERM == state ) {
;
}
else {
;
}
return OMPI_SUCCESS;
}

Просмотреть файл

@ -1,5 +1,5 @@
/**
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
@ -26,6 +26,9 @@
#include "ompi/class/ompi_free_list.h"
#include "ompi/class/ompi_pointer_array.h"
#include "opal/mca/crs/crs.h"
#include "opal/mca/crs/base/base.h"
#define MCA_MPOOL_FLAGS_CACHE_BYPASS 0x1
#define MCA_MPOOL_FLAGS_PERSIST 0x2
#define MCA_MPOOL_FLAGS_MPI_ALLOC_MEM 0x4
@ -130,6 +133,14 @@ typedef void* (*mca_mpool_base_module_address_fn_t)(struct mca_mpool_base_module
typedef void (*mca_mpool_base_module_finalize_fn_t)(struct mca_mpool_base_module_t*);
/**
* Fault Tolerance Event Notification Function
* @param state Checkpoint Stae
* @return OMPI_SUCCESS or failure status
*/
typedef int (*mca_mpool_base_module_ft_event_fn_t)(int state);
/**
* mpool component descriptor. Contains component version information
* and open/close/init functions.
@ -166,6 +177,7 @@ struct mca_mpool_base_module_t {
mca_mpool_base_module_release_fn_t mpool_release; /**< release a registration from the cache */
mca_mpool_base_module_release_memory_fn_t mpool_release_memory; /**< release memor region from the cache */
mca_mpool_base_module_finalize_fn_t mpool_finalize; /**< finalize */
mca_mpool_base_module_ft_event_fn_t mpool_ft_event; /**< ft_event */
struct mca_rcache_base_module_t *rcache; /* the rcache associated with this mpool */
uint32_t flags; /**< mpool flags */
};

Просмотреть файл

@ -1,5 +1,5 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
@ -144,23 +144,15 @@ int mca_mpool_mvapi_retain(
);
/**
* Fault Tolerance Event Notification Function
* @param state Checkpoint Stae
* @return OMPI_SUCCESS or failure status
*/
int mca_mpool_mvapi_ft_event(int state);
#if defined(c_plusplus) || defined(__cplusplus)
}
#endif
#endif

Просмотреть файл

@ -1,5 +1,5 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
@ -53,8 +53,8 @@ mca_mpool_mvapi_component_t mca_mpool_mvapi_component = {
/* Next the MCA v1.0.0 component meta data */
{
/* Whether the component is checkpointable or not */
false
/* The component is not checkpoint ready */
MCA_BASE_METADATA_PARAM_NONE
},
mca_mpool_mvapi_init

Просмотреть файл

@ -1,5 +1,5 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
@ -52,6 +52,7 @@ void mca_mpool_mvapi_module_init(mca_mpool_mvapi_module_t* mpool)
mpool->super.mpool_retain = mca_mpool_mvapi_retain;
mpool->super.mpool_release = mca_mpool_mvapi_release;
mpool->super.mpool_finalize = NULL;
mpool->super.mpool_ft_event = mca_mpool_mvapi_ft_event;
mpool->super.rcache =
mca_rcache_base_module_create(mca_mpool_mvapi_component.rcache_name);
mpool->super.flags = MCA_MPOOL_FLAGS_MPI_ALLOC_MEM;
@ -253,3 +254,22 @@ int mca_mpool_mvapi_retain(struct mca_mpool_base_module_t* mpool,
return OMPI_SUCCESS;
}
int mca_mpool_mvapi_ft_event(int state) {
if(OPAL_CRS_CHECKPOINT == state) {
;
}
else if(OPAL_CRS_CONTINUE == state) {
;
}
else if(OPAL_CRS_RESTART == state) {
;
}
else if(OPAL_CRS_TERM == state ) {
;
}
else {
;
}
return OMPI_SUCCESS;
}

Просмотреть файл

@ -1,5 +1,5 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
@ -142,24 +142,15 @@ int mca_mpool_openib_retain(
);
/**
* Fault Tolerance Event Notification Function
* @param state Checkpoint Stae
* @return OMPI_SUCCESS or failure status
*/
int mca_mpool_openib_ft_event(int state);
#if defined(c_plusplus) || defined(__cplusplus)
}
#endif
#endif

Просмотреть файл

@ -1,5 +1,5 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
@ -55,8 +55,8 @@ mca_mpool_openib_component_t mca_mpool_openib_component = {
/* Next the MCA v1.0.0 component meta data */
{
/* Whether the component is checkpointable or not */
false
/* The component is not checkpoint ready */
MCA_BASE_METADATA_PARAM_NONE
},
mca_mpool_openib_init

Просмотреть файл

@ -1,5 +1,5 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
@ -47,6 +47,7 @@ void mca_mpool_openib_module_init(mca_mpool_openib_module_t* mpool)
mpool->super.mpool_retain = mca_mpool_openib_retain;
mpool->super.mpool_release = mca_mpool_openib_release;
mpool->super.mpool_finalize = NULL;
mpool->super.mpool_ft_event = mca_mpool_openib_ft_event;
mpool->super.rcache =
mca_rcache_base_module_create(mca_mpool_openib_component.rcache_name);
mpool->super.flags = MCA_MPOOL_FLAGS_MPI_ALLOC_MEM;
@ -227,3 +228,23 @@ int mca_mpool_openib_retain(struct mca_mpool_base_module_t* mpool,
OPAL_THREAD_ADD32(&registration->ref_count, 1);
return OMPI_SUCCESS;
}
int mca_mpool_openib_ft_event(int state) {
if(OPAL_CRS_CHECKPOINT == state) {
;
}
else if(OPAL_CRS_CONTINUE == state) {
;
}
else if(OPAL_CRS_RESTART == state) {
;
}
else if(OPAL_CRS_TERM == state ) {
;
}
else {
;
}
return OMPI_SUCCESS;
}

Просмотреть файл

@ -1,5 +1,5 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
@ -86,24 +86,15 @@ void mca_mpool_sm_free(
void * addr,
mca_mpool_base_registration_t* registration);
/**
* Fault Tolerance Event Notification Function
* @param state Checkpoint Stae
* @return OMPI_SUCCESS or failure status
*/
int mca_mpool_sm_ft_event(int state);
#if defined(c_plusplus) || defined(__cplusplus)
}
#endif
#endif

Просмотреть файл

@ -1,5 +1,5 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
@ -60,8 +60,8 @@ mca_mpool_sm_component_t mca_mpool_sm_component = {
/* Next the MCA v1.0.0 component meta data */
{
/* Whether the component is checkpointable or not */
false
/* The component is not checkpoint ready */
MCA_BASE_METADATA_PARAM_NONE
},
mca_mpool_sm_init

Просмотреть файл

@ -1,5 +1,5 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
@ -38,6 +38,7 @@ void mca_mpool_sm_module_init(mca_mpool_sm_module_t* mpool)
mpool->super.mpool_deregister = NULL;
mpool->super.mpool_release_memory = NULL;
mpool->super.mpool_finalize = NULL;
mpool->super.mpool_ft_event = mca_mpool_sm_ft_event;
mpool->super.flags = 0;
}
@ -85,3 +86,23 @@ void mca_mpool_sm_free(mca_mpool_base_module_t* mpool, void * addr,
mca_mpool_sm_module_t* mpool_sm = (mca_mpool_sm_module_t*)mpool;
mpool_sm->sm_allocator->alc_free(mpool_sm->sm_allocator, addr);
}
int mca_mpool_sm_ft_event(int state) {
if(OPAL_CRS_CHECKPOINT == state) {
;
}
else if(OPAL_CRS_CONTINUE == state) {
;
}
else if(OPAL_CRS_RESTART == state) {
;
}
else if(OPAL_CRS_TERM == state ) {
;
}
else {
;
}
return OMPI_SUCCESS;
}

Некоторые файлы не были показаны из-за слишком большого количества измененных файлов Показать больше