Replace the old coordinated component ('coord') and replace it with a much more refined version ('bkmrk').
The new component fixes a number of problems with the old component. The core algorithm is the same, but by changing the data strucutres a bit we have improved performance and memory utilization. There are still a couple corner cases that still need some work. However, I did not want to delay bringing this into the trunk (and v1.3 branch) for too much longer. This commit was SVN r19537.
Этот коммит содержится в:
родитель
1ad9d0459e
Коммит
36185ad964
@ -32,10 +32,10 @@ snapc=full
|
||||
#
|
||||
# OMPI Parameters
|
||||
# - Wrap the PML
|
||||
# - Use the LAM/MPI-like Coordinated Checkpoint/Restart Coordination Protocol
|
||||
# - Use a Bookmark Exchange Fully Coordinated Checkpoint/Restart Coordination Protocol
|
||||
#
|
||||
pml_wrapper=crcpw
|
||||
crcp=coord
|
||||
crcp=bkmrk
|
||||
|
||||
#
|
||||
# Temporary fix to force the event engine to use poll to behave well with BLCR
|
||||
|
@ -14,32 +14,32 @@
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
dist_pkgdata_DATA = help-ompi-crcp-coord.txt
|
||||
dist_pkgdata_DATA = help-ompi-crcp-bkmrk.txt
|
||||
|
||||
sources = \
|
||||
crcp_coord.h \
|
||||
crcp_coord_pml.h \
|
||||
crcp_coord_component.c \
|
||||
crcp_coord_module.c \
|
||||
crcp_coord_pml.c
|
||||
crcp_bkmrk.h \
|
||||
crcp_bkmrk_pml.h \
|
||||
crcp_bkmrk_component.c \
|
||||
crcp_bkmrk_module.c \
|
||||
crcp_bkmrk_pml.c
|
||||
|
||||
# Make the output library in this directory, and name it either
|
||||
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
|
||||
# (for static builds).
|
||||
|
||||
if OMPI_BUILD_crcp_coord_DSO
|
||||
if OMPI_BUILD_crcp_bkmrk_DSO
|
||||
component_noinst =
|
||||
component_install = mca_crcp_coord.la
|
||||
component_install = mca_crcp_bkmrk.la
|
||||
else
|
||||
component_noinst = libmca_crcp_coord.la
|
||||
component_noinst = libmca_crcp_bkmrk.la
|
||||
component_install =
|
||||
endif
|
||||
|
||||
mcacomponentdir = $(pkglibdir)
|
||||
mcacomponent_LTLIBRARIES = $(component_install)
|
||||
mca_crcp_coord_la_SOURCES = $(sources)
|
||||
mca_crcp_coord_la_LDFLAGS = -module -avoid-version
|
||||
mca_crcp_bkmrk_la_SOURCES = $(sources)
|
||||
mca_crcp_bkmrk_la_LDFLAGS = -module -avoid-version
|
||||
|
||||
noinst_LTLIBRARIES = $(component_noinst)
|
||||
libmca_crcp_coord_la_SOURCES = $(sources)
|
||||
libmca_crcp_coord_la_LDFLAGS = -module -avoid-version
|
||||
libmca_crcp_bkmrk_la_SOURCES = $(sources)
|
||||
libmca_crcp_bkmrk_la_LDFLAGS = -module -avoid-version
|
@ -15,9 +15,9 @@
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
# MCA_crcp_coord_CONFIG([action-if-found], [action-if-not-found])
|
||||
# MCA_crcp_bkmrk_CONFIG([action-if-found], [action-if-not-found])
|
||||
# -----------------------------------------------------------
|
||||
AC_DEFUN([MCA_crcp_coord_CONFIG],[
|
||||
AC_DEFUN([MCA_crcp_bkmrk_CONFIG],[
|
||||
# If we don't want FT, don't compile this component
|
||||
AS_IF([test "$ompi_want_ft" = "1"],
|
||||
[$1],
|
@ -15,5 +15,5 @@
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
PARAM_INIT_FILE=crcp_coord_component.c
|
||||
PARAM_INIT_FILE=crcp_bkmrk_component.c
|
||||
PARAM_CONFIG_FILES="Makefile"
|
@ -17,12 +17,12 @@
|
||||
/**
|
||||
* @file
|
||||
*
|
||||
* Coord CRCP component
|
||||
* Hoke CRCP component
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef MCA_CRCP_COORD_EXPORT_H
|
||||
#define MCA_CRCP_COORD_EXPORT_H
|
||||
#ifndef MCA_CRCP_HOKE_EXPORT_H
|
||||
#define MCA_CRCP_HOKE_EXPORT_H
|
||||
|
||||
#include "ompi_config.h"
|
||||
|
||||
@ -42,11 +42,11 @@ extern "C" {
|
||||
/*
|
||||
* Local Component structures
|
||||
*/
|
||||
struct ompi_crcp_coord_component_t {
|
||||
struct ompi_crcp_bkmrk_component_t {
|
||||
ompi_crcp_base_component_t super; /** Base CRCP component */
|
||||
};
|
||||
typedef struct ompi_crcp_coord_component_t ompi_crcp_coord_component_t;
|
||||
OMPI_MODULE_DECLSPEC extern ompi_crcp_coord_component_t mca_crcp_coord_component;
|
||||
typedef struct ompi_crcp_bkmrk_component_t ompi_crcp_bkmrk_component_t;
|
||||
OMPI_MODULE_DECLSPEC extern ompi_crcp_bkmrk_component_t mca_crcp_bkmrk_component;
|
||||
|
||||
/*
|
||||
* Local variables
|
||||
@ -56,15 +56,15 @@ extern "C" {
|
||||
/*
|
||||
* Module functions
|
||||
*/
|
||||
int ompi_crcp_coord_component_query(mca_base_module_t **module, int *priority);
|
||||
int ompi_crcp_coord_module_init(void);
|
||||
int ompi_crcp_coord_module_finalize(void);
|
||||
int ompi_crcp_bkmrk_component_query(mca_base_module_t **module, int *priority);
|
||||
int ompi_crcp_bkmrk_module_init(void);
|
||||
int ompi_crcp_bkmrk_module_finalize(void);
|
||||
|
||||
int ompi_crcp_coord_pml_init(void);
|
||||
int ompi_crcp_coord_pml_finalize(void);
|
||||
int ompi_crcp_bkmrk_pml_init(void);
|
||||
int ompi_crcp_bkmrk_pml_finalize(void);
|
||||
|
||||
#if defined(c_plusplus) || defined(__cplusplus)
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* MCA_CRCP_COORD_EXPORT_H */
|
||||
#endif /* MCA_CRCP_HOKE_EXPORT_H */
|
@ -23,7 +23,7 @@
|
||||
|
||||
#include "opal/runtime/opal_cr.h"
|
||||
#include "opal/event/event.h"
|
||||
#include "orte/util/show_help.h"
|
||||
#include "orte/util/output.h"
|
||||
#include "opal/util/argv.h"
|
||||
#include "opal/util/opal_environ.h"
|
||||
#include "opal/mca/base/mca_base_param.h"
|
||||
@ -35,14 +35,14 @@
|
||||
#include "ompi/mca/crcp/crcp.h"
|
||||
#include "ompi/mca/crcp/base/base.h"
|
||||
|
||||
#include "crcp_coord.h"
|
||||
#include "crcp_coord_btl.h"
|
||||
#include "crcp_bkmrk.h"
|
||||
#include "crcp_bkmrk_btl.h"
|
||||
|
||||
int ompi_crcp_coord_btl_init(void) {
|
||||
int ompi_crcp_bkmrk_btl_init(void) {
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
int ompi_crcp_coord_btl_finalize(void) {
|
||||
int ompi_crcp_bkmrk_btl_finalize(void) {
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
@ -17,12 +17,12 @@
|
||||
/**
|
||||
* @file
|
||||
*
|
||||
* Coord CRCP component
|
||||
* Hoke CRCP component
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef MCA_CRCP_COORD_BTL_EXPORT_H
|
||||
#define MCA_CRCP_COORD_BTL_EXPORT_H
|
||||
#ifndef MCA_CRCP_HOKE_BTL_EXPORT_H
|
||||
#define MCA_CRCP_HOKE_BTL_EXPORT_H
|
||||
|
||||
#include "ompi_config.h"
|
||||
|
||||
@ -33,7 +33,7 @@
|
||||
#include "opal/threads/mutex.h"
|
||||
#include "opal/threads/condition.h"
|
||||
|
||||
#include "ompi/mca/crcp/coord/crcp_coord.h"
|
||||
#include "ompi/mca/crcp/bkmrk/crcp_bkmrk.h"
|
||||
|
||||
#if defined(c_plusplus) || defined(__cplusplus)
|
||||
extern "C" {
|
||||
@ -129,4 +129,4 @@ extern "C" {
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* MCA_CRCP_COORD_BTL_EXPORT_H */
|
||||
#endif /* MCA_CRCP_HOKE_BTL_EXPORT_H */
|
@ -16,31 +16,31 @@
|
||||
|
||||
#include "ompi_config.h"
|
||||
|
||||
#include "orte/util/show_help.h"
|
||||
#include "opal/util/output.h"
|
||||
|
||||
#include "ompi/mca/crcp/crcp.h"
|
||||
#include "ompi/mca/crcp/base/base.h"
|
||||
#include "crcp_coord.h"
|
||||
#include "crcp_bkmrk.h"
|
||||
|
||||
/*
|
||||
* Public string for version number
|
||||
*/
|
||||
const char *ompi_crcp_coord_component_version_string =
|
||||
"OMPI CRCP coord MCA component version " OMPI_VERSION;
|
||||
const char *ompi_crcp_bkmrk_component_version_string =
|
||||
"OMPI CRCP bkmrk MCA component version " OMPI_VERSION;
|
||||
|
||||
int timing_enabled = 0;
|
||||
|
||||
/*
|
||||
* Local functionality
|
||||
*/
|
||||
static int crcp_coord_open(void);
|
||||
static int crcp_coord_close(void);
|
||||
static int crcp_bkmrk_open(void);
|
||||
static int crcp_bkmrk_close(void);
|
||||
|
||||
/*
|
||||
* Instantiate the public struct with all of our public information
|
||||
* and pointer to our public functions in it
|
||||
*/
|
||||
ompi_crcp_coord_component_t mca_crcp_coord_component = {
|
||||
ompi_crcp_bkmrk_component_t mca_crcp_bkmrk_component = {
|
||||
/* First do the base component stuff */
|
||||
{
|
||||
/* Handle the general mca_component_t struct containing
|
||||
@ -48,17 +48,16 @@ ompi_crcp_coord_component_t mca_crcp_coord_component = {
|
||||
*/
|
||||
{
|
||||
OMPI_CRCP_BASE_VERSION_2_0_0,
|
||||
|
||||
/* Component name and version */
|
||||
"coord",
|
||||
"bkmrk",
|
||||
OMPI_MAJOR_VERSION,
|
||||
OMPI_MINOR_VERSION,
|
||||
OMPI_RELEASE_VERSION,
|
||||
|
||||
/* Component open and close functions */
|
||||
crcp_coord_open,
|
||||
crcp_coord_close,
|
||||
ompi_crcp_coord_component_query
|
||||
crcp_bkmrk_open,
|
||||
crcp_bkmrk_close,
|
||||
ompi_crcp_bkmrk_component_query
|
||||
},
|
||||
{
|
||||
/* The component is checkpoint ready */
|
||||
@ -70,11 +69,11 @@ ompi_crcp_coord_component_t mca_crcp_coord_component = {
|
||||
/* opal_output handler */
|
||||
-1,
|
||||
/* Default priority */
|
||||
10
|
||||
20
|
||||
}
|
||||
};
|
||||
|
||||
static int crcp_coord_open(void)
|
||||
static int crcp_bkmrk_open(void)
|
||||
{
|
||||
int val;
|
||||
|
||||
@ -82,31 +81,31 @@ static int crcp_coord_open(void)
|
||||
* This should be the last componet to ever get used since
|
||||
* it doesn't do anything.
|
||||
*/
|
||||
mca_base_param_reg_int(&mca_crcp_coord_component.super.base_version,
|
||||
mca_base_param_reg_int(&mca_crcp_bkmrk_component.super.base_version,
|
||||
"priority",
|
||||
"Priority of the CRCP coord component",
|
||||
"Priority of the CRCP bkmrk component",
|
||||
false, false,
|
||||
mca_crcp_coord_component.super.priority,
|
||||
&mca_crcp_coord_component.super.priority);
|
||||
mca_crcp_bkmrk_component.super.priority,
|
||||
&mca_crcp_bkmrk_component.super.priority);
|
||||
|
||||
mca_base_param_reg_int(&mca_crcp_coord_component.super.base_version,
|
||||
mca_base_param_reg_int(&mca_crcp_bkmrk_component.super.base_version,
|
||||
"verbose",
|
||||
"Verbose level for the CRCP coord component",
|
||||
"Verbose level for the CRCP bkmrk component",
|
||||
false, false,
|
||||
mca_crcp_coord_component.super.verbose,
|
||||
&mca_crcp_coord_component.super.verbose);
|
||||
mca_crcp_bkmrk_component.super.verbose,
|
||||
&mca_crcp_bkmrk_component.super.verbose);
|
||||
/* If there is a custom verbose level for this component than use it
|
||||
* otherwise take our parents level and output channel
|
||||
*/
|
||||
if ( 0 != mca_crcp_coord_component.super.verbose) {
|
||||
mca_crcp_coord_component.super.output_handle = opal_output_open(NULL);
|
||||
opal_output_set_verbosity(mca_crcp_coord_component.super.output_handle,
|
||||
mca_crcp_coord_component.super.verbose);
|
||||
if ( 0 != mca_crcp_bkmrk_component.super.verbose) {
|
||||
mca_crcp_bkmrk_component.super.output_handle = opal_output_open(NULL);
|
||||
opal_output_set_verbosity(mca_crcp_bkmrk_component.super.output_handle,
|
||||
mca_crcp_bkmrk_component.super.verbose);
|
||||
} else {
|
||||
mca_crcp_coord_component.super.output_handle = ompi_crcp_base_output;
|
||||
mca_crcp_bkmrk_component.super.output_handle = ompi_crcp_base_output;
|
||||
}
|
||||
|
||||
mca_base_param_reg_int(&mca_crcp_coord_component.super.base_version,
|
||||
mca_base_param_reg_int(&mca_crcp_bkmrk_component.super.base_version,
|
||||
"timing",
|
||||
"Enable Performance timing",
|
||||
false, false,
|
||||
@ -117,22 +116,22 @@ static int crcp_coord_open(void)
|
||||
/*
|
||||
* Debug Output
|
||||
*/
|
||||
opal_output_verbose(10, mca_crcp_coord_component.super.output_handle,
|
||||
"crcp:coord: open()");
|
||||
opal_output_verbose(20, mca_crcp_coord_component.super.output_handle,
|
||||
"crcp:coord: open: priority = %d",
|
||||
mca_crcp_coord_component.super.priority);
|
||||
opal_output_verbose(20, mca_crcp_coord_component.super.output_handle,
|
||||
"crcp:coord: open: verbosity = %d",
|
||||
mca_crcp_coord_component.super.verbose);
|
||||
opal_output_verbose(10, mca_crcp_bkmrk_component.super.output_handle,
|
||||
"crcp:bkmrk: open()");
|
||||
opal_output_verbose(20, mca_crcp_bkmrk_component.super.output_handle,
|
||||
"crcp:bkmrk: open: priority = %d",
|
||||
mca_crcp_bkmrk_component.super.priority);
|
||||
opal_output_verbose(20, mca_crcp_bkmrk_component.super.output_handle,
|
||||
"crcp:bkmrk: open: verbosity = %d",
|
||||
mca_crcp_bkmrk_component.super.verbose);
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
static int crcp_coord_close(void)
|
||||
static int crcp_bkmrk_close(void)
|
||||
{
|
||||
opal_output_verbose(10, mca_crcp_coord_component.super.output_handle,
|
||||
"crcp:coord: close()");
|
||||
opal_output_verbose(10, mca_crcp_bkmrk_component.super.output_handle,
|
||||
"crcp:bkmrk: close()");
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
@ -24,57 +24,56 @@
|
||||
#include "opal/mca/mca.h"
|
||||
#include "opal/mca/base/base.h"
|
||||
|
||||
#include "orte/util/show_help.h"
|
||||
#include "opal/util/output.h"
|
||||
#include "opal/mca/base/mca_base_param.h"
|
||||
|
||||
#include "orte/util/show_help.h"
|
||||
#include "opal/util/argv.h"
|
||||
#include "opal/util/opal_environ.h"
|
||||
|
||||
#include "ompi/mca/crcp/crcp.h"
|
||||
#include "ompi/mca/crcp/base/base.h"
|
||||
|
||||
#include "crcp_coord.h"
|
||||
#include "crcp_coord_pml.h"
|
||||
#include "crcp_bkmrk.h"
|
||||
#include "crcp_bkmrk_pml.h"
|
||||
|
||||
/*
|
||||
* Coord module
|
||||
*/
|
||||
static ompi_crcp_base_module_t loc_module = {
|
||||
/** Initialization Function */
|
||||
ompi_crcp_coord_module_init,
|
||||
ompi_crcp_bkmrk_module_init,
|
||||
/** Finalization Function */
|
||||
ompi_crcp_coord_module_finalize,
|
||||
ompi_crcp_bkmrk_module_finalize,
|
||||
|
||||
/** PML Wrapper */
|
||||
ompi_crcp_coord_pml_enable,
|
||||
ompi_crcp_bkmrk_pml_enable,
|
||||
|
||||
ompi_crcp_coord_pml_add_comm,
|
||||
ompi_crcp_coord_pml_del_comm,
|
||||
ompi_crcp_bkmrk_pml_add_comm,
|
||||
ompi_crcp_bkmrk_pml_del_comm,
|
||||
|
||||
ompi_crcp_coord_pml_add_procs,
|
||||
ompi_crcp_coord_pml_del_procs,
|
||||
ompi_crcp_bkmrk_pml_add_procs,
|
||||
ompi_crcp_bkmrk_pml_del_procs,
|
||||
|
||||
ompi_crcp_coord_pml_progress,
|
||||
ompi_crcp_bkmrk_pml_progress,
|
||||
|
||||
ompi_crcp_coord_pml_iprobe,
|
||||
ompi_crcp_coord_pml_probe,
|
||||
ompi_crcp_bkmrk_pml_iprobe,
|
||||
ompi_crcp_bkmrk_pml_probe,
|
||||
|
||||
ompi_crcp_coord_pml_isend_init,
|
||||
ompi_crcp_coord_pml_isend,
|
||||
ompi_crcp_coord_pml_send,
|
||||
ompi_crcp_bkmrk_pml_isend_init,
|
||||
ompi_crcp_bkmrk_pml_isend,
|
||||
ompi_crcp_bkmrk_pml_send,
|
||||
|
||||
ompi_crcp_coord_pml_irecv_init,
|
||||
ompi_crcp_coord_pml_irecv,
|
||||
ompi_crcp_coord_pml_recv,
|
||||
ompi_crcp_bkmrk_pml_irecv_init,
|
||||
ompi_crcp_bkmrk_pml_irecv,
|
||||
ompi_crcp_bkmrk_pml_recv,
|
||||
|
||||
ompi_crcp_coord_pml_dump,
|
||||
ompi_crcp_coord_pml_start,
|
||||
ompi_crcp_bkmrk_pml_dump,
|
||||
ompi_crcp_bkmrk_pml_start,
|
||||
|
||||
ompi_crcp_coord_pml_ft_event,
|
||||
ompi_crcp_bkmrk_pml_ft_event,
|
||||
|
||||
/* Request Functions */
|
||||
ompi_crcp_coord_request_complete,
|
||||
ompi_crcp_bkmrk_request_complete,
|
||||
|
||||
/* BTL Wrapper Functions */
|
||||
NULL, /* btl_add_procs */
|
||||
@ -102,33 +101,33 @@ static ompi_crcp_base_module_t loc_module = {
|
||||
/*
|
||||
* MCA Functions
|
||||
*/
|
||||
int ompi_crcp_coord_component_query(mca_base_module_t **module, int *priority)
|
||||
int ompi_crcp_bkmrk_component_query(mca_base_module_t **module, int *priority)
|
||||
{
|
||||
opal_output_verbose(10, mca_crcp_coord_component.super.output_handle,
|
||||
"crcp:coord: component_query()");
|
||||
opal_output_verbose(10, mca_crcp_bkmrk_component.super.output_handle,
|
||||
"crcp:bkmrk: component_query()");
|
||||
|
||||
*priority = mca_crcp_coord_component.super.priority;
|
||||
*priority = mca_crcp_bkmrk_component.super.priority;
|
||||
*module = (mca_base_module_t *)&loc_module;
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
int ompi_crcp_coord_module_init(void)
|
||||
int ompi_crcp_bkmrk_module_init(void)
|
||||
{
|
||||
opal_output_verbose(10, mca_crcp_coord_component.super.output_handle,
|
||||
"crcp:coord: module_init()");
|
||||
opal_output_verbose(10, mca_crcp_bkmrk_component.super.output_handle,
|
||||
"crcp:bkmrk: module_init()");
|
||||
|
||||
ompi_crcp_coord_pml_init();
|
||||
ompi_crcp_bkmrk_pml_init();
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
int ompi_crcp_coord_module_finalize(void)
|
||||
int ompi_crcp_bkmrk_module_finalize(void)
|
||||
{
|
||||
opal_output_verbose(10, mca_crcp_coord_component.super.output_handle,
|
||||
"crcp:coord: module_finalize()");
|
||||
opal_output_verbose(10, mca_crcp_bkmrk_component.super.output_handle,
|
||||
"crcp:bkmrk: module_finalize()");
|
||||
|
||||
ompi_crcp_coord_pml_finalize();
|
||||
ompi_crcp_bkmrk_pml_finalize();
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
6452
ompi/mca/crcp/bkmrk/crcp_bkmrk_pml.c
Обычный файл
6452
ompi/mca/crcp/bkmrk/crcp_bkmrk_pml.c
Обычный файл
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
@ -17,12 +17,12 @@
|
||||
/**
|
||||
* @file
|
||||
*
|
||||
* Coord CRCP component
|
||||
* Hoke CRCP component
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef MCA_CRCP_COORD_PML_EXPORT_H
|
||||
#define MCA_CRCP_COORD_PML_EXPORT_H
|
||||
#ifndef MCA_CRCP_HOKE_PML_EXPORT_H
|
||||
#define MCA_CRCP_HOKE_PML_EXPORT_H
|
||||
|
||||
#include "ompi_config.h"
|
||||
|
||||
@ -34,7 +34,7 @@
|
||||
#include "opal/threads/condition.h"
|
||||
#include "ompi/class/ompi_free_list.h"
|
||||
|
||||
#include "ompi/mca/crcp/coord/crcp_coord.h"
|
||||
#include "ompi/mca/crcp/bkmrk/crcp_bkmrk.h"
|
||||
|
||||
#if defined(c_plusplus) || defined(__cplusplus)
|
||||
extern "C" {
|
||||
@ -43,89 +43,89 @@ extern "C" {
|
||||
/*
|
||||
* PML Coordination functions
|
||||
*/
|
||||
ompi_crcp_base_pml_state_t* ompi_crcp_coord_pml_enable
|
||||
ompi_crcp_base_pml_state_t* ompi_crcp_bkmrk_pml_enable
|
||||
( bool enable, ompi_crcp_base_pml_state_t* pml_state );
|
||||
|
||||
ompi_crcp_base_pml_state_t* ompi_crcp_coord_pml_add_comm
|
||||
ompi_crcp_base_pml_state_t* ompi_crcp_bkmrk_pml_add_comm
|
||||
( struct ompi_communicator_t* comm,
|
||||
ompi_crcp_base_pml_state_t* pml_state );
|
||||
ompi_crcp_base_pml_state_t* ompi_crcp_coord_pml_del_comm
|
||||
ompi_crcp_base_pml_state_t* ompi_crcp_bkmrk_pml_del_comm
|
||||
( struct ompi_communicator_t* comm,
|
||||
ompi_crcp_base_pml_state_t* pml_state );
|
||||
|
||||
ompi_crcp_base_pml_state_t* ompi_crcp_coord_pml_add_procs
|
||||
ompi_crcp_base_pml_state_t* ompi_crcp_bkmrk_pml_add_procs
|
||||
( struct ompi_proc_t **procs, size_t nprocs,
|
||||
ompi_crcp_base_pml_state_t* pml_state );
|
||||
ompi_crcp_base_pml_state_t* ompi_crcp_coord_pml_del_procs
|
||||
ompi_crcp_base_pml_state_t* ompi_crcp_bkmrk_pml_del_procs
|
||||
( struct ompi_proc_t **procs, size_t nprocs,
|
||||
ompi_crcp_base_pml_state_t* pml_state );
|
||||
|
||||
ompi_crcp_base_pml_state_t* ompi_crcp_coord_pml_progress
|
||||
ompi_crcp_base_pml_state_t* ompi_crcp_bkmrk_pml_progress
|
||||
(ompi_crcp_base_pml_state_t* pml_state);
|
||||
|
||||
ompi_crcp_base_pml_state_t* ompi_crcp_coord_pml_iprobe
|
||||
ompi_crcp_base_pml_state_t* ompi_crcp_bkmrk_pml_iprobe
|
||||
(int dst, int tag, struct ompi_communicator_t* comm,
|
||||
int *matched, ompi_status_public_t* status,
|
||||
ompi_crcp_base_pml_state_t* pml_state );
|
||||
|
||||
ompi_crcp_base_pml_state_t* ompi_crcp_coord_pml_probe
|
||||
ompi_crcp_base_pml_state_t* ompi_crcp_bkmrk_pml_probe
|
||||
( int dst, int tag, struct ompi_communicator_t* comm,
|
||||
ompi_status_public_t* status,
|
||||
ompi_crcp_base_pml_state_t* pml_state );
|
||||
|
||||
ompi_crcp_base_pml_state_t* ompi_crcp_coord_pml_isend_init
|
||||
ompi_crcp_base_pml_state_t* ompi_crcp_bkmrk_pml_isend_init
|
||||
( void *buf, size_t count, ompi_datatype_t *datatype,
|
||||
int dst, int tag, mca_pml_base_send_mode_t mode,
|
||||
struct ompi_communicator_t* comm,
|
||||
struct ompi_request_t **request,
|
||||
ompi_crcp_base_pml_state_t* pml_state );
|
||||
|
||||
ompi_crcp_base_pml_state_t* ompi_crcp_coord_pml_isend
|
||||
ompi_crcp_base_pml_state_t* ompi_crcp_bkmrk_pml_isend
|
||||
( void *buf, size_t count, ompi_datatype_t *datatype,
|
||||
int dst, int tag, mca_pml_base_send_mode_t mode,
|
||||
struct ompi_communicator_t* comm,
|
||||
struct ompi_request_t **request,
|
||||
ompi_crcp_base_pml_state_t* pml_state );
|
||||
|
||||
ompi_crcp_base_pml_state_t* ompi_crcp_coord_pml_send
|
||||
ompi_crcp_base_pml_state_t* ompi_crcp_bkmrk_pml_send
|
||||
( void *buf, size_t count, ompi_datatype_t *datatype,
|
||||
int dst, int tag, mca_pml_base_send_mode_t mode,
|
||||
struct ompi_communicator_t* comm,
|
||||
ompi_crcp_base_pml_state_t* pml_state );
|
||||
|
||||
ompi_crcp_base_pml_state_t* ompi_crcp_coord_pml_irecv_init
|
||||
ompi_crcp_base_pml_state_t* ompi_crcp_bkmrk_pml_irecv_init
|
||||
( void *buf, size_t count, ompi_datatype_t *datatype,
|
||||
int src, int tag, struct ompi_communicator_t* comm,
|
||||
struct ompi_request_t **request,
|
||||
ompi_crcp_base_pml_state_t* pml_state);
|
||||
|
||||
ompi_crcp_base_pml_state_t* ompi_crcp_coord_pml_irecv
|
||||
ompi_crcp_base_pml_state_t* ompi_crcp_bkmrk_pml_irecv
|
||||
( void *buf, size_t count, ompi_datatype_t *datatype,
|
||||
int src, int tag, struct ompi_communicator_t* comm,
|
||||
struct ompi_request_t **request,
|
||||
ompi_crcp_base_pml_state_t* pml_state );
|
||||
|
||||
ompi_crcp_base_pml_state_t* ompi_crcp_coord_pml_recv
|
||||
ompi_crcp_base_pml_state_t* ompi_crcp_bkmrk_pml_recv
|
||||
( void *buf, size_t count, ompi_datatype_t *datatype,
|
||||
int src, int tag, struct ompi_communicator_t* comm,
|
||||
ompi_status_public_t* status,
|
||||
ompi_crcp_base_pml_state_t* pml_state);
|
||||
|
||||
ompi_crcp_base_pml_state_t* ompi_crcp_coord_pml_dump
|
||||
ompi_crcp_base_pml_state_t* ompi_crcp_bkmrk_pml_dump
|
||||
( struct ompi_communicator_t* comm, int verbose,
|
||||
ompi_crcp_base_pml_state_t* pml_state );
|
||||
|
||||
ompi_crcp_base_pml_state_t* ompi_crcp_coord_pml_start
|
||||
ompi_crcp_base_pml_state_t* ompi_crcp_bkmrk_pml_start
|
||||
( size_t count, ompi_request_t** requests,
|
||||
ompi_crcp_base_pml_state_t* pml_state );
|
||||
|
||||
ompi_crcp_base_pml_state_t* ompi_crcp_coord_pml_ft_event
|
||||
ompi_crcp_base_pml_state_t* ompi_crcp_bkmrk_pml_ft_event
|
||||
(int state, ompi_crcp_base_pml_state_t* pml_state);
|
||||
|
||||
/*
|
||||
* Request function
|
||||
*/
|
||||
int ompi_crcp_coord_request_complete(struct ompi_request_t *request);
|
||||
int ompi_crcp_bkmrk_request_complete(struct ompi_request_t *request);
|
||||
|
||||
/***********************************
|
||||
* Globally Defined Structures
|
||||
@ -133,7 +133,7 @@ extern "C" {
|
||||
/*
|
||||
* Types of Messages
|
||||
*/
|
||||
enum ompi_crcp_coord_pml_message_type_t {
|
||||
enum ompi_crcp_bkmrk_pml_message_type_t {
|
||||
COORD_MSG_TYPE_UNKNOWN, /* 0 Unknown type */
|
||||
COORD_MSG_TYPE_B_SEND, /* 1 Blocking Send */
|
||||
COORD_MSG_TYPE_I_SEND, /* 2 Non-Blocking Send */
|
||||
@ -142,12 +142,60 @@ extern "C" {
|
||||
COORD_MSG_TYPE_I_RECV, /* 5 Non-Blocking Recv */
|
||||
COORD_MSG_TYPE_P_RECV /* 6 Persistent Recv */
|
||||
};
|
||||
typedef enum ompi_crcp_coord_pml_message_type_t ompi_crcp_coord_pml_message_type_t;
|
||||
typedef enum ompi_crcp_bkmrk_pml_message_type_t ompi_crcp_bkmrk_pml_message_type_t;
|
||||
|
||||
/*
|
||||
* Message Reference
|
||||
* A list structure to contain {buffer, request, status} sets
|
||||
*
|
||||
* send/recv type | Buffer | Request | Status | Active
|
||||
* ---------------+--------+---------+--------+--------
|
||||
* Blocking | No | No | No | No
|
||||
* Non-Blocking | No | Yes | Yes | No
|
||||
* Persistent | Yes | Yes | Yes | Yes
|
||||
*
|
||||
* No : Does not require this field
|
||||
* Yes: Does require this field
|
||||
*/
|
||||
struct ompi_crcp_coord_pml_message_ref_t {
|
||||
struct ompi_crcp_bkmrk_pml_message_content_ref_t {
|
||||
/** This is a list object */
|
||||
opal_list_item_t super;
|
||||
|
||||
/** Buffer for data */
|
||||
void * buffer;
|
||||
|
||||
/* Request for this message */
|
||||
ompi_request_t *request;
|
||||
|
||||
/** Status */
|
||||
ompi_status_public_t status;
|
||||
|
||||
/** Active ? */
|
||||
bool active;
|
||||
|
||||
/** Done ? - Only useful in Drain*/
|
||||
bool done;
|
||||
|
||||
/** Already_posted ? - Only useful in Drain */
|
||||
bool already_posted;
|
||||
|
||||
/** Drained */
|
||||
bool already_drained;
|
||||
|
||||
/** JJH XXX Debug counter*/
|
||||
uint64_t msg_id;
|
||||
};
|
||||
typedef struct ompi_crcp_bkmrk_pml_message_content_ref_t ompi_crcp_bkmrk_pml_message_content_ref_t;
|
||||
|
||||
OBJ_CLASS_DECLARATION(ompi_crcp_bkmrk_pml_message_content_ref_t);
|
||||
void ompi_crcp_bkmrk_pml_message_content_ref_construct(ompi_crcp_bkmrk_pml_message_content_ref_t *content_ref);
|
||||
void ompi_crcp_bkmrk_pml_message_content_ref_destruct( ompi_crcp_bkmrk_pml_message_content_ref_t *content_ref);
|
||||
|
||||
/*
|
||||
* Drain Message Reference
|
||||
* - The first section of this structure should match
|
||||
* ompi_crcp_bkmrk_pml_traffic_message_ref_t exactly.
|
||||
*/
|
||||
struct ompi_crcp_bkmrk_pml_drain_message_ref_t {
|
||||
/** This is a list object */
|
||||
opal_list_item_t super;
|
||||
|
||||
@ -155,11 +203,7 @@ extern "C" {
|
||||
uint64_t msg_id;
|
||||
|
||||
/** Type of message this references */
|
||||
ompi_crcp_coord_pml_message_type_t msg_type;
|
||||
|
||||
|
||||
/** Buffer for data */
|
||||
void * buffer;
|
||||
ompi_crcp_bkmrk_pml_message_type_t msg_type;
|
||||
|
||||
/** Count for data */
|
||||
size_t count;
|
||||
@ -179,12 +223,89 @@ extern "C" {
|
||||
/** Communicator pointer */
|
||||
ompi_communicator_t* comm;
|
||||
|
||||
/** Receive Request */
|
||||
ompi_request_t *request;
|
||||
/** Message Contents */
|
||||
opal_list_t msg_contents;
|
||||
|
||||
/** Status */
|
||||
ompi_status_public_t status;
|
||||
/** Peer which we received from */
|
||||
orte_process_name_t proc_name;
|
||||
|
||||
/** Is this message complete WRT PML semantics?
|
||||
* true = message done on this side (send or receive)
|
||||
* false = message still in process (sending or receiving)
|
||||
*/
|
||||
int done;
|
||||
|
||||
/** Is the message actively being worked on?
|
||||
* true = Message is !done, and is in the progress cycle
|
||||
* false = Message is !done and is *not* in the progress cycle ( [send/recv]_init requests)
|
||||
*/
|
||||
int active;
|
||||
|
||||
/** Has this message been posted?
|
||||
* true = message was posted (Send or recv)
|
||||
* false = message was not yet posted.
|
||||
* Used when trying to figure out which messages the drain protocol needs to post, and
|
||||
* which message have already been posted for it.
|
||||
*/
|
||||
int already_posted;
|
||||
|
||||
};
|
||||
typedef struct ompi_crcp_bkmrk_pml_drain_message_ref_t ompi_crcp_bkmrk_pml_drain_message_ref_t;
|
||||
|
||||
OBJ_CLASS_DECLARATION(ompi_crcp_bkmrk_pml_drain_message_ref_t);
|
||||
void ompi_crcp_bkmrk_pml_drain_message_ref_construct(ompi_crcp_bkmrk_pml_drain_message_ref_t *msg_ref);
|
||||
void ompi_crcp_bkmrk_pml_drain_message_ref_destruct( ompi_crcp_bkmrk_pml_drain_message_ref_t *msg_ref);
|
||||
|
||||
/*
|
||||
* List of Pending ACKs to drained messages
|
||||
*/
|
||||
struct ompi_crcp_bkmrk_pml_drain_message_ack_ref_t {
|
||||
/** This is a list object */
|
||||
opal_list_item_t super;
|
||||
|
||||
/** Complete flag */
|
||||
bool complete;
|
||||
|
||||
/** Peer which we received from */
|
||||
orte_process_name_t peer;
|
||||
};
|
||||
typedef struct ompi_crcp_bkmrk_pml_drain_message_ack_ref_t ompi_crcp_bkmrk_pml_drain_message_ack_ref_t;
|
||||
|
||||
OBJ_CLASS_DECLARATION(ompi_crcp_bkmrk_pml_drain_message_ack_ref_t);
|
||||
void ompi_crcp_bkmrk_pml_drain_message_ack_ref_construct(ompi_crcp_bkmrk_pml_drain_message_ack_ref_t *msg_ack_ref);
|
||||
void ompi_crcp_bkmrk_pml_drain_message_ack_ref_destruct( ompi_crcp_bkmrk_pml_drain_message_ack_ref_t *msg_ack_ref);
|
||||
|
||||
/*
|
||||
* Regular Traffic Message Reference
|
||||
* Tracks message signature {count, datatype_size, tag, comm, peer}
|
||||
*/
|
||||
struct ompi_crcp_bkmrk_pml_traffic_message_ref_t {
|
||||
/** This is a list object */
|
||||
opal_list_item_t super;
|
||||
|
||||
/** Sequence Number of this message */
|
||||
uint64_t msg_id;
|
||||
|
||||
/** Type of message this references */
|
||||
ompi_crcp_bkmrk_pml_message_type_t msg_type;
|
||||
|
||||
/** Count for data */
|
||||
size_t count;
|
||||
|
||||
/** Quick reference to the size of the datatype */
|
||||
size_t ddt_size;
|
||||
|
||||
/** Message Tag */
|
||||
int tag;
|
||||
|
||||
/** Peer rank to which it was sent/recv'ed if known */
|
||||
int rank;
|
||||
|
||||
/** Communicator pointer */
|
||||
ompi_communicator_t* comm;
|
||||
|
||||
/** Message Contents */
|
||||
opal_list_t msg_contents;
|
||||
|
||||
/** Peer which we received from */
|
||||
orte_process_name_t proc_name;
|
||||
@ -205,42 +326,42 @@ extern "C" {
|
||||
* already_posted = false -> true when posted irecv
|
||||
*/
|
||||
/** Has this message been matched by the peer?
|
||||
* true = peer confirmed the receipt of this message
|
||||
* - Resolved during bookmark exchange
|
||||
* true = peer confirmed the receipt of this message
|
||||
* false = unknown if peer has received this message or not
|
||||
*/
|
||||
bool matched;
|
||||
int matched;
|
||||
|
||||
/** Is this message complete WRT PML semantics?
|
||||
* true = message done on this side (send or receive)
|
||||
* - Is it not in-flight?
|
||||
* true = message done on this side (send or receive)
|
||||
* false = message still in process (sending or receiving)
|
||||
*/
|
||||
bool done;
|
||||
int done;
|
||||
|
||||
/** Is the message actively being worked on?
|
||||
* true = Message is !done, and is in the progress cycle
|
||||
* - Known to be in-flight?
|
||||
* true = Message is !done, and is in the progress cycle
|
||||
* false = Message is !done and is *not* in the progress cycle ( [send/recv]_init requests)
|
||||
*/
|
||||
bool active;
|
||||
int active;
|
||||
|
||||
/** Has this message been posted?
|
||||
* true = message was posted (Send or recv)
|
||||
* false = message was not yet posted.
|
||||
* Used when trying to figure out which messages the drain protocol needs to post, and
|
||||
* which message have already been posted for it.
|
||||
/** How many times a persistent send/recv has been posted, but not activated.
|
||||
*
|
||||
*/
|
||||
bool already_posted;
|
||||
int posted;
|
||||
|
||||
/** Suggested Rank that this should be matched to
|
||||
* This is used when rank = ANY_SOURCE and we need to
|
||||
* drain it to a specific peer
|
||||
/** Actively drained
|
||||
* These are messages that are active, and being drained. So if we checkpoint while the drain
|
||||
* list is not empty then we do not try to count these messages more than once.
|
||||
*/
|
||||
int suggested_rank;
|
||||
int active_drain;
|
||||
};
|
||||
typedef struct ompi_crcp_coord_pml_message_ref_t ompi_crcp_coord_pml_message_ref_t;
|
||||
typedef struct ompi_crcp_bkmrk_pml_traffic_message_ref_t ompi_crcp_bkmrk_pml_traffic_message_ref_t;
|
||||
|
||||
OBJ_CLASS_DECLARATION(ompi_crcp_coord_pml_message_ref_t);
|
||||
void ompi_crcp_coord_pml_message_ref_construct(ompi_crcp_coord_pml_message_ref_t *msg_ref);
|
||||
void ompi_crcp_coord_pml_message_ref_destruct( ompi_crcp_coord_pml_message_ref_t *msg_ref);
|
||||
OBJ_CLASS_DECLARATION(ompi_crcp_bkmrk_pml_traffic_message_ref_t);
|
||||
void ompi_crcp_bkmrk_pml_traffic_message_ref_construct(ompi_crcp_bkmrk_pml_traffic_message_ref_t *msg_ref);
|
||||
void ompi_crcp_bkmrk_pml_traffic_message_ref_destruct( ompi_crcp_bkmrk_pml_traffic_message_ref_t *msg_ref);
|
||||
|
||||
/*
|
||||
* A structure for a single process
|
||||
@ -249,7 +370,7 @@ extern "C" {
|
||||
* - List of received message from this peer
|
||||
* - Message totals
|
||||
*/
|
||||
struct ompi_crcp_coord_pml_peer_ref_t {
|
||||
struct ompi_crcp_bkmrk_pml_peer_ref_t {
|
||||
/** This is a list object */
|
||||
opal_list_item_t super;
|
||||
|
||||
@ -266,6 +387,9 @@ extern "C" {
|
||||
opal_list_t irecv_list; /**< pml_irecv */
|
||||
opal_list_t recv_init_list; /**< pml_irecv_init */
|
||||
|
||||
/** List of messages drained from this peer */
|
||||
opal_list_t drained_list;
|
||||
|
||||
/*
|
||||
* These are totals over all communicators provided for convenience.
|
||||
*
|
||||
@ -284,42 +408,37 @@ extern "C" {
|
||||
* Once completed: ++total
|
||||
*/
|
||||
/** Total Number of messages sent */
|
||||
uint32_t total_send_msgs;
|
||||
uint32_t total_isend_msgs;
|
||||
uint32_t total_send_init_msgs;
|
||||
uint32_t matched_send_msgs;
|
||||
uint32_t matched_isend_msgs;
|
||||
uint32_t matched_send_init_msgs;
|
||||
uint32_t total_msgs_sent;
|
||||
uint32_t matched_msgs_sent;
|
||||
|
||||
/** Total Number of messages received */
|
||||
uint32_t total_recv_msgs;
|
||||
uint32_t total_irecv_msgs;
|
||||
uint32_t total_recv_init_msgs;
|
||||
uint32_t matched_recv_msgs;
|
||||
uint32_t matched_irecv_msgs;
|
||||
uint32_t matched_recv_init_msgs;
|
||||
uint32_t total_msgs_recvd;
|
||||
uint32_t matched_msgs_recvd;
|
||||
|
||||
/** Total Number of messages drained */
|
||||
uint32_t total_drained_msgs;
|
||||
|
||||
/** If peer is expecting an ACK after draining the messages */
|
||||
bool ack_required;
|
||||
};
|
||||
typedef struct ompi_crcp_coord_pml_peer_ref_t ompi_crcp_coord_pml_peer_ref_t;
|
||||
typedef struct ompi_crcp_bkmrk_pml_peer_ref_t ompi_crcp_bkmrk_pml_peer_ref_t;
|
||||
|
||||
OBJ_CLASS_DECLARATION(ompi_crcp_coord_pml_peer_ref_t);
|
||||
void ompi_crcp_coord_pml_peer_ref_construct(ompi_crcp_coord_pml_peer_ref_t *bkm_proc);
|
||||
void ompi_crcp_coord_pml_peer_ref_destruct( ompi_crcp_coord_pml_peer_ref_t *bkm_proc);
|
||||
OBJ_CLASS_DECLARATION(ompi_crcp_bkmrk_pml_peer_ref_t);
|
||||
void ompi_crcp_bkmrk_pml_peer_ref_construct(ompi_crcp_bkmrk_pml_peer_ref_t *bkm_proc);
|
||||
void ompi_crcp_bkmrk_pml_peer_ref_destruct( ompi_crcp_bkmrk_pml_peer_ref_t *bkm_proc);
|
||||
|
||||
/*
|
||||
* Local version of the PML state
|
||||
*/
|
||||
struct ompi_crcp_coord_pml_state_t {
|
||||
struct ompi_crcp_bkmrk_pml_state_t {
|
||||
ompi_crcp_base_pml_state_t p_super;
|
||||
ompi_crcp_base_pml_state_t *prev_ptr;
|
||||
|
||||
ompi_crcp_coord_pml_peer_ref_t *peer_ref;
|
||||
ompi_crcp_coord_pml_message_ref_t *msg_ref;
|
||||
ompi_crcp_bkmrk_pml_peer_ref_t *peer_ref;
|
||||
ompi_crcp_bkmrk_pml_traffic_message_ref_t *msg_ref;
|
||||
};
|
||||
typedef struct ompi_crcp_coord_pml_state_t ompi_crcp_coord_pml_state_t;
|
||||
OBJ_CLASS_DECLARATION(ompi_crcp_coord_pml_state_t);
|
||||
typedef struct ompi_crcp_bkmrk_pml_state_t ompi_crcp_bkmrk_pml_state_t;
|
||||
OBJ_CLASS_DECLARATION(ompi_crcp_bkmrk_pml_state_t);
|
||||
|
||||
/***********************************
|
||||
* Globally Defined Variables
|
||||
@ -327,10 +446,10 @@ extern "C" {
|
||||
/*
|
||||
* List of known peers
|
||||
*/
|
||||
extern opal_list_t ompi_crcp_coord_pml_peer_refs;
|
||||
extern opal_list_t ompi_crcp_bkmrk_pml_peer_refs;
|
||||
|
||||
#if defined(c_plusplus) || defined(__cplusplus)
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* MCA_CRCP_COORD_PML_EXPORT_H */
|
||||
#endif /* MCA_CRCP_HOKE_PML_EXPORT_H */
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
Загрузка…
x
Ссылка в новой задаче
Block a user