1
1

Replace the old coordinated component ('coord') and replace it with a much more refined version ('bkmrk').

The new component fixes a number of problems with the old component. The core algorithm is the same, but by changing the data strucutres a bit we have improved performance and memory utilization.

There are still a couple corner cases that still need some work. However, I did not want to delay bringing this into the trunk (and v1.3 branch) for too much longer.

This commit was SVN r19537.
Этот коммит содержится в:
Josh Hursey 2008-09-10 18:29:17 +00:00
родитель 1ad9d0459e
Коммит 36185ad964
13 изменённых файлов: 6763 добавлений и 5318 удалений

Просмотреть файл

@ -32,10 +32,10 @@ snapc=full
#
# OMPI Parameters
# - Wrap the PML
# - Use the LAM/MPI-like Coordinated Checkpoint/Restart Coordination Protocol
# - Use a Bookmark Exchange Fully Coordinated Checkpoint/Restart Coordination Protocol
#
pml_wrapper=crcpw
crcp=coord
crcp=bkmrk
#
# Temporary fix to force the event engine to use poll to behave well with BLCR

Просмотреть файл

@ -14,32 +14,32 @@
# $HEADER$
#
dist_pkgdata_DATA = help-ompi-crcp-coord.txt
dist_pkgdata_DATA = help-ompi-crcp-bkmrk.txt
sources = \
crcp_coord.h \
crcp_coord_pml.h \
crcp_coord_component.c \
crcp_coord_module.c \
crcp_coord_pml.c
crcp_bkmrk.h \
crcp_bkmrk_pml.h \
crcp_bkmrk_component.c \
crcp_bkmrk_module.c \
crcp_bkmrk_pml.c
# Make the output library in this directory, and name it either
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
# (for static builds).
if OMPI_BUILD_crcp_coord_DSO
if OMPI_BUILD_crcp_bkmrk_DSO
component_noinst =
component_install = mca_crcp_coord.la
component_install = mca_crcp_bkmrk.la
else
component_noinst = libmca_crcp_coord.la
component_noinst = libmca_crcp_bkmrk.la
component_install =
endif
mcacomponentdir = $(pkglibdir)
mcacomponent_LTLIBRARIES = $(component_install)
mca_crcp_coord_la_SOURCES = $(sources)
mca_crcp_coord_la_LDFLAGS = -module -avoid-version
mca_crcp_bkmrk_la_SOURCES = $(sources)
mca_crcp_bkmrk_la_LDFLAGS = -module -avoid-version
noinst_LTLIBRARIES = $(component_noinst)
libmca_crcp_coord_la_SOURCES = $(sources)
libmca_crcp_coord_la_LDFLAGS = -module -avoid-version
libmca_crcp_bkmrk_la_SOURCES = $(sources)
libmca_crcp_bkmrk_la_LDFLAGS = -module -avoid-version

Просмотреть файл

@ -15,9 +15,9 @@
# $HEADER$
#
# MCA_crcp_coord_CONFIG([action-if-found], [action-if-not-found])
# MCA_crcp_bkmrk_CONFIG([action-if-found], [action-if-not-found])
# -----------------------------------------------------------
AC_DEFUN([MCA_crcp_coord_CONFIG],[
AC_DEFUN([MCA_crcp_bkmrk_CONFIG],[
# If we don't want FT, don't compile this component
AS_IF([test "$ompi_want_ft" = "1"],
[$1],

Просмотреть файл

@ -15,5 +15,5 @@
# $HEADER$
#
PARAM_INIT_FILE=crcp_coord_component.c
PARAM_INIT_FILE=crcp_bkmrk_component.c
PARAM_CONFIG_FILES="Makefile"

Просмотреть файл

@ -17,12 +17,12 @@
/**
* @file
*
* Coord CRCP component
* Hoke CRCP component
*
*/
#ifndef MCA_CRCP_COORD_EXPORT_H
#define MCA_CRCP_COORD_EXPORT_H
#ifndef MCA_CRCP_HOKE_EXPORT_H
#define MCA_CRCP_HOKE_EXPORT_H
#include "ompi_config.h"
@ -42,11 +42,11 @@ extern "C" {
/*
* Local Component structures
*/
struct ompi_crcp_coord_component_t {
struct ompi_crcp_bkmrk_component_t {
ompi_crcp_base_component_t super; /** Base CRCP component */
};
typedef struct ompi_crcp_coord_component_t ompi_crcp_coord_component_t;
OMPI_MODULE_DECLSPEC extern ompi_crcp_coord_component_t mca_crcp_coord_component;
typedef struct ompi_crcp_bkmrk_component_t ompi_crcp_bkmrk_component_t;
OMPI_MODULE_DECLSPEC extern ompi_crcp_bkmrk_component_t mca_crcp_bkmrk_component;
/*
* Local variables
@ -56,15 +56,15 @@ extern "C" {
/*
* Module functions
*/
int ompi_crcp_coord_component_query(mca_base_module_t **module, int *priority);
int ompi_crcp_coord_module_init(void);
int ompi_crcp_coord_module_finalize(void);
int ompi_crcp_bkmrk_component_query(mca_base_module_t **module, int *priority);
int ompi_crcp_bkmrk_module_init(void);
int ompi_crcp_bkmrk_module_finalize(void);
int ompi_crcp_coord_pml_init(void);
int ompi_crcp_coord_pml_finalize(void);
int ompi_crcp_bkmrk_pml_init(void);
int ompi_crcp_bkmrk_pml_finalize(void);
#if defined(c_plusplus) || defined(__cplusplus)
}
#endif
#endif /* MCA_CRCP_COORD_EXPORT_H */
#endif /* MCA_CRCP_HOKE_EXPORT_H */

Просмотреть файл

@ -23,7 +23,7 @@
#include "opal/runtime/opal_cr.h"
#include "opal/event/event.h"
#include "orte/util/show_help.h"
#include "orte/util/output.h"
#include "opal/util/argv.h"
#include "opal/util/opal_environ.h"
#include "opal/mca/base/mca_base_param.h"
@ -35,14 +35,14 @@
#include "ompi/mca/crcp/crcp.h"
#include "ompi/mca/crcp/base/base.h"
#include "crcp_coord.h"
#include "crcp_coord_btl.h"
#include "crcp_bkmrk.h"
#include "crcp_bkmrk_btl.h"
int ompi_crcp_coord_btl_init(void) {
int ompi_crcp_bkmrk_btl_init(void) {
return OMPI_SUCCESS;
}
int ompi_crcp_coord_btl_finalize(void) {
int ompi_crcp_bkmrk_btl_finalize(void) {
return OMPI_SUCCESS;
}

Просмотреть файл

@ -17,12 +17,12 @@
/**
* @file
*
* Coord CRCP component
* Hoke CRCP component
*
*/
#ifndef MCA_CRCP_COORD_BTL_EXPORT_H
#define MCA_CRCP_COORD_BTL_EXPORT_H
#ifndef MCA_CRCP_HOKE_BTL_EXPORT_H
#define MCA_CRCP_HOKE_BTL_EXPORT_H
#include "ompi_config.h"
@ -33,7 +33,7 @@
#include "opal/threads/mutex.h"
#include "opal/threads/condition.h"
#include "ompi/mca/crcp/coord/crcp_coord.h"
#include "ompi/mca/crcp/bkmrk/crcp_bkmrk.h"
#if defined(c_plusplus) || defined(__cplusplus)
extern "C" {
@ -129,4 +129,4 @@ extern "C" {
}
#endif
#endif /* MCA_CRCP_COORD_BTL_EXPORT_H */
#endif /* MCA_CRCP_HOKE_BTL_EXPORT_H */

Просмотреть файл

@ -16,31 +16,31 @@
#include "ompi_config.h"
#include "orte/util/show_help.h"
#include "opal/util/output.h"
#include "ompi/mca/crcp/crcp.h"
#include "ompi/mca/crcp/base/base.h"
#include "crcp_coord.h"
#include "crcp_bkmrk.h"
/*
* Public string for version number
*/
const char *ompi_crcp_coord_component_version_string =
"OMPI CRCP coord MCA component version " OMPI_VERSION;
const char *ompi_crcp_bkmrk_component_version_string =
"OMPI CRCP bkmrk MCA component version " OMPI_VERSION;
int timing_enabled = 0;
/*
* Local functionality
*/
static int crcp_coord_open(void);
static int crcp_coord_close(void);
static int crcp_bkmrk_open(void);
static int crcp_bkmrk_close(void);
/*
* Instantiate the public struct with all of our public information
* and pointer to our public functions in it
*/
ompi_crcp_coord_component_t mca_crcp_coord_component = {
ompi_crcp_bkmrk_component_t mca_crcp_bkmrk_component = {
/* First do the base component stuff */
{
/* Handle the general mca_component_t struct containing
@ -48,17 +48,16 @@ ompi_crcp_coord_component_t mca_crcp_coord_component = {
*/
{
OMPI_CRCP_BASE_VERSION_2_0_0,
/* Component name and version */
"coord",
"bkmrk",
OMPI_MAJOR_VERSION,
OMPI_MINOR_VERSION,
OMPI_RELEASE_VERSION,
/* Component open and close functions */
crcp_coord_open,
crcp_coord_close,
ompi_crcp_coord_component_query
crcp_bkmrk_open,
crcp_bkmrk_close,
ompi_crcp_bkmrk_component_query
},
{
/* The component is checkpoint ready */
@ -70,11 +69,11 @@ ompi_crcp_coord_component_t mca_crcp_coord_component = {
/* opal_output handler */
-1,
/* Default priority */
10
20
}
};
static int crcp_coord_open(void)
static int crcp_bkmrk_open(void)
{
int val;
@ -82,31 +81,31 @@ static int crcp_coord_open(void)
* This should be the last componet to ever get used since
* it doesn't do anything.
*/
mca_base_param_reg_int(&mca_crcp_coord_component.super.base_version,
mca_base_param_reg_int(&mca_crcp_bkmrk_component.super.base_version,
"priority",
"Priority of the CRCP coord component",
"Priority of the CRCP bkmrk component",
false, false,
mca_crcp_coord_component.super.priority,
&mca_crcp_coord_component.super.priority);
mca_crcp_bkmrk_component.super.priority,
&mca_crcp_bkmrk_component.super.priority);
mca_base_param_reg_int(&mca_crcp_coord_component.super.base_version,
mca_base_param_reg_int(&mca_crcp_bkmrk_component.super.base_version,
"verbose",
"Verbose level for the CRCP coord component",
"Verbose level for the CRCP bkmrk component",
false, false,
mca_crcp_coord_component.super.verbose,
&mca_crcp_coord_component.super.verbose);
mca_crcp_bkmrk_component.super.verbose,
&mca_crcp_bkmrk_component.super.verbose);
/* If there is a custom verbose level for this component than use it
* otherwise take our parents level and output channel
*/
if ( 0 != mca_crcp_coord_component.super.verbose) {
mca_crcp_coord_component.super.output_handle = opal_output_open(NULL);
opal_output_set_verbosity(mca_crcp_coord_component.super.output_handle,
mca_crcp_coord_component.super.verbose);
if ( 0 != mca_crcp_bkmrk_component.super.verbose) {
mca_crcp_bkmrk_component.super.output_handle = opal_output_open(NULL);
opal_output_set_verbosity(mca_crcp_bkmrk_component.super.output_handle,
mca_crcp_bkmrk_component.super.verbose);
} else {
mca_crcp_coord_component.super.output_handle = ompi_crcp_base_output;
mca_crcp_bkmrk_component.super.output_handle = ompi_crcp_base_output;
}
mca_base_param_reg_int(&mca_crcp_coord_component.super.base_version,
mca_base_param_reg_int(&mca_crcp_bkmrk_component.super.base_version,
"timing",
"Enable Performance timing",
false, false,
@ -117,22 +116,22 @@ static int crcp_coord_open(void)
/*
* Debug Output
*/
opal_output_verbose(10, mca_crcp_coord_component.super.output_handle,
"crcp:coord: open()");
opal_output_verbose(20, mca_crcp_coord_component.super.output_handle,
"crcp:coord: open: priority = %d",
mca_crcp_coord_component.super.priority);
opal_output_verbose(20, mca_crcp_coord_component.super.output_handle,
"crcp:coord: open: verbosity = %d",
mca_crcp_coord_component.super.verbose);
opal_output_verbose(10, mca_crcp_bkmrk_component.super.output_handle,
"crcp:bkmrk: open()");
opal_output_verbose(20, mca_crcp_bkmrk_component.super.output_handle,
"crcp:bkmrk: open: priority = %d",
mca_crcp_bkmrk_component.super.priority);
opal_output_verbose(20, mca_crcp_bkmrk_component.super.output_handle,
"crcp:bkmrk: open: verbosity = %d",
mca_crcp_bkmrk_component.super.verbose);
return OMPI_SUCCESS;
}
static int crcp_coord_close(void)
static int crcp_bkmrk_close(void)
{
opal_output_verbose(10, mca_crcp_coord_component.super.output_handle,
"crcp:coord: close()");
opal_output_verbose(10, mca_crcp_bkmrk_component.super.output_handle,
"crcp:bkmrk: close()");
return OMPI_SUCCESS;
}

Просмотреть файл

@ -24,57 +24,56 @@
#include "opal/mca/mca.h"
#include "opal/mca/base/base.h"
#include "orte/util/show_help.h"
#include "opal/util/output.h"
#include "opal/mca/base/mca_base_param.h"
#include "orte/util/show_help.h"
#include "opal/util/argv.h"
#include "opal/util/opal_environ.h"
#include "ompi/mca/crcp/crcp.h"
#include "ompi/mca/crcp/base/base.h"
#include "crcp_coord.h"
#include "crcp_coord_pml.h"
#include "crcp_bkmrk.h"
#include "crcp_bkmrk_pml.h"
/*
* Coord module
*/
static ompi_crcp_base_module_t loc_module = {
/** Initialization Function */
ompi_crcp_coord_module_init,
ompi_crcp_bkmrk_module_init,
/** Finalization Function */
ompi_crcp_coord_module_finalize,
ompi_crcp_bkmrk_module_finalize,
/** PML Wrapper */
ompi_crcp_coord_pml_enable,
ompi_crcp_bkmrk_pml_enable,
ompi_crcp_coord_pml_add_comm,
ompi_crcp_coord_pml_del_comm,
ompi_crcp_bkmrk_pml_add_comm,
ompi_crcp_bkmrk_pml_del_comm,
ompi_crcp_coord_pml_add_procs,
ompi_crcp_coord_pml_del_procs,
ompi_crcp_bkmrk_pml_add_procs,
ompi_crcp_bkmrk_pml_del_procs,
ompi_crcp_coord_pml_progress,
ompi_crcp_bkmrk_pml_progress,
ompi_crcp_coord_pml_iprobe,
ompi_crcp_coord_pml_probe,
ompi_crcp_bkmrk_pml_iprobe,
ompi_crcp_bkmrk_pml_probe,
ompi_crcp_coord_pml_isend_init,
ompi_crcp_coord_pml_isend,
ompi_crcp_coord_pml_send,
ompi_crcp_bkmrk_pml_isend_init,
ompi_crcp_bkmrk_pml_isend,
ompi_crcp_bkmrk_pml_send,
ompi_crcp_coord_pml_irecv_init,
ompi_crcp_coord_pml_irecv,
ompi_crcp_coord_pml_recv,
ompi_crcp_bkmrk_pml_irecv_init,
ompi_crcp_bkmrk_pml_irecv,
ompi_crcp_bkmrk_pml_recv,
ompi_crcp_coord_pml_dump,
ompi_crcp_coord_pml_start,
ompi_crcp_bkmrk_pml_dump,
ompi_crcp_bkmrk_pml_start,
ompi_crcp_coord_pml_ft_event,
ompi_crcp_bkmrk_pml_ft_event,
/* Request Functions */
ompi_crcp_coord_request_complete,
ompi_crcp_bkmrk_request_complete,
/* BTL Wrapper Functions */
NULL, /* btl_add_procs */
@ -102,33 +101,33 @@ static ompi_crcp_base_module_t loc_module = {
/*
* MCA Functions
*/
int ompi_crcp_coord_component_query(mca_base_module_t **module, int *priority)
int ompi_crcp_bkmrk_component_query(mca_base_module_t **module, int *priority)
{
opal_output_verbose(10, mca_crcp_coord_component.super.output_handle,
"crcp:coord: component_query()");
opal_output_verbose(10, mca_crcp_bkmrk_component.super.output_handle,
"crcp:bkmrk: component_query()");
*priority = mca_crcp_coord_component.super.priority;
*priority = mca_crcp_bkmrk_component.super.priority;
*module = (mca_base_module_t *)&loc_module;
return ORTE_SUCCESS;
}
int ompi_crcp_coord_module_init(void)
int ompi_crcp_bkmrk_module_init(void)
{
opal_output_verbose(10, mca_crcp_coord_component.super.output_handle,
"crcp:coord: module_init()");
opal_output_verbose(10, mca_crcp_bkmrk_component.super.output_handle,
"crcp:bkmrk: module_init()");
ompi_crcp_coord_pml_init();
ompi_crcp_bkmrk_pml_init();
return OMPI_SUCCESS;
}
int ompi_crcp_coord_module_finalize(void)
int ompi_crcp_bkmrk_module_finalize(void)
{
opal_output_verbose(10, mca_crcp_coord_component.super.output_handle,
"crcp:coord: module_finalize()");
opal_output_verbose(10, mca_crcp_bkmrk_component.super.output_handle,
"crcp:bkmrk: module_finalize()");
ompi_crcp_coord_pml_finalize();
ompi_crcp_bkmrk_pml_finalize();
return OMPI_SUCCESS;
}

6452
ompi/mca/crcp/bkmrk/crcp_bkmrk_pml.c Обычный файл

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -17,12 +17,12 @@
/**
* @file
*
* Coord CRCP component
* Hoke CRCP component
*
*/
#ifndef MCA_CRCP_COORD_PML_EXPORT_H
#define MCA_CRCP_COORD_PML_EXPORT_H
#ifndef MCA_CRCP_HOKE_PML_EXPORT_H
#define MCA_CRCP_HOKE_PML_EXPORT_H
#include "ompi_config.h"
@ -34,7 +34,7 @@
#include "opal/threads/condition.h"
#include "ompi/class/ompi_free_list.h"
#include "ompi/mca/crcp/coord/crcp_coord.h"
#include "ompi/mca/crcp/bkmrk/crcp_bkmrk.h"
#if defined(c_plusplus) || defined(__cplusplus)
extern "C" {
@ -43,89 +43,89 @@ extern "C" {
/*
* PML Coordination functions
*/
ompi_crcp_base_pml_state_t* ompi_crcp_coord_pml_enable
ompi_crcp_base_pml_state_t* ompi_crcp_bkmrk_pml_enable
( bool enable, ompi_crcp_base_pml_state_t* pml_state );
ompi_crcp_base_pml_state_t* ompi_crcp_coord_pml_add_comm
ompi_crcp_base_pml_state_t* ompi_crcp_bkmrk_pml_add_comm
( struct ompi_communicator_t* comm,
ompi_crcp_base_pml_state_t* pml_state );
ompi_crcp_base_pml_state_t* ompi_crcp_coord_pml_del_comm
ompi_crcp_base_pml_state_t* ompi_crcp_bkmrk_pml_del_comm
( struct ompi_communicator_t* comm,
ompi_crcp_base_pml_state_t* pml_state );
ompi_crcp_base_pml_state_t* ompi_crcp_coord_pml_add_procs
ompi_crcp_base_pml_state_t* ompi_crcp_bkmrk_pml_add_procs
( struct ompi_proc_t **procs, size_t nprocs,
ompi_crcp_base_pml_state_t* pml_state );
ompi_crcp_base_pml_state_t* ompi_crcp_coord_pml_del_procs
ompi_crcp_base_pml_state_t* ompi_crcp_bkmrk_pml_del_procs
( struct ompi_proc_t **procs, size_t nprocs,
ompi_crcp_base_pml_state_t* pml_state );
ompi_crcp_base_pml_state_t* ompi_crcp_coord_pml_progress
ompi_crcp_base_pml_state_t* ompi_crcp_bkmrk_pml_progress
(ompi_crcp_base_pml_state_t* pml_state);
ompi_crcp_base_pml_state_t* ompi_crcp_coord_pml_iprobe
ompi_crcp_base_pml_state_t* ompi_crcp_bkmrk_pml_iprobe
(int dst, int tag, struct ompi_communicator_t* comm,
int *matched, ompi_status_public_t* status,
ompi_crcp_base_pml_state_t* pml_state );
ompi_crcp_base_pml_state_t* ompi_crcp_coord_pml_probe
ompi_crcp_base_pml_state_t* ompi_crcp_bkmrk_pml_probe
( int dst, int tag, struct ompi_communicator_t* comm,
ompi_status_public_t* status,
ompi_crcp_base_pml_state_t* pml_state );
ompi_crcp_base_pml_state_t* ompi_crcp_coord_pml_isend_init
ompi_crcp_base_pml_state_t* ompi_crcp_bkmrk_pml_isend_init
( void *buf, size_t count, ompi_datatype_t *datatype,
int dst, int tag, mca_pml_base_send_mode_t mode,
struct ompi_communicator_t* comm,
struct ompi_request_t **request,
ompi_crcp_base_pml_state_t* pml_state );
ompi_crcp_base_pml_state_t* ompi_crcp_coord_pml_isend
ompi_crcp_base_pml_state_t* ompi_crcp_bkmrk_pml_isend
( void *buf, size_t count, ompi_datatype_t *datatype,
int dst, int tag, mca_pml_base_send_mode_t mode,
struct ompi_communicator_t* comm,
struct ompi_request_t **request,
ompi_crcp_base_pml_state_t* pml_state );
ompi_crcp_base_pml_state_t* ompi_crcp_coord_pml_send
ompi_crcp_base_pml_state_t* ompi_crcp_bkmrk_pml_send
( void *buf, size_t count, ompi_datatype_t *datatype,
int dst, int tag, mca_pml_base_send_mode_t mode,
struct ompi_communicator_t* comm,
ompi_crcp_base_pml_state_t* pml_state );
ompi_crcp_base_pml_state_t* ompi_crcp_coord_pml_irecv_init
ompi_crcp_base_pml_state_t* ompi_crcp_bkmrk_pml_irecv_init
( void *buf, size_t count, ompi_datatype_t *datatype,
int src, int tag, struct ompi_communicator_t* comm,
struct ompi_request_t **request,
ompi_crcp_base_pml_state_t* pml_state);
ompi_crcp_base_pml_state_t* ompi_crcp_coord_pml_irecv
ompi_crcp_base_pml_state_t* ompi_crcp_bkmrk_pml_irecv
( void *buf, size_t count, ompi_datatype_t *datatype,
int src, int tag, struct ompi_communicator_t* comm,
struct ompi_request_t **request,
ompi_crcp_base_pml_state_t* pml_state );
ompi_crcp_base_pml_state_t* ompi_crcp_coord_pml_recv
ompi_crcp_base_pml_state_t* ompi_crcp_bkmrk_pml_recv
( void *buf, size_t count, ompi_datatype_t *datatype,
int src, int tag, struct ompi_communicator_t* comm,
ompi_status_public_t* status,
ompi_crcp_base_pml_state_t* pml_state);
ompi_crcp_base_pml_state_t* ompi_crcp_coord_pml_dump
ompi_crcp_base_pml_state_t* ompi_crcp_bkmrk_pml_dump
( struct ompi_communicator_t* comm, int verbose,
ompi_crcp_base_pml_state_t* pml_state );
ompi_crcp_base_pml_state_t* ompi_crcp_coord_pml_start
ompi_crcp_base_pml_state_t* ompi_crcp_bkmrk_pml_start
( size_t count, ompi_request_t** requests,
ompi_crcp_base_pml_state_t* pml_state );
ompi_crcp_base_pml_state_t* ompi_crcp_coord_pml_ft_event
ompi_crcp_base_pml_state_t* ompi_crcp_bkmrk_pml_ft_event
(int state, ompi_crcp_base_pml_state_t* pml_state);
/*
* Request function
*/
int ompi_crcp_coord_request_complete(struct ompi_request_t *request);
int ompi_crcp_bkmrk_request_complete(struct ompi_request_t *request);
/***********************************
* Globally Defined Structures
@ -133,7 +133,7 @@ extern "C" {
/*
* Types of Messages
*/
enum ompi_crcp_coord_pml_message_type_t {
enum ompi_crcp_bkmrk_pml_message_type_t {
COORD_MSG_TYPE_UNKNOWN, /* 0 Unknown type */
COORD_MSG_TYPE_B_SEND, /* 1 Blocking Send */
COORD_MSG_TYPE_I_SEND, /* 2 Non-Blocking Send */
@ -142,12 +142,60 @@ extern "C" {
COORD_MSG_TYPE_I_RECV, /* 5 Non-Blocking Recv */
COORD_MSG_TYPE_P_RECV /* 6 Persistent Recv */
};
typedef enum ompi_crcp_coord_pml_message_type_t ompi_crcp_coord_pml_message_type_t;
typedef enum ompi_crcp_bkmrk_pml_message_type_t ompi_crcp_bkmrk_pml_message_type_t;
/*
* Message Reference
* A list structure to contain {buffer, request, status} sets
*
* send/recv type | Buffer | Request | Status | Active
* ---------------+--------+---------+--------+--------
* Blocking | No | No | No | No
* Non-Blocking | No | Yes | Yes | No
* Persistent | Yes | Yes | Yes | Yes
*
* No : Does not require this field
* Yes: Does require this field
*/
struct ompi_crcp_coord_pml_message_ref_t {
struct ompi_crcp_bkmrk_pml_message_content_ref_t {
/** This is a list object */
opal_list_item_t super;
/** Buffer for data */
void * buffer;
/* Request for this message */
ompi_request_t *request;
/** Status */
ompi_status_public_t status;
/** Active ? */
bool active;
/** Done ? - Only useful in Drain*/
bool done;
/** Already_posted ? - Only useful in Drain */
bool already_posted;
/** Drained */
bool already_drained;
/** JJH XXX Debug counter*/
uint64_t msg_id;
};
typedef struct ompi_crcp_bkmrk_pml_message_content_ref_t ompi_crcp_bkmrk_pml_message_content_ref_t;
OBJ_CLASS_DECLARATION(ompi_crcp_bkmrk_pml_message_content_ref_t);
void ompi_crcp_bkmrk_pml_message_content_ref_construct(ompi_crcp_bkmrk_pml_message_content_ref_t *content_ref);
void ompi_crcp_bkmrk_pml_message_content_ref_destruct( ompi_crcp_bkmrk_pml_message_content_ref_t *content_ref);
/*
* Drain Message Reference
* - The first section of this structure should match
* ompi_crcp_bkmrk_pml_traffic_message_ref_t exactly.
*/
struct ompi_crcp_bkmrk_pml_drain_message_ref_t {
/** This is a list object */
opal_list_item_t super;
@ -155,11 +203,7 @@ extern "C" {
uint64_t msg_id;
/** Type of message this references */
ompi_crcp_coord_pml_message_type_t msg_type;
/** Buffer for data */
void * buffer;
ompi_crcp_bkmrk_pml_message_type_t msg_type;
/** Count for data */
size_t count;
@ -179,12 +223,89 @@ extern "C" {
/** Communicator pointer */
ompi_communicator_t* comm;
/** Receive Request */
ompi_request_t *request;
/** Message Contents */
opal_list_t msg_contents;
/** Status */
ompi_status_public_t status;
/** Peer which we received from */
orte_process_name_t proc_name;
/** Is this message complete WRT PML semantics?
* true = message done on this side (send or receive)
* false = message still in process (sending or receiving)
*/
int done;
/** Is the message actively being worked on?
* true = Message is !done, and is in the progress cycle
* false = Message is !done and is *not* in the progress cycle ( [send/recv]_init requests)
*/
int active;
/** Has this message been posted?
* true = message was posted (Send or recv)
* false = message was not yet posted.
* Used when trying to figure out which messages the drain protocol needs to post, and
* which message have already been posted for it.
*/
int already_posted;
};
typedef struct ompi_crcp_bkmrk_pml_drain_message_ref_t ompi_crcp_bkmrk_pml_drain_message_ref_t;
OBJ_CLASS_DECLARATION(ompi_crcp_bkmrk_pml_drain_message_ref_t);
void ompi_crcp_bkmrk_pml_drain_message_ref_construct(ompi_crcp_bkmrk_pml_drain_message_ref_t *msg_ref);
void ompi_crcp_bkmrk_pml_drain_message_ref_destruct( ompi_crcp_bkmrk_pml_drain_message_ref_t *msg_ref);
/*
* List of Pending ACKs to drained messages
*/
struct ompi_crcp_bkmrk_pml_drain_message_ack_ref_t {
/** This is a list object */
opal_list_item_t super;
/** Complete flag */
bool complete;
/** Peer which we received from */
orte_process_name_t peer;
};
typedef struct ompi_crcp_bkmrk_pml_drain_message_ack_ref_t ompi_crcp_bkmrk_pml_drain_message_ack_ref_t;
OBJ_CLASS_DECLARATION(ompi_crcp_bkmrk_pml_drain_message_ack_ref_t);
void ompi_crcp_bkmrk_pml_drain_message_ack_ref_construct(ompi_crcp_bkmrk_pml_drain_message_ack_ref_t *msg_ack_ref);
void ompi_crcp_bkmrk_pml_drain_message_ack_ref_destruct( ompi_crcp_bkmrk_pml_drain_message_ack_ref_t *msg_ack_ref);
/*
* Regular Traffic Message Reference
* Tracks message signature {count, datatype_size, tag, comm, peer}
*/
struct ompi_crcp_bkmrk_pml_traffic_message_ref_t {
/** This is a list object */
opal_list_item_t super;
/** Sequence Number of this message */
uint64_t msg_id;
/** Type of message this references */
ompi_crcp_bkmrk_pml_message_type_t msg_type;
/** Count for data */
size_t count;
/** Quick reference to the size of the datatype */
size_t ddt_size;
/** Message Tag */
int tag;
/** Peer rank to which it was sent/recv'ed if known */
int rank;
/** Communicator pointer */
ompi_communicator_t* comm;
/** Message Contents */
opal_list_t msg_contents;
/** Peer which we received from */
orte_process_name_t proc_name;
@ -205,42 +326,42 @@ extern "C" {
* already_posted = false -> true when posted irecv
*/
/** Has this message been matched by the peer?
* true = peer confirmed the receipt of this message
* - Resolved during bookmark exchange
* true = peer confirmed the receipt of this message
* false = unknown if peer has received this message or not
*/
bool matched;
int matched;
/** Is this message complete WRT PML semantics?
* true = message done on this side (send or receive)
* - Is it not in-flight?
* true = message done on this side (send or receive)
* false = message still in process (sending or receiving)
*/
bool done;
int done;
/** Is the message actively being worked on?
* true = Message is !done, and is in the progress cycle
* - Known to be in-flight?
* true = Message is !done, and is in the progress cycle
* false = Message is !done and is *not* in the progress cycle ( [send/recv]_init requests)
*/
bool active;
int active;
/** Has this message been posted?
* true = message was posted (Send or recv)
* false = message was not yet posted.
* Used when trying to figure out which messages the drain protocol needs to post, and
* which message have already been posted for it.
/** How many times a persistent send/recv has been posted, but not activated.
*
*/
bool already_posted;
int posted;
/** Suggested Rank that this should be matched to
* This is used when rank = ANY_SOURCE and we need to
* drain it to a specific peer
/** Actively drained
* These are messages that are active, and being drained. So if we checkpoint while the drain
* list is not empty then we do not try to count these messages more than once.
*/
int suggested_rank;
int active_drain;
};
typedef struct ompi_crcp_coord_pml_message_ref_t ompi_crcp_coord_pml_message_ref_t;
typedef struct ompi_crcp_bkmrk_pml_traffic_message_ref_t ompi_crcp_bkmrk_pml_traffic_message_ref_t;
OBJ_CLASS_DECLARATION(ompi_crcp_coord_pml_message_ref_t);
void ompi_crcp_coord_pml_message_ref_construct(ompi_crcp_coord_pml_message_ref_t *msg_ref);
void ompi_crcp_coord_pml_message_ref_destruct( ompi_crcp_coord_pml_message_ref_t *msg_ref);
OBJ_CLASS_DECLARATION(ompi_crcp_bkmrk_pml_traffic_message_ref_t);
void ompi_crcp_bkmrk_pml_traffic_message_ref_construct(ompi_crcp_bkmrk_pml_traffic_message_ref_t *msg_ref);
void ompi_crcp_bkmrk_pml_traffic_message_ref_destruct( ompi_crcp_bkmrk_pml_traffic_message_ref_t *msg_ref);
/*
* A structure for a single process
@ -249,7 +370,7 @@ extern "C" {
* - List of received message from this peer
* - Message totals
*/
struct ompi_crcp_coord_pml_peer_ref_t {
struct ompi_crcp_bkmrk_pml_peer_ref_t {
/** This is a list object */
opal_list_item_t super;
@ -266,6 +387,9 @@ extern "C" {
opal_list_t irecv_list; /**< pml_irecv */
opal_list_t recv_init_list; /**< pml_irecv_init */
/** List of messages drained from this peer */
opal_list_t drained_list;
/*
* These are totals over all communicators provided for convenience.
*
@ -284,42 +408,37 @@ extern "C" {
* Once completed: ++total
*/
/** Total Number of messages sent */
uint32_t total_send_msgs;
uint32_t total_isend_msgs;
uint32_t total_send_init_msgs;
uint32_t matched_send_msgs;
uint32_t matched_isend_msgs;
uint32_t matched_send_init_msgs;
uint32_t total_msgs_sent;
uint32_t matched_msgs_sent;
/** Total Number of messages received */
uint32_t total_recv_msgs;
uint32_t total_irecv_msgs;
uint32_t total_recv_init_msgs;
uint32_t matched_recv_msgs;
uint32_t matched_irecv_msgs;
uint32_t matched_recv_init_msgs;
uint32_t total_msgs_recvd;
uint32_t matched_msgs_recvd;
/** Total Number of messages drained */
uint32_t total_drained_msgs;
/** If peer is expecting an ACK after draining the messages */
bool ack_required;
};
typedef struct ompi_crcp_coord_pml_peer_ref_t ompi_crcp_coord_pml_peer_ref_t;
typedef struct ompi_crcp_bkmrk_pml_peer_ref_t ompi_crcp_bkmrk_pml_peer_ref_t;
OBJ_CLASS_DECLARATION(ompi_crcp_coord_pml_peer_ref_t);
void ompi_crcp_coord_pml_peer_ref_construct(ompi_crcp_coord_pml_peer_ref_t *bkm_proc);
void ompi_crcp_coord_pml_peer_ref_destruct( ompi_crcp_coord_pml_peer_ref_t *bkm_proc);
OBJ_CLASS_DECLARATION(ompi_crcp_bkmrk_pml_peer_ref_t);
void ompi_crcp_bkmrk_pml_peer_ref_construct(ompi_crcp_bkmrk_pml_peer_ref_t *bkm_proc);
void ompi_crcp_bkmrk_pml_peer_ref_destruct( ompi_crcp_bkmrk_pml_peer_ref_t *bkm_proc);
/*
* Local version of the PML state
*/
struct ompi_crcp_coord_pml_state_t {
struct ompi_crcp_bkmrk_pml_state_t {
ompi_crcp_base_pml_state_t p_super;
ompi_crcp_base_pml_state_t *prev_ptr;
ompi_crcp_coord_pml_peer_ref_t *peer_ref;
ompi_crcp_coord_pml_message_ref_t *msg_ref;
ompi_crcp_bkmrk_pml_peer_ref_t *peer_ref;
ompi_crcp_bkmrk_pml_traffic_message_ref_t *msg_ref;
};
typedef struct ompi_crcp_coord_pml_state_t ompi_crcp_coord_pml_state_t;
OBJ_CLASS_DECLARATION(ompi_crcp_coord_pml_state_t);
typedef struct ompi_crcp_bkmrk_pml_state_t ompi_crcp_bkmrk_pml_state_t;
OBJ_CLASS_DECLARATION(ompi_crcp_bkmrk_pml_state_t);
/***********************************
* Globally Defined Variables
@ -327,10 +446,10 @@ extern "C" {
/*
* List of known peers
*/
extern opal_list_t ompi_crcp_coord_pml_peer_refs;
extern opal_list_t ompi_crcp_bkmrk_pml_peer_refs;
#if defined(c_plusplus) || defined(__cplusplus)
}
#endif
#endif /* MCA_CRCP_COORD_PML_EXPORT_H */
#endif /* MCA_CRCP_HOKE_PML_EXPORT_H */

Разница между файлами не показана из-за своего большого размера Загрузить разницу