1
1

Update the PML base send init macro to take a converter_flag field (discussed with George).

Update the csum pml module - still not quite right, but closer.

Modify the LANL platform files to keep pace.

This commit was SVN r20859.
Этот коммит содержится в:
Ralph Castain 2009-03-24 19:12:53 +00:00
родитель 36a813415d
Коммит f72e3ba9f9
25 изменённых файлов: 149 добавлений и 194 удалений

Просмотреть файл

@ -3,7 +3,7 @@ enable_mem_debug=yes
enable_mem_profile=yes
enable_debug_symbols=yes
enable_binaries=yes
enable_heterogeneous=yes
enable_heterogeneous=no
enable_picky=yes
enable_debug=yes
enable_shared=yes
@ -29,4 +29,4 @@ with_threads=posix
with_valgrind=no
LDFLAGS=-L/opt/PBS/lib64
with_openib_control_hdr_padding=yes
with_dst_checksum=yes

Просмотреть файл

@ -74,7 +74,6 @@ orte_allocation_required = 1
## MPI behavior
mpi_leave_pinned = 0
pml = csum
pml_csum_enable_csum = 1
btl_openib_flags = 50
## Protect looped collectives

Просмотреть файл

@ -3,7 +3,7 @@ enable_mem_debug=yes
enable_mem_profile=yes
enable_debug_symbols=yes
enable_binaries=yes
enable_heterogeneous=yes
enable_heterogeneous=no
enable_picky=yes
enable_debug=yes
enable_shared=yes
@ -30,4 +30,5 @@ with_valgrind=no
LDFLAGS=-L/opt/PBS/lib64
CFLAGS="-I/opt/panfs/include"
with_openib_control_hdr_padding=yes
with_dst_checksum=yes

Просмотреть файл

@ -74,7 +74,6 @@ orte_allocation_required = 1
## MPI behavior
mpi_leave_pinned = 0
pml = csum
pml_csum_enable_csum = 1
btl_openib_flags = 50
## Protect looped collectives

Просмотреть файл

@ -3,7 +3,7 @@ enable_mem_debug=no
enable_mem_profile=no
enable_debug_symbols=no
enable_binaries=yes
enable_heterogeneous=yes
enable_heterogeneous=no
enable_debug=no
enable_shared=yes
enable_static=yes
@ -29,4 +29,5 @@ with_threads=posix
with_valgrind=no
LDFLAGS=-L/opt/PBS/lib64
with_openib_control_hdr_padding=yes
with_dst_checksum=yes

Просмотреть файл

@ -74,7 +74,6 @@ orte_allocation_required = 1
## MPI behavior
mpi_leave_pinned = 0
pml = csum
pml_csum_enable_csum = 1
btl_openib_flags = 50
## Protect looped collectives

Просмотреть файл

@ -3,7 +3,7 @@ enable_mem_debug=no
enable_mem_profile=no
enable_debug_symbols=no
enable_binaries=yes
enable_heterogeneous=yes
enable_heterogeneous=no
enable_debug=no
enable_shared=yes
enable_static=yes
@ -30,4 +30,5 @@ with_valgrind=no
LDFLAGS=-L/opt/PBS/lib64
CFLAGS="-I/opt/panfs/include"
with_openib_control_hdr_padding=yes
with_dst_checksum=yes

Просмотреть файл

@ -74,7 +74,6 @@ orte_allocation_required = 1
## MPI behavior
mpi_leave_pinned = 0
pml = csum
pml_csum_enable_csum = 1
btl_openib_flags = 50
## Protect looped collectives

Просмотреть файл

@ -71,6 +71,16 @@ orte_tmpdir_base = /tmp
## from inadvertent job executions
orte_allocation_required = 1
## MPI behavior
mpi_leave_pinned = 0
## Protect looped collectives
coll_sync_priority = 100
coll_sync_barrier_before = 1000
## Activate hierarchical collectives
coll_hierarch_priority = 90
## Add the interface for out-of-band communication
## and set it up
oob_tcp_if_include=ib0

Просмотреть файл

@ -71,6 +71,16 @@ orte_tmpdir_base = /tmp
## from inadvertent job executions
orte_allocation_required = 1
## MPI behavior
mpi_leave_pinned = 0
## Protect looped collectives
coll_sync_priority = 100
coll_sync_barrier_before = 1000
## Activate hierarchical collectives
coll_hierarch_priority = 90
## Add the interface for out-of-band communication
## and set it up
oob_tcp_if_include=ib0

Просмотреть файл

@ -71,6 +71,16 @@ orte_tmpdir_base = /tmp
## from inadvertent job executions
orte_allocation_required = 1
## MPI behavior
mpi_leave_pinned = 0
## Protect looped collectives
coll_sync_priority = 100
coll_sync_barrier_before = 1000
## Activate hierarchical collectives
coll_hierarch_priority = 90
## Add the interface for out-of-band communication
## and set it up
oob_tcp_if_include=ib0

Просмотреть файл

@ -71,6 +71,16 @@ orte_tmpdir_base = /tmp
## from inadvertent job executions
orte_allocation_required = 1
## MPI behavior
mpi_leave_pinned = 0
## Protect looped collectives
coll_sync_priority = 100
coll_sync_barrier_before = 1000
## Activate hierarchical collectives
coll_hierarch_priority = 90
## Add the interface for out-of-band communication
## and set it up
oob_tcp_if_include=ib0

Просмотреть файл

@ -47,15 +47,16 @@ OMPI_DECLSPEC OBJ_CLASS_DECLARATION( mca_pml_base_send_request_t );
/**
* Initialize a send request with call parameters.
*
* @param request (IN) Send request
* @param addr (IN) User buffer
* @param count (IN) Number of elements of indicated datatype.
* @param datatype (IN) User defined datatype
* @param peer (IN) Destination rank
* @param tag (IN) User defined tag
* @param comm (IN) Communicator
* @param mode (IN) Send mode (STANDARD,BUFFERED,SYNCHRONOUS,READY)
* @param persistent (IN) Is request persistent.
* @param request (IN) Send request
* @param addr (IN) User buffer
* @param count (IN) Number of elements of indicated datatype.
* @param datatype (IN) User defined datatype
* @param peer (IN) Destination rank
* @param tag (IN) User defined tag
* @param comm (IN) Communicator
* @param mode (IN) Send mode (STANDARD,BUFFERED,SYNCHRONOUS,READY)
* @param persistent (IN) Is request persistent.
* @param convertor_flags (IN) Flags to pass to convertor
*
* Perform a any one-time initialization. Note that per-use initialization
* is done in the send request start routine.
@ -69,7 +70,8 @@ OMPI_DECLSPEC OBJ_CLASS_DECLARATION( mca_pml_base_send_request_t );
tag, \
comm, \
mode, \
persistent) \
persistent, \
convertor_flags) \
{ \
/* increment reference counts */ \
OBJ_RETAIN(comm); \
@ -100,7 +102,7 @@ OMPI_DECLSPEC OBJ_CLASS_DECLARATION( mca_pml_base_send_request_t );
(request)->req_base.req_datatype, \
(request)->req_base.req_count, \
(request)->req_base.req_addr, \
0, \
convertor_flags, \
&(request)->req_base.req_convertor ); \
ompi_convertor_get_packed_size( &(request)->req_base.req_convertor, \
&((request)->req_bytes_packed) );\

Просмотреть файл

@ -17,8 +17,6 @@
# $HEADER$
#
dist_pkgdata_DATA = help-pml-csum.txt
EXTRA_DIST = post_configure.sh pml_csum_endpoint.c pml_csum_endpoint.h
csum_sources = \

Просмотреть файл

@ -1,31 +0,0 @@
# -*- text -*-
#
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2005 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# Copyright (c) 2009 IBM Corporation. All rights reserved.
# Copyright (c) 2009 Los Alamos National Security, LLC. All rights
# reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# This is the US/English general help file for Open MPI.
#
[pml:checksum-not-enabled]
Warning: This build of Open MPI was specifically configured
with support for the alternate checksum algorithm, but the
support was not enabled by the proper MCA parameter. You should
set pml_csum_enable_csum to enable checksum operation.
While your application will be allowed to proceed, please be
advised that you will not be protected from data errors.

Просмотреть файл

@ -432,8 +432,7 @@ int mca_pml_csum_send_fin( ompi_proc_t* proc,
mca_btl_base_descriptor_t* fin;
mca_pml_csum_fin_hdr_t* hdr;
int rc;
bool do_csum = mca_pml_csum.enable_csum &&
(bml_btl->btl_flags & MCA_BTL_FLAGS_NEED_CSUM);
bool do_csum = bml_btl->btl_flags & MCA_BTL_FLAGS_NEED_CSUM;
mca_bml_base_alloc(bml_btl, &fin, order, sizeof(mca_pml_csum_fin_hdr_t),
MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP);

Просмотреть файл

@ -81,9 +81,6 @@ struct mca_pml_csum_t {
char* allocator_name;
mca_allocator_base_module_t* allocator;
uint32_t unexpected_limit;
/*Enable or Disable checksum*/
bool enable_csum;
};
typedef struct mca_pml_csum_t mca_pml_csum_t;

Просмотреть файл

@ -142,14 +142,6 @@ static int mca_pml_csum_component_open(void)
return OMPI_ERROR;
}
/* default is not to checksum all data */
mca_pml_csum.enable_csum =
mca_pml_csum_param_register_int("enable_csum", 0);
/*
ompi_convertor_checksum_enable(mca_pml_csum.enable_csum);
*/
mca_pml_csum.enabled = false;
return mca_bml_base_open();
}
@ -174,35 +166,27 @@ mca_pml_csum_component_init( int* priority,
bool enable_progress_threads,
bool enable_mpi_threads )
{
/* if the alternative csum was defined and enable_csum set, then we must
* be selected
*/
#if defined (OMPI_CSUM_DST)
if (mca_pml_csum.enable_csum) {
*priority = 100;
opal_output_verbose( 10, 0, "in csum, my priority is 0\n");
if(OMPI_SUCCESS != mca_bml_base_init( enable_progress_threads,
enable_mpi_threads)) {
*priority = 0;
return NULL;
}
/* Set this here (vs in component_open()) because
ompi_mpi_leave_pinned* may have been set after MCA params were
read (e.g., by the openib btl) */
mca_pml_csum.leave_pinned = (1 == ompi_mpi_leave_pinned);
mca_pml_csum.leave_pinned_pipeline = (int) ompi_mpi_leave_pinned_pipeline;
return &mca_pml_csum.super;
} else {
/* select us only if we are specified */
if((*priority) > 0) {
*priority = 0;
orte_show_help("help-pml-csum.txt", "pml:checksum-not-enabled", true);
return NULL;
}
#else
*priority = 0;
return NULL;
#endif
if(OMPI_SUCCESS != mca_bml_base_init( enable_progress_threads,
enable_mpi_threads)) {
return NULL;
}
/* Set this here (vs in component_open()) because
ompi_mpi_leave_pinned* may have been set after MCA params were
read (e.g., by the openib btl) */
mca_pml_csum.leave_pinned = (1 == ompi_mpi_leave_pinned);
mca_pml_csum.leave_pinned_pipeline = (int) ompi_mpi_leave_pinned_pipeline;
return &mca_pml_csum.super;
}
int mca_pml_csum_component_fini(void)

Просмотреть файл

@ -73,20 +73,37 @@ typedef struct mca_pml_csum_common_hdr_t mca_pml_csum_common_hdr_t;
struct mca_pml_csum_match_hdr_t {
mca_pml_csum_common_hdr_t hdr_common; /**< common attributes */
uint16_t hdr_ctx; /**< communicator index */
uint16_t hdr_seq; /**< message sequence number */
int32_t hdr_src; /**< source rank */
int32_t hdr_tag; /**< user tag */
uint16_t hdr_seq; /**< message sequence number */
#if OMPI_ENABLE_HETEROGENEOUS_SUPPORT
uint8_t hdr_padding[12]; /**< explicitly pad to 16-byte boundary. Compilers seem to already prefer to do this, but make it explicit just in case */
#endif
uint32_t hdr_csum; /**< checksum over data */
};
#define OMPI_PML_CSUM_MATCH_HDR_LEN 20
#if OMPI_ENABLE_HETEROGENEOUS_SUPPORT
#define OMPI_PML_CSUM_MATCH_HDR_LEN 32
#else
#define OMPI_PML_CSUM_MATCH_HDR_LEN 20
#endif
typedef struct mca_pml_csum_match_hdr_t mca_pml_csum_match_hdr_t;
#if OMPI_ENABLE_HETEROGENEOUS_SUPPORT && OMPI_ENABLE_DEBUG
#define MCA_PML_CSUM_MATCH_HDR_FILL(h) \
do { \
(h).hdr_padding[0] = 0; \
(h).hdr_padding[1] = 0; \
do { \
(h).hdr_padding[0] = 0; \
(h).hdr_padding[1] = 0; \
(h).hdr_padding[2] = 0; \
(h).hdr_padding[3] = 0; \
(h).hdr_padding[4] = 0; \
(h).hdr_padding[5] = 0; \
(h).hdr_padding[6] = 0; \
(h).hdr_padding[7] = 0; \
(h).hdr_padding[8] = 0; \
(h).hdr_padding[9] = 0; \
(h).hdr_padding[10] = 0; \
(h).hdr_padding[11] = 0; \
} while(0)
#else
#define MCA_PML_CSUM_MATCH_HDR_FILL(h)
@ -193,10 +210,13 @@ do { \
*/
struct mca_pml_csum_frag_hdr_t {
mca_pml_csum_common_hdr_t hdr_common; /**< common attributes */
uint32_t hdr_csum;
#if OMPI_ENABLE_HETEROGENEOUS_SUPPORT
uint8_t hdr_padding[2]; /**< align to 16-byte boundary */
#endif
uint64_t hdr_frag_offset; /**< offset into message */
ompi_ptr_t hdr_src_req; /**< pointer to source request */
ompi_ptr_t hdr_dst_req; /**< pointer to matched receive */
uint32_t hdr_csum;
};
typedef struct mca_pml_csum_frag_hdr_t mca_pml_csum_frag_hdr_t;
@ -205,10 +225,6 @@ typedef struct mca_pml_csum_frag_hdr_t mca_pml_csum_frag_hdr_t;
do { \
(h).hdr_padding[0] = 0; \
(h).hdr_padding[1] = 0; \
(h).hdr_padding[2] = 0; \
(h).hdr_padding[3] = 0; \
(h).hdr_padding[4] = 0; \
(h).hdr_padding[5] = 0; \
} while(0)
#else
#define MCA_PML_CSUM_FRAG_HDR_FILL(h)
@ -281,6 +297,9 @@ do { \
struct mca_pml_csum_rdma_hdr_t {
mca_pml_csum_common_hdr_t hdr_common; /**< common attributes */
#if OMPI_ENABLE_HETEROGENEOUS_SUPPORT
uint8_t hdr_padding[2]; /** two to pad out the hdr to a 4 byte alignment. hdr_req will then be 8 byte aligned after 4 for hdr_seg_cnt */
#endif
uint32_t hdr_seg_cnt; /**< number of segments for rdma */
ompi_ptr_t hdr_req; /**< destination request */
ompi_ptr_t hdr_des; /**< source descriptor */

Просмотреть файл

@ -119,7 +119,7 @@ void mca_pml_csum_recv_frag_callback_match(mca_btl_base_module_t* btl,
mca_pml_csum_recv_frag_t* frag = NULL;
size_t num_segments = des->des_dst_cnt;
size_t bytes_received = 0;
uint16_t csum_received, csum;
uint16_t csum_received, csum=0;
uint32_t csum_data;
bool do_csum = btl->btl_flags & MCA_BTL_FLAGS_NEED_CSUM;
@ -254,8 +254,8 @@ void mca_pml_csum_recv_frag_callback_match(mca_btl_base_module_t* btl,
match->req_recv.req_base.req_datatype);
);
}
if (do_csum) {
csum_data = (bytes_received > 0) ? match->req_recv.req_base.req_convertor.checksum : 0;
if (do_csum && bytes_received > 0) {
csum_data = match->req_recv.req_base.req_convertor.checksum;
OPAL_OUTPUT_VERBOSE((1, mca_pml_base_output,
"%s Received \'match\' with data csum:0x%x, header csum:0x%04x, size:%lu\n",
@ -391,8 +391,7 @@ void mca_pml_csum_recv_frag_callback_frag(mca_btl_base_module_t* btl,
mca_pml_csum_hdr_t* hdr = (mca_pml_csum_hdr_t*)segments->seg_addr.pval;
mca_pml_csum_recv_request_t* recvreq;
uint16_t csum_received, csum;
bool do_csum = mca_pml_csum.enable_csum &&
(btl->btl_flags & MCA_BTL_FLAGS_NEED_CSUM);
bool do_csum = btl->btl_flags & MCA_BTL_FLAGS_NEED_CSUM;
if( OPAL_UNLIKELY(segments->seg_len < sizeof(mca_pml_csum_common_hdr_t)) ) {
return;
@ -425,8 +424,7 @@ void mca_pml_csum_recv_frag_callback_put(mca_btl_base_module_t* btl,
mca_pml_csum_hdr_t* hdr = (mca_pml_csum_hdr_t*)segments->seg_addr.pval;
mca_pml_csum_send_request_t* sendreq;
uint16_t csum_received, csum;
bool do_csum = mca_pml_csum.enable_csum &&
(btl->btl_flags & MCA_BTL_FLAGS_NEED_CSUM);
bool do_csum = btl->btl_flags & MCA_BTL_FLAGS_NEED_CSUM;
if( OPAL_UNLIKELY(segments->seg_len < sizeof(mca_pml_csum_common_hdr_t)) ) {
return;
@ -462,8 +460,7 @@ void mca_pml_csum_recv_frag_callback_fin(mca_btl_base_module_t* btl,
mca_pml_csum_hdr_t* hdr = (mca_pml_csum_hdr_t*)segments->seg_addr.pval;
mca_btl_base_descriptor_t* rdma;
uint16_t csum_received, csum;
bool do_csum = mca_pml_csum.enable_csum &&
(btl->btl_flags & MCA_BTL_FLAGS_NEED_CSUM);
bool do_csum = btl->btl_flags & MCA_BTL_FLAGS_NEED_CSUM;
if( OPAL_UNLIKELY(segments->seg_len < sizeof(mca_pml_csum_common_hdr_t)) ) {
return;

Просмотреть файл

@ -471,9 +471,8 @@ void mca_pml_csum_recv_request_progress_frag( mca_pml_csum_recv_request_t* recvr
recvreq->req_recv.req_base.req_datatype);
);
if (do_csum) {
csum = (bytes_received > 0)?
recvreq->req_recv.req_base.req_convertor.checksum : 0;
if (do_csum && bytes_received > 0) {
csum = recvreq->req_recv.req_base.req_convertor.checksum;
OPAL_OUTPUT_VERBOSE((0, mca_pml_base_output,
"%s Received \'frag\' with data csum:0x%x, header csum:0x%04x, size:%lu\n",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), csum, hdr->hdr_common.hdr_csum, (unsigned long)bytes_received));
@ -615,9 +614,8 @@ void mca_pml_csum_recv_request_progress_rndv( mca_pml_csum_recv_request_t* recvr
recvreq->req_recv.req_base.req_datatype);
);
}
if (do_csum) {
csum = (bytes_received > 0)?
recvreq->req_recv.req_base.req_convertor.checksum : 0;
if (do_csum && bytes_received > 0) {
csum = recvreq->req_recv.req_base.req_convertor.checksum;
OPAL_OUTPUT_VERBOSE((1, mca_pml_base_output,
"%s Received \'rndv\' with data csum:0x%x, header csum:0x%04x, size:%lu\n",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), csum, hdr->hdr_common.hdr_csum, (unsigned long)bytes_received));
@ -674,8 +672,8 @@ void mca_pml_csum_recv_request_progress_match( mca_pml_csum_recv_request_t* recv
data_offset,
bytes_received,
bytes_delivered);
if (do_csum) {
csum = (bytes_received > 0) ? recvreq->req_recv.req_base.req_convertor.checksum : 0;
if (do_csum && bytes_received > 0) {
csum = recvreq->req_recv.req_base.req_convertor.checksum;
OPAL_OUTPUT_VERBOSE((1, mca_pml_base_output,
"%s Received \'match\' with data csum:0x%x, header csum:0x%04x, size:%lu\n",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), csum, hdr->hdr_common.hdr_csum, (unsigned long)bytes_received));

Просмотреть файл

@ -199,8 +199,7 @@ static inline void prepare_recv_req_converter(mca_pml_csum_recv_request_t *req)
{
mca_bml_base_endpoint_t* endpoint =
req->req_recv.req_base.req_proc->proc_bml;
bool do_csum = mca_pml_csum.enable_csum &&
(endpoint->btl_flags_or & MCA_BTL_FLAGS_NEED_CSUM);
bool do_csum = endpoint->btl_flags_or & MCA_BTL_FLAGS_NEED_CSUM;
if( req->req_recv.req_base.req_datatype->size | req->req_recv.req_base.req_count ) {
ompi_convertor_copy_and_prepare_for_recv(

Просмотреть файл

@ -692,7 +692,7 @@ int mca_pml_csum_send_request_start_rdma( mca_pml_csum_send_request_t* sendreq,
bml_btl = sendreq->req_rdma[0].bml_btl;
do_csum = mca_pml_csum.enable_csum && (bml_btl->btl_flags & MCA_BTL_FLAGS_NEED_CSUM);
do_csum = bml_btl->btl_flags & MCA_BTL_FLAGS_NEED_CSUM;
if((sendreq->req_rdma_cnt == 1) && (bml_btl->btl_flags & MCA_BTL_FLAGS_GET)) {
mca_mpool_base_registration_t* reg = sendreq->req_rdma[0].btl_reg;
@ -855,8 +855,7 @@ int mca_pml_csum_send_request_start_rndv( mca_pml_csum_send_request_t* sendreq,
mca_btl_base_segment_t* segment;
mca_pml_csum_hdr_t* hdr;
int rc;
bool do_csum = mca_pml_csum.enable_csum &&
(bml_btl->btl_flags & MCA_BTL_FLAGS_NEED_CSUM);
bool do_csum = bml_btl->btl_flags & MCA_BTL_FLAGS_NEED_CSUM;
/* prepare descriptor */
if(size == 0) {
@ -1122,8 +1121,7 @@ cannot_pack:
des->des_cbfunc = mca_pml_csum_frag_completion;
des->des_cbdata = sendreq;
do_csum = mca_pml_csum.enable_csum &&
(bml_btl->btl_flags & MCA_BTL_FLAGS_NEED_CSUM);
do_csum = bml_btl->btl_flags & MCA_BTL_FLAGS_NEED_CSUM;
/* setup header */
hdr = (mca_pml_csum_frag_hdr_t*)des->des_src->seg_addr.pval;

Просмотреть файл

@ -132,77 +132,32 @@ get_request_from_send_pending(mca_pml_csum_send_pending_t *type)
}
#define MCA_PML_CSUM_SEND_REQUEST_INIT( sendreq, \
buf, \
count, \
datatype, \
dst, \
tag, \
comm, \
sendmode, \
persistent) \
{ \
MCA_PML_CSUM_BASE_SEND_REQUEST_INIT(&sendreq->req_send, \
buf, \
count, \
datatype, \
dst, \
tag, \
comm, \
sendmode, \
persistent); \
(sendreq)->req_recv.pval = NULL; \
#define MCA_PML_CSUM_SEND_REQUEST_INIT(sendreq, \
buf, \
count, \
datatype, \
dst, \
tag, \
comm, \
sendmode, \
persistent) \
{ \
mca_bml_base_endpoint_t* endpoint = \
sendreq->req_send.req_base.req_proc->proc_bml; \
bool do_csum = endpoint->btl_flags_or & MCA_BTL_FLAGS_NEED_CSUM; \
MCA_PML_BASE_SEND_REQUEST_INIT(&sendreq->req_send, \
buf, \
count, \
datatype, \
dst, \
tag, \
comm, \
sendmode, \
persistent, \
do_csum ? CONVERTOR_WITH_CHECKSUM: 0); \
(sendreq)->req_recv.pval = NULL; \
}
#define MCA_PML_CSUM_BASE_SEND_REQUEST_INIT( request, \
addr, \
count, \
datatype, \
peer, \
tag, \
comm, \
mode, \
persistent) \
{ \
mca_bml_base_endpoint_t* endpoint = \
sendreq->req_send.req_base.req_proc->proc_bml; \
bool do_csum = mca_pml_csum.enable_csum && \
(endpoint->btl_flags_or & MCA_BTL_FLAGS_NEED_CSUM); \
/* increment reference counts */ \
OBJ_RETAIN(comm); \
\
OMPI_REQUEST_INIT(&(request)->req_base.req_ompi, persistent); \
(request)->req_base.req_ompi.req_mpi_object.comm = comm; \
(request)->req_addr = addr; \
(request)->req_send_mode = mode; \
(request)->req_base.req_addr = addr; \
(request)->req_base.req_count = count; \
(request)->req_base.req_datatype = datatype; \
(request)->req_base.req_peer = (int32_t)peer; \
(request)->req_base.req_tag = (int32_t)tag; \
(request)->req_base.req_comm = comm; \
/* (request)->req_base.req_proc is set on request allocation */ \
(request)->req_base.req_pml_complete = OPAL_INT_TO_BOOL(persistent); \
(request)->req_base.req_free_called = false; \
(request)->req_base.req_ompi.req_status._cancelled = 0; \
(request)->req_bytes_packed = 0; \
\
/* initialize datatype convertor for this request */ \
if( count > 0 ) { \
OBJ_RETAIN(datatype); \
/* We will create a convertor specialized for the */ \
/* remote architecture and prepared with the datatype. */ \
ompi_convertor_copy_and_prepare_for_send( \
(request)->req_base.req_proc->proc_convertor, \
(request)->req_base.req_datatype, \
(request)->req_base.req_count, \
(request)->req_base.req_addr, \
(do_csum ? CONVERTOR_WITH_CHECKSUM: 0), \
&(request)->req_base.req_convertor ); \
ompi_convertor_get_packed_size( &(request)->req_base.req_convertor, \
&((request)->req_bytes_packed) );\
} \
}
static inline void mca_pml_csum_free_rdma_resources(mca_pml_csum_send_request_t* sendreq)
{

Просмотреть файл

@ -147,7 +147,8 @@ get_request_from_send_pending(mca_pml_ob1_send_pending_t *type)
tag, \
comm, \
sendmode, \
persistent); \
persistent, \
0); \
(sendreq)->req_recv.pval = NULL; \
}