Update the PML base send init macro to take a converter_flag field (discussed with George).
Update the csum pml module - still not quite right, but closer. Modify the LANL platform files to keep pace. This commit was SVN r20859.
Этот коммит содержится в:
родитель
36a813415d
Коммит
f72e3ba9f9
@ -3,7 +3,7 @@ enable_mem_debug=yes
|
||||
enable_mem_profile=yes
|
||||
enable_debug_symbols=yes
|
||||
enable_binaries=yes
|
||||
enable_heterogeneous=yes
|
||||
enable_heterogeneous=no
|
||||
enable_picky=yes
|
||||
enable_debug=yes
|
||||
enable_shared=yes
|
||||
@ -29,4 +29,4 @@ with_threads=posix
|
||||
with_valgrind=no
|
||||
LDFLAGS=-L/opt/PBS/lib64
|
||||
with_openib_control_hdr_padding=yes
|
||||
|
||||
with_dst_checksum=yes
|
||||
|
@ -74,7 +74,6 @@ orte_allocation_required = 1
|
||||
## MPI behavior
|
||||
mpi_leave_pinned = 0
|
||||
pml = csum
|
||||
pml_csum_enable_csum = 1
|
||||
btl_openib_flags = 50
|
||||
|
||||
## Protect looped collectives
|
||||
|
@ -3,7 +3,7 @@ enable_mem_debug=yes
|
||||
enable_mem_profile=yes
|
||||
enable_debug_symbols=yes
|
||||
enable_binaries=yes
|
||||
enable_heterogeneous=yes
|
||||
enable_heterogeneous=no
|
||||
enable_picky=yes
|
||||
enable_debug=yes
|
||||
enable_shared=yes
|
||||
@ -30,4 +30,5 @@ with_valgrind=no
|
||||
LDFLAGS=-L/opt/PBS/lib64
|
||||
CFLAGS="-I/opt/panfs/include"
|
||||
with_openib_control_hdr_padding=yes
|
||||
with_dst_checksum=yes
|
||||
|
||||
|
@ -74,7 +74,6 @@ orte_allocation_required = 1
|
||||
## MPI behavior
|
||||
mpi_leave_pinned = 0
|
||||
pml = csum
|
||||
pml_csum_enable_csum = 1
|
||||
btl_openib_flags = 50
|
||||
|
||||
## Protect looped collectives
|
||||
|
@ -3,7 +3,7 @@ enable_mem_debug=no
|
||||
enable_mem_profile=no
|
||||
enable_debug_symbols=no
|
||||
enable_binaries=yes
|
||||
enable_heterogeneous=yes
|
||||
enable_heterogeneous=no
|
||||
enable_debug=no
|
||||
enable_shared=yes
|
||||
enable_static=yes
|
||||
@ -29,4 +29,5 @@ with_threads=posix
|
||||
with_valgrind=no
|
||||
LDFLAGS=-L/opt/PBS/lib64
|
||||
with_openib_control_hdr_padding=yes
|
||||
with_dst_checksum=yes
|
||||
|
||||
|
@ -74,7 +74,6 @@ orte_allocation_required = 1
|
||||
## MPI behavior
|
||||
mpi_leave_pinned = 0
|
||||
pml = csum
|
||||
pml_csum_enable_csum = 1
|
||||
btl_openib_flags = 50
|
||||
|
||||
## Protect looped collectives
|
||||
|
@ -3,7 +3,7 @@ enable_mem_debug=no
|
||||
enable_mem_profile=no
|
||||
enable_debug_symbols=no
|
||||
enable_binaries=yes
|
||||
enable_heterogeneous=yes
|
||||
enable_heterogeneous=no
|
||||
enable_debug=no
|
||||
enable_shared=yes
|
||||
enable_static=yes
|
||||
@ -30,4 +30,5 @@ with_valgrind=no
|
||||
LDFLAGS=-L/opt/PBS/lib64
|
||||
CFLAGS="-I/opt/panfs/include"
|
||||
with_openib_control_hdr_padding=yes
|
||||
with_dst_checksum=yes
|
||||
|
||||
|
@ -74,7 +74,6 @@ orte_allocation_required = 1
|
||||
## MPI behavior
|
||||
mpi_leave_pinned = 0
|
||||
pml = csum
|
||||
pml_csum_enable_csum = 1
|
||||
btl_openib_flags = 50
|
||||
|
||||
## Protect looped collectives
|
||||
|
@ -71,6 +71,16 @@ orte_tmpdir_base = /tmp
|
||||
## from inadvertent job executions
|
||||
orte_allocation_required = 1
|
||||
|
||||
## MPI behavior
|
||||
mpi_leave_pinned = 0
|
||||
|
||||
## Protect looped collectives
|
||||
coll_sync_priority = 100
|
||||
coll_sync_barrier_before = 1000
|
||||
|
||||
## Activate hierarchical collectives
|
||||
coll_hierarch_priority = 90
|
||||
|
||||
## Add the interface for out-of-band communication
|
||||
## and set it up
|
||||
oob_tcp_if_include=ib0
|
||||
|
@ -71,6 +71,16 @@ orte_tmpdir_base = /tmp
|
||||
## from inadvertent job executions
|
||||
orte_allocation_required = 1
|
||||
|
||||
## MPI behavior
|
||||
mpi_leave_pinned = 0
|
||||
|
||||
## Protect looped collectives
|
||||
coll_sync_priority = 100
|
||||
coll_sync_barrier_before = 1000
|
||||
|
||||
## Activate hierarchical collectives
|
||||
coll_hierarch_priority = 90
|
||||
|
||||
## Add the interface for out-of-band communication
|
||||
## and set it up
|
||||
oob_tcp_if_include=ib0
|
||||
|
@ -71,6 +71,16 @@ orte_tmpdir_base = /tmp
|
||||
## from inadvertent job executions
|
||||
orte_allocation_required = 1
|
||||
|
||||
## MPI behavior
|
||||
mpi_leave_pinned = 0
|
||||
|
||||
## Protect looped collectives
|
||||
coll_sync_priority = 100
|
||||
coll_sync_barrier_before = 1000
|
||||
|
||||
## Activate hierarchical collectives
|
||||
coll_hierarch_priority = 90
|
||||
|
||||
## Add the interface for out-of-band communication
|
||||
## and set it up
|
||||
oob_tcp_if_include=ib0
|
||||
|
@ -71,6 +71,16 @@ orte_tmpdir_base = /tmp
|
||||
## from inadvertent job executions
|
||||
orte_allocation_required = 1
|
||||
|
||||
## MPI behavior
|
||||
mpi_leave_pinned = 0
|
||||
|
||||
## Protect looped collectives
|
||||
coll_sync_priority = 100
|
||||
coll_sync_barrier_before = 1000
|
||||
|
||||
## Activate hierarchical collectives
|
||||
coll_hierarch_priority = 90
|
||||
|
||||
## Add the interface for out-of-band communication
|
||||
## and set it up
|
||||
oob_tcp_if_include=ib0
|
||||
|
@ -47,15 +47,16 @@ OMPI_DECLSPEC OBJ_CLASS_DECLARATION( mca_pml_base_send_request_t );
|
||||
/**
|
||||
* Initialize a send request with call parameters.
|
||||
*
|
||||
* @param request (IN) Send request
|
||||
* @param addr (IN) User buffer
|
||||
* @param count (IN) Number of elements of indicated datatype.
|
||||
* @param datatype (IN) User defined datatype
|
||||
* @param peer (IN) Destination rank
|
||||
* @param tag (IN) User defined tag
|
||||
* @param comm (IN) Communicator
|
||||
* @param mode (IN) Send mode (STANDARD,BUFFERED,SYNCHRONOUS,READY)
|
||||
* @param persistent (IN) Is request persistent.
|
||||
* @param request (IN) Send request
|
||||
* @param addr (IN) User buffer
|
||||
* @param count (IN) Number of elements of indicated datatype.
|
||||
* @param datatype (IN) User defined datatype
|
||||
* @param peer (IN) Destination rank
|
||||
* @param tag (IN) User defined tag
|
||||
* @param comm (IN) Communicator
|
||||
* @param mode (IN) Send mode (STANDARD,BUFFERED,SYNCHRONOUS,READY)
|
||||
* @param persistent (IN) Is request persistent.
|
||||
* @param convertor_flags (IN) Flags to pass to convertor
|
||||
*
|
||||
* Perform a any one-time initialization. Note that per-use initialization
|
||||
* is done in the send request start routine.
|
||||
@ -69,7 +70,8 @@ OMPI_DECLSPEC OBJ_CLASS_DECLARATION( mca_pml_base_send_request_t );
|
||||
tag, \
|
||||
comm, \
|
||||
mode, \
|
||||
persistent) \
|
||||
persistent, \
|
||||
convertor_flags) \
|
||||
{ \
|
||||
/* increment reference counts */ \
|
||||
OBJ_RETAIN(comm); \
|
||||
@ -100,7 +102,7 @@ OMPI_DECLSPEC OBJ_CLASS_DECLARATION( mca_pml_base_send_request_t );
|
||||
(request)->req_base.req_datatype, \
|
||||
(request)->req_base.req_count, \
|
||||
(request)->req_base.req_addr, \
|
||||
0, \
|
||||
convertor_flags, \
|
||||
&(request)->req_base.req_convertor ); \
|
||||
ompi_convertor_get_packed_size( &(request)->req_base.req_convertor, \
|
||||
&((request)->req_bytes_packed) );\
|
||||
|
@ -17,8 +17,6 @@
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
dist_pkgdata_DATA = help-pml-csum.txt
|
||||
|
||||
EXTRA_DIST = post_configure.sh pml_csum_endpoint.c pml_csum_endpoint.h
|
||||
|
||||
csum_sources = \
|
||||
|
@ -1,31 +0,0 @@
|
||||
# -*- text -*-
|
||||
#
|
||||
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
# University Research and Technology
|
||||
# Corporation. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
# of Tennessee Research Foundation. All rights
|
||||
# reserved.
|
||||
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
# University of Stuttgart. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
# All rights reserved.
|
||||
# Copyright (c) 2009 IBM Corporation. All rights reserved.
|
||||
# Copyright (c) 2009 Los Alamos National Security, LLC. All rights
|
||||
# reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
# This is the US/English general help file for Open MPI.
|
||||
#
|
||||
[pml:checksum-not-enabled]
|
||||
Warning: This build of Open MPI was specifically configured
|
||||
with support for the alternate checksum algorithm, but the
|
||||
support was not enabled by the proper MCA parameter. You should
|
||||
set pml_csum_enable_csum to enable checksum operation.
|
||||
|
||||
While your application will be allowed to proceed, please be
|
||||
advised that you will not be protected from data errors.
|
@ -432,8 +432,7 @@ int mca_pml_csum_send_fin( ompi_proc_t* proc,
|
||||
mca_btl_base_descriptor_t* fin;
|
||||
mca_pml_csum_fin_hdr_t* hdr;
|
||||
int rc;
|
||||
bool do_csum = mca_pml_csum.enable_csum &&
|
||||
(bml_btl->btl_flags & MCA_BTL_FLAGS_NEED_CSUM);
|
||||
bool do_csum = bml_btl->btl_flags & MCA_BTL_FLAGS_NEED_CSUM;
|
||||
mca_bml_base_alloc(bml_btl, &fin, order, sizeof(mca_pml_csum_fin_hdr_t),
|
||||
MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP);
|
||||
|
||||
|
@ -81,9 +81,6 @@ struct mca_pml_csum_t {
|
||||
char* allocator_name;
|
||||
mca_allocator_base_module_t* allocator;
|
||||
uint32_t unexpected_limit;
|
||||
|
||||
/*Enable or Disable checksum*/
|
||||
bool enable_csum;
|
||||
};
|
||||
typedef struct mca_pml_csum_t mca_pml_csum_t;
|
||||
|
||||
|
@ -142,14 +142,6 @@ static int mca_pml_csum_component_open(void)
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
/* default is not to checksum all data */
|
||||
mca_pml_csum.enable_csum =
|
||||
mca_pml_csum_param_register_int("enable_csum", 0);
|
||||
/*
|
||||
ompi_convertor_checksum_enable(mca_pml_csum.enable_csum);
|
||||
*/
|
||||
|
||||
mca_pml_csum.enabled = false;
|
||||
return mca_bml_base_open();
|
||||
}
|
||||
|
||||
@ -174,35 +166,27 @@ mca_pml_csum_component_init( int* priority,
|
||||
bool enable_progress_threads,
|
||||
bool enable_mpi_threads )
|
||||
{
|
||||
/* if the alternative csum was defined and enable_csum set, then we must
|
||||
* be selected
|
||||
*/
|
||||
#if defined (OMPI_CSUM_DST)
|
||||
if (mca_pml_csum.enable_csum) {
|
||||
*priority = 100;
|
||||
opal_output_verbose( 10, 0, "in csum, my priority is 0\n");
|
||||
|
||||
if(OMPI_SUCCESS != mca_bml_base_init( enable_progress_threads,
|
||||
enable_mpi_threads)) {
|
||||
*priority = 0;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Set this here (vs in component_open()) because
|
||||
ompi_mpi_leave_pinned* may have been set after MCA params were
|
||||
read (e.g., by the openib btl) */
|
||||
mca_pml_csum.leave_pinned = (1 == ompi_mpi_leave_pinned);
|
||||
mca_pml_csum.leave_pinned_pipeline = (int) ompi_mpi_leave_pinned_pipeline;
|
||||
|
||||
return &mca_pml_csum.super;
|
||||
} else {
|
||||
/* select us only if we are specified */
|
||||
if((*priority) > 0) {
|
||||
*priority = 0;
|
||||
orte_show_help("help-pml-csum.txt", "pml:checksum-not-enabled", true);
|
||||
return NULL;
|
||||
}
|
||||
#else
|
||||
*priority = 0;
|
||||
return NULL;
|
||||
#endif
|
||||
|
||||
if(OMPI_SUCCESS != mca_bml_base_init( enable_progress_threads,
|
||||
enable_mpi_threads)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Set this here (vs in component_open()) because
|
||||
ompi_mpi_leave_pinned* may have been set after MCA params were
|
||||
read (e.g., by the openib btl) */
|
||||
mca_pml_csum.leave_pinned = (1 == ompi_mpi_leave_pinned);
|
||||
mca_pml_csum.leave_pinned_pipeline = (int) ompi_mpi_leave_pinned_pipeline;
|
||||
|
||||
return &mca_pml_csum.super;
|
||||
}
|
||||
|
||||
int mca_pml_csum_component_fini(void)
|
||||
|
@ -73,20 +73,37 @@ typedef struct mca_pml_csum_common_hdr_t mca_pml_csum_common_hdr_t;
|
||||
struct mca_pml_csum_match_hdr_t {
|
||||
mca_pml_csum_common_hdr_t hdr_common; /**< common attributes */
|
||||
uint16_t hdr_ctx; /**< communicator index */
|
||||
uint16_t hdr_seq; /**< message sequence number */
|
||||
int32_t hdr_src; /**< source rank */
|
||||
int32_t hdr_tag; /**< user tag */
|
||||
uint16_t hdr_seq; /**< message sequence number */
|
||||
#if OMPI_ENABLE_HETEROGENEOUS_SUPPORT
|
||||
uint8_t hdr_padding[12]; /**< explicitly pad to 16-byte boundary. Compilers seem to already prefer to do this, but make it explicit just in case */
|
||||
#endif
|
||||
uint32_t hdr_csum; /**< checksum over data */
|
||||
};
|
||||
#define OMPI_PML_CSUM_MATCH_HDR_LEN 20
|
||||
#if OMPI_ENABLE_HETEROGENEOUS_SUPPORT
|
||||
#define OMPI_PML_CSUM_MATCH_HDR_LEN 32
|
||||
#else
|
||||
#define OMPI_PML_CSUM_MATCH_HDR_LEN 20
|
||||
#endif
|
||||
|
||||
typedef struct mca_pml_csum_match_hdr_t mca_pml_csum_match_hdr_t;
|
||||
|
||||
#if OMPI_ENABLE_HETEROGENEOUS_SUPPORT && OMPI_ENABLE_DEBUG
|
||||
#define MCA_PML_CSUM_MATCH_HDR_FILL(h) \
|
||||
do { \
|
||||
(h).hdr_padding[0] = 0; \
|
||||
(h).hdr_padding[1] = 0; \
|
||||
do { \
|
||||
(h).hdr_padding[0] = 0; \
|
||||
(h).hdr_padding[1] = 0; \
|
||||
(h).hdr_padding[2] = 0; \
|
||||
(h).hdr_padding[3] = 0; \
|
||||
(h).hdr_padding[4] = 0; \
|
||||
(h).hdr_padding[5] = 0; \
|
||||
(h).hdr_padding[6] = 0; \
|
||||
(h).hdr_padding[7] = 0; \
|
||||
(h).hdr_padding[8] = 0; \
|
||||
(h).hdr_padding[9] = 0; \
|
||||
(h).hdr_padding[10] = 0; \
|
||||
(h).hdr_padding[11] = 0; \
|
||||
} while(0)
|
||||
#else
|
||||
#define MCA_PML_CSUM_MATCH_HDR_FILL(h)
|
||||
@ -193,10 +210,13 @@ do { \
|
||||
*/
|
||||
struct mca_pml_csum_frag_hdr_t {
|
||||
mca_pml_csum_common_hdr_t hdr_common; /**< common attributes */
|
||||
uint32_t hdr_csum;
|
||||
#if OMPI_ENABLE_HETEROGENEOUS_SUPPORT
|
||||
uint8_t hdr_padding[2]; /**< align to 16-byte boundary */
|
||||
#endif
|
||||
uint64_t hdr_frag_offset; /**< offset into message */
|
||||
ompi_ptr_t hdr_src_req; /**< pointer to source request */
|
||||
ompi_ptr_t hdr_dst_req; /**< pointer to matched receive */
|
||||
uint32_t hdr_csum;
|
||||
};
|
||||
typedef struct mca_pml_csum_frag_hdr_t mca_pml_csum_frag_hdr_t;
|
||||
|
||||
@ -205,10 +225,6 @@ typedef struct mca_pml_csum_frag_hdr_t mca_pml_csum_frag_hdr_t;
|
||||
do { \
|
||||
(h).hdr_padding[0] = 0; \
|
||||
(h).hdr_padding[1] = 0; \
|
||||
(h).hdr_padding[2] = 0; \
|
||||
(h).hdr_padding[3] = 0; \
|
||||
(h).hdr_padding[4] = 0; \
|
||||
(h).hdr_padding[5] = 0; \
|
||||
} while(0)
|
||||
#else
|
||||
#define MCA_PML_CSUM_FRAG_HDR_FILL(h)
|
||||
@ -281,6 +297,9 @@ do { \
|
||||
|
||||
struct mca_pml_csum_rdma_hdr_t {
|
||||
mca_pml_csum_common_hdr_t hdr_common; /**< common attributes */
|
||||
#if OMPI_ENABLE_HETEROGENEOUS_SUPPORT
|
||||
uint8_t hdr_padding[2]; /** two to pad out the hdr to a 4 byte alignment. hdr_req will then be 8 byte aligned after 4 for hdr_seg_cnt */
|
||||
#endif
|
||||
uint32_t hdr_seg_cnt; /**< number of segments for rdma */
|
||||
ompi_ptr_t hdr_req; /**< destination request */
|
||||
ompi_ptr_t hdr_des; /**< source descriptor */
|
||||
|
@ -119,7 +119,7 @@ void mca_pml_csum_recv_frag_callback_match(mca_btl_base_module_t* btl,
|
||||
mca_pml_csum_recv_frag_t* frag = NULL;
|
||||
size_t num_segments = des->des_dst_cnt;
|
||||
size_t bytes_received = 0;
|
||||
uint16_t csum_received, csum;
|
||||
uint16_t csum_received, csum=0;
|
||||
uint32_t csum_data;
|
||||
bool do_csum = btl->btl_flags & MCA_BTL_FLAGS_NEED_CSUM;
|
||||
|
||||
@ -254,8 +254,8 @@ void mca_pml_csum_recv_frag_callback_match(mca_btl_base_module_t* btl,
|
||||
match->req_recv.req_base.req_datatype);
|
||||
);
|
||||
}
|
||||
if (do_csum) {
|
||||
csum_data = (bytes_received > 0) ? match->req_recv.req_base.req_convertor.checksum : 0;
|
||||
if (do_csum && bytes_received > 0) {
|
||||
csum_data = match->req_recv.req_base.req_convertor.checksum;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, mca_pml_base_output,
|
||||
"%s Received \'match\' with data csum:0x%x, header csum:0x%04x, size:%lu\n",
|
||||
@ -391,8 +391,7 @@ void mca_pml_csum_recv_frag_callback_frag(mca_btl_base_module_t* btl,
|
||||
mca_pml_csum_hdr_t* hdr = (mca_pml_csum_hdr_t*)segments->seg_addr.pval;
|
||||
mca_pml_csum_recv_request_t* recvreq;
|
||||
uint16_t csum_received, csum;
|
||||
bool do_csum = mca_pml_csum.enable_csum &&
|
||||
(btl->btl_flags & MCA_BTL_FLAGS_NEED_CSUM);
|
||||
bool do_csum = btl->btl_flags & MCA_BTL_FLAGS_NEED_CSUM;
|
||||
|
||||
if( OPAL_UNLIKELY(segments->seg_len < sizeof(mca_pml_csum_common_hdr_t)) ) {
|
||||
return;
|
||||
@ -425,8 +424,7 @@ void mca_pml_csum_recv_frag_callback_put(mca_btl_base_module_t* btl,
|
||||
mca_pml_csum_hdr_t* hdr = (mca_pml_csum_hdr_t*)segments->seg_addr.pval;
|
||||
mca_pml_csum_send_request_t* sendreq;
|
||||
uint16_t csum_received, csum;
|
||||
bool do_csum = mca_pml_csum.enable_csum &&
|
||||
(btl->btl_flags & MCA_BTL_FLAGS_NEED_CSUM);
|
||||
bool do_csum = btl->btl_flags & MCA_BTL_FLAGS_NEED_CSUM;
|
||||
|
||||
if( OPAL_UNLIKELY(segments->seg_len < sizeof(mca_pml_csum_common_hdr_t)) ) {
|
||||
return;
|
||||
@ -462,8 +460,7 @@ void mca_pml_csum_recv_frag_callback_fin(mca_btl_base_module_t* btl,
|
||||
mca_pml_csum_hdr_t* hdr = (mca_pml_csum_hdr_t*)segments->seg_addr.pval;
|
||||
mca_btl_base_descriptor_t* rdma;
|
||||
uint16_t csum_received, csum;
|
||||
bool do_csum = mca_pml_csum.enable_csum &&
|
||||
(btl->btl_flags & MCA_BTL_FLAGS_NEED_CSUM);
|
||||
bool do_csum = btl->btl_flags & MCA_BTL_FLAGS_NEED_CSUM;
|
||||
|
||||
if( OPAL_UNLIKELY(segments->seg_len < sizeof(mca_pml_csum_common_hdr_t)) ) {
|
||||
return;
|
||||
|
@ -471,9 +471,8 @@ void mca_pml_csum_recv_request_progress_frag( mca_pml_csum_recv_request_t* recvr
|
||||
recvreq->req_recv.req_base.req_datatype);
|
||||
);
|
||||
|
||||
if (do_csum) {
|
||||
csum = (bytes_received > 0)?
|
||||
recvreq->req_recv.req_base.req_convertor.checksum : 0;
|
||||
if (do_csum && bytes_received > 0) {
|
||||
csum = recvreq->req_recv.req_base.req_convertor.checksum;
|
||||
OPAL_OUTPUT_VERBOSE((0, mca_pml_base_output,
|
||||
"%s Received \'frag\' with data csum:0x%x, header csum:0x%04x, size:%lu\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), csum, hdr->hdr_common.hdr_csum, (unsigned long)bytes_received));
|
||||
@ -615,9 +614,8 @@ void mca_pml_csum_recv_request_progress_rndv( mca_pml_csum_recv_request_t* recvr
|
||||
recvreq->req_recv.req_base.req_datatype);
|
||||
);
|
||||
}
|
||||
if (do_csum) {
|
||||
csum = (bytes_received > 0)?
|
||||
recvreq->req_recv.req_base.req_convertor.checksum : 0;
|
||||
if (do_csum && bytes_received > 0) {
|
||||
csum = recvreq->req_recv.req_base.req_convertor.checksum;
|
||||
OPAL_OUTPUT_VERBOSE((1, mca_pml_base_output,
|
||||
"%s Received \'rndv\' with data csum:0x%x, header csum:0x%04x, size:%lu\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), csum, hdr->hdr_common.hdr_csum, (unsigned long)bytes_received));
|
||||
@ -674,8 +672,8 @@ void mca_pml_csum_recv_request_progress_match( mca_pml_csum_recv_request_t* recv
|
||||
data_offset,
|
||||
bytes_received,
|
||||
bytes_delivered);
|
||||
if (do_csum) {
|
||||
csum = (bytes_received > 0) ? recvreq->req_recv.req_base.req_convertor.checksum : 0;
|
||||
if (do_csum && bytes_received > 0) {
|
||||
csum = recvreq->req_recv.req_base.req_convertor.checksum;
|
||||
OPAL_OUTPUT_VERBOSE((1, mca_pml_base_output,
|
||||
"%s Received \'match\' with data csum:0x%x, header csum:0x%04x, size:%lu\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), csum, hdr->hdr_common.hdr_csum, (unsigned long)bytes_received));
|
||||
|
@ -199,8 +199,7 @@ static inline void prepare_recv_req_converter(mca_pml_csum_recv_request_t *req)
|
||||
{
|
||||
mca_bml_base_endpoint_t* endpoint =
|
||||
req->req_recv.req_base.req_proc->proc_bml;
|
||||
bool do_csum = mca_pml_csum.enable_csum &&
|
||||
(endpoint->btl_flags_or & MCA_BTL_FLAGS_NEED_CSUM);
|
||||
bool do_csum = endpoint->btl_flags_or & MCA_BTL_FLAGS_NEED_CSUM;
|
||||
|
||||
if( req->req_recv.req_base.req_datatype->size | req->req_recv.req_base.req_count ) {
|
||||
ompi_convertor_copy_and_prepare_for_recv(
|
||||
|
@ -692,7 +692,7 @@ int mca_pml_csum_send_request_start_rdma( mca_pml_csum_send_request_t* sendreq,
|
||||
|
||||
bml_btl = sendreq->req_rdma[0].bml_btl;
|
||||
|
||||
do_csum = mca_pml_csum.enable_csum && (bml_btl->btl_flags & MCA_BTL_FLAGS_NEED_CSUM);
|
||||
do_csum = bml_btl->btl_flags & MCA_BTL_FLAGS_NEED_CSUM;
|
||||
|
||||
if((sendreq->req_rdma_cnt == 1) && (bml_btl->btl_flags & MCA_BTL_FLAGS_GET)) {
|
||||
mca_mpool_base_registration_t* reg = sendreq->req_rdma[0].btl_reg;
|
||||
@ -855,8 +855,7 @@ int mca_pml_csum_send_request_start_rndv( mca_pml_csum_send_request_t* sendreq,
|
||||
mca_btl_base_segment_t* segment;
|
||||
mca_pml_csum_hdr_t* hdr;
|
||||
int rc;
|
||||
bool do_csum = mca_pml_csum.enable_csum &&
|
||||
(bml_btl->btl_flags & MCA_BTL_FLAGS_NEED_CSUM);
|
||||
bool do_csum = bml_btl->btl_flags & MCA_BTL_FLAGS_NEED_CSUM;
|
||||
|
||||
/* prepare descriptor */
|
||||
if(size == 0) {
|
||||
@ -1122,8 +1121,7 @@ cannot_pack:
|
||||
des->des_cbfunc = mca_pml_csum_frag_completion;
|
||||
des->des_cbdata = sendreq;
|
||||
|
||||
do_csum = mca_pml_csum.enable_csum &&
|
||||
(bml_btl->btl_flags & MCA_BTL_FLAGS_NEED_CSUM);
|
||||
do_csum = bml_btl->btl_flags & MCA_BTL_FLAGS_NEED_CSUM;
|
||||
|
||||
/* setup header */
|
||||
hdr = (mca_pml_csum_frag_hdr_t*)des->des_src->seg_addr.pval;
|
||||
|
@ -132,77 +132,32 @@ get_request_from_send_pending(mca_pml_csum_send_pending_t *type)
|
||||
}
|
||||
|
||||
|
||||
#define MCA_PML_CSUM_SEND_REQUEST_INIT( sendreq, \
|
||||
buf, \
|
||||
count, \
|
||||
datatype, \
|
||||
dst, \
|
||||
tag, \
|
||||
comm, \
|
||||
sendmode, \
|
||||
persistent) \
|
||||
{ \
|
||||
MCA_PML_CSUM_BASE_SEND_REQUEST_INIT(&sendreq->req_send, \
|
||||
buf, \
|
||||
count, \
|
||||
datatype, \
|
||||
dst, \
|
||||
tag, \
|
||||
comm, \
|
||||
sendmode, \
|
||||
persistent); \
|
||||
(sendreq)->req_recv.pval = NULL; \
|
||||
#define MCA_PML_CSUM_SEND_REQUEST_INIT(sendreq, \
|
||||
buf, \
|
||||
count, \
|
||||
datatype, \
|
||||
dst, \
|
||||
tag, \
|
||||
comm, \
|
||||
sendmode, \
|
||||
persistent) \
|
||||
{ \
|
||||
mca_bml_base_endpoint_t* endpoint = \
|
||||
sendreq->req_send.req_base.req_proc->proc_bml; \
|
||||
bool do_csum = endpoint->btl_flags_or & MCA_BTL_FLAGS_NEED_CSUM; \
|
||||
MCA_PML_BASE_SEND_REQUEST_INIT(&sendreq->req_send, \
|
||||
buf, \
|
||||
count, \
|
||||
datatype, \
|
||||
dst, \
|
||||
tag, \
|
||||
comm, \
|
||||
sendmode, \
|
||||
persistent, \
|
||||
do_csum ? CONVERTOR_WITH_CHECKSUM: 0); \
|
||||
(sendreq)->req_recv.pval = NULL; \
|
||||
}
|
||||
|
||||
#define MCA_PML_CSUM_BASE_SEND_REQUEST_INIT( request, \
|
||||
addr, \
|
||||
count, \
|
||||
datatype, \
|
||||
peer, \
|
||||
tag, \
|
||||
comm, \
|
||||
mode, \
|
||||
persistent) \
|
||||
{ \
|
||||
mca_bml_base_endpoint_t* endpoint = \
|
||||
sendreq->req_send.req_base.req_proc->proc_bml; \
|
||||
bool do_csum = mca_pml_csum.enable_csum && \
|
||||
(endpoint->btl_flags_or & MCA_BTL_FLAGS_NEED_CSUM); \
|
||||
/* increment reference counts */ \
|
||||
OBJ_RETAIN(comm); \
|
||||
\
|
||||
OMPI_REQUEST_INIT(&(request)->req_base.req_ompi, persistent); \
|
||||
(request)->req_base.req_ompi.req_mpi_object.comm = comm; \
|
||||
(request)->req_addr = addr; \
|
||||
(request)->req_send_mode = mode; \
|
||||
(request)->req_base.req_addr = addr; \
|
||||
(request)->req_base.req_count = count; \
|
||||
(request)->req_base.req_datatype = datatype; \
|
||||
(request)->req_base.req_peer = (int32_t)peer; \
|
||||
(request)->req_base.req_tag = (int32_t)tag; \
|
||||
(request)->req_base.req_comm = comm; \
|
||||
/* (request)->req_base.req_proc is set on request allocation */ \
|
||||
(request)->req_base.req_pml_complete = OPAL_INT_TO_BOOL(persistent); \
|
||||
(request)->req_base.req_free_called = false; \
|
||||
(request)->req_base.req_ompi.req_status._cancelled = 0; \
|
||||
(request)->req_bytes_packed = 0; \
|
||||
\
|
||||
/* initialize datatype convertor for this request */ \
|
||||
if( count > 0 ) { \
|
||||
OBJ_RETAIN(datatype); \
|
||||
/* We will create a convertor specialized for the */ \
|
||||
/* remote architecture and prepared with the datatype. */ \
|
||||
ompi_convertor_copy_and_prepare_for_send( \
|
||||
(request)->req_base.req_proc->proc_convertor, \
|
||||
(request)->req_base.req_datatype, \
|
||||
(request)->req_base.req_count, \
|
||||
(request)->req_base.req_addr, \
|
||||
(do_csum ? CONVERTOR_WITH_CHECKSUM: 0), \
|
||||
&(request)->req_base.req_convertor ); \
|
||||
ompi_convertor_get_packed_size( &(request)->req_base.req_convertor, \
|
||||
&((request)->req_bytes_packed) );\
|
||||
} \
|
||||
}
|
||||
|
||||
static inline void mca_pml_csum_free_rdma_resources(mca_pml_csum_send_request_t* sendreq)
|
||||
{
|
||||
|
@ -147,7 +147,8 @@ get_request_from_send_pending(mca_pml_ob1_send_pending_t *type)
|
||||
tag, \
|
||||
comm, \
|
||||
sendmode, \
|
||||
persistent); \
|
||||
persistent, \
|
||||
0); \
|
||||
(sendreq)->req_recv.pval = NULL; \
|
||||
}
|
||||
|
||||
|
Загрузка…
Ссылка в новой задаче
Block a user