DR will now checksum on a per btl basis (see MCA_BTL_FLAGS_NEED_CSUM). We
still always send ACK's, teasing apart completion for ACK/no ACK looks like a pain in the .. This commit was SVN r10530.
Этот коммит содержится в:
родитель
2cf73912e2
Коммит
e6cd8db0e5
@ -252,6 +252,8 @@ struct mca_bml_base_endpoint_t {
|
||||
mca_bml_base_btl_array_t btl_send; /**< array of btls to use for remaining fragments */
|
||||
mca_bml_base_btl_array_t btl_rdma; /**< array of btls that support (prefer) rdma */
|
||||
mca_bml_base_endpoint_copy_fn_t copy;
|
||||
uint32_t btl_flags_or; /**< the bitwise OR of the btl flags */
|
||||
uint32_t btl_flags_and; /**< the bitwise AND of the btl flags */
|
||||
};
|
||||
typedef struct mca_bml_base_endpoint_t mca_bml_base_endpoint_t;
|
||||
|
||||
|
@ -265,7 +265,9 @@ int mca_bml_r2_add_procs(
|
||||
bml_endpoint->btl_rdma_size = -1;
|
||||
bml_endpoint->super.proc_ompi = proc;
|
||||
proc->proc_pml = (struct mca_pml_proc_t*) bml_endpoint;
|
||||
|
||||
|
||||
bml_endpoint->btl_flags_and = 0;
|
||||
bml_endpoint->btl_flags_or = 0;
|
||||
}
|
||||
|
||||
bml_endpoints[p] =(mca_bml_base_endpoint_t*) proc->proc_pml;
|
||||
@ -324,6 +326,11 @@ int mca_bml_r2_add_procs(
|
||||
*/
|
||||
bml_btl->btl_flags |= MCA_BTL_FLAGS_SEND;
|
||||
}
|
||||
/**
|
||||
* calculate the bitwise OR and AND of the btl flags
|
||||
*/
|
||||
bml_endpoint->btl_flags_or |= bml_btl->btl_flags;
|
||||
bml_endpoint->btl_flags_and &= bml_btl->btl_flags;
|
||||
}
|
||||
}
|
||||
if(btl_inuse > 0 && NULL != btl->btl_component->btl_progress) {
|
||||
@ -372,7 +379,7 @@ int mca_bml_r2_add_procs(
|
||||
latency = btl->btl_latency;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* (1) set the weight of each btl as a percentage of overall bandwidth
|
||||
* (2) copy all btl instances at the highest priority ranking into the
|
||||
* list of btls used for first fragments
|
||||
|
@ -146,7 +146,7 @@ typedef uint8_t mca_btl_base_tag_t;
|
||||
|
||||
/* btl transport is reliable */
|
||||
#define MCA_BTL_FLAGS_NEED_ACK 0x10
|
||||
#define MCA_BTL_FLAGS_NEED_CSUM 0xB
|
||||
#define MCA_BTL_FLAGS_NEED_CSUM 0x20
|
||||
|
||||
/* Default exclusivity levels */
|
||||
#define MCA_BTL_EXCLUSIVITY_HIGH 64*1024 /* internal loopback */
|
||||
|
@ -40,14 +40,14 @@
|
||||
|
||||
|
||||
|
||||
#define MCA_PML_DR_HDR_VALIDATE_ACK(hdr, type) \
|
||||
#define MCA_PML_DR_HDR_VALIDATE_ACK(do_csum, hdr, type) \
|
||||
do { \
|
||||
mca_pml_dr_endpoint_t* ep; \
|
||||
if(mca_pml_dr.enable_csum) { \
|
||||
if(do_csum) { \
|
||||
uint16_t csum = opal_csum(hdr, sizeof(type)); \
|
||||
if(hdr->hdr_common.hdr_csum != csum) { \
|
||||
MCA_PML_DR_DEBUG(0, (0, "%s:%d: invalid header checksum: 0x%04x != 0x%04x\n", \
|
||||
__FILE__, __LINE__, hdr->hdr_common.hdr_csum, csum)); \
|
||||
MCA_PML_DR_DEBUG(0, (0, "%s:%d: invalid header checksum: 0x%04x != 0x%04x\n", \
|
||||
__FILE__, __LINE__, hdr->hdr_common.hdr_csum, csum)); \
|
||||
return; \
|
||||
} \
|
||||
} \
|
||||
@ -109,6 +109,8 @@ void mca_pml_dr_recv_frag_callback(
|
||||
mca_pml_dr_endpoint_t *ep;
|
||||
ompi_communicator_t* ompi_comm;
|
||||
uint16_t csum;
|
||||
bool do_csum = mca_pml_dr.enable_csum &&
|
||||
(btl->btl_flags & MCA_BTL_FLAGS_NEED_CSUM);
|
||||
|
||||
if(segments->seg_len < sizeof(mca_pml_dr_common_hdr_t)) {
|
||||
return;
|
||||
@ -117,7 +119,7 @@ void mca_pml_dr_recv_frag_callback(
|
||||
switch(hdr->hdr_common.hdr_type) {
|
||||
case MCA_PML_DR_HDR_TYPE_MATCH:
|
||||
{
|
||||
if(mca_pml_dr.enable_csum) {
|
||||
if(do_csum) {
|
||||
csum = opal_csum(hdr, sizeof(mca_pml_dr_match_hdr_t));
|
||||
if(hdr->hdr_common.hdr_csum != csum) {
|
||||
MCA_PML_DR_DEBUG(0,(0, "%s:%d: invalid header checksum: 0x%04x != 0x%04x\n",
|
||||
@ -160,18 +162,19 @@ void mca_pml_dr_recv_frag_callback(
|
||||
}
|
||||
case MCA_PML_DR_HDR_TYPE_MATCH_ACK:
|
||||
{
|
||||
MCA_PML_DR_HDR_VALIDATE_ACK(hdr, mca_pml_dr_ack_hdr_t);
|
||||
MCA_PML_DR_HDR_VALIDATE_ACK(do_csum, hdr, mca_pml_dr_ack_hdr_t);
|
||||
mca_pml_dr_send_request_match_ack(btl, &hdr->hdr_ack);
|
||||
break;
|
||||
}
|
||||
case MCA_PML_DR_HDR_TYPE_RNDV:
|
||||
{
|
||||
if(mca_pml_dr.enable_csum) {
|
||||
if(do_csum) {
|
||||
csum = opal_csum(hdr, sizeof(mca_pml_dr_rendezvous_hdr_t));
|
||||
|
||||
if(hdr->hdr_common.hdr_csum != csum) {
|
||||
MCA_PML_DR_DEBUG(0, (0, "%s:%d: invalid header checksum: 0x%04x != 0x%04x\n",
|
||||
__FILE__, __LINE__, hdr->hdr_common.hdr_csum, csum));
|
||||
assert(0);
|
||||
return;
|
||||
}
|
||||
}
|
||||
@ -243,7 +246,7 @@ void mca_pml_dr_recv_frag_callback(
|
||||
}
|
||||
case MCA_PML_DR_HDR_TYPE_RNDV_ACK:
|
||||
{
|
||||
MCA_PML_DR_HDR_VALIDATE_ACK(hdr, mca_pml_dr_ack_hdr_t);
|
||||
MCA_PML_DR_HDR_VALIDATE_ACK(do_csum, hdr, mca_pml_dr_ack_hdr_t);
|
||||
mca_pml_dr_send_request_rndv_ack(btl, &hdr->hdr_ack);
|
||||
break;
|
||||
}
|
||||
@ -251,7 +254,7 @@ void mca_pml_dr_recv_frag_callback(
|
||||
{
|
||||
mca_pml_dr_recv_request_t* recvreq;
|
||||
|
||||
if(mca_pml_dr.enable_csum) {
|
||||
if(do_csum) {
|
||||
csum = opal_csum(hdr, sizeof(mca_pml_dr_frag_hdr_t));
|
||||
if(hdr->hdr_common.hdr_csum != csum) {
|
||||
MCA_PML_DR_DEBUG(0,(0, "%s:%d: invalid header checksum: 0x%04x != 0x%04x\n",
|
||||
@ -294,7 +297,7 @@ void mca_pml_dr_recv_frag_callback(
|
||||
}
|
||||
case MCA_PML_DR_HDR_TYPE_FRAG_ACK:
|
||||
{
|
||||
MCA_PML_DR_HDR_VALIDATE_ACK(hdr, mca_pml_dr_ack_hdr_t);
|
||||
MCA_PML_DR_HDR_VALIDATE_ACK(do_csum, hdr, mca_pml_dr_ack_hdr_t);
|
||||
mca_pml_dr_send_request_frag_ack(btl, &hdr->hdr_ack);
|
||||
break;
|
||||
}
|
||||
@ -608,6 +611,8 @@ bool mca_pml_dr_recv_frag_match(
|
||||
int rc;
|
||||
uint32_t csum;
|
||||
mca_pml_dr_endpoint_t* ep = (mca_pml_dr_endpoint_t*) proc->endpoint;
|
||||
bool do_csum = mca_pml_dr.enable_csum &&
|
||||
(btl->btl_flags & MCA_BTL_FLAGS_NEED_CSUM);
|
||||
|
||||
/* source sequence number */
|
||||
frag_msg_seq = hdr->hdr_seq;
|
||||
@ -684,7 +689,7 @@ rematch:
|
||||
return rc;
|
||||
}
|
||||
MCA_PML_DR_RECV_FRAG_INIT(frag,ompi_proc,hdr,segments,num_segments,btl,csum);
|
||||
if(mca_pml_dr.enable_csum && csum != hdr->hdr_csum) {
|
||||
if(do_csum && csum != hdr->hdr_csum) {
|
||||
mca_pml_dr_recv_frag_ack((mca_bml_base_endpoint_t*)ompi_proc->proc_pml,
|
||||
&hdr->hdr_common, hdr->hdr_src_ptr.pval, 0, 0);
|
||||
MCA_PML_DR_DEBUG(0,(0, "%s:%d: received corrupted data 0x%08x != 0x%08x (segments %d length %d)\n",
|
||||
@ -720,7 +725,7 @@ rematch:
|
||||
return rc;
|
||||
}
|
||||
MCA_PML_DR_RECV_FRAG_INIT(frag,ompi_proc,hdr,segments,num_segments,btl,csum);
|
||||
if(mca_pml_dr.enable_csum && csum != hdr->hdr_csum) {
|
||||
if(do_csum && csum != hdr->hdr_csum) {
|
||||
mca_pml_dr_recv_frag_ack((mca_bml_base_endpoint_t*)ompi_proc->proc_pml,
|
||||
&hdr->hdr_common, hdr->hdr_src_ptr.pval, 0, 0);
|
||||
MCA_PML_DR_DEBUG(0,(0, "%s:%d: received corrupted data 0x%08x != 0x%08x\n",
|
||||
@ -775,8 +780,11 @@ void mca_pml_dr_recv_frag_ack(
|
||||
mca_pml_dr_recv_frag_t* frag;
|
||||
mca_pml_dr_ack_hdr_t* ack;
|
||||
int rc;
|
||||
bool do_csum;
|
||||
bml_btl = mca_bml_base_btl_array_get_next(&endpoint->btl_eager);
|
||||
|
||||
do_csum = mca_pml_dr.enable_csum &&
|
||||
(bml_btl->btl_flags & MCA_BTL_FLAGS_NEED_CSUM);
|
||||
|
||||
/* allocate descriptor */
|
||||
MCA_PML_DR_DES_ALLOC(bml_btl, des, sizeof(mca_pml_dr_ack_hdr_t));
|
||||
if(NULL == des) {
|
||||
@ -796,7 +804,7 @@ void mca_pml_dr_recv_frag_ack(
|
||||
ack->hdr_src_ptr.pval = src_ptr;
|
||||
assert(ack->hdr_src_ptr.pval);
|
||||
ack->hdr_dst_ptr.pval = NULL;
|
||||
ack->hdr_common.hdr_csum = (mca_pml_dr.enable_csum ?
|
||||
ack->hdr_common.hdr_csum = (do_csum ?
|
||||
opal_csum(ack, sizeof(mca_pml_dr_ack_hdr_t)) :
|
||||
OPAL_CSUM_ZERO);
|
||||
|
||||
|
@ -66,7 +66,8 @@ do { \
|
||||
uint32_t ui1 = 0; \
|
||||
uint32_t ui2 = 0; \
|
||||
mca_pml_dr_buffer_t** buffers = frag->buffers; \
|
||||
\
|
||||
bool do_csum = mca_pml_dr.enable_csum && \
|
||||
(btl->btl_flags & MCA_BTL_FLAGS_NEED_CSUM); \
|
||||
/* init recv_frag */ \
|
||||
frag->btl = btl; \
|
||||
frag->hdr = *(mca_pml_dr_hdr_t*)hdr; \
|
||||
@ -83,7 +84,7 @@ do { \
|
||||
buffers[i] = buff; \
|
||||
frag->segments[i].seg_addr.pval = buff->addr; \
|
||||
frag->segments[i].seg_len = segs[i].seg_len; \
|
||||
if( mca_pml_dr.enable_csum ) { \
|
||||
if( do_csum ) { \
|
||||
size_t hdr_len = 0; \
|
||||
if( 0 == i ) { \
|
||||
hdr_len = mca_pml_dr_hdr_size(hdr->hdr_common.hdr_type);\
|
||||
|
@ -31,8 +31,13 @@
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
|
||||
|
||||
#define MCA_PML_DR_RECV_REQUEST_ACK(recvreq,hdr,csum,bytes_received) \
|
||||
if(mca_pml_dr.enable_csum && csum != hdr->hdr_match.hdr_csum) { \
|
||||
/*
|
||||
* this macro is needed for MATCH/RNDV headers,
|
||||
* as we need to put the match back on the list if the checksum
|
||||
* fails for later matching
|
||||
*/
|
||||
#define MCA_PML_DR_RECV_REQUEST_MATCH_ACK(do_csum,recvreq,hdr,csum,bytes_received) \
|
||||
if(do_csum && csum != hdr->hdr_match.hdr_csum) { \
|
||||
/* failed the csum, put the request back on the list for \
|
||||
* matching later on retransmission \
|
||||
*/ \
|
||||
@ -167,9 +172,12 @@ void mca_pml_dr_recv_request_ack(
|
||||
mca_bml_base_btl_t* bml_btl;
|
||||
mca_pml_dr_ack_hdr_t* ack;
|
||||
int rc;
|
||||
bool do_csum;
|
||||
|
||||
/* allocate descriptor */
|
||||
bml_btl = mca_bml_base_btl_array_get_next(&recvreq->req_endpoint->base.btl_eager);
|
||||
do_csum = mca_pml_dr.enable_csum &&
|
||||
(bml_btl->btl_flags & MCA_BTL_FLAGS_NEED_CSUM);
|
||||
MCA_PML_DR_DES_ALLOC(bml_btl, des, sizeof(mca_pml_dr_ack_hdr_t));
|
||||
if(NULL == des) {
|
||||
return;
|
||||
@ -187,7 +195,7 @@ void mca_pml_dr_recv_request_ack(
|
||||
ack->hdr_vmask = mask;
|
||||
ack->hdr_src_ptr = src_ptr;
|
||||
ack->hdr_dst_ptr.pval = recvreq;
|
||||
ack->hdr_common.hdr_csum = (mca_pml_dr.enable_csum?
|
||||
ack->hdr_common.hdr_csum = (do_csum?
|
||||
opal_csum(ack, sizeof(mca_pml_dr_ack_hdr_t)) :
|
||||
OPAL_CSUM_ZERO);
|
||||
|
||||
@ -223,7 +231,10 @@ void mca_pml_dr_recv_request_progress(
|
||||
uint32_t csum = OPAL_CSUM_ZERO;
|
||||
uint64_t bit;
|
||||
mca_pml_dr_vfrag_t* vfrag;
|
||||
bool do_csum = mca_pml_dr.enable_csum &&
|
||||
(btl->btl_flags & MCA_BTL_FLAGS_NEED_CSUM);
|
||||
|
||||
|
||||
for(i=0; i<num_segments; i++)
|
||||
bytes_received += segments[i].seg_len;
|
||||
|
||||
@ -233,6 +244,7 @@ void mca_pml_dr_recv_request_progress(
|
||||
bytes_received -= sizeof(mca_pml_dr_match_hdr_t);
|
||||
recvreq->req_vfrag0.vf_send = hdr->hdr_match.hdr_src_ptr;
|
||||
MCA_PML_DR_RECV_REQUEST_BYTES_PACKED(recvreq, bytes_received);
|
||||
|
||||
MCA_PML_DR_RECV_REQUEST_UNPACK(
|
||||
recvreq,
|
||||
segments,
|
||||
@ -242,7 +254,7 @@ void mca_pml_dr_recv_request_progress(
|
||||
bytes_received,
|
||||
bytes_delivered,
|
||||
csum);
|
||||
MCA_PML_DR_RECV_REQUEST_ACK(recvreq,hdr,csum,bytes_received);
|
||||
MCA_PML_DR_RECV_REQUEST_MATCH_ACK(do_csum, recvreq,hdr,csum,bytes_received);
|
||||
|
||||
break;
|
||||
|
||||
@ -260,7 +272,7 @@ void mca_pml_dr_recv_request_progress(
|
||||
bytes_received,
|
||||
bytes_delivered,
|
||||
csum);
|
||||
MCA_PML_DR_RECV_REQUEST_ACK(recvreq,hdr,csum,bytes_received);
|
||||
MCA_PML_DR_RECV_REQUEST_MATCH_ACK(do_csum, recvreq,hdr,csum,bytes_received);
|
||||
|
||||
break;
|
||||
|
||||
@ -295,7 +307,7 @@ void mca_pml_dr_recv_request_progress(
|
||||
* note that it might still fail the checksum though
|
||||
*/
|
||||
vfrag->vf_pending |= bit;
|
||||
if(!mca_pml_dr.enable_csum || csum == hdr->hdr_frag.hdr_frag_csum) {
|
||||
if(!do_csum || csum == hdr->hdr_frag.hdr_frag_csum) {
|
||||
/* this part of the vfrag passed the checksum,
|
||||
mark it so that we ack it after receiving the
|
||||
entire vfrag */
|
||||
|
@ -240,6 +240,8 @@ do {
|
||||
|
||||
#define MCA_PML_DR_RECV_REQUEST_BYTES_PACKED(request, bytes_packed) \
|
||||
do { \
|
||||
bool do_csum = mca_pml_dr.enable_csum && \
|
||||
(request->req_endpoint->base.btl_flags_or & MCA_BTL_FLAGS_NEED_CSUM); \
|
||||
(request)->req_recv.req_bytes_packed = bytes_packed; \
|
||||
if((request)->req_recv.req_bytes_packed != 0) { \
|
||||
ompi_proc_t *proc = (request)->req_proc->ompi_proc; \
|
||||
@ -247,7 +249,7 @@ do {
|
||||
(request)->req_recv.req_base.req_datatype, \
|
||||
(request)->req_recv.req_base.req_count, \
|
||||
(request)->req_recv.req_base.req_addr, \
|
||||
(mca_pml_dr.enable_csum ? CONVERTOR_WITH_CHECKSUM: 0), \
|
||||
(do_csum ? CONVERTOR_WITH_CHECKSUM: 0), \
|
||||
&(request)->req_recv.req_convertor ); \
|
||||
} \
|
||||
} while (0)
|
||||
@ -273,6 +275,8 @@ do {
|
||||
size_t max_data = bytes_received; \
|
||||
int32_t free_after = 0; \
|
||||
size_t n, offset = seg_offset; \
|
||||
bool do_csum = mca_pml_dr.enable_csum && \
|
||||
(request->req_endpoint->base.btl_flags_or & MCA_BTL_FLAGS_NEED_CSUM); \
|
||||
\
|
||||
for(n=0; n<num_segments; n++) { \
|
||||
mca_btl_base_segment_t* segment = segments+n; \
|
||||
@ -297,7 +301,7 @@ do {
|
||||
&free_after); \
|
||||
bytes_delivered = max_data; \
|
||||
if(bytes_received && !bytes_delivered) assert(0); \
|
||||
csum = (mca_pml_dr.enable_csum ? \
|
||||
csum = (do_csum ? \
|
||||
request->req_recv.req_convertor.checksum : OPAL_CSUM_ZERO); \
|
||||
} else { \
|
||||
bytes_delivered = 0; \
|
||||
|
@ -374,6 +374,8 @@ int mca_pml_dr_send_request_start_buffered(
|
||||
int32_t free_after;
|
||||
int rc;
|
||||
uint32_t csum;
|
||||
bool do_csum = mca_pml_dr.enable_csum &&
|
||||
(bml_btl->btl_flags & MCA_BTL_FLAGS_NEED_CSUM);
|
||||
|
||||
/* allocate descriptor */
|
||||
mca_bml_base_alloc(bml_btl, &descriptor, sizeof(mca_pml_dr_rendezvous_hdr_t) + size);
|
||||
@ -446,7 +448,7 @@ int mca_pml_dr_send_request_start_buffered(
|
||||
hdr->hdr_match.hdr_csum = csum;
|
||||
hdr->hdr_match.hdr_src_ptr.pval = &sendreq->req_vfrag0;
|
||||
hdr->hdr_rndv.hdr_msg_length = sendreq->req_send.req_bytes_packed;
|
||||
hdr->hdr_common.hdr_csum = (mca_pml_dr.enable_csum ?
|
||||
hdr->hdr_common.hdr_csum = (do_csum ?
|
||||
opal_csum(hdr, sizeof(mca_pml_dr_rendezvous_hdr_t)) :
|
||||
OPAL_CSUM_ZERO);
|
||||
|
||||
@ -489,7 +491,9 @@ int mca_pml_dr_send_request_start_copy(
|
||||
size_t max_data;
|
||||
int32_t free_after;
|
||||
int rc;
|
||||
|
||||
bool do_csum = mca_pml_dr.enable_csum &&
|
||||
(bml_btl->btl_flags & MCA_BTL_FLAGS_NEED_CSUM);
|
||||
|
||||
/* allocate descriptor */
|
||||
mca_bml_base_alloc(bml_btl, &descriptor, sizeof(mca_pml_dr_match_hdr_t) + size);
|
||||
if(NULL == descriptor) {
|
||||
@ -525,11 +529,11 @@ int mca_pml_dr_send_request_start_copy(
|
||||
hdr->hdr_common.hdr_src = sendreq->req_endpoint->src;
|
||||
hdr->hdr_match.hdr_tag = sendreq->req_send.req_base.req_tag;
|
||||
hdr->hdr_match.hdr_seq = sendreq->req_send.req_base.req_sequence;
|
||||
hdr->hdr_match.hdr_csum = (size > 0 && mca_pml_dr.enable_csum ?
|
||||
hdr->hdr_match.hdr_csum = (size > 0 && do_csum ?
|
||||
sendreq->req_send.req_convertor.checksum : OPAL_CSUM_ZERO);
|
||||
hdr->hdr_match.hdr_src_ptr.pval = &sendreq->req_vfrag0;
|
||||
hdr->hdr_common.hdr_vid = sendreq->req_vfrag0.vf_id;
|
||||
hdr->hdr_common.hdr_csum = (mca_pml_dr.enable_csum ?
|
||||
hdr->hdr_common.hdr_csum = (do_csum ?
|
||||
opal_csum(hdr, sizeof(mca_pml_dr_match_hdr_t)) :
|
||||
OPAL_CSUM_ZERO);
|
||||
|
||||
@ -572,6 +576,8 @@ int mca_pml_dr_send_request_start_prepare(
|
||||
mca_btl_base_segment_t* segment;
|
||||
mca_pml_dr_hdr_t* hdr;
|
||||
int rc;
|
||||
bool do_csum = mca_pml_dr.enable_csum &&
|
||||
(bml_btl->btl_flags & MCA_BTL_FLAGS_NEED_CSUM);
|
||||
|
||||
/* prepare descriptor */
|
||||
mca_bml_base_prepare_src(
|
||||
@ -597,11 +603,11 @@ int mca_pml_dr_send_request_start_prepare(
|
||||
hdr->hdr_common.hdr_src = sendreq->req_endpoint->src;
|
||||
hdr->hdr_match.hdr_tag = sendreq->req_send.req_base.req_tag;
|
||||
hdr->hdr_match.hdr_seq = sendreq->req_send.req_base.req_sequence;
|
||||
hdr->hdr_match.hdr_csum = (size > 0 && mca_pml_dr.enable_csum ?
|
||||
hdr->hdr_match.hdr_csum = (size > 0 && do_csum ?
|
||||
sendreq->req_send.req_convertor.checksum : OPAL_CSUM_ZERO);
|
||||
hdr->hdr_match.hdr_src_ptr.pval = &sendreq->req_vfrag0;
|
||||
hdr->hdr_common.hdr_vid = sendreq->req_vfrag0.vf_id;
|
||||
hdr->hdr_common.hdr_csum = (mca_pml_dr.enable_csum ?
|
||||
hdr->hdr_common.hdr_csum = (do_csum ?
|
||||
opal_csum(hdr, sizeof(mca_pml_dr_match_hdr_t)) :
|
||||
OPAL_CSUM_ZERO);
|
||||
|
||||
@ -640,7 +646,9 @@ int mca_pml_dr_send_request_start_rndv(
|
||||
mca_btl_base_segment_t* segment;
|
||||
mca_pml_dr_hdr_t* hdr;
|
||||
int rc;
|
||||
|
||||
bool do_csum = mca_pml_dr.enable_csum &&
|
||||
(bml_btl->btl_flags & MCA_BTL_FLAGS_NEED_CSUM);
|
||||
|
||||
|
||||
/* prepare descriptor */
|
||||
if(size == 0) {
|
||||
@ -673,12 +681,13 @@ int mca_pml_dr_send_request_start_rndv(
|
||||
hdr->hdr_common.hdr_src = sendreq->req_endpoint->src;
|
||||
hdr->hdr_common.hdr_ctx = sendreq->req_send.req_base.req_comm->c_contextid;
|
||||
hdr->hdr_common.hdr_vid = sendreq->req_vfrag0.vf_id;
|
||||
hdr->hdr_common.hdr_csum = 0;
|
||||
hdr->hdr_match.hdr_tag = sendreq->req_send.req_base.req_tag;
|
||||
hdr->hdr_match.hdr_seq = sendreq->req_send.req_base.req_sequence;
|
||||
hdr->hdr_match.hdr_src_ptr.pval = &sendreq->req_vfrag0;
|
||||
hdr->hdr_match.hdr_csum = size > 0 ? sendreq->req_send.req_convertor.checksum : OPAL_CSUM_ZERO;
|
||||
hdr->hdr_rndv.hdr_msg_length = sendreq->req_send.req_bytes_packed;
|
||||
hdr->hdr_common.hdr_csum = (mca_pml_dr.enable_csum ?
|
||||
hdr->hdr_common.hdr_csum = (do_csum ?
|
||||
opal_csum(hdr, sizeof(mca_pml_dr_rendezvous_hdr_t)) :
|
||||
OPAL_CSUM_ZERO);
|
||||
|
||||
@ -749,7 +758,9 @@ int mca_pml_dr_send_request_schedule(mca_pml_dr_send_request_t* sendreq)
|
||||
size_t offset_in_msg = vfrag->vf_offset + offset_in_vfrag;
|
||||
size_t size;
|
||||
int rc;
|
||||
|
||||
bool do_csum = mca_pml_dr.enable_csum &&
|
||||
(bml_btl->btl_flags & MCA_BTL_FLAGS_NEED_CSUM);
|
||||
|
||||
if(vfrag->vf_idx == vfrag->vf_len - 1) {
|
||||
size = vfrag->vf_size - offset_in_vfrag;
|
||||
} else {
|
||||
@ -790,7 +801,7 @@ int mca_pml_dr_send_request_schedule(mca_pml_dr_send_request_t* sendreq)
|
||||
hdr->hdr_frag_offset = offset_in_msg;
|
||||
hdr->hdr_src_ptr.pval = vfrag;
|
||||
hdr->hdr_dst_ptr = sendreq->req_vfrag0.vf_recv;
|
||||
hdr->hdr_common.hdr_csum = (mca_pml_dr.enable_csum ?
|
||||
hdr->hdr_common.hdr_csum = (do_csum ?
|
||||
opal_csum(hdr, sizeof(mca_pml_dr_frag_hdr_t)) :
|
||||
OPAL_CSUM_ZERO);
|
||||
|
||||
@ -841,11 +852,12 @@ int mca_pml_dr_send_request_schedule(mca_pml_dr_send_request_t* sendreq)
|
||||
mca_bml_base_btl_t* bml_btl = NULL;
|
||||
mca_pml_dr_vfrag_t* vfrag = sendreq->req_vfrag;
|
||||
size_t size = bytes_remaining;
|
||||
|
||||
|
||||
/* offset tells us how much of the vfrag has been scheduled */
|
||||
size_t bytes_sent = sendreq->req_send_offset - vfrag->vf_offset;
|
||||
int rc;
|
||||
|
||||
bool do_csum;
|
||||
|
||||
/* do we need to allocate a new vfrag
|
||||
(we scheduled all the vfrag already) */
|
||||
if(vfrag->vf_size == bytes_sent) {
|
||||
@ -865,6 +877,9 @@ int mca_pml_dr_send_request_schedule(mca_pml_dr_send_request_t* sendreq)
|
||||
bml_btl = vfrag->bml_btl;
|
||||
}
|
||||
|
||||
do_csum = mca_pml_dr.enable_csum &&
|
||||
(bml_btl->btl_flags & MCA_BTL_FLAGS_NEED_CSUM);
|
||||
|
||||
/* makes sure that we don't exceed vfrag size */
|
||||
if (size > vfrag->vf_max_send_size) {
|
||||
size = vfrag->vf_max_send_size;
|
||||
@ -903,12 +918,12 @@ int mca_pml_dr_send_request_schedule(mca_pml_dr_send_request_t* sendreq)
|
||||
hdr->hdr_common.hdr_ctx = sendreq->req_send.req_base.req_comm->c_contextid;
|
||||
hdr->hdr_vlen = vfrag->vf_len;
|
||||
hdr->hdr_frag_idx = vfrag->vf_idx;
|
||||
hdr->hdr_frag_csum = (mca_pml_dr.enable_csum ?
|
||||
hdr->hdr_frag_csum = (do_csum ?
|
||||
sendreq->req_send.req_convertor.checksum : OPAL_CSUM_ZERO);
|
||||
hdr->hdr_frag_offset = sendreq->req_send_offset;
|
||||
hdr->hdr_src_ptr.pval = vfrag;
|
||||
hdr->hdr_dst_ptr = sendreq->req_vfrag0.vf_recv;
|
||||
hdr->hdr_common.hdr_csum = (mca_pml_dr.enable_csum ?
|
||||
hdr->hdr_common.hdr_csum = (do_csum ?
|
||||
opal_csum(hdr, sizeof(mca_pml_dr_frag_hdr_t)): OPAL_CSUM_ZERO);
|
||||
|
||||
assert(hdr->hdr_frag_offset < sendreq->req_send.req_bytes_packed);
|
||||
|
@ -28,6 +28,7 @@
|
||||
#include "ompi/mca/pml/base/pml_base_sendreq.h"
|
||||
#include "ompi/mca/mpool/base/base.h"
|
||||
#include "ompi/mca/bml/bml.h"
|
||||
#include "ompi/mca/btl/btl.h"
|
||||
|
||||
#include "pml_dr_proc.h"
|
||||
#include "pml_dr_comm.h"
|
||||
@ -99,6 +100,10 @@ OBJ_CLASS_DECLARATION(mca_pml_dr_send_request_t);
|
||||
sendmode, \
|
||||
persistent) \
|
||||
do { \
|
||||
mca_pml_dr_endpoint_t* endpoint = \
|
||||
(mca_pml_dr_endpoint_t*)sendreq->req_send.req_base.req_proc->proc_pml; \
|
||||
bool do_csum = mca_pml_dr.enable_csum && \
|
||||
(endpoint->base.btl_flags_or & MCA_BTL_FLAGS_NEED_CSUM); \
|
||||
/* increment reference counts */ \
|
||||
OBJ_RETAIN(comm); \
|
||||
OBJ_RETAIN(datatype); \
|
||||
@ -127,7 +132,7 @@ do {
|
||||
(sendreq)->req_send.req_base.req_datatype, \
|
||||
(sendreq)->req_send.req_base.req_count, \
|
||||
(sendreq)->req_send.req_base.req_addr, \
|
||||
(mca_pml_dr.enable_csum ? CONVERTOR_WITH_CHECKSUM: 0), \
|
||||
(do_csum ? CONVERTOR_WITH_CHECKSUM: 0), \
|
||||
&(sendreq)->req_send.req_convertor ); \
|
||||
ompi_convertor_get_packed_size(&(sendreq)->req_send.req_convertor, \
|
||||
&((sendreq)->req_send.req_bytes_packed) ); \
|
||||
@ -394,7 +399,8 @@ do { \
|
||||
mca_bml_base_btl_array_get_next(&endpoint->base.btl_eager); \
|
||||
mca_btl_base_descriptor_t *des_old, *des_new; \
|
||||
mca_pml_dr_hdr_t *hdr; \
|
||||
\
|
||||
bool do_csum = mca_pml_dr.enable_csum && \
|
||||
(bml_btl->btl_flags & MCA_BTL_FLAGS_NEED_CSUM); \
|
||||
MCA_PML_DR_DEBUG(0,(0, "%s:%d:%s: (re)transmitting rndv probe\n", \
|
||||
__FILE__, __LINE__, __func__)); \
|
||||
OPAL_THREAD_ADD_SIZE_T(&sendreq->req_pipeline_depth,1); \
|
||||
@ -418,7 +424,7 @@ do { \
|
||||
hdr->hdr_match.hdr_csum = OPAL_CSUM_ZERO; \
|
||||
hdr->hdr_common.hdr_vid = sendreq->req_vfrag0.vf_id; \
|
||||
hdr->hdr_rndv.hdr_msg_length = sendreq->req_send.req_bytes_packed; \
|
||||
hdr->hdr_common.hdr_csum = (mca_pml_dr.enable_csum ? \
|
||||
hdr->hdr_common.hdr_csum = (do_csum ? \
|
||||
opal_csum(hdr, sizeof(mca_pml_dr_rendezvous_hdr_t)): OPAL_CSUM_ZERO); \
|
||||
des_new->des_flags = des_old->des_flags; \
|
||||
des_new->des_cbdata = des_old->des_cbdata; \
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user