1
1

DR will now checksum on a per btl basis (see MCA_BTL_FLAGS_NEED_CSUM). We

still always send ACK's, teasing apart completion for ACK/no ACK looks like a
pain in the .. 

This commit was SVN r10530.
Этот коммит содержится в:
Galen Shipman 2006-06-27 20:23:47 +00:00
родитель 2cf73912e2
Коммит e6cd8db0e5
9 изменённых файлов: 99 добавлений и 44 удалений

Просмотреть файл

@ -252,6 +252,8 @@ struct mca_bml_base_endpoint_t {
mca_bml_base_btl_array_t btl_send; /**< array of btls to use for remaining fragments */
mca_bml_base_btl_array_t btl_rdma; /**< array of btls that support (prefer) rdma */
mca_bml_base_endpoint_copy_fn_t copy;
uint32_t btl_flags_or; /**< the bitwise OR of the btl flags */
uint32_t btl_flags_and; /**< the bitwise AND of the btl flags */
};
typedef struct mca_bml_base_endpoint_t mca_bml_base_endpoint_t;

Просмотреть файл

@ -265,7 +265,9 @@ int mca_bml_r2_add_procs(
bml_endpoint->btl_rdma_size = -1;
bml_endpoint->super.proc_ompi = proc;
proc->proc_pml = (struct mca_pml_proc_t*) bml_endpoint;
bml_endpoint->btl_flags_and = 0;
bml_endpoint->btl_flags_or = 0;
}
bml_endpoints[p] =(mca_bml_base_endpoint_t*) proc->proc_pml;
@ -324,6 +326,11 @@ int mca_bml_r2_add_procs(
*/
bml_btl->btl_flags |= MCA_BTL_FLAGS_SEND;
}
/**
* calculate the bitwise OR and AND of the btl flags
*/
bml_endpoint->btl_flags_or |= bml_btl->btl_flags;
bml_endpoint->btl_flags_and &= bml_btl->btl_flags;
}
}
if(btl_inuse > 0 && NULL != btl->btl_component->btl_progress) {
@ -372,7 +379,7 @@ int mca_bml_r2_add_procs(
latency = btl->btl_latency;
}
}
/* (1) set the weight of each btl as a percentage of overall bandwidth
* (2) copy all btl instances at the highest priority ranking into the
* list of btls used for first fragments

Просмотреть файл

@ -146,7 +146,7 @@ typedef uint8_t mca_btl_base_tag_t;
/* btl transport is reliable */
#define MCA_BTL_FLAGS_NEED_ACK 0x10
#define MCA_BTL_FLAGS_NEED_CSUM 0xB
#define MCA_BTL_FLAGS_NEED_CSUM 0x20
/* Default exclusivity levels */
#define MCA_BTL_EXCLUSIVITY_HIGH 64*1024 /* internal loopback */

Просмотреть файл

@ -40,14 +40,14 @@
#define MCA_PML_DR_HDR_VALIDATE_ACK(hdr, type) \
#define MCA_PML_DR_HDR_VALIDATE_ACK(do_csum, hdr, type) \
do { \
mca_pml_dr_endpoint_t* ep; \
if(mca_pml_dr.enable_csum) { \
if(do_csum) { \
uint16_t csum = opal_csum(hdr, sizeof(type)); \
if(hdr->hdr_common.hdr_csum != csum) { \
MCA_PML_DR_DEBUG(0, (0, "%s:%d: invalid header checksum: 0x%04x != 0x%04x\n", \
__FILE__, __LINE__, hdr->hdr_common.hdr_csum, csum)); \
MCA_PML_DR_DEBUG(0, (0, "%s:%d: invalid header checksum: 0x%04x != 0x%04x\n", \
__FILE__, __LINE__, hdr->hdr_common.hdr_csum, csum)); \
return; \
} \
} \
@ -109,6 +109,8 @@ void mca_pml_dr_recv_frag_callback(
mca_pml_dr_endpoint_t *ep;
ompi_communicator_t* ompi_comm;
uint16_t csum;
bool do_csum = mca_pml_dr.enable_csum &&
(btl->btl_flags & MCA_BTL_FLAGS_NEED_CSUM);
if(segments->seg_len < sizeof(mca_pml_dr_common_hdr_t)) {
return;
@ -117,7 +119,7 @@ void mca_pml_dr_recv_frag_callback(
switch(hdr->hdr_common.hdr_type) {
case MCA_PML_DR_HDR_TYPE_MATCH:
{
if(mca_pml_dr.enable_csum) {
if(do_csum) {
csum = opal_csum(hdr, sizeof(mca_pml_dr_match_hdr_t));
if(hdr->hdr_common.hdr_csum != csum) {
MCA_PML_DR_DEBUG(0,(0, "%s:%d: invalid header checksum: 0x%04x != 0x%04x\n",
@ -160,18 +162,19 @@ void mca_pml_dr_recv_frag_callback(
}
case MCA_PML_DR_HDR_TYPE_MATCH_ACK:
{
MCA_PML_DR_HDR_VALIDATE_ACK(hdr, mca_pml_dr_ack_hdr_t);
MCA_PML_DR_HDR_VALIDATE_ACK(do_csum, hdr, mca_pml_dr_ack_hdr_t);
mca_pml_dr_send_request_match_ack(btl, &hdr->hdr_ack);
break;
}
case MCA_PML_DR_HDR_TYPE_RNDV:
{
if(mca_pml_dr.enable_csum) {
if(do_csum) {
csum = opal_csum(hdr, sizeof(mca_pml_dr_rendezvous_hdr_t));
if(hdr->hdr_common.hdr_csum != csum) {
MCA_PML_DR_DEBUG(0, (0, "%s:%d: invalid header checksum: 0x%04x != 0x%04x\n",
__FILE__, __LINE__, hdr->hdr_common.hdr_csum, csum));
assert(0);
return;
}
}
@ -243,7 +246,7 @@ void mca_pml_dr_recv_frag_callback(
}
case MCA_PML_DR_HDR_TYPE_RNDV_ACK:
{
MCA_PML_DR_HDR_VALIDATE_ACK(hdr, mca_pml_dr_ack_hdr_t);
MCA_PML_DR_HDR_VALIDATE_ACK(do_csum, hdr, mca_pml_dr_ack_hdr_t);
mca_pml_dr_send_request_rndv_ack(btl, &hdr->hdr_ack);
break;
}
@ -251,7 +254,7 @@ void mca_pml_dr_recv_frag_callback(
{
mca_pml_dr_recv_request_t* recvreq;
if(mca_pml_dr.enable_csum) {
if(do_csum) {
csum = opal_csum(hdr, sizeof(mca_pml_dr_frag_hdr_t));
if(hdr->hdr_common.hdr_csum != csum) {
MCA_PML_DR_DEBUG(0,(0, "%s:%d: invalid header checksum: 0x%04x != 0x%04x\n",
@ -294,7 +297,7 @@ void mca_pml_dr_recv_frag_callback(
}
case MCA_PML_DR_HDR_TYPE_FRAG_ACK:
{
MCA_PML_DR_HDR_VALIDATE_ACK(hdr, mca_pml_dr_ack_hdr_t);
MCA_PML_DR_HDR_VALIDATE_ACK(do_csum, hdr, mca_pml_dr_ack_hdr_t);
mca_pml_dr_send_request_frag_ack(btl, &hdr->hdr_ack);
break;
}
@ -608,6 +611,8 @@ bool mca_pml_dr_recv_frag_match(
int rc;
uint32_t csum;
mca_pml_dr_endpoint_t* ep = (mca_pml_dr_endpoint_t*) proc->endpoint;
bool do_csum = mca_pml_dr.enable_csum &&
(btl->btl_flags & MCA_BTL_FLAGS_NEED_CSUM);
/* source sequence number */
frag_msg_seq = hdr->hdr_seq;
@ -684,7 +689,7 @@ rematch:
return rc;
}
MCA_PML_DR_RECV_FRAG_INIT(frag,ompi_proc,hdr,segments,num_segments,btl,csum);
if(mca_pml_dr.enable_csum && csum != hdr->hdr_csum) {
if(do_csum && csum != hdr->hdr_csum) {
mca_pml_dr_recv_frag_ack((mca_bml_base_endpoint_t*)ompi_proc->proc_pml,
&hdr->hdr_common, hdr->hdr_src_ptr.pval, 0, 0);
MCA_PML_DR_DEBUG(0,(0, "%s:%d: received corrupted data 0x%08x != 0x%08x (segments %d length %d)\n",
@ -720,7 +725,7 @@ rematch:
return rc;
}
MCA_PML_DR_RECV_FRAG_INIT(frag,ompi_proc,hdr,segments,num_segments,btl,csum);
if(mca_pml_dr.enable_csum && csum != hdr->hdr_csum) {
if(do_csum && csum != hdr->hdr_csum) {
mca_pml_dr_recv_frag_ack((mca_bml_base_endpoint_t*)ompi_proc->proc_pml,
&hdr->hdr_common, hdr->hdr_src_ptr.pval, 0, 0);
MCA_PML_DR_DEBUG(0,(0, "%s:%d: received corrupted data 0x%08x != 0x%08x\n",
@ -775,8 +780,11 @@ void mca_pml_dr_recv_frag_ack(
mca_pml_dr_recv_frag_t* frag;
mca_pml_dr_ack_hdr_t* ack;
int rc;
bool do_csum;
bml_btl = mca_bml_base_btl_array_get_next(&endpoint->btl_eager);
do_csum = mca_pml_dr.enable_csum &&
(bml_btl->btl_flags & MCA_BTL_FLAGS_NEED_CSUM);
/* allocate descriptor */
MCA_PML_DR_DES_ALLOC(bml_btl, des, sizeof(mca_pml_dr_ack_hdr_t));
if(NULL == des) {
@ -796,7 +804,7 @@ void mca_pml_dr_recv_frag_ack(
ack->hdr_src_ptr.pval = src_ptr;
assert(ack->hdr_src_ptr.pval);
ack->hdr_dst_ptr.pval = NULL;
ack->hdr_common.hdr_csum = (mca_pml_dr.enable_csum ?
ack->hdr_common.hdr_csum = (do_csum ?
opal_csum(ack, sizeof(mca_pml_dr_ack_hdr_t)) :
OPAL_CSUM_ZERO);

Просмотреть файл

@ -66,7 +66,8 @@ do { \
uint32_t ui1 = 0; \
uint32_t ui2 = 0; \
mca_pml_dr_buffer_t** buffers = frag->buffers; \
\
bool do_csum = mca_pml_dr.enable_csum && \
(btl->btl_flags & MCA_BTL_FLAGS_NEED_CSUM); \
/* init recv_frag */ \
frag->btl = btl; \
frag->hdr = *(mca_pml_dr_hdr_t*)hdr; \
@ -83,7 +84,7 @@ do { \
buffers[i] = buff; \
frag->segments[i].seg_addr.pval = buff->addr; \
frag->segments[i].seg_len = segs[i].seg_len; \
if( mca_pml_dr.enable_csum ) { \
if( do_csum ) { \
size_t hdr_len = 0; \
if( 0 == i ) { \
hdr_len = mca_pml_dr_hdr_size(hdr->hdr_common.hdr_type);\

Просмотреть файл

@ -31,8 +31,13 @@
#include "orte/mca/errmgr/errmgr.h"
#define MCA_PML_DR_RECV_REQUEST_ACK(recvreq,hdr,csum,bytes_received) \
if(mca_pml_dr.enable_csum && csum != hdr->hdr_match.hdr_csum) { \
/*
* this macro is needed for MATCH/RNDV headers,
* as we need to put the match back on the list if the checksum
* fails for later matching
*/
#define MCA_PML_DR_RECV_REQUEST_MATCH_ACK(do_csum,recvreq,hdr,csum,bytes_received) \
if(do_csum && csum != hdr->hdr_match.hdr_csum) { \
/* failed the csum, put the request back on the list for \
* matching later on retransmission \
*/ \
@ -167,9 +172,12 @@ void mca_pml_dr_recv_request_ack(
mca_bml_base_btl_t* bml_btl;
mca_pml_dr_ack_hdr_t* ack;
int rc;
bool do_csum;
/* allocate descriptor */
bml_btl = mca_bml_base_btl_array_get_next(&recvreq->req_endpoint->base.btl_eager);
do_csum = mca_pml_dr.enable_csum &&
(bml_btl->btl_flags & MCA_BTL_FLAGS_NEED_CSUM);
MCA_PML_DR_DES_ALLOC(bml_btl, des, sizeof(mca_pml_dr_ack_hdr_t));
if(NULL == des) {
return;
@ -187,7 +195,7 @@ void mca_pml_dr_recv_request_ack(
ack->hdr_vmask = mask;
ack->hdr_src_ptr = src_ptr;
ack->hdr_dst_ptr.pval = recvreq;
ack->hdr_common.hdr_csum = (mca_pml_dr.enable_csum?
ack->hdr_common.hdr_csum = (do_csum?
opal_csum(ack, sizeof(mca_pml_dr_ack_hdr_t)) :
OPAL_CSUM_ZERO);
@ -223,7 +231,10 @@ void mca_pml_dr_recv_request_progress(
uint32_t csum = OPAL_CSUM_ZERO;
uint64_t bit;
mca_pml_dr_vfrag_t* vfrag;
bool do_csum = mca_pml_dr.enable_csum &&
(btl->btl_flags & MCA_BTL_FLAGS_NEED_CSUM);
for(i=0; i<num_segments; i++)
bytes_received += segments[i].seg_len;
@ -233,6 +244,7 @@ void mca_pml_dr_recv_request_progress(
bytes_received -= sizeof(mca_pml_dr_match_hdr_t);
recvreq->req_vfrag0.vf_send = hdr->hdr_match.hdr_src_ptr;
MCA_PML_DR_RECV_REQUEST_BYTES_PACKED(recvreq, bytes_received);
MCA_PML_DR_RECV_REQUEST_UNPACK(
recvreq,
segments,
@ -242,7 +254,7 @@ void mca_pml_dr_recv_request_progress(
bytes_received,
bytes_delivered,
csum);
MCA_PML_DR_RECV_REQUEST_ACK(recvreq,hdr,csum,bytes_received);
MCA_PML_DR_RECV_REQUEST_MATCH_ACK(do_csum, recvreq,hdr,csum,bytes_received);
break;
@ -260,7 +272,7 @@ void mca_pml_dr_recv_request_progress(
bytes_received,
bytes_delivered,
csum);
MCA_PML_DR_RECV_REQUEST_ACK(recvreq,hdr,csum,bytes_received);
MCA_PML_DR_RECV_REQUEST_MATCH_ACK(do_csum, recvreq,hdr,csum,bytes_received);
break;
@ -295,7 +307,7 @@ void mca_pml_dr_recv_request_progress(
* note that it might still fail the checksum though
*/
vfrag->vf_pending |= bit;
if(!mca_pml_dr.enable_csum || csum == hdr->hdr_frag.hdr_frag_csum) {
if(!do_csum || csum == hdr->hdr_frag.hdr_frag_csum) {
/* this part of the vfrag passed the checksum,
mark it so that we ack it after receiving the
entire vfrag */

Просмотреть файл

@ -240,6 +240,8 @@ do {
#define MCA_PML_DR_RECV_REQUEST_BYTES_PACKED(request, bytes_packed) \
do { \
bool do_csum = mca_pml_dr.enable_csum && \
(request->req_endpoint->base.btl_flags_or & MCA_BTL_FLAGS_NEED_CSUM); \
(request)->req_recv.req_bytes_packed = bytes_packed; \
if((request)->req_recv.req_bytes_packed != 0) { \
ompi_proc_t *proc = (request)->req_proc->ompi_proc; \
@ -247,7 +249,7 @@ do {
(request)->req_recv.req_base.req_datatype, \
(request)->req_recv.req_base.req_count, \
(request)->req_recv.req_base.req_addr, \
(mca_pml_dr.enable_csum ? CONVERTOR_WITH_CHECKSUM: 0), \
(do_csum ? CONVERTOR_WITH_CHECKSUM: 0), \
&(request)->req_recv.req_convertor ); \
} \
} while (0)
@ -273,6 +275,8 @@ do {
size_t max_data = bytes_received; \
int32_t free_after = 0; \
size_t n, offset = seg_offset; \
bool do_csum = mca_pml_dr.enable_csum && \
(request->req_endpoint->base.btl_flags_or & MCA_BTL_FLAGS_NEED_CSUM); \
\
for(n=0; n<num_segments; n++) { \
mca_btl_base_segment_t* segment = segments+n; \
@ -297,7 +301,7 @@ do {
&free_after); \
bytes_delivered = max_data; \
if(bytes_received && !bytes_delivered) assert(0); \
csum = (mca_pml_dr.enable_csum ? \
csum = (do_csum ? \
request->req_recv.req_convertor.checksum : OPAL_CSUM_ZERO); \
} else { \
bytes_delivered = 0; \

Просмотреть файл

@ -374,6 +374,8 @@ int mca_pml_dr_send_request_start_buffered(
int32_t free_after;
int rc;
uint32_t csum;
bool do_csum = mca_pml_dr.enable_csum &&
(bml_btl->btl_flags & MCA_BTL_FLAGS_NEED_CSUM);
/* allocate descriptor */
mca_bml_base_alloc(bml_btl, &descriptor, sizeof(mca_pml_dr_rendezvous_hdr_t) + size);
@ -446,7 +448,7 @@ int mca_pml_dr_send_request_start_buffered(
hdr->hdr_match.hdr_csum = csum;
hdr->hdr_match.hdr_src_ptr.pval = &sendreq->req_vfrag0;
hdr->hdr_rndv.hdr_msg_length = sendreq->req_send.req_bytes_packed;
hdr->hdr_common.hdr_csum = (mca_pml_dr.enable_csum ?
hdr->hdr_common.hdr_csum = (do_csum ?
opal_csum(hdr, sizeof(mca_pml_dr_rendezvous_hdr_t)) :
OPAL_CSUM_ZERO);
@ -489,7 +491,9 @@ int mca_pml_dr_send_request_start_copy(
size_t max_data;
int32_t free_after;
int rc;
bool do_csum = mca_pml_dr.enable_csum &&
(bml_btl->btl_flags & MCA_BTL_FLAGS_NEED_CSUM);
/* allocate descriptor */
mca_bml_base_alloc(bml_btl, &descriptor, sizeof(mca_pml_dr_match_hdr_t) + size);
if(NULL == descriptor) {
@ -525,11 +529,11 @@ int mca_pml_dr_send_request_start_copy(
hdr->hdr_common.hdr_src = sendreq->req_endpoint->src;
hdr->hdr_match.hdr_tag = sendreq->req_send.req_base.req_tag;
hdr->hdr_match.hdr_seq = sendreq->req_send.req_base.req_sequence;
hdr->hdr_match.hdr_csum = (size > 0 && mca_pml_dr.enable_csum ?
hdr->hdr_match.hdr_csum = (size > 0 && do_csum ?
sendreq->req_send.req_convertor.checksum : OPAL_CSUM_ZERO);
hdr->hdr_match.hdr_src_ptr.pval = &sendreq->req_vfrag0;
hdr->hdr_common.hdr_vid = sendreq->req_vfrag0.vf_id;
hdr->hdr_common.hdr_csum = (mca_pml_dr.enable_csum ?
hdr->hdr_common.hdr_csum = (do_csum ?
opal_csum(hdr, sizeof(mca_pml_dr_match_hdr_t)) :
OPAL_CSUM_ZERO);
@ -572,6 +576,8 @@ int mca_pml_dr_send_request_start_prepare(
mca_btl_base_segment_t* segment;
mca_pml_dr_hdr_t* hdr;
int rc;
bool do_csum = mca_pml_dr.enable_csum &&
(bml_btl->btl_flags & MCA_BTL_FLAGS_NEED_CSUM);
/* prepare descriptor */
mca_bml_base_prepare_src(
@ -597,11 +603,11 @@ int mca_pml_dr_send_request_start_prepare(
hdr->hdr_common.hdr_src = sendreq->req_endpoint->src;
hdr->hdr_match.hdr_tag = sendreq->req_send.req_base.req_tag;
hdr->hdr_match.hdr_seq = sendreq->req_send.req_base.req_sequence;
hdr->hdr_match.hdr_csum = (size > 0 && mca_pml_dr.enable_csum ?
hdr->hdr_match.hdr_csum = (size > 0 && do_csum ?
sendreq->req_send.req_convertor.checksum : OPAL_CSUM_ZERO);
hdr->hdr_match.hdr_src_ptr.pval = &sendreq->req_vfrag0;
hdr->hdr_common.hdr_vid = sendreq->req_vfrag0.vf_id;
hdr->hdr_common.hdr_csum = (mca_pml_dr.enable_csum ?
hdr->hdr_common.hdr_csum = (do_csum ?
opal_csum(hdr, sizeof(mca_pml_dr_match_hdr_t)) :
OPAL_CSUM_ZERO);
@ -640,7 +646,9 @@ int mca_pml_dr_send_request_start_rndv(
mca_btl_base_segment_t* segment;
mca_pml_dr_hdr_t* hdr;
int rc;
bool do_csum = mca_pml_dr.enable_csum &&
(bml_btl->btl_flags & MCA_BTL_FLAGS_NEED_CSUM);
/* prepare descriptor */
if(size == 0) {
@ -673,12 +681,13 @@ int mca_pml_dr_send_request_start_rndv(
hdr->hdr_common.hdr_src = sendreq->req_endpoint->src;
hdr->hdr_common.hdr_ctx = sendreq->req_send.req_base.req_comm->c_contextid;
hdr->hdr_common.hdr_vid = sendreq->req_vfrag0.vf_id;
hdr->hdr_common.hdr_csum = 0;
hdr->hdr_match.hdr_tag = sendreq->req_send.req_base.req_tag;
hdr->hdr_match.hdr_seq = sendreq->req_send.req_base.req_sequence;
hdr->hdr_match.hdr_src_ptr.pval = &sendreq->req_vfrag0;
hdr->hdr_match.hdr_csum = size > 0 ? sendreq->req_send.req_convertor.checksum : OPAL_CSUM_ZERO;
hdr->hdr_rndv.hdr_msg_length = sendreq->req_send.req_bytes_packed;
hdr->hdr_common.hdr_csum = (mca_pml_dr.enable_csum ?
hdr->hdr_common.hdr_csum = (do_csum ?
opal_csum(hdr, sizeof(mca_pml_dr_rendezvous_hdr_t)) :
OPAL_CSUM_ZERO);
@ -749,7 +758,9 @@ int mca_pml_dr_send_request_schedule(mca_pml_dr_send_request_t* sendreq)
size_t offset_in_msg = vfrag->vf_offset + offset_in_vfrag;
size_t size;
int rc;
bool do_csum = mca_pml_dr.enable_csum &&
(bml_btl->btl_flags & MCA_BTL_FLAGS_NEED_CSUM);
if(vfrag->vf_idx == vfrag->vf_len - 1) {
size = vfrag->vf_size - offset_in_vfrag;
} else {
@ -790,7 +801,7 @@ int mca_pml_dr_send_request_schedule(mca_pml_dr_send_request_t* sendreq)
hdr->hdr_frag_offset = offset_in_msg;
hdr->hdr_src_ptr.pval = vfrag;
hdr->hdr_dst_ptr = sendreq->req_vfrag0.vf_recv;
hdr->hdr_common.hdr_csum = (mca_pml_dr.enable_csum ?
hdr->hdr_common.hdr_csum = (do_csum ?
opal_csum(hdr, sizeof(mca_pml_dr_frag_hdr_t)) :
OPAL_CSUM_ZERO);
@ -841,11 +852,12 @@ int mca_pml_dr_send_request_schedule(mca_pml_dr_send_request_t* sendreq)
mca_bml_base_btl_t* bml_btl = NULL;
mca_pml_dr_vfrag_t* vfrag = sendreq->req_vfrag;
size_t size = bytes_remaining;
/* offset tells us how much of the vfrag has been scheduled */
size_t bytes_sent = sendreq->req_send_offset - vfrag->vf_offset;
int rc;
bool do_csum;
/* do we need to allocate a new vfrag
(we scheduled all the vfrag already) */
if(vfrag->vf_size == bytes_sent) {
@ -865,6 +877,9 @@ int mca_pml_dr_send_request_schedule(mca_pml_dr_send_request_t* sendreq)
bml_btl = vfrag->bml_btl;
}
do_csum = mca_pml_dr.enable_csum &&
(bml_btl->btl_flags & MCA_BTL_FLAGS_NEED_CSUM);
/* makes sure that we don't exceed vfrag size */
if (size > vfrag->vf_max_send_size) {
size = vfrag->vf_max_send_size;
@ -903,12 +918,12 @@ int mca_pml_dr_send_request_schedule(mca_pml_dr_send_request_t* sendreq)
hdr->hdr_common.hdr_ctx = sendreq->req_send.req_base.req_comm->c_contextid;
hdr->hdr_vlen = vfrag->vf_len;
hdr->hdr_frag_idx = vfrag->vf_idx;
hdr->hdr_frag_csum = (mca_pml_dr.enable_csum ?
hdr->hdr_frag_csum = (do_csum ?
sendreq->req_send.req_convertor.checksum : OPAL_CSUM_ZERO);
hdr->hdr_frag_offset = sendreq->req_send_offset;
hdr->hdr_src_ptr.pval = vfrag;
hdr->hdr_dst_ptr = sendreq->req_vfrag0.vf_recv;
hdr->hdr_common.hdr_csum = (mca_pml_dr.enable_csum ?
hdr->hdr_common.hdr_csum = (do_csum ?
opal_csum(hdr, sizeof(mca_pml_dr_frag_hdr_t)): OPAL_CSUM_ZERO);
assert(hdr->hdr_frag_offset < sendreq->req_send.req_bytes_packed);

Просмотреть файл

@ -28,6 +28,7 @@
#include "ompi/mca/pml/base/pml_base_sendreq.h"
#include "ompi/mca/mpool/base/base.h"
#include "ompi/mca/bml/bml.h"
#include "ompi/mca/btl/btl.h"
#include "pml_dr_proc.h"
#include "pml_dr_comm.h"
@ -99,6 +100,10 @@ OBJ_CLASS_DECLARATION(mca_pml_dr_send_request_t);
sendmode, \
persistent) \
do { \
mca_pml_dr_endpoint_t* endpoint = \
(mca_pml_dr_endpoint_t*)sendreq->req_send.req_base.req_proc->proc_pml; \
bool do_csum = mca_pml_dr.enable_csum && \
(endpoint->base.btl_flags_or & MCA_BTL_FLAGS_NEED_CSUM); \
/* increment reference counts */ \
OBJ_RETAIN(comm); \
OBJ_RETAIN(datatype); \
@ -127,7 +132,7 @@ do {
(sendreq)->req_send.req_base.req_datatype, \
(sendreq)->req_send.req_base.req_count, \
(sendreq)->req_send.req_base.req_addr, \
(mca_pml_dr.enable_csum ? CONVERTOR_WITH_CHECKSUM: 0), \
(do_csum ? CONVERTOR_WITH_CHECKSUM: 0), \
&(sendreq)->req_send.req_convertor ); \
ompi_convertor_get_packed_size(&(sendreq)->req_send.req_convertor, \
&((sendreq)->req_send.req_bytes_packed) ); \
@ -394,7 +399,8 @@ do { \
mca_bml_base_btl_array_get_next(&endpoint->base.btl_eager); \
mca_btl_base_descriptor_t *des_old, *des_new; \
mca_pml_dr_hdr_t *hdr; \
\
bool do_csum = mca_pml_dr.enable_csum && \
(bml_btl->btl_flags & MCA_BTL_FLAGS_NEED_CSUM); \
MCA_PML_DR_DEBUG(0,(0, "%s:%d:%s: (re)transmitting rndv probe\n", \
__FILE__, __LINE__, __func__)); \
OPAL_THREAD_ADD_SIZE_T(&sendreq->req_pipeline_depth,1); \
@ -418,7 +424,7 @@ do { \
hdr->hdr_match.hdr_csum = OPAL_CSUM_ZERO; \
hdr->hdr_common.hdr_vid = sendreq->req_vfrag0.vf_id; \
hdr->hdr_rndv.hdr_msg_length = sendreq->req_send.req_bytes_packed; \
hdr->hdr_common.hdr_csum = (mca_pml_dr.enable_csum ? \
hdr->hdr_common.hdr_csum = (do_csum ? \
opal_csum(hdr, sizeof(mca_pml_dr_rendezvous_hdr_t)): OPAL_CSUM_ZERO); \
des_new->des_flags = des_old->des_flags; \
des_new->des_cbdata = des_old->des_cbdata; \