1
1

- pass a flag to the peer indicating wether data is contiguous at the soure

- only attempt to schedule rdma if contiguous at both src/dst
- need to review this for next release 

This commit was SVN r8119.
Этот коммит содержится в:
Tim Woodall 2005-11-11 15:33:25 +00:00
родитель 5d4091d485
Коммит 607f62accd
4 изменённых файлов: 22 добавлений и 15 удалений

Просмотреть файл

@ -43,6 +43,7 @@
#define MCA_PML_OB1_HDR_FLAGS_ACK 1 /* is an ack required */ #define MCA_PML_OB1_HDR_FLAGS_ACK 1 /* is an ack required */
#define MCA_PML_OB1_HDR_FLAGS_NBO 2 /* is the hdr in network byte order */ #define MCA_PML_OB1_HDR_FLAGS_NBO 2 /* is the hdr in network byte order */
#define MCA_PML_OB1_HDR_FLAGS_PIN 4 /* is user buffer pinned */ #define MCA_PML_OB1_HDR_FLAGS_PIN 4 /* is user buffer pinned */
#define MCA_PML_OB1_HDR_FLAGS_CONTIG 8 /* is user buffer contiguous */
/* /*

Просмотреть файл

@ -207,7 +207,8 @@ static void mca_pml_ob1_recv_request_ack(
* registered. * registered.
*/ */
if(ompi_convertor_need_buffers(&recvreq->req_recv.req_convertor) == 0) { if(ompi_convertor_need_buffers(&recvreq->req_recv.req_convertor) == 0 &&
hdr->hdr_match.hdr_common.hdr_flags & MCA_PML_OB1_HDR_FLAGS_CONTIG) {
recvreq->req_rdma_cnt = mca_pml_ob1_rdma_btls( recvreq->req_rdma_cnt = mca_pml_ob1_rdma_btls(
bml_endpoint, bml_endpoint,
recvreq->req_recv.req_base.req_addr, recvreq->req_recv.req_base.req_addr,

Просмотреть файл

@ -556,7 +556,7 @@ int mca_pml_ob1_send_request_start_rdma(
/* build match header */ /* build match header */
hdr = (mca_pml_ob1_hdr_t*)segment->seg_addr.pval; hdr = (mca_pml_ob1_hdr_t*)segment->seg_addr.pval;
hdr->hdr_common.hdr_flags = MCA_PML_OB1_HDR_FLAGS_PIN; hdr->hdr_common.hdr_flags = MCA_PML_OB1_HDR_FLAGS_CONTIG|MCA_PML_OB1_HDR_FLAGS_PIN;
hdr->hdr_common.hdr_type = MCA_PML_OB1_HDR_TYPE_RGET; hdr->hdr_common.hdr_type = MCA_PML_OB1_HDR_TYPE_RGET;
hdr->hdr_match.hdr_ctx = sendreq->req_send.req_base.req_comm->c_contextid; hdr->hdr_match.hdr_ctx = sendreq->req_send.req_base.req_comm->c_contextid;
hdr->hdr_match.hdr_src = sendreq->req_send.req_base.req_comm->c_my_rank; hdr->hdr_match.hdr_src = sendreq->req_send.req_base.req_comm->c_my_rank;
@ -584,7 +584,7 @@ int mca_pml_ob1_send_request_start_rdma(
/* build hdr */ /* build hdr */
hdr = (mca_pml_ob1_hdr_t*)segment->seg_addr.pval; hdr = (mca_pml_ob1_hdr_t*)segment->seg_addr.pval;
hdr->hdr_common.hdr_flags = MCA_PML_OB1_HDR_FLAGS_PIN; hdr->hdr_common.hdr_flags = MCA_PML_OB1_HDR_FLAGS_CONTIG|MCA_PML_OB1_HDR_FLAGS_PIN;
hdr->hdr_common.hdr_type = MCA_PML_OB1_HDR_TYPE_RNDV; hdr->hdr_common.hdr_type = MCA_PML_OB1_HDR_TYPE_RNDV;
hdr->hdr_match.hdr_ctx = sendreq->req_send.req_base.req_comm->c_contextid; hdr->hdr_match.hdr_ctx = sendreq->req_send.req_base.req_comm->c_contextid;
hdr->hdr_match.hdr_src = sendreq->req_send.req_base.req_comm->c_my_rank; hdr->hdr_match.hdr_src = sendreq->req_send.req_base.req_comm->c_my_rank;
@ -621,7 +621,8 @@ int mca_pml_ob1_send_request_start_rdma(
int mca_pml_ob1_send_request_start_rndv( int mca_pml_ob1_send_request_start_rndv(
mca_pml_ob1_send_request_t* sendreq, mca_pml_ob1_send_request_t* sendreq,
mca_bml_base_btl_t* bml_btl, mca_bml_base_btl_t* bml_btl,
size_t size) size_t size,
int flags)
{ {
mca_btl_base_descriptor_t* des; mca_btl_base_descriptor_t* des;
mca_btl_base_segment_t* segment; mca_btl_base_segment_t* segment;
@ -652,7 +653,7 @@ int mca_pml_ob1_send_request_start_rndv(
/* build hdr */ /* build hdr */
hdr = (mca_pml_ob1_hdr_t*)segment->seg_addr.pval; hdr = (mca_pml_ob1_hdr_t*)segment->seg_addr.pval;
hdr->hdr_common.hdr_flags = 0; hdr->hdr_common.hdr_flags = flags;
hdr->hdr_common.hdr_type = MCA_PML_OB1_HDR_TYPE_RNDV; hdr->hdr_common.hdr_type = MCA_PML_OB1_HDR_TYPE_RNDV;
hdr->hdr_match.hdr_ctx = sendreq->req_send.req_base.req_comm->c_contextid; hdr->hdr_match.hdr_ctx = sendreq->req_send.req_base.req_comm->c_contextid;
hdr->hdr_match.hdr_src = sendreq->req_send.req_base.req_comm->c_my_rank; hdr->hdr_match.hdr_src = sendreq->req_send.req_base.req_comm->c_my_rank;

Просмотреть файл

@ -186,7 +186,7 @@ do {
if(size <= eager_limit) { \ if(size <= eager_limit) { \
switch(sendreq->req_send.req_send_mode) { \ switch(sendreq->req_send.req_send_mode) { \
case MCA_PML_BASE_SEND_SYNCHRONOUS: \ case MCA_PML_BASE_SEND_SYNCHRONOUS: \
rc = mca_pml_ob1_send_request_start_rndv(sendreq, bml_btl, size); \ rc = mca_pml_ob1_send_request_start_rndv(sendreq, bml_btl, size, 0); \
break; \ break; \
case MCA_PML_BASE_SEND_BUFFERED: \ case MCA_PML_BASE_SEND_BUFFERED: \
rc = mca_pml_ob1_send_request_start_copy(sendreq, bml_btl, size); \ rc = mca_pml_ob1_send_request_start_copy(sendreq, bml_btl, size); \
@ -204,15 +204,18 @@ do {
if(sendreq->req_send.req_send_mode == MCA_PML_BASE_SEND_BUFFERED) { \ if(sendreq->req_send.req_send_mode == MCA_PML_BASE_SEND_BUFFERED) { \
rc = mca_pml_ob1_send_request_start_buffered(sendreq, bml_btl, size); \ rc = mca_pml_ob1_send_request_start_buffered(sendreq, bml_btl, size); \
} else if \ } else if \
(ompi_convertor_need_buffers(&sendreq->req_send.req_convertor) == false && \ (ompi_convertor_need_buffers(&sendreq->req_send.req_convertor) == false) { \
0 != (sendreq->req_rdma_cnt = mca_pml_ob1_rdma_btls( \ if( 0 != (sendreq->req_rdma_cnt = mca_pml_ob1_rdma_btls( \
sendreq->req_endpoint, \ sendreq->req_endpoint, \
sendreq->req_send.req_addr, \ sendreq->req_send.req_addr, \
sendreq->req_send.req_bytes_packed, \ sendreq->req_send.req_bytes_packed, \
sendreq->req_rdma))) { \ sendreq->req_rdma))) { \
rc = mca_pml_ob1_send_request_start_rdma(sendreq, bml_btl, size); \ rc = mca_pml_ob1_send_request_start_rdma(sendreq, bml_btl, size); \
} else { \
rc = mca_pml_ob1_send_request_start_rndv(sendreq, bml_btl, size, MCA_PML_OB1_HDR_FLAGS_CONTIG); \
} \
} else { \ } else { \
rc = mca_pml_ob1_send_request_start_rndv(sendreq, bml_btl, size); \ rc = mca_pml_ob1_send_request_start_rndv(sendreq, bml_btl, size, 0); \
} \ } \
} \ } \
} \ } \
@ -381,7 +384,8 @@ int mca_pml_ob1_send_request_start_rdma(
int mca_pml_ob1_send_request_start_rndv( int mca_pml_ob1_send_request_start_rndv(
mca_pml_ob1_send_request_t* sendreq, mca_pml_ob1_send_request_t* sendreq,
mca_bml_base_btl_t* bml_btl, mca_bml_base_btl_t* bml_btl,
size_t size); size_t size,
int flags);
/** /**
* Schedule additional fragments * Schedule additional fragments