- add new send flag (MCA_PML_OB1_SEND_COMPLETE) to force local completion prior to signalling mpi completion
This commit was SVN r8907.
Этот коммит содержится в:
родитель
c0d18c9579
Коммит
e920ec67b9
@ -104,121 +104,118 @@ OBJ_CLASS_DECLARATION(mca_pml_ob1_send_request_t);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Diagnostic output to trace rdma protocol timing
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* Start a send request.
|
||||
*/
|
||||
|
||||
#define MCA_PML_OB1_SEND_REQUEST_START(sendreq, rc) \
|
||||
do { \
|
||||
mca_pml_ob1_comm_t* comm = sendreq->req_send.req_base.req_comm->c_pml_comm; \
|
||||
#define MCA_PML_OB1_SEND_REQUEST_START(sendreq, rc) \
|
||||
do { \
|
||||
mca_pml_ob1_comm_t* comm = sendreq->req_send.req_base.req_comm->c_pml_comm; \
|
||||
mca_bml_base_endpoint_t* endpoint = (mca_bml_base_endpoint_t*) \
|
||||
sendreq->req_send.req_base.req_proc->proc_pml; \
|
||||
mca_bml_base_btl_t* bml_btl; \
|
||||
size_t size = sendreq->req_send.req_bytes_packed; \
|
||||
\
|
||||
if(endpoint == NULL) { \
|
||||
rc = OMPI_ERR_UNREACH; \
|
||||
break; \
|
||||
} \
|
||||
\
|
||||
sendreq->req_lock = 0; \
|
||||
sendreq->req_pipeline_depth = 0; \
|
||||
sendreq->req_bytes_delivered = 0; \
|
||||
sendreq->req_rdma_cnt = 0; \
|
||||
sendreq->req_state = 0; \
|
||||
sendreq->req_send_offset = 0; \
|
||||
sendreq->req_send.req_base.req_pml_complete = false; \
|
||||
sendreq->req_send.req_base.req_ompi.req_complete = false; \
|
||||
sendreq->req_send.req_base.req_ompi.req_state = OMPI_REQUEST_ACTIVE; \
|
||||
sendreq->req_send.req_base.req_ompi.req_status._cancelled = 0; \
|
||||
sendreq->req_send.req_base.req_sequence = OPAL_THREAD_ADD32( \
|
||||
&comm->procs[sendreq->req_send.req_base.req_peer].send_sequence,1); \
|
||||
sendreq->req_endpoint = endpoint; \
|
||||
\
|
||||
/* select a btl */ \
|
||||
bml_btl = mca_bml_base_btl_array_get_next(&endpoint->btl_eager); \
|
||||
\
|
||||
/* shortcut for zero byte */ \
|
||||
if(size == 0 && sendreq->req_send.req_send_mode != MCA_PML_BASE_SEND_SYNCHRONOUS) { \
|
||||
mca_btl_base_descriptor_t* descriptor; \
|
||||
mca_btl_base_segment_t* segment; \
|
||||
mca_pml_ob1_hdr_t* hdr; \
|
||||
\
|
||||
/* allocate a descriptor */ \
|
||||
MCA_PML_OB1_DES_ALLOC(bml_btl, descriptor, sizeof(mca_pml_ob1_match_hdr_t)); \
|
||||
if(NULL == descriptor) { \
|
||||
return OMPI_ERR_OUT_OF_RESOURCE; \
|
||||
} \
|
||||
segment = descriptor->des_src; \
|
||||
\
|
||||
/* build hdr */ \
|
||||
hdr = (mca_pml_ob1_hdr_t*)segment->seg_addr.pval; \
|
||||
hdr->hdr_common.hdr_flags = 0; \
|
||||
hdr->hdr_common.hdr_type = MCA_PML_OB1_HDR_TYPE_MATCH; \
|
||||
hdr->hdr_match.hdr_ctx = sendreq->req_send.req_base.req_comm->c_contextid; \
|
||||
hdr->hdr_match.hdr_src = sendreq->req_send.req_base.req_comm->c_my_rank; \
|
||||
hdr->hdr_match.hdr_tag = sendreq->req_send.req_base.req_tag; \
|
||||
hdr->hdr_match.hdr_seq = sendreq->req_send.req_base.req_sequence; \
|
||||
\
|
||||
/* short message */ \
|
||||
descriptor->des_cbfunc = mca_pml_ob1_match_completion_cache; \
|
||||
descriptor->des_flags |= MCA_BTL_DES_FLAGS_PRIORITY; \
|
||||
descriptor->des_cbdata = sendreq; \
|
||||
\
|
||||
/* request is complete at mpi level */ \
|
||||
OPAL_THREAD_LOCK(&ompi_request_lock); \
|
||||
MCA_PML_OB1_SEND_REQUEST_MPI_COMPLETE(sendreq); \
|
||||
OPAL_THREAD_UNLOCK(&ompi_request_lock); \
|
||||
\
|
||||
/* send */ \
|
||||
rc = mca_bml_base_send(bml_btl, descriptor, MCA_BTL_TAG_PML); \
|
||||
if(OMPI_SUCCESS != rc) { \
|
||||
mca_bml_base_free(bml_btl, descriptor ); \
|
||||
} \
|
||||
\
|
||||
} else { \
|
||||
size_t eager_limit = bml_btl->btl_eager_limit - sizeof(mca_pml_ob1_hdr_t); \
|
||||
if(size <= eager_limit) { \
|
||||
switch(sendreq->req_send.req_send_mode) { \
|
||||
case MCA_PML_BASE_SEND_SYNCHRONOUS: \
|
||||
rc = mca_pml_ob1_send_request_start_rndv(sendreq, bml_btl, size, 0); \
|
||||
break; \
|
||||
case MCA_PML_BASE_SEND_BUFFERED: \
|
||||
rc = mca_pml_ob1_send_request_start_copy(sendreq, bml_btl, size); \
|
||||
break; \
|
||||
default: \
|
||||
if (bml_btl->btl_flags & MCA_BTL_FLAGS_SEND_INPLACE) { \
|
||||
rc = mca_pml_ob1_send_request_start_prepare(sendreq, bml_btl, size); \
|
||||
} else { \
|
||||
rc = mca_pml_ob1_send_request_start_copy(sendreq, bml_btl, size); \
|
||||
} \
|
||||
break; \
|
||||
} \
|
||||
} else { \
|
||||
size = eager_limit; \
|
||||
if(sendreq->req_send.req_send_mode == MCA_PML_BASE_SEND_BUFFERED) { \
|
||||
rc = mca_pml_ob1_send_request_start_buffered(sendreq, bml_btl, size); \
|
||||
} else if \
|
||||
(ompi_convertor_need_buffers(&sendreq->req_send.req_convertor) == false) { \
|
||||
if( 0 != (sendreq->req_rdma_cnt = mca_pml_ob1_rdma_btls( \
|
||||
sendreq->req_endpoint, \
|
||||
sendreq->req_send.req_addr, \
|
||||
sendreq->req_send.req_bytes_packed, \
|
||||
sendreq->req_rdma))) { \
|
||||
rc = mca_pml_ob1_send_request_start_rdma(sendreq, bml_btl, size); \
|
||||
} else { \
|
||||
rc = mca_pml_ob1_send_request_start_rndv(sendreq, bml_btl, size, MCA_PML_OB1_HDR_FLAGS_CONTIG); \
|
||||
} \
|
||||
} else { \
|
||||
rc = mca_pml_ob1_send_request_start_rndv(sendreq, bml_btl, size, 0); \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
mca_bml_base_btl_t* bml_btl; \
|
||||
size_t size = sendreq->req_send.req_bytes_packed; \
|
||||
size_t eager_limit; \
|
||||
\
|
||||
if(endpoint == NULL) { \
|
||||
rc = OMPI_ERR_UNREACH; \
|
||||
break; \
|
||||
} \
|
||||
\
|
||||
sendreq->req_lock = 0; \
|
||||
sendreq->req_pipeline_depth = 0; \
|
||||
sendreq->req_bytes_delivered = 0; \
|
||||
sendreq->req_rdma_cnt = 0; \
|
||||
sendreq->req_state = 0; \
|
||||
sendreq->req_send_offset = 0; \
|
||||
sendreq->req_send.req_base.req_pml_complete = false; \
|
||||
sendreq->req_send.req_base.req_ompi.req_complete = false; \
|
||||
sendreq->req_send.req_base.req_ompi.req_state = OMPI_REQUEST_ACTIVE; \
|
||||
sendreq->req_send.req_base.req_ompi.req_status._cancelled = 0; \
|
||||
sendreq->req_send.req_base.req_sequence = OPAL_THREAD_ADD32( \
|
||||
&comm->procs[sendreq->req_send.req_base.req_peer].send_sequence,1); \
|
||||
sendreq->req_endpoint = endpoint; \
|
||||
\
|
||||
/* select a btl */ \
|
||||
bml_btl = mca_bml_base_btl_array_get_next(&endpoint->btl_eager); \
|
||||
eager_limit = bml_btl->btl_eager_limit - sizeof(mca_pml_ob1_hdr_t); \
|
||||
\
|
||||
/* shortcut for zero byte */ \
|
||||
if(size <= eager_limit) { \
|
||||
switch(sendreq->req_send.req_send_mode) { \
|
||||
case MCA_PML_BASE_SEND_SYNCHRONOUS: \
|
||||
rc = mca_pml_ob1_send_request_start_rndv(sendreq, bml_btl, size, 0); \
|
||||
break; \
|
||||
case MCA_PML_BASE_SEND_BUFFERED: \
|
||||
rc = mca_pml_ob1_send_request_start_copy(sendreq, bml_btl, size); \
|
||||
break; \
|
||||
case MCA_PML_BASE_SEND_COMPLETE: \
|
||||
rc = mca_pml_ob1_send_request_start_prepare(sendreq, bml_btl, size); \
|
||||
break; \
|
||||
default: \
|
||||
if (size == 0) { \
|
||||
mca_btl_base_descriptor_t* descriptor; \
|
||||
mca_btl_base_segment_t* segment; \
|
||||
mca_pml_ob1_hdr_t* hdr; \
|
||||
\
|
||||
/* allocate a descriptor */ \
|
||||
MCA_PML_OB1_DES_ALLOC(bml_btl, descriptor, sizeof(mca_pml_ob1_match_hdr_t)); \
|
||||
if(NULL == descriptor) { \
|
||||
return OMPI_ERR_OUT_OF_RESOURCE; \
|
||||
} \
|
||||
segment = descriptor->des_src; \
|
||||
\
|
||||
/* build hdr */ \
|
||||
hdr = (mca_pml_ob1_hdr_t*)segment->seg_addr.pval; \
|
||||
hdr->hdr_common.hdr_flags = 0; \
|
||||
hdr->hdr_common.hdr_type = MCA_PML_OB1_HDR_TYPE_MATCH; \
|
||||
hdr->hdr_match.hdr_ctx = sendreq->req_send.req_base.req_comm->c_contextid; \
|
||||
hdr->hdr_match.hdr_src = sendreq->req_send.req_base.req_comm->c_my_rank; \
|
||||
hdr->hdr_match.hdr_tag = sendreq->req_send.req_base.req_tag; \
|
||||
hdr->hdr_match.hdr_seq = sendreq->req_send.req_base.req_sequence; \
|
||||
\
|
||||
/* short message */ \
|
||||
descriptor->des_cbfunc = mca_pml_ob1_match_completion_cache; \
|
||||
descriptor->des_flags |= MCA_BTL_DES_FLAGS_PRIORITY; \
|
||||
descriptor->des_cbdata = sendreq; \
|
||||
\
|
||||
/* request is complete at mpi level */ \
|
||||
OPAL_THREAD_LOCK(&ompi_request_lock); \
|
||||
MCA_PML_OB1_SEND_REQUEST_MPI_COMPLETE(sendreq); \
|
||||
OPAL_THREAD_UNLOCK(&ompi_request_lock); \
|
||||
\
|
||||
/* send */ \
|
||||
rc = mca_bml_base_send(bml_btl, descriptor, MCA_BTL_TAG_PML); \
|
||||
if(OMPI_SUCCESS != rc) { \
|
||||
mca_bml_base_free(bml_btl, descriptor ); \
|
||||
} \
|
||||
} else if (bml_btl->btl_flags & MCA_BTL_FLAGS_SEND_INPLACE) { \
|
||||
rc = mca_pml_ob1_send_request_start_prepare(sendreq, bml_btl, size); \
|
||||
} else { \
|
||||
rc = mca_pml_ob1_send_request_start_copy(sendreq, bml_btl, size); \
|
||||
} \
|
||||
break; \
|
||||
} \
|
||||
} else { \
|
||||
size = eager_limit; \
|
||||
if(sendreq->req_send.req_send_mode == MCA_PML_BASE_SEND_BUFFERED) { \
|
||||
rc = mca_pml_ob1_send_request_start_buffered(sendreq, bml_btl, size); \
|
||||
} else if \
|
||||
(ompi_convertor_need_buffers(&sendreq->req_send.req_convertor) == false) { \
|
||||
if( 0 != (sendreq->req_rdma_cnt = mca_pml_ob1_rdma_btls( \
|
||||
sendreq->req_endpoint, \
|
||||
sendreq->req_send.req_addr, \
|
||||
sendreq->req_send.req_bytes_packed, \
|
||||
sendreq->req_rdma))) { \
|
||||
rc = mca_pml_ob1_send_request_start_rdma(sendreq, bml_btl, size); \
|
||||
} else { \
|
||||
rc = mca_pml_ob1_send_request_start_rndv(sendreq, bml_btl, size, \
|
||||
MCA_PML_OB1_HDR_FLAGS_CONTIG); \
|
||||
} \
|
||||
} else { \
|
||||
rc = mca_pml_ob1_send_request_start_rndv(sendreq, bml_btl, size, 0); \
|
||||
} \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
|
||||
|
@ -88,7 +88,8 @@ typedef enum {
|
||||
MCA_PML_BASE_SEND_STANDARD,
|
||||
MCA_PML_BASE_SEND_BUFFERED,
|
||||
MCA_PML_BASE_SEND_SYNCHRONOUS,
|
||||
MCA_PML_BASE_SEND_READY
|
||||
MCA_PML_BASE_SEND_READY,
|
||||
MCA_PML_BASE_SEND_COMPLETE
|
||||
} mca_pml_base_send_mode_t;
|
||||
|
||||
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user