diff --git a/ompi/mca/pml/ob1/pml_ob1_sendreq.h b/ompi/mca/pml/ob1/pml_ob1_sendreq.h index 27927f8686..f401b9076b 100644 --- a/ompi/mca/pml/ob1/pml_ob1_sendreq.h +++ b/ompi/mca/pml/ob1/pml_ob1_sendreq.h @@ -104,121 +104,118 @@ OBJ_CLASS_DECLARATION(mca_pml_ob1_send_request_t); } -/** - * Diagnostic output to trace rdma protocol timing - */ - - /** * Start a send request. */ -#define MCA_PML_OB1_SEND_REQUEST_START(sendreq, rc) \ -do { \ - mca_pml_ob1_comm_t* comm = sendreq->req_send.req_base.req_comm->c_pml_comm; \ +#define MCA_PML_OB1_SEND_REQUEST_START(sendreq, rc) \ +do { \ + mca_pml_ob1_comm_t* comm = sendreq->req_send.req_base.req_comm->c_pml_comm; \ mca_bml_base_endpoint_t* endpoint = (mca_bml_base_endpoint_t*) \ sendreq->req_send.req_base.req_proc->proc_pml; \ - mca_bml_base_btl_t* bml_btl; \ - size_t size = sendreq->req_send.req_bytes_packed; \ - \ - if(endpoint == NULL) { \ - rc = OMPI_ERR_UNREACH; \ - break; \ - } \ - \ - sendreq->req_lock = 0; \ - sendreq->req_pipeline_depth = 0; \ - sendreq->req_bytes_delivered = 0; \ - sendreq->req_rdma_cnt = 0; \ - sendreq->req_state = 0; \ - sendreq->req_send_offset = 0; \ - sendreq->req_send.req_base.req_pml_complete = false; \ - sendreq->req_send.req_base.req_ompi.req_complete = false; \ - sendreq->req_send.req_base.req_ompi.req_state = OMPI_REQUEST_ACTIVE; \ - sendreq->req_send.req_base.req_ompi.req_status._cancelled = 0; \ - sendreq->req_send.req_base.req_sequence = OPAL_THREAD_ADD32( \ - &comm->procs[sendreq->req_send.req_base.req_peer].send_sequence,1); \ - sendreq->req_endpoint = endpoint; \ - \ - /* select a btl */ \ - bml_btl = mca_bml_base_btl_array_get_next(&endpoint->btl_eager); \ - \ - /* shortcut for zero byte */ \ - if(size == 0 && sendreq->req_send.req_send_mode != MCA_PML_BASE_SEND_SYNCHRONOUS) { \ - mca_btl_base_descriptor_t* descriptor; \ - mca_btl_base_segment_t* segment; \ - mca_pml_ob1_hdr_t* hdr; \ - \ - /* allocate a descriptor */ \ - MCA_PML_OB1_DES_ALLOC(bml_btl, descriptor, sizeof(mca_pml_ob1_match_hdr_t)); \ - if(NULL == descriptor) { \ - return OMPI_ERR_OUT_OF_RESOURCE; \ - } \ - segment = descriptor->des_src; \ - \ - /* build hdr */ \ - hdr = (mca_pml_ob1_hdr_t*)segment->seg_addr.pval; \ - hdr->hdr_common.hdr_flags = 0; \ - hdr->hdr_common.hdr_type = MCA_PML_OB1_HDR_TYPE_MATCH; \ - hdr->hdr_match.hdr_ctx = sendreq->req_send.req_base.req_comm->c_contextid; \ - hdr->hdr_match.hdr_src = sendreq->req_send.req_base.req_comm->c_my_rank; \ - hdr->hdr_match.hdr_tag = sendreq->req_send.req_base.req_tag; \ - hdr->hdr_match.hdr_seq = sendreq->req_send.req_base.req_sequence; \ - \ - /* short message */ \ - descriptor->des_cbfunc = mca_pml_ob1_match_completion_cache; \ - descriptor->des_flags |= MCA_BTL_DES_FLAGS_PRIORITY; \ - descriptor->des_cbdata = sendreq; \ - \ - /* request is complete at mpi level */ \ - OPAL_THREAD_LOCK(&ompi_request_lock); \ - MCA_PML_OB1_SEND_REQUEST_MPI_COMPLETE(sendreq); \ - OPAL_THREAD_UNLOCK(&ompi_request_lock); \ - \ - /* send */ \ - rc = mca_bml_base_send(bml_btl, descriptor, MCA_BTL_TAG_PML); \ - if(OMPI_SUCCESS != rc) { \ - mca_bml_base_free(bml_btl, descriptor ); \ - } \ - \ - } else { \ - size_t eager_limit = bml_btl->btl_eager_limit - sizeof(mca_pml_ob1_hdr_t); \ - if(size <= eager_limit) { \ - switch(sendreq->req_send.req_send_mode) { \ - case MCA_PML_BASE_SEND_SYNCHRONOUS: \ - rc = mca_pml_ob1_send_request_start_rndv(sendreq, bml_btl, size, 0); \ - break; \ - case MCA_PML_BASE_SEND_BUFFERED: \ - rc = mca_pml_ob1_send_request_start_copy(sendreq, bml_btl, size); \ - break; \ - default: \ - if (bml_btl->btl_flags & MCA_BTL_FLAGS_SEND_INPLACE) { \ - rc = mca_pml_ob1_send_request_start_prepare(sendreq, bml_btl, size); \ - } else { \ - rc = mca_pml_ob1_send_request_start_copy(sendreq, bml_btl, size); \ - } \ - break; \ - } \ - } else { \ - size = eager_limit; \ - if(sendreq->req_send.req_send_mode == MCA_PML_BASE_SEND_BUFFERED) { \ - rc = mca_pml_ob1_send_request_start_buffered(sendreq, bml_btl, size); \ - } else if \ - (ompi_convertor_need_buffers(&sendreq->req_send.req_convertor) == false) { \ - if( 0 != (sendreq->req_rdma_cnt = mca_pml_ob1_rdma_btls( \ - sendreq->req_endpoint, \ - sendreq->req_send.req_addr, \ - sendreq->req_send.req_bytes_packed, \ - sendreq->req_rdma))) { \ - rc = mca_pml_ob1_send_request_start_rdma(sendreq, bml_btl, size); \ - } else { \ - rc = mca_pml_ob1_send_request_start_rndv(sendreq, bml_btl, size, MCA_PML_OB1_HDR_FLAGS_CONTIG); \ - } \ - } else { \ - rc = mca_pml_ob1_send_request_start_rndv(sendreq, bml_btl, size, 0); \ - } \ - } \ - } \ + mca_bml_base_btl_t* bml_btl; \ + size_t size = sendreq->req_send.req_bytes_packed; \ + size_t eager_limit; \ + \ + if(endpoint == NULL) { \ + rc = OMPI_ERR_UNREACH; \ + break; \ + } \ + \ + sendreq->req_lock = 0; \ + sendreq->req_pipeline_depth = 0; \ + sendreq->req_bytes_delivered = 0; \ + sendreq->req_rdma_cnt = 0; \ + sendreq->req_state = 0; \ + sendreq->req_send_offset = 0; \ + sendreq->req_send.req_base.req_pml_complete = false; \ + sendreq->req_send.req_base.req_ompi.req_complete = false; \ + sendreq->req_send.req_base.req_ompi.req_state = OMPI_REQUEST_ACTIVE; \ + sendreq->req_send.req_base.req_ompi.req_status._cancelled = 0; \ + sendreq->req_send.req_base.req_sequence = OPAL_THREAD_ADD32( \ + &comm->procs[sendreq->req_send.req_base.req_peer].send_sequence,1); \ + sendreq->req_endpoint = endpoint; \ + \ + /* select a btl */ \ + bml_btl = mca_bml_base_btl_array_get_next(&endpoint->btl_eager); \ + eager_limit = bml_btl->btl_eager_limit - sizeof(mca_pml_ob1_hdr_t); \ + \ + /* shortcut for zero byte */ \ + if(size <= eager_limit) { \ + switch(sendreq->req_send.req_send_mode) { \ + case MCA_PML_BASE_SEND_SYNCHRONOUS: \ + rc = mca_pml_ob1_send_request_start_rndv(sendreq, bml_btl, size, 0); \ + break; \ + case MCA_PML_BASE_SEND_BUFFERED: \ + rc = mca_pml_ob1_send_request_start_copy(sendreq, bml_btl, size); \ + break; \ + case MCA_PML_BASE_SEND_COMPLETE: \ + rc = mca_pml_ob1_send_request_start_prepare(sendreq, bml_btl, size); \ + break; \ + default: \ + if (size == 0) { \ + mca_btl_base_descriptor_t* descriptor; \ + mca_btl_base_segment_t* segment; \ + mca_pml_ob1_hdr_t* hdr; \ + \ + /* allocate a descriptor */ \ + MCA_PML_OB1_DES_ALLOC(bml_btl, descriptor, sizeof(mca_pml_ob1_match_hdr_t)); \ + if(NULL == descriptor) { \ + return OMPI_ERR_OUT_OF_RESOURCE; \ + } \ + segment = descriptor->des_src; \ + \ + /* build hdr */ \ + hdr = (mca_pml_ob1_hdr_t*)segment->seg_addr.pval; \ + hdr->hdr_common.hdr_flags = 0; \ + hdr->hdr_common.hdr_type = MCA_PML_OB1_HDR_TYPE_MATCH; \ + hdr->hdr_match.hdr_ctx = sendreq->req_send.req_base.req_comm->c_contextid; \ + hdr->hdr_match.hdr_src = sendreq->req_send.req_base.req_comm->c_my_rank; \ + hdr->hdr_match.hdr_tag = sendreq->req_send.req_base.req_tag; \ + hdr->hdr_match.hdr_seq = sendreq->req_send.req_base.req_sequence; \ + \ + /* short message */ \ + descriptor->des_cbfunc = mca_pml_ob1_match_completion_cache; \ + descriptor->des_flags |= MCA_BTL_DES_FLAGS_PRIORITY; \ + descriptor->des_cbdata = sendreq; \ + \ + /* request is complete at mpi level */ \ + OPAL_THREAD_LOCK(&ompi_request_lock); \ + MCA_PML_OB1_SEND_REQUEST_MPI_COMPLETE(sendreq); \ + OPAL_THREAD_UNLOCK(&ompi_request_lock); \ + \ + /* send */ \ + rc = mca_bml_base_send(bml_btl, descriptor, MCA_BTL_TAG_PML); \ + if(OMPI_SUCCESS != rc) { \ + mca_bml_base_free(bml_btl, descriptor ); \ + } \ + } else if (bml_btl->btl_flags & MCA_BTL_FLAGS_SEND_INPLACE) { \ + rc = mca_pml_ob1_send_request_start_prepare(sendreq, bml_btl, size); \ + } else { \ + rc = mca_pml_ob1_send_request_start_copy(sendreq, bml_btl, size); \ + } \ + break; \ + } \ + } else { \ + size = eager_limit; \ + if(sendreq->req_send.req_send_mode == MCA_PML_BASE_SEND_BUFFERED) { \ + rc = mca_pml_ob1_send_request_start_buffered(sendreq, bml_btl, size); \ + } else if \ + (ompi_convertor_need_buffers(&sendreq->req_send.req_convertor) == false) { \ + if( 0 != (sendreq->req_rdma_cnt = mca_pml_ob1_rdma_btls( \ + sendreq->req_endpoint, \ + sendreq->req_send.req_addr, \ + sendreq->req_send.req_bytes_packed, \ + sendreq->req_rdma))) { \ + rc = mca_pml_ob1_send_request_start_rdma(sendreq, bml_btl, size); \ + } else { \ + rc = mca_pml_ob1_send_request_start_rndv(sendreq, bml_btl, size, \ + MCA_PML_OB1_HDR_FLAGS_CONTIG); \ + } \ + } else { \ + rc = mca_pml_ob1_send_request_start_rndv(sendreq, bml_btl, size, 0); \ + } \ + } \ } while (0) diff --git a/ompi/mca/pml/pml.h b/ompi/mca/pml/pml.h index fbf41f046a..bfcea342d2 100644 --- a/ompi/mca/pml/pml.h +++ b/ompi/mca/pml/pml.h @@ -88,7 +88,8 @@ typedef enum { MCA_PML_BASE_SEND_STANDARD, MCA_PML_BASE_SEND_BUFFERED, MCA_PML_BASE_SEND_SYNCHRONOUS, - MCA_PML_BASE_SEND_READY + MCA_PML_BASE_SEND_READY, + MCA_PML_BASE_SEND_COMPLETE } mca_pml_base_send_mode_t;