diff --git a/src/mca/bmi/ib/bmi_ib.c b/src/mca/bmi/ib/bmi_ib.c index 5ab5273e3b..59de514afd 100644 --- a/src/mca/bmi/ib/bmi_ib.c +++ b/src/mca/bmi/ib/bmi_ib.c @@ -173,17 +173,24 @@ int mca_bmi_ib_free( { mca_bmi_ib_frag_t* frag = (mca_bmi_ib_frag_t*)des; mca_bmi_ib_module_t * ib_bmi = (mca_bmi_ib_module_t*) bmi; + mca_mpool_base_chunk_t * mpool_chunk; if(frag->size == 0) { MCA_BMI_IB_FRAG_RETURN_FRAG(bmi, frag); - /* we also need to unregister the associated memory */ - frag->ret = VAPI_deregister_mr( - ib_bmi->nic, - frag->mem_hndl - ); - if(frag->ret!=VAPI_OK){ - ompi_output(0, "%s:error deregistering memory region", __func__); - return OMPI_ERROR; + + /* we also need to unregister the associated memory iff + the memory wasn't allocated via MPI_Alloc_mem */ + + mpool_chunk = mca_mpool_base_find((void*) frag->segment.seg_addr.pval); + if(NULL == mpool_chunk){ + frag->ret = VAPI_deregister_mr( + ib_bmi->nic, + frag->mem_hndl + ); + if(frag->ret!=VAPI_OK){ + ompi_output(0, "%s:error deregistering memory region", __func__); + return OMPI_ERROR; + } } } else if(frag->size == mca_bmi_ib_component.max_send_size){ @@ -240,8 +247,8 @@ mca_bmi_base_descriptor_t* mca_bmi_ib_prepare_src( frag->segment.seg_len = max_data + reserve; *size = max_data; return &frag->base; - - }else if( max_data + reserve < ib_bmi->super.bmi_min_rdma_size || 1 == ompi_convertor_need_buffers( convertor) ){ + + }else if( max_data + reserve <= ib_bmi->super.bmi_max_send_size || 1 == ompi_convertor_need_buffers( convertor) ){ MCA_BMI_IB_FRAG_ALLOC_MAX(bmi, frag, rc); if(NULL == frag) { return NULL; @@ -271,6 +278,7 @@ mca_bmi_base_descriptor_t* mca_bmi_ib_prepare_src( return &frag->base; } else { + VAPI_mrw_t mr_in, mr_out; VAPI_ret_t ret; mca_common_vapi_memhandle_t mem_hndl; @@ -303,13 +311,11 @@ mca_bmi_base_descriptor_t* mca_bmi_ib_prepare_src( /* first we will try to find this address in the memory tree (from MPI_Alloc_mem) */ mpool_chunk = mca_mpool_base_find((void*) iov.iov_base); - + frag->segment.seg_len = max_data; + frag->segment.seg_addr.pval = iov.iov_base; + if(NULL == mpool_chunk) { - frag->segment.seg_len = max_data; - frag->segment.seg_addr.pval = iov.iov_base; - - mr_in.size = max_data; mr_in.start = (VAPI_virt_addr_t) (MT_virt_addr_t) iov.iov_base; @@ -337,25 +343,26 @@ mca_bmi_base_descriptor_t* mca_bmi_ib_prepare_src( } } - frag->mem_hndl = mem_hndl.hndl; - frag->sg_entry.len = max_data; - frag->sg_entry.lkey = mem_hndl.l_key; - frag->sg_entry.addr = (VAPI_virt_addr_t) (MT_virt_addr_t) iov.iov_base; - - frag->segment.seg_key.key32[0] = (uint32_t) mem_hndl.l_key; - - frag->base.des_src = &frag->segment; - frag->base.des_src_cnt = 1; - frag->base.des_dst = NULL; - frag->base.des_dst_cnt = 0; - - return &frag->base; - - } + frag->mem_hndl = mem_hndl.hndl; + frag->sg_entry.len = max_data; + frag->sg_entry.lkey = mem_hndl.l_key; + frag->sg_entry.addr = (VAPI_virt_addr_t) (MT_virt_addr_t) iov.iov_base; + frag->segment.seg_key.key32[0] = (uint32_t) mem_hndl.l_key; + + frag->base.des_src = &frag->segment; + frag->base.des_src_cnt = 1; + frag->base.des_dst = NULL; + frag->base.des_dst_cnt = 0; + + + return &frag->base; + + } - return NULL; + + return NULL; } /** diff --git a/src/mca/bmi/ib/bmi_ib_component.c b/src/mca/bmi/ib/bmi_ib_component.c index fe9a7c4ac1..df6ace1e2f 100644 --- a/src/mca/bmi/ib/bmi_ib_component.c +++ b/src/mca/bmi/ib/bmi_ib_component.c @@ -186,15 +186,12 @@ int mca_bmi_ib_component_open(void) mca_bmi_ib_module.ib_src_path_bits = mca_bmi_ib_param_register_int("ib_src_path_bits", 0); - mca_bmi_ib_module.super.bmi_min_rdma_size = mca_bmi_ib_param_register_int("min_rdma_size", 256*1024); mca_bmi_ib_module.super.bmi_max_rdma_size = mca_bmi_ib_param_register_int("max_rdma_size", 512*1024); - - mca_bmi_ib_module.super.bmi_flags = mca_bmi_ib_param_register_int("flags", MCA_BMI_FLAGS_RDMA); @@ -516,9 +513,9 @@ int mca_bmi_ib_component_progress() ompi_output(0, "Got error : %s, Vendor code : %d Frag : %p", VAPI_wc_status_sym(comp.status), comp.vendor_err_syndrome, comp.id); - frag->rc = OMPI_ERROR; + return OMPI_ERROR; } - + /* Handle n/w completions */ switch(comp.opcode) { case VAPI_CQE_SQ_RDMA_WRITE: @@ -526,6 +523,7 @@ int mca_bmi_ib_component_progress() /* Process a completed send */ frag = (mca_bmi_ib_frag_t*) comp.id; + frag->rc = OMPI_SUCCESS; frag->base.des_cbfunc(&ib_bmi->super, frag->endpoint, &frag->base, frag->rc); count++; break; @@ -533,7 +531,8 @@ int mca_bmi_ib_component_progress() case VAPI_CQE_RQ_SEND_DATA: DEBUG_OUT(0, "%s:%d ib recv under redesign\n", __FILE__, __LINE__); - frag = (mca_bmi_ib_frag_t*) comp.id; + frag = (mca_bmi_ib_frag_t*) comp.id; + frag->rc=OMPI_SUCCESS; frag->segment.seg_len = comp.byte_len-((unsigned char*) frag->segment.seg_addr.pval - (unsigned char*) frag->hdr); /* advance the segment address past the header and subtract from the length..*/ ib_bmi->ib_reg[frag->hdr->tag].cbfunc(&ib_bmi->super, frag->hdr->tag, &frag->base, ib_bmi->ib_reg[frag->hdr->tag].cbdata); diff --git a/src/mca/pml/ob1/pml_ob1_recvreq.c b/src/mca/pml/ob1/pml_ob1_recvreq.c index 12399c386c..cba902ce19 100644 --- a/src/mca/pml/ob1/pml_ob1_recvreq.c +++ b/src/mca/pml/ob1/pml_ob1_recvreq.c @@ -297,7 +297,7 @@ void mca_pml_ob1_recv_request_schedule(mca_pml_ob1_recv_request_t* recvreq) do { size_t bytes_remaining = recvreq->req_recv.req_bytes_packed - recvreq->req_rdma_offset; while(bytes_remaining > 0 && recvreq->req_pipeline_depth < mca_pml_ob1.recv_pipeline_depth) { - mca_pml_ob1_endpoint_t* ep = mca_pml_ob1_ep_array_get_next(&proc->bmi_send); + mca_pml_ob1_endpoint_t* ep = mca_pml_ob1_ep_array_get_next(&proc->bmi_rdma); size_t hdr_size; mca_pml_ob1_rdma_hdr_t* hdr; mca_bmi_base_descriptor_t* dst; @@ -386,7 +386,9 @@ void mca_pml_ob1_recv_request_schedule(mca_pml_ob1_recv_request_t* recvreq) OMPI_THREAD_UNLOCK(&mca_pml_ob1.lock); break; } - } + rc = ep->bmi->bmi_component->bmi_progress(); + + } } while(OMPI_THREAD_ADD32(&recvreq->req_lock,-1) > 0); } }