diff --git a/src/mca/bmi/ib/bmi_ib.c b/src/mca/bmi/ib/bmi_ib.c index 936ad16bd8..790c5af2e5 100644 --- a/src/mca/bmi/ib/bmi_ib.c +++ b/src/mca/bmi/ib/bmi_ib.c @@ -151,7 +151,9 @@ mca_bmi_base_descriptor_t* mca_bmi_ib_alloc( /* if(size <= ib_bmi->super.bmi_eager_limit){ */ - + + + if(size <= mca_bmi_ib_component.eager_limit){ MCA_BMI_IB_FRAG_ALLOC_EAGER(bmi, frag, rc); frag->segment.seg_len = @@ -167,7 +169,8 @@ mca_bmi_base_descriptor_t* mca_bmi_ib_alloc( /* } else { */ - /* frag = (mca_bmi_ib_frag_t*) ib_bmi->ib_pool->mpool_alloc(ib_bmi->ib_pool, sizeof(frag) + sizeof(mca_bmi_ib_header_t) + size ,0, &user_out); */ + + /* frag = (mca_bmi_ib_frag_t*) ib_bmi->ib_pool->mpool_alloc(ib_bmi->ib_pool, sizeof(frag) + sizeof(mca_bmi_ib_header_t) + size ,0, &user_out); */ /* frag->base.super.user_data = user_out; */ /* OBJ_CONSTRUCT(frag, mca_bmi_ib_frag_t); */ @@ -308,8 +311,12 @@ mca_bmi_base_descriptor_t* mca_bmi_ib_prepare_src( mem_hndl.r_key = mr_out.r_key; frag->mem_hndl = mem_hndl.hndl; - frag->segment.seg_key.key32[1] = (uint32_t) mem_hndl.l_key; - + frag->sg_entry.len = max_data; + frag->sg_entry.lkey = mem_hndl.l_key; + frag->sg_entry.addr = (VAPI_virt_addr_t) (MT_virt_addr_t) iov.iov_base; + + frag->segment.seg_key.key32[0] = (uint32_t) mem_hndl.l_key; + return &frag->base; } @@ -331,11 +338,103 @@ mca_bmi_base_descriptor_t* mca_bmi_ib_prepare_dst( size_t reserve, size_t* size) { - return NULL; + mca_bmi_ib_module_t* ib_bmi; + mca_bmi_ib_frag_t* frag; + struct iovec iov; + int32_t iov_count = 1; + size_t max_data = *size; + int32_t free_after; + int rc; + void* user_out; + + ib_bmi = (mca_bmi_ib_module_t*) bmi; + VAPI_mrw_t mr_in, mr_out; + VAPI_ret_t ret; + mca_common_vapi_memhandle_t mem_hndl; + + memset(&mr_in, 0, sizeof(VAPI_mrw_t)); + memset(&mr_out, 0, sizeof(VAPI_mrw_t)); + memset(&mem_hndl, 0, sizeof(mca_common_vapi_memhandle_t)); + + mem_hndl.hndl = VAPI_INVAL_HNDL; + + + mr_in.acl = VAPI_EN_LOCAL_WRITE | VAPI_EN_REMOTE_WRITE; + mr_in.l_key = 0; + mr_in.r_key = 0; + mr_in.pd_hndl = ib_bmi->ptag; + mr_in.type = VAPI_MR; + MCA_BMI_IB_FRAG_ALLOC_FRAG(bmi, frag, rc); + if(NULL == frag){ + return NULL; + } + + iov.iov_len = max_data; + iov.iov_base = NULL; + + ompi_convertor_pack(convertor, &iov, &iov_count, &max_data, &free_after); + frag->segment.seg_len = max_data; + frag->segment.seg_addr.pval = iov.iov_base; + + mr_in.size = max_data; + mr_in.start = (VAPI_virt_addr_t) (MT_virt_addr_t) iov.iov_base; + + ret = VAPI_register_mr( + ib_bmi->nic, + &mr_in, + &mem_hndl.hndl, + &mr_out + ); + + if(VAPI_OK != ret){ + ompi_output(0, "error pinning vapi memory\n"); + return NULL; + } + + mem_hndl.l_key = mr_out.l_key; + mem_hndl.r_key = mr_out.r_key; + + frag->mem_hndl = mem_hndl.hndl; + frag->sg_entry.len = max_data; + frag->sg_entry.lkey = mem_hndl.l_key; + frag->sg_entry.addr = (VAPI_virt_addr_t) (MT_virt_addr_t) iov.iov_base; + + frag->segment.seg_key.key32[0] = (uint32_t) mem_hndl.l_key; + + return &frag->base; + } int mca_bmi_ib_finalize(struct mca_bmi_base_module_t* bmi) { + mca_bmi_ib_module_t* ib_bmi; + ib_bmi = (mca_bmi_ib_module_t*) bmi; + + if(ib_bmi->send_free_eager.fl_num_allocated != + ib_bmi->send_free_eager.super.ompi_list_length){ + ompi_output(0, "bmi ib send_free_eager frags: %d allocated %d returned \n", + ib_bmi->send_free_eager.fl_num_allocated, + ib_bmi->send_free_eager.super.ompi_list_length); + } + if(ib_bmi->send_free_max.fl_num_allocated != + ib_bmi->send_free_max.super.ompi_list_length){ + ompi_output(0, "bmi ib send_free_max frags: %d allocated %d returned \n", + ib_bmi->send_free_max.fl_num_allocated, + ib_bmi->send_free_max.super.ompi_list_length); + } + if(ib_bmi->send_free_frag.fl_num_allocated != + ib_bmi->send_free_frag.super.ompi_list_length){ + ompi_output(0, "bmi ib send_free_frag frags: %d allocated %d returned \n", + ib_bmi->send_free_frag.fl_num_allocated, + ib_bmi->send_free_frag.super.ompi_list_length); + } + if(ib_bmi->recv_free.fl_num_allocated != + ib_bmi->recv_free.super.ompi_list_length){ + ompi_output(0, "bmi ib recv_free frags: %d allocated %d returned \n", + ib_bmi->recv_free.fl_num_allocated, + ib_bmi->recv_free.super.ompi_list_length); + } + return OMPI_SUCCESS; } @@ -369,10 +468,24 @@ int mca_bmi_ib_send( */ int mca_bmi_ib_put( mca_bmi_base_module_t* bmi, - mca_bmi_base_endpoint_t* bmi_peer, + mca_bmi_base_endpoint_t* endpoint, mca_bmi_base_descriptor_t* descriptor) { - return OMPI_ERR_NOT_IMPLEMENTED; + mca_bmi_ib_module_t* ib_bmi = (mca_bmi_ib_module_t*) bmi; + mca_bmi_ib_frag_t* frag = (mca_bmi_ib_frag_t*) descriptor; + frag->sr_desc.opcode = VAPI_RDMA_WRITE; + frag->sr_desc.remote_qp = endpoint->rem_qp_num; + frag->sr_desc.remote_addr = (VAPI_virt_addr_t) (MT_virt_addr_t) frag->base.des_dst->seg_addr.pval; + + frag->ret = VAPI_post_sr(ib_bmi->nic, + endpoint->lcl_qp_hndl, + &frag->sr_desc); + if(VAPI_OK != frag->ret){ + return OMPI_ERROR; + } + + return OMPI_SUCCESS; + } diff --git a/src/mca/bmi/ib/bmi_ib_component.c b/src/mca/bmi/ib/bmi_ib_component.c index 8b1a261e3f..6a47489afb 100644 --- a/src/mca/bmi/ib/bmi_ib_component.c +++ b/src/mca/bmi/ib/bmi_ib_component.c @@ -371,7 +371,8 @@ mca_bmi_base_module_t** mca_bmi_ib_component_init(int *num_bmi_modules, length = sizeof(mca_bmi_ib_frag_t) + sizeof(mca_bmi_ib_header_t) + - ib_bmi->super.bmi_eager_limit; + ib_bmi->super.bmi_eager_limit+ + 2*MCA_BMI_IB_FRAG_ALIGN; ompi_free_list_init(&ib_bmi->send_free_eager, length, @@ -385,7 +386,8 @@ mca_bmi_base_module_t** mca_bmi_ib_component_init(int *num_bmi_modules, length = sizeof(mca_bmi_ib_frag_t) + sizeof(mca_bmi_ib_header_t) + - ib_bmi->super.bmi_max_send_size; + ib_bmi->super.bmi_max_send_size+ + 2*MCA_BMI_IB_FRAG_ALIGN; ompi_free_list_init(&ib_bmi->send_free_max, @@ -408,7 +410,8 @@ mca_bmi_base_module_t** mca_bmi_ib_component_init(int *num_bmi_modules, length = sizeof(mca_bmi_ib_frag_t) + - sizeof(mca_bmi_ib_header_t); + sizeof(mca_bmi_ib_header_t)+ + 2*MCA_BMI_IB_FRAG_ALIGN; diff --git a/src/mca/bmi/ib/bmi_ib_frag.c b/src/mca/bmi/ib/bmi_ib_frag.c index ae6a3c7b55..9b0fa1bb30 100644 --- a/src/mca/bmi/ib/bmi_ib_frag.c +++ b/src/mca/bmi/ib/bmi_ib_frag.c @@ -6,16 +6,33 @@ static void mca_bmi_ib_frag_common_constructor( mca_bmi_ib_frag_t* frag) { + size_t mod; mca_common_vapi_memhandle_t* mem_hndl = frag->base.super.user_data; frag->hdr = (mca_bmi_ib_header_t*) (frag+1); /* initialize the bmi header to point to start at end of frag */ - frag->segment.seg_addr.pval = frag->hdr+1; /* init the segment address to start after the bmi header */ +#if 0 + mod = (unsigned long) frag->hdr % MCA_BMI_IB_FRAG_ALIGN; + + if(mod != 0) { + frag->hdr = (unsigned char*) frag->hdr + (MCA_BMI_IB_FRAG_ALIGN - mod); + } +#endif + + frag->segment.seg_addr.pval = ((unsigned char* )frag->hdr) + sizeof(mca_bmi_ib_header_t); /* init the segment address to start after the bmi header */ + +#if 0 + mod = (frag->segment.seg_addr.lval) % MCA_BMI_IB_FRAG_ALIGN; + if(mod != 0) { + frag->segment.seg_addr.lval += (MCA_BMI_IB_FRAG_ALIGN - mod); + } +#endif + frag->segment.seg_len = frag->size; frag->base.des_src = &frag->segment; frag->base.des_src_cnt = 1; frag->base.des_dst = NULL; frag->base.des_dst_cnt = 0; frag->base.des_flags = 0; - frag->base.des_src->seg_key.key64 = (uint64_t) mem_hndl->l_key; + frag->base.des_src->seg_key.key32[0] = (uint64_t) mem_hndl->l_key; frag->sg_entry.lkey = mem_hndl->l_key; frag->sg_entry.addr = (VAPI_virt_addr_t) (MT_virt_addr_t) frag->hdr; } diff --git a/src/mca/bmi/ib/bmi_ib_frag.h b/src/mca/bmi/ib/bmi_ib_frag.h index 4f87080910..fa89ae2f97 100644 --- a/src/mca/bmi/ib/bmi_ib_frag.h +++ b/src/mca/bmi/ib/bmi_ib_frag.h @@ -17,6 +17,8 @@ #ifndef MCA_BMI_IB_FRAG_H #define MCA_BMI_IB_FRAG_H + +#define MCA_BMI_IB_FRAG_ALIGN (8) #include "ompi_config.h" #include "bmi_ib.h"