diff --git a/ompi/mca/btl/openib/btl_openib.c b/ompi/mca/btl/openib/btl_openib.c index e6b903c00d..77d685ae26 100644 --- a/ompi/mca/btl/openib/btl_openib.c +++ b/ompi/mca/btl/openib/btl_openib.c @@ -16,6 +16,7 @@ #include "ompi_config.h" #include +#include #include "opal/util/output.h" #include "opal/util/if.h" #include "mca/pml/pml.h" @@ -187,7 +188,7 @@ int mca_btl_openib_free( if(frag->size == 0) { MCA_BTL_IB_FRAG_RETURN_FRAG(btl, frag); - OBJ_RELEASE(frag->vapi_reg); + OBJ_RELEASE(frag->openib_reg); } @@ -394,10 +395,10 @@ mca_btl_base_descriptor_t* mca_btl_openib_prepare_src( } frag->mr = openib_reg->mr; frag->sg_entry.length = max_data; - frag->sg_entry.lkey = openib_reg->l_key; + frag->sg_entry.lkey = openib_reg->mr->lkey; frag->sg_entry.addr = (uintptr_t) iov.iov_base; - frag->segment.seg_key.key32[0] = (uint32_t) frag->mr->l_key; + frag->segment.seg_key.key32[0] = (uint32_t) frag->mr->lkey; frag->base.des_src = &frag->segment; frag->base.des_src_cnt = 1; @@ -617,7 +618,7 @@ mca_btl_base_descriptor_t* mca_btl_openib_prepare_dst( frag->mr = openib_reg->mr; - frag->sg_entry.len = *size; + frag->sg_entry.length = *size; frag->sg_entry.lkey = openib_reg->mr->lkey; frag->sg_entry.addr = (uintptr_t) frag->segment.seg_addr.pval; @@ -708,17 +709,16 @@ int mca_btl_openib_put( mca_btl_base_module_t* btl, mca_btl_base_descriptor_t* descriptor) { struct ibv_send_wr* bad_wr; - mca_btl_openib_module_t* openib_btl = (mca_btl_openib_module_t*) btl; mca_btl_openib_frag_t* frag = (mca_btl_openib_frag_t*) descriptor; frag->endpoint = endpoint; frag->sr_desc.opcode = IBV_WR_RDMA_WRITE; - frag->sr_desc.rdma.remote_addr = (uintptr_t) frag->base.des_src->seg_addr.pval; - frag->sr_desc.rdma.rkey = frag->base.des_dst->seg_key.key32[0]; + frag->sr_desc.wr.rdma.remote_addr = (uintptr_t) frag->base.des_src->seg_addr.pval; + frag->sr_desc.wr.rdma.rkey = frag->base.des_dst->seg_key.key32[0]; frag->sg_entry.addr = (uintptr_t) frag->base.des_src->seg_addr.pval; frag->sg_entry.length = frag->base.des_src->seg_len; if(ibv_post_send(endpoint->lcl_qp_low, - frag->sr_desc, + &frag->sr_desc, &bad_wr)){ opal_output(0, "%s: error posting send request\n", __func__); return OMPI_ERROR; @@ -792,14 +792,21 @@ int mca_btl_openib_module_init(mca_btl_openib_module_t *openib_btl) if(NULL == openib_btl->ib_pd) { - ompi_output(0, "%s: error allocating pd for %s\n", __func__, ibv_get_device_name(openib_btl->ib_dev)); + opal_output(0, "%s: error allocating pd for %s\n", __func__, ibv_get_device_name(openib_btl->ib_dev)); return OMPI_ERROR; } - openib_btl->ib_cq = ibv_create_cq(ctx, openib_btl->ib_cq_size, NULL); + openib_btl->ib_cq_low = ibv_create_cq(ctx, openib_btl->ib_cq_size, NULL); - if(NULL == openib_btl->ib_cq) { - ompi_output(0, "%s: error creating cq for %s\n", __func__, ibv_get_device_name(openib_btl->ib_dev)); + if(NULL == openib_btl->ib_cq_low) { + opal_output(0, "%s: error creating low priority cq for %s\n", __func__, ibv_get_device_name(openib_btl->ib_dev)); + return OMPI_ERROR; + } + + openib_btl->ib_cq_high = ibv_create_cq(ctx, openib_btl->ib_cq_size, NULL); + + if(NULL == openib_btl->ib_cq_high) { + opal_output(0, "%s: error creating high priority cq for %s\n", __func__, ibv_get_device_name(openib_btl->ib_dev)); return OMPI_ERROR; } diff --git a/ompi/mca/btl/openib/btl_openib_component.c b/ompi/mca/btl/openib/btl_openib_component.c index 9e35440c6a..50d28c961e 100644 --- a/ompi/mca/btl/openib/btl_openib_component.c +++ b/ompi/mca/btl/openib/btl_openib_component.c @@ -26,7 +26,6 @@ #include "mca/base/mca_base_param.h" #include "mca/errmgr/errmgr.h" -#include "mca/common/vapi/vapi_mem_reg.h" #include "mca/mpool/base/base.h" #include "btl_openib.h" #include "btl_openib_frag.h" @@ -109,7 +108,7 @@ int mca_btl_openib_component_open(void) /* initialize state */ mca_btl_openib_component.ib_num_btls=0; - mca_btl_openib_component.mvapi_btls=NULL; + mca_btl_openib_component.openib_btls=NULL; /* initialize objects */ OBJ_CONSTRUCT(&mca_btl_openib_component.ib_procs, opal_list_t); @@ -169,7 +168,7 @@ int mca_btl_openib_component_open(void) 1); mca_btl_openib_module.ib_mtu = mca_btl_openib_param_register_int("ib_mtu", - MTU1024); + IBV_MTU_1024); mca_btl_openib_module.ib_min_rnr_timer = mca_btl_openib_param_register_int("ib_min_rnr_timer", 5); @@ -249,7 +248,7 @@ mca_btl_base_module_t** mca_btl_openib_component_init(int *num_btl_modules, uint32_t i,j, length; struct mca_mpool_base_resources_t mpool_resources; opal_list_t btl_list; - mca_btl_openib_module_t * mvapi_btl; + mca_btl_openib_module_t * openib_btl; mca_btl_base_selected_module_t* ib_selected; opal_list_item_t* item; /* initialization */ @@ -273,7 +272,7 @@ mca_btl_base_module_t** mca_btl_openib_component_init(int *num_btl_modules, /* Allocate space for the ib devices */ - ib_devs = (struct ibv_dev**) malloc(num_devs * sizeof(struct ibv_dev*)); + ib_devs = (struct ibv_device**) malloc(num_devs * sizeof(struct ibv_dev*)); if(NULL == ib_devs) { ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); return NULL; @@ -281,7 +280,7 @@ mca_btl_base_module_t** mca_btl_openib_component_init(int *num_btl_modules, dlist_start(dev_list); - int i = 0; + i = 0; dlist_for_each_data(dev_list, ib_dev, struct ibv_device) ib_devs[i++] = ib_dev; @@ -295,7 +294,7 @@ mca_btl_base_module_t** mca_btl_openib_component_init(int *num_btl_modules, for(i = 0; i < num_devs; i++){ - struct ibv_device_attr* ib_attr; + struct ibv_device_attr ib_dev_attr; struct ibv_context* ib_dev_context; ib_dev = ib_devs[i]; @@ -305,7 +304,7 @@ mca_btl_base_module_t** mca_btl_openib_component_init(int *num_btl_modules, return NULL; } - if(ibv_query_device(context, &ib_attr)){ + if(ibv_query_device(ib_dev_context, &ib_dev_attr)){ opal_output(0, "%s: error obtaining device attributes for %s\n", __func__, ibv_get_device_name(ib_dev)); return NULL; } @@ -315,7 +314,8 @@ mca_btl_base_module_t** mca_btl_openib_component_init(int *num_btl_modules, for(j = 1; j <= ib_dev_attr.phys_port_cnt; j++){ struct ibv_port_attr* ib_port_attr; - if(ibv_query_port(ib_dev_context, (uint8_t) j, &ib_port_attr)){ + ib_port_attr = (struct ibv_port_attr*) malloc(sizeof(struct ibv_port_attr)); + if(ibv_query_port(ib_dev_context, (uint8_t) j, ib_port_attr)){ opal_output(0, "%s: error getting port attributes for device %s port number %d", __func__, ibv_get_device_name(ib_dev), j); return NULL; @@ -323,8 +323,8 @@ mca_btl_base_module_t** mca_btl_openib_component_init(int *num_btl_modules, if( IBV_PORT_ACTIVE == ib_port_attr->state ){ - openib_btl = (mca_btl_openib_module_t*) malloc(sizeof(mca_btl_openib_module_t)); - memcpy(openib_btl, &mca_btl_openib_module, sizeof(mca_btl_openib_module)); + openib_btl = (mca_btl_openib_module_t*) malloc(sizeof(mca_btl_openib_module_t)); + memcpy(openib_btl, &mca_btl_openib_module, sizeof(mca_btl_openib_module)); ib_selected = OBJ_NEW(mca_btl_base_selected_module_t); ib_selected->btl_module = (mca_btl_base_module_t*) openib_btl; @@ -468,8 +468,8 @@ mca_btl_base_module_t** mca_btl_openib_component_init(int *num_btl_modules, /* Initialize the rr_desc_post array for posting of rr*/ - openib_btl->rr_desc_post = (struct ibv_recv_wr **) - malloc((mca_btl_openib_component.ib_rr_buf_max * sizeof(struct ibv_recv_wr*))); + openib_btl->rr_desc_post = (struct ibv_recv_wr *) + malloc((mca_btl_openib_component.ib_rr_buf_max * sizeof(struct ibv_recv_wr))); btls[i] = &openib_btl->super; } @@ -478,7 +478,7 @@ mca_btl_base_module_t** mca_btl_openib_component_init(int *num_btl_modules, mca_btl_openib_post_recv(); *num_btl_modules = mca_btl_openib_component.ib_num_btls; - free(hca_ids); + free(ib_devs); return btls; } @@ -489,105 +489,128 @@ mca_btl_base_module_t** mca_btl_openib_component_init(int *num_btl_modules, int mca_btl_openib_component_progress() { - uint32_t i; + uint32_t i, ne; int count = 0; mca_btl_openib_frag_t* frag; /* Poll for completions */ for(i = 0; i < mca_btl_openib_component.ib_num_btls; i++) { - VAPI_ret_t ret; - VAPI_wc_desc_t comp; + + struct ibv_wc wc; mca_btl_openib_module_t* openib_btl = &mca_btl_openib_component.openib_btls[i]; - do{ - ret = VAPI_poll_cq(openib_btl->nic, openib_btl->cq_hndl_high, &comp); - if(VAPI_OK == ret) { - if(comp.status != VAPI_SUCCESS) { - opal_output(0, "Got error : %s, Vendor code : %d Frag : %p", - VAPI_wc_status_sym(comp.status), - comp.vendor_err_syndrome, comp.id); + do{ + ne=ibv_poll_cq(openib_btl->ib_cq_high, 1, &wc ); + if(ne < 0 ){ + opal_output(0, "%s: error polling CQ with %d \n", __func__, ne); + return OMPI_ERROR; + } + else if(wc.status != IBV_WC_SUCCESS) { + opal_output(0, "%s: error polling CQ with status %d for wr_id %d\n", + __func__, + wc.status, wc.wr_id); return OMPI_ERROR; } - - /* Handle n/w completions */ - switch(comp.opcode) { - case VAPI_CQE_RQ_RDMA_WITH_IMM: - if(comp.imm_data_valid){ - opal_output(0, "Got an RQ_RDMA_WITH_IMM!\n"); - + else if(1 == ne) { + /* Handle n/w completions */ + switch(wc.opcode) { + case IBV_WC_RECV_RDMA_WITH_IMM: + opal_output(0, "Got an RDMA with Immediate data Not supported!\n"); + return OMPI_ERROR; + + case IBV_WC_RDMA_WRITE: + case IBV_WC_SEND : + if(wc.opcode & IBV_WC_RECV){ + /* process a recv completion (this should only occur for a send not an rdma) */ + DEBUG_OUT(0, "%s:%d ib recv under redesign\n", __FILE__, __LINE__); + frag = (mca_btl_openib_frag_t*) wc.wr_id; + frag->rc=OMPI_SUCCESS; + frag->segment.seg_len = + wc.byte_len- + ((unsigned char*) frag->segment.seg_addr.pval - (unsigned char*) frag->hdr); + + /* advance the segment address past the header and subtract from the length..*/ + openib_btl->ib_reg[frag->hdr->tag].cbfunc(&openib_btl->super, + frag->hdr->tag, + &frag->base, + openib_btl->ib_reg[frag->hdr->tag].cbdata); + + OMPI_FREE_LIST_RETURN(&(openib_btl->recv_free_eager), (opal_list_item_t*) frag); + OPAL_THREAD_ADD32(&openib_btl->rr_posted_high, -1); + + mca_btl_openib_endpoint_post_rr(((mca_btl_openib_frag_t*)wc.wr_id)->endpoint, 0); + + count++; + } + else { + /* Process a completed send */ + frag = (mca_btl_openib_frag_t*) wc.wr_id; + frag->rc = OMPI_SUCCESS; + frag->base.des_cbfunc(&openib_btl->super, frag->endpoint, &frag->base, frag->rc); + count++; + + } + + break; + + default: + opal_output(0, "Errorneous network completion"); + break; } - break; - case VAPI_CQE_SQ_RDMA_WRITE: - case VAPI_CQE_SQ_SEND_DATA : - - /* Process a completed send */ - frag = (mca_btl_openib_frag_t*) comp.id; - frag->rc = OMPI_SUCCESS; - frag->base.des_cbfunc(&openib_btl->super, frag->endpoint, &frag->base, frag->rc); - count++; - break; - - case VAPI_CQE_RQ_SEND_DATA: - - DEBUG_OUT(0, "%s:%d ib recv under redesign\n", __FILE__, __LINE__); - frag = (mca_btl_openib_frag_t*) comp.id; - frag->rc=OMPI_SUCCESS; - frag->segment.seg_len = comp.byte_len-((unsigned char*) frag->segment.seg_addr.pval - (unsigned char*) frag->hdr); - /* advance the segment address past the header and subtract from the length..*/ - openib_btl->ib_reg[frag->hdr->tag].cbfunc(&openib_btl->super, frag->hdr->tag, &frag->base, openib_btl->ib_reg[frag->hdr->tag].cbdata); - - OMPI_FREE_LIST_RETURN(&(openib_btl->recv_free_eager), (opal_list_item_t*) frag); - OPAL_THREAD_ADD32(&openib_btl->rr_posted_high, -1); - - mca_btl_openib_endpoint_post_rr(((mca_btl_openib_frag_t*)comp.id)->endpoint, 0); - - count++; - break; - - default: - opal_output(0, "Errorneous network completion"); - break; } } + while(ne > 0); + + ne=ibv_poll_cq(openib_btl->ib_cq_low, 1, &wc ); + if(ne < 0){ + opal_output(0, "%s: error polling CQ with %d \n", __func__, ne); + return OMPI_ERROR; + } + else if(wc.status != IBV_WC_SUCCESS) { + opal_output(0, "%s: error polling CQ with status %d for wr_id %d\n", + __func__, + wc.status, wc.wr_id); + return OMPI_ERROR; } - while(VAPI_OK == ret); - - ret = VAPI_poll_cq(openib_btl->nic, openib_btl->cq_hndl_low, &comp); - if(VAPI_OK == ret) { - if(comp.status != VAPI_SUCCESS) { - opal_output(0, "Got error : %s, Vendor code : %d Frag : %p", - VAPI_wc_status_sym(comp.status), - comp.vendor_err_syndrome, comp.id); - return OMPI_ERROR; - } - + else if(1 == ne) { /* Handle n/w completions */ - switch(comp.opcode) { - case VAPI_CQE_SQ_RDMA_WRITE: - case VAPI_CQE_SQ_SEND_DATA : + switch(wc.opcode) { + case IBV_WC_RECV_RDMA_WITH_IMM: + opal_output(0, "Got an RDMA with Immediate data Not supported!\n"); + return OMPI_ERROR; - /* Process a completed send */ - frag = (mca_btl_openib_frag_t*) comp.id; - frag->rc = OMPI_SUCCESS; - frag->base.des_cbfunc(&openib_btl->super, frag->endpoint, &frag->base, frag->rc); - count++; - break; + case IBV_WC_RDMA_WRITE: + case IBV_WC_SEND : + if(wc.opcode & IBV_WC_RECV){ + /* process a recv completion (this should only occur for a send not an rdma) */ + DEBUG_OUT(0, "%s:%d ib recv under redesign\n", __FILE__, __LINE__); + frag = (mca_btl_openib_frag_t*) wc.wr_id; + frag->rc=OMPI_SUCCESS; + frag->segment.seg_len = + wc.byte_len- + ((unsigned char*) frag->segment.seg_addr.pval - (unsigned char*) frag->hdr); - case VAPI_CQE_RQ_SEND_DATA: + /* advance the segment address past the header and subtract from the length..*/ + openib_btl->ib_reg[frag->hdr->tag].cbfunc(&openib_btl->super, + frag->hdr->tag, + &frag->base, + openib_btl->ib_reg[frag->hdr->tag].cbdata); - DEBUG_OUT(0, "%s:%d ib recv under redesign\n", __FILE__, __LINE__); - frag = (mca_btl_openib_frag_t*) comp.id; - frag->rc=OMPI_SUCCESS; - frag->segment.seg_len = comp.byte_len-((unsigned char*) frag->segment.seg_addr.pval - (unsigned char*) frag->hdr); - /* advance the segment address past the header and subtract from the length..*/ - openib_btl->ib_reg[frag->hdr->tag].cbfunc(&openib_btl->super, frag->hdr->tag, &frag->base, openib_btl->ib_reg[frag->hdr->tag].cbdata); - - OMPI_FREE_LIST_RETURN(&(openib_btl->recv_free_max), (opal_list_item_t*) frag); - OPAL_THREAD_ADD32(&openib_btl->rr_posted_low, -1); + OMPI_FREE_LIST_RETURN(&(openib_btl->recv_free_eager), (opal_list_item_t*) frag); + OPAL_THREAD_ADD32(&openib_btl->rr_posted_high, -1); + mca_btl_openib_endpoint_post_rr(((mca_btl_openib_frag_t*)wc.wr_id)->endpoint, 0); + + count++; + } + else { + /* Process a completed send */ + frag = (mca_btl_openib_frag_t*) wc.wr_id; + frag->rc = OMPI_SUCCESS; + frag->base.des_cbfunc(&openib_btl->super, frag->endpoint, &frag->base, frag->rc); + count++; + + } - mca_btl_openib_endpoint_post_rr(((mca_btl_openib_frag_t*)comp.id)->endpoint, 0); - - count++; break; default: diff --git a/ompi/mca/btl/openib/btl_openib_endpoint.c b/ompi/mca/btl/openib/btl_openib_endpoint.c index ac60d30f56..91a54464fa 100644 --- a/ompi/mca/btl/openib/btl_openib_endpoint.c +++ b/ompi/mca/btl/openib/btl_openib_endpoint.c @@ -61,10 +61,8 @@ static inline int mca_btl_openib_endpoint_post_send(mca_btl_openib_module_t* ope { struct ibv_qp* ib_qp; - struct ibv_send_wr *bad_wr; - - frag->sr_desc.remote_qkey = 0; - frag->sg_entry.addr = (uintprt_t) frag->hdr; + struct ibv_send_wr* bad_wr; + frag->sg_entry.addr = (uintptr_t) frag->hdr; if(frag->base.des_flags && MCA_BTL_DES_FLAGS_PRIORITY && frag->size <= openib_btl->super.btl_eager_limit){ ib_qp = endpoint->lcl_qp_high; @@ -82,7 +80,7 @@ static inline int mca_btl_openib_endpoint_post_send(mca_btl_openib_module_t* ope * if(frag->sg_entry.len <= openib_btl->ib_inline_max) { */ if(ibv_post_send(ib_qp, - frag->sr_desc, + &frag->sr_desc, &bad_wr)) { opal_output(0, "%s: error posting send request\n", __func__); return OMPI_ERROR; @@ -151,13 +149,13 @@ static int mca_btl_openib_endpoint_send_connect_req(mca_btl_base_endpoint_t* end /* pack the info in the send buffer */ - rc = orte_dps.pack(buffer, &endpoint->lcl_qp_prop_high.qp_num, 1, ORTE_UINT32); + rc = orte_dps.pack(buffer, &endpoint->lcl_qp_high->qp_num, 1, ORTE_UINT32); if(rc != ORTE_SUCCESS) { ORTE_ERROR_LOG(rc); return rc; } - rc = orte_dps.pack(buffer, &endpoint->lcl_qp_prop_low.qp_num, 1, ORTE_UINT32); + rc = orte_dps.pack(buffer, &endpoint->lcl_qp_low->qp_num, 1, ORTE_UINT32); if(rc != ORTE_SUCCESS) { ORTE_ERROR_LOG(rc); return rc; @@ -175,7 +173,7 @@ static int mca_btl_openib_endpoint_send_connect_req(mca_btl_base_endpoint_t* end return rc; } - rc = orte_dps.pack(buffer, &endpoint->endpoint_btl->port.lid, 1, ORTE_UINT16); + rc = orte_dps.pack(buffer, &endpoint->endpoint_btl->ib_port_attr->lid, 1, ORTE_UINT16); if(rc != ORTE_SUCCESS) { ORTE_ERROR_LOG(rc); return rc; @@ -359,7 +357,7 @@ static int mca_btl_openib_endpoint_reply_start_connect(mca_btl_openib_endpoint_t /* Create the High Priority Queue Pair */ if(OMPI_SUCCESS != (rc = mca_btl_openib_endpoint_create_qp(openib_btl, openib_btl->ib_pd, - openib_btl->ib_cq, + openib_btl->ib_cq_high, endpoint->lcl_qp_attr_high, &endpoint->lcl_qp_high))) { opal_output(0, "[%lu,%lu,%lu] %s:%d errcode %d\n", @@ -371,7 +369,7 @@ static int mca_btl_openib_endpoint_reply_start_connect(mca_btl_openib_endpoint_t /* Create the Low Priority Queue Pair */ if(OMPI_SUCCESS != (rc = mca_btl_openib_endpoint_create_qp(openib_btl, openib_btl->ib_pd, - openib_btl->ib_cq, + openib_btl->ib_cq_low, endpoint->lcl_qp_attr_low, &endpoint->lcl_qp_low))) { opal_output(0, "[%lu,%lu,%lu] %s:%d errcode %d\n", @@ -512,8 +510,7 @@ static void mca_btl_openib_endpoint_recv( void mca_btl_openib_post_recv() { - DEBUG_OUT(""); - + orte_rml.recv_buffer_nb( ORTE_RML_NAME_ANY, ORTE_RML_TAG_DYNAMIC-1, @@ -542,7 +539,7 @@ int mca_btl_openib_endpoint_send( case MCA_BTL_IB_CONNECTING: DEBUG_OUT("Queing because state is connecting"); - + opal_list_append(&endpoint->pending_send_frags, (opal_list_item_t *)frag); @@ -686,17 +683,15 @@ int mca_btl_openib_endpoint_create_qp( ) { { - struct ibv_qp_init_attr qp_init_attr = { - .send_cq = cq, - .recv_cq = cq, - .cap = { - .max_send_wr = openib_btl->ib_wq_size, - .max_recv_wr = openib_btl->ib_wq_size, - .max_send_sge = openib_btl->ib_sg_list_size, - .max_recv_sge = openib_btl->ib_sg_list_size, - }, - .qp_type = IBV_QPT_RC - }; + struct ibv_qp_init_attr qp_init_attr; + qp_init_attr.send_cq = cq; + qp_init_attr.recv_cq = cq; + qp_init_attr.cap.max_send_wr = openib_btl->ib_wq_size; + qp_init_attr.cap.max_recv_wr = openib_btl->ib_wq_size; + qp_init_attr.cap.max_send_sge = openib_btl->ib_sg_list_size; + qp_init_attr.cap.max_recv_sge = openib_btl->ib_sg_list_size; + qp_init_attr.qp_type = IBV_QPT_RC; + (*qp) = ibv_create_qp(pd, &qp_init_attr); @@ -712,7 +707,7 @@ int mca_btl_openib_endpoint_create_qp( { qp_attr->qp_state = IBV_QPS_INIT; qp_attr->pkey_index = openib_btl->ib_pkey_ix; - qp_attr->qp_port_num = openib_btl->port_num; + qp_attr->port_num = openib_btl->port_num; qp_attr->qp_access_flags = 0; if(ibv_modify_qp((*qp), qp_attr, @@ -720,7 +715,7 @@ int mca_btl_openib_endpoint_create_qp( IBV_QP_PKEY_INDEX | IBV_QP_PORT | IBV_QP_ACCESS_FLAGS )) { - opal_output("%s: error modifying qp to INIT\n"); + opal_output(0, "%s: error modifying qp to INIT\n"); return OMPI_ERROR; } } @@ -742,10 +737,10 @@ int mca_btl_openib_endpoint_qp_init_query( { attr->qp_state = IBV_QPS_RTR; - attr->path_mth = openib_btl->ib_mtu; + attr->path_mtu = openib_btl->ib_mtu; attr->dest_qp_num = rem_qp_num; attr->rq_psn = rem_psn; - attr->max_des_rd_atomic = openib_btl->ib_max_rdma_dst_ops; + attr->max_dest_rd_atomic = openib_btl->ib_max_rdma_dst_ops; attr->min_rnr_timer = openib_btl->ib_min_rnr_timer; attr->ah_attr.is_global = 0; attr->ah_attr.dlid = rem_lid; @@ -761,14 +756,14 @@ int mca_btl_openib_endpoint_qp_init_query( IBV_QP_RQ_PSN | IBV_QP_MAX_DEST_RD_ATOMIC | IBV_QP_MIN_RNR_TIMER)) { - opal_out(0, "%s: error modifing QP to RTR\n", __func__); + opal_output(0, "%s: error modifing QP to RTR\n", __func__); return OMPI_ERROR; } attr->qp_state = IBV_QPS_RTS; attr->timeout = openib_btl->ib_timeout; attr->retry_cnt = openib_btl->ib_retry_count; attr->rnr_retry = openib_btl->ib_rnr_retry; - attr->sq_psn = my_psn; + attr->sq_psn = lcl_psn; attr->max_rd_atomic = openib_btl->ib_max_rdma_dst_ops; if (ibv_modify_qp(qp, attr, IBV_QP_STATE | @@ -778,7 +773,7 @@ int mca_btl_openib_endpoint_qp_init_query( IBV_QP_SQ_PSN | IBV_QP_MAX_QP_RD_ATOMIC)) { opal_output(0, "%s: error modifying QP to RTS\n", __func__); - return OMPI_FAILURE; + return OMPI_ERROR; } return OMPI_SUCCESS; } diff --git a/ompi/mca/btl/openib/btl_openib_endpoint.h b/ompi/mca/btl/openib/btl_openib_endpoint.h index 9a856033c0..67199e3b2f 100644 --- a/ompi/mca/btl/openib/btl_openib_endpoint.h +++ b/ompi/mca/btl/openib/btl_openib_endpoint.h @@ -136,7 +136,7 @@ static inline int mca_btl_openib_endpoint_post_rr_sub(int cnt, mca_btl_openib_frag_t* frag; mca_btl_openib_module_t *openib_btl = endpoint->endpoint_btl; struct ibv_recv_wr* bad_wr; - struct ibv_recv_wr** rr_desc_post = openib_btl->rr_desc_post; + struct ibv_recv_wr* rr_desc_post = openib_btl->rr_desc_post; /* prepare frags and post receive requests, given, this is ugly, * if openib doesn't plan on supporting a post_list method than @@ -157,7 +157,7 @@ static inline int mca_btl_openib_endpoint_post_rr_sub(int cnt, for(i=0; i< cnt; i++){ if(ibv_post_recv(qp, - rr_desc_post[i], + &rr_desc_post[i], &bad_wr)) { opal_output(0, "%s: error posting receive\n", __func__); return OMPI_ERROR; diff --git a/ompi/mca/btl/openib/configure.stub b/ompi/mca/btl/openib/configure.stub index c4136be314..6a4e629711 100644 --- a/ompi/mca/btl/openib/configure.stub +++ b/ompi/mca/btl/openib/configure.stub @@ -67,11 +67,11 @@ AC_DEFUN([MCA_CONFIGURE_STUB],[ AC_CHECK_HEADERS(infiniband/verbs.h,, [CPPFLAGS="$CPPFLAGS $EXTRA_CPPFLAGS" - eval "unset ac_cv_header_vapi_h" + eval "unset ac_cv_header_verbs_h" AC_CHECK_HEADERS(infiniband/verbs.h,, AC_MSG_ERROR([*** Cannot find working infiniband/verbs.h]))]) - # Many vapi.h's have horrid semantics and don't obey ISOC99 + # Many verbs.h's have horrid semantics and don't obey ISOC99 # standards. So we have to turn off flags like -pedantic. Sigh. # Galen: is this true for Open IB? @@ -86,17 +86,17 @@ AC_DEFUN([MCA_CONFIGURE_STUB],[ LD_LIBRARY_PATH_save="$LD_LIBRARY_PATH" unset LD_LIBRARY_PATH - # Helper function to try to find libvapi (called from below). + # Helper function to try to find libibverbs (called from below). mca_btl_openib_try_find_lib() { func1=[$]1 func2=[$]2 LDFLAGS="$LDFLAGS $EXTRA_LDFLAGS" - vapi_badness= - AC_CHECK_LIB([vapi], [$func1], [], [vapi_badness=true], - [-lmthca -libcm]) - if test "$vapi_badness" != ""; then + ibverbs_badness= + AC_CHECK_LIB([ibverbs], [$func1], [], [ibverbs_badness=true], + [-libcm]) + if test "$ibverbs_badness" != ""; then AC_TRY_LINK([#include ], [pthread_create(0,0,0,0);], [pthreads="yes"], [pthreads="no"]) @@ -104,8 +104,8 @@ mca_btl_openib_try_find_lib() { [sysfs=yes LIBS="$LIBS -lsysfs"], [sysfs=no]) if test "$pthread" = "yes" && test "$sysfs" = "yes"; then - AC_CHECK_LIB([vapi], [$func2], [], [], - [-lmthca -libcm]) + AC_CHECK_LIB([ibverbs], [$func2], [], [], + [-libcm]) fi @@ -118,28 +118,28 @@ mca_btl_openib_try_find_lib() { LIBS_save="$LIBS" LDFLAGS_save="$LDFLAGS" # Galen: Are these the right extra libs? - LIBS="$LIBS -lmthca -libcm" + LIBS="$LIBS -libcm" LIBS_orig="$LIBS" EXTRA_LDFLAGS= if test -d "$IBLIBDIR/lib/infiniband"; then - EXTRA_LDFLAGS="-L$IBLIBDIR/lib/infiniband" + EXTRA_LDFLAGS="-L$IBLIBDIR/lib/infiniband -L$IBLIBDIR/lib -L$IBLIBDIR/lib/sysfs" LDFLAGS="$LDFLAGS $EXTRA_LDFLAGS" # Galen: are these the right symbol names? mca_btl_openib_try_find_lib ibv_get_devices ibv_open_device if test "$LIBS" != "$LIBS_orig"; then -# Galen: Are we looking for "libvapi"? +# Galen: Are we looking for "libibverbs"? echo "--> found libibverbs libs in $IBLIBDIR/lib" fi fi if test "$LIBS" = "$LIBS_orig"; then -# Galen: Are we looking for "libvapi"? +# Galen: Are we looking for "libibverbs"? AC_MSG_ERROR([*** Cannot find working libibverbs.]) fi LD_LIBRARY_PATH="$LD_LIBRARY_PATH_save" # Galen: Are these the right extra libs? - LIBS="$LIBS -lmthca -libcm" + LIBS="$LIBS -libcm" # # Save extra compiler/linker flags so that they can be added in @@ -148,5 +148,5 @@ mca_btl_openib_try_find_lib() { WRAPPER_EXTRA_LDFLAGS="$EXTRA_LDFLAGS" # Galen: Are these the right extra libs? - WRAPPER_EXTRA_LIBS="-libibverbs -lmmthca -libcm" + WRAPPER_EXTRA_LIBS="-libverbs -libcm" ])dnl diff --git a/ompi/mca/mpool/openib/configure.stub b/ompi/mca/mpool/openib/configure.stub index 25823443a2..6a4e629711 100644 --- a/ompi/mca/mpool/openib/configure.stub +++ b/ompi/mca/mpool/openib/configure.stub @@ -60,18 +60,18 @@ AC_DEFUN([MCA_CONFIGURE_STUB],[ fi fi - # See if we can find vapi.h. First try without any additional + # See if we can find verbs.h. First try without any additional # -I's to see if we can find it easily. If we don't find it, then # try again with the EXTRA_CPPFLAGS. This prevents us from adding # things like -I/usr/local if we don't need to. - AC_CHECK_HEADERS(infiniband/vapi.h,, + AC_CHECK_HEADERS(infiniband/verbs.h,, [CPPFLAGS="$CPPFLAGS $EXTRA_CPPFLAGS" - eval "unset ac_cv_header_vapi_h" - AC_CHECK_HEADERS(infiniband/vapi.h,, - AC_MSG_ERROR([*** Cannot find working infiniband/vapi.h]))]) + eval "unset ac_cv_header_verbs_h" + AC_CHECK_HEADERS(infiniband/verbs.h,, + AC_MSG_ERROR([*** Cannot find working infiniband/verbs.h]))]) - # Many vapi.h's have horrid semantics and don't obey ISOC99 + # Many verbs.h's have horrid semantics and don't obey ISOC99 # standards. So we have to turn off flags like -pedantic. Sigh. # Galen: is this true for Open IB? @@ -86,17 +86,17 @@ AC_DEFUN([MCA_CONFIGURE_STUB],[ LD_LIBRARY_PATH_save="$LD_LIBRARY_PATH" unset LD_LIBRARY_PATH - # Helper function to try to find libvapi (called from below). + # Helper function to try to find libibverbs (called from below). mca_btl_openib_try_find_lib() { func1=[$]1 func2=[$]2 LDFLAGS="$LDFLAGS $EXTRA_LDFLAGS" - vapi_badness= - AC_CHECK_LIB([vapi], [$func1], [], [vapi_badness=true], - [-lmthca -libcm]) - if test "$vapi_badness" != ""; then + ibverbs_badness= + AC_CHECK_LIB([ibverbs], [$func1], [], [ibverbs_badness=true], + [-libcm]) + if test "$ibverbs_badness" != ""; then AC_TRY_LINK([#include ], [pthread_create(0,0,0,0);], [pthreads="yes"], [pthreads="no"]) @@ -104,8 +104,8 @@ mca_btl_openib_try_find_lib() { [sysfs=yes LIBS="$LIBS -lsysfs"], [sysfs=no]) if test "$pthread" = "yes" && test "$sysfs" = "yes"; then - AC_CHECK_LIB([vapi], [$func2], [], [], - [-lmthca -libcm]) + AC_CHECK_LIB([ibverbs], [$func2], [], [], + [-libcm]) fi @@ -118,28 +118,28 @@ mca_btl_openib_try_find_lib() { LIBS_save="$LIBS" LDFLAGS_save="$LDFLAGS" # Galen: Are these the right extra libs? - LIBS="$LIBS -lmthca -libcm" + LIBS="$LIBS -libcm" LIBS_orig="$LIBS" EXTRA_LDFLAGS= if test -d "$IBLIBDIR/lib/infiniband"; then - EXTRA_LDFLAGS="-L$IBLIBDIR/lib/infiniband" + EXTRA_LDFLAGS="-L$IBLIBDIR/lib/infiniband -L$IBLIBDIR/lib -L$IBLIBDIR/lib/sysfs" LDFLAGS="$LDFLAGS $EXTRA_LDFLAGS" # Galen: are these the right symbol names? mca_btl_openib_try_find_lib ibv_get_devices ibv_open_device if test "$LIBS" != "$LIBS_orig"; then -# Galen: Are we looking for "libvapi"? +# Galen: Are we looking for "libibverbs"? echo "--> found libibverbs libs in $IBLIBDIR/lib" fi fi if test "$LIBS" = "$LIBS_orig"; then -# Galen: Are we looking for "libvapi"? +# Galen: Are we looking for "libibverbs"? AC_MSG_ERROR([*** Cannot find working libibverbs.]) fi LD_LIBRARY_PATH="$LD_LIBRARY_PATH_save" # Galen: Are these the right extra libs? - LIBS="$LIBS -lmthca -libcm" + LIBS="$LIBS -libcm" # # Save extra compiler/linker flags so that they can be added in @@ -148,5 +148,5 @@ mca_btl_openib_try_find_lib() { WRAPPER_EXTRA_LDFLAGS="$EXTRA_LDFLAGS" # Galen: Are these the right extra libs? - WRAPPER_EXTRA_LIBS="-libibverbs -lmmthca -libcm" + WRAPPER_EXTRA_LIBS="-libverbs -libcm" ])dnl diff --git a/ompi/mca/mpool/openib/mpool_openib_module.c b/ompi/mca/mpool/openib/mpool_openib_module.c index 7e8da2eec8..bff2d69ce7 100644 --- a/ompi/mca/mpool/openib/mpool_openib_module.c +++ b/ompi/mca/mpool/openib/mpool_openib_module.c @@ -66,7 +66,7 @@ int mca_mpool_openib_register(mca_mpool_base_module_t* mpool, vapi_reg->mr = ibv_reg_mr( - mpool_module->resources->ib_pd, + mpool_module->resources.ib_pd, addr, size, IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE @@ -91,17 +91,11 @@ int mca_mpool_openib_register(mca_mpool_base_module_t* mpool, int mca_mpool_openib_deregister(mca_mpool_base_module_t* mpool, void *addr, size_t size, mca_mpool_base_registration_t* registration){ - VAPI_ret_t ret; mca_mpool_openib_module_t * mpool_openib = (mca_mpool_openib_module_t*) mpool; - mca_mpool_openib_registration_t * vapi_reg; - vapi_reg = (mca_mpool_openib_registration_t*) registration; - ret = VAPI_deregister_mr( - mpool_openib->hca_pd.hca, - vapi_reg->hndl - ); - - if(VAPI_OK != ret){ - opal_output(0, "%s: error unpinning vapi memory\n", __func__); + mca_mpool_openib_registration_t * openib_reg; + openib_reg = (mca_mpool_openib_registration_t*) registration; + if(! ibv_dereg_mr(openib_reg->mr)){ + opal_output(0, "%s: error unpinning openib memory\n", __func__); return OMPI_ERROR; } free(registration);