diff --git a/ompi/mca/btl/openib/btl_openib.c b/ompi/mca/btl/openib/btl_openib.c index 0559513ffa..e7c6f3497a 100644 --- a/ompi/mca/btl/openib/btl_openib.c +++ b/ompi/mca/btl/openib/btl_openib.c @@ -218,10 +218,6 @@ int mca_btl_openib_add_procs( return OMPI_ERR_OUT_OF_RESOURCE; } - endpoint->endpoint_btl = openib_btl; - endpoint->use_eager_rdma = openib_btl->hca->use_eager_rdma & - mca_btl_openib_component.use_eager_rdma; - endpoint->subnet_id = openib_btl->port_info.subnet_id; #if HAVE_XRC if (MCA_BTL_XRC_ENABLED) { /* Pasha: now we need to push the subnet and lid to some global table in the component */ @@ -237,6 +233,7 @@ int mca_btl_openib_add_procs( ib_proc->port_touse++; } #endif + mca_btl_openib_endpoint_init(openib_btl, endpoint); rc = mca_btl_openib_proc_insert(ib_proc, endpoint); if(rc != OMPI_SUCCESS) { OBJ_RELEASE(endpoint); diff --git a/ompi/mca/btl/openib/btl_openib_endpoint.c b/ompi/mca/btl/openib/btl_openib_endpoint.c index cde310f2cd..f6e92d53ec 100644 --- a/ompi/mca/btl/openib/btl_openib_endpoint.c +++ b/ompi/mca/btl/openib/btl_openib_endpoint.c @@ -324,16 +324,8 @@ static void endpoint_init_qp_xrc(mca_btl_openib_endpoint_qp_t *ep_qp, const int qp, mca_btl_openib_qp_t *xrc_qp) { - /* In XRC mode the we the qps used as send qp only. We need only one send - * qp, and other qps points to the first one */ - if (0 == qp) { - ep_qp->qp = endpoint_alloc_qp(); - /* number of available send WQEs */ - ep_qp->qp->sd_wqe = - mca_btl_openib_component.qp_infos[qp].u.srq_qp.sd_max; - } else { - ep_qp->qp = xrc_qp; - } + ep_qp->qp = xrc_qp; + ep_qp->qp->sd_wqe += mca_btl_openib_component.qp_infos[qp].u.srq_qp.sd_max; ep_qp->qp->users++; } @@ -354,7 +346,9 @@ static void endpoint_init_qp(mca_btl_base_endpoint_t *ep, const int qp) endpoint_init_qp_srq(ep_qp, qp); break; case MCA_BTL_OPENIB_XRC_QP: - endpoint_init_qp_xrc(ep_qp, qp, ep->qps[0].qp); + if(NULL == ep->ib_addr->qp) + ep->ib_addr->qp = endpoint_alloc_qp(); + endpoint_init_qp_xrc(ep_qp, qp, ep->ib_addr->qp); break; default: BTL_ERROR(("Wrong QP type")); @@ -362,11 +356,22 @@ static void endpoint_init_qp(mca_btl_base_endpoint_t *ep, const int qp) } } +void mca_btl_openib_endpoint_init(mca_btl_openib_module_t *btl, + mca_btl_base_endpoint_t *ep) +{ + int qp; + + ep->endpoint_btl = btl; + ep->use_eager_rdma = btl->hca->use_eager_rdma & + mca_btl_openib_component.use_eager_rdma; + ep->subnet_id = btl->port_info.subnet_id; + + for(qp = 0; qp < mca_btl_openib_component.num_qps; qp++) + endpoint_init_qp(ep, qp); +} + static void mca_btl_openib_endpoint_construct(mca_btl_base_endpoint_t* endpoint) { - - int qp; - /* setup qp structures */ endpoint->qps = (mca_btl_openib_endpoint_qp_t*) calloc(mca_btl_openib_component.num_qps, @@ -413,9 +418,6 @@ static void mca_btl_openib_endpoint_construct(mca_btl_base_endpoint_t* endpoint) endpoint->use_eager_rdma = false; endpoint->eager_rdma_remote.tokens = 0; endpoint->eager_rdma_local.credits = 0; - - for(qp = 0; qp < mca_btl_openib_component.num_qps; qp++) - endpoint_init_qp(endpoint, qp); } /* @@ -450,12 +452,6 @@ static void mca_btl_openib_endpoint_destruct(mca_btl_base_endpoint_t* endpoint) /* Close opened QPs if we have them*/ for(qp = 0; qp < mca_btl_openib_component.num_qps; qp++) { - if (BTL_OPENIB_QP_TYPE_XRC(qp) && - endpoint != endpoint->ib_addr->ep_xrc_master) { - /* in XRC case we need to release only first one on master - * endpoint */ - goto clean_endpoint; - } MCA_BTL_OPENIB_CLEAN_PENDING_FRAGS(&endpoint->qps[qp].pending_frags[0]); MCA_BTL_OPENIB_CLEAN_PENDING_FRAGS(&endpoint->qps[qp].pending_frags[1]); OBJ_DESTRUCT(&endpoint->qps[qp].pending_frags[0]); @@ -481,7 +477,6 @@ static void mca_btl_openib_endpoint_destruct(mca_btl_base_endpoint_t* endpoint) /* free the qps */ free(endpoint->qps); -clean_endpoint: /* destroy recv qp */ if (NULL != endpoint->xrc_recv_qp) { if(ibv_destroy_qp(endpoint->xrc_recv_qp)) { @@ -537,12 +532,10 @@ void mca_btl_openib_endpoint_connected(mca_btl_openib_endpoint_t *endpoint) if (MCA_BTL_IB_ADDR_CONNECTED == endpoint->ib_addr->status) { /* We are not xrc master */ /* set our qp pointer to master qp */ - endpoint->qps = endpoint->ib_addr->ep_xrc_master->qps; master = false; } else { /* I'm master of XRC */ endpoint->ib_addr->status = MCA_BTL_IB_ADDR_CONNECTED; - endpoint->ib_addr->ep_xrc_master = endpoint; master = true; } } diff --git a/ompi/mca/btl/openib/btl_openib_endpoint.h b/ompi/mca/btl/openib/btl_openib_endpoint.h index d78ff2a716..07590d444d 100644 --- a/ompi/mca/btl/openib/btl_openib_endpoint.h +++ b/ompi/mca/btl/openib/btl_openib_endpoint.h @@ -235,6 +235,8 @@ void mca_btl_openib_endpoint_send_credits(mca_btl_base_endpoint_t*, const int); void mca_btl_openib_endpoint_connect_eager_rdma(mca_btl_openib_endpoint_t*); int mca_btl_openib_endpoint_post_recvs(mca_btl_openib_endpoint_t*); void mca_btl_openib_endpoint_connected(mca_btl_openib_endpoint_t*); +void mca_btl_openib_endpoint_init(mca_btl_openib_module_t*, + mca_btl_base_endpoint_t*); static inline int post_recvs(mca_btl_base_endpoint_t *ep, const int qp, const int num_post) diff --git a/ompi/mca/btl/openib/btl_openib_xrc.c b/ompi/mca/btl/openib/btl_openib_xrc.c index 81969598e4..d5c9a00871 100644 --- a/ompi/mca/btl/openib/btl_openib_xrc.c +++ b/ompi/mca/btl/openib/btl_openib_xrc.c @@ -88,7 +88,7 @@ static void ib_address_constructor(ib_address_t *ib_addr) ib_addr->subnet_id = 0; ib_addr->lid = 0; ib_addr->status = MCA_BTL_IB_ADDR_CLOSED; - ib_addr->ep_xrc_master = NULL; + ib_addr->qp = NULL; OBJ_CONSTRUCT(&ib_addr->addr_lock, opal_mutex_t); OBJ_CONSTRUCT(&ib_addr->pending_ep, opal_list_t); } diff --git a/ompi/mca/btl/openib/btl_openib_xrc.h b/ompi/mca/btl/openib/btl_openib_xrc.h index 4f73c733c4..a859c022bd 100644 --- a/ompi/mca/btl/openib/btl_openib_xrc.h +++ b/ompi/mca/btl/openib/btl_openib_xrc.h @@ -31,8 +31,10 @@ struct ib_address_t { void *key; /* the key with size 80bit - [subnet(64) LID(16bit)] */ uint64_t subnet_id; /* caching subnet_id */ uint16_t lid; /* caching lid */ - opal_list_t pending_ep; /* list of endpoints that use this ib_address */ - mca_btl_openib_endpoint_t *ep_xrc_master; /* pointer to endpoint that keeps the xrc connection */ + opal_list_t pending_ep; /* list of endpoints that use this ib_address */ + mca_btl_openib_qp_t *qp; /* pointer to qp that will be used + for communication with the + destination */ opal_mutex_t addr_lock; /* protection */ mca_btl_openib_ib_addr_state_t status; /* ib port status */ }; diff --git a/ompi/mca/btl/openib/connect/btl_openib_connect_xoob.c b/ompi/mca/btl/openib/connect/btl_openib_connect_xoob.c index 480b44cbd0..30f38a3cbb 100644 --- a/ompi/mca/btl/openib/connect/btl_openib_connect_xoob.c +++ b/ompi/mca/btl/openib/connect/btl_openib_connect_xoob.c @@ -247,8 +247,7 @@ static int xoob_qp_create(mca_btl_base_endpoint_t* endpoint, xoob_qp_type type) qp_init_attr.cap.max_recv_wr = mca_btl_openib_component.qp_infos->rd_num; /* reserve additional wr for eager rdma credit management */ - qp_init_attr.cap.max_send_wr = - mca_btl_openib_component.qp_infos->u.srq_qp.sd_max + + qp_init_attr.cap.max_send_wr = endpoint->ib_addr->qp->sd_wqe + (mca_btl_openib_component.use_eager_rdma ? mca_btl_openib_component.max_eager_rdma : 0);