diff --git a/opal/mca/btl/openib/btl_openib_endpoint.c b/opal/mca/btl/openib/btl_openib_endpoint.c index 277ff21dab..484beb56dc 100644 --- a/opal/mca/btl/openib/btl_openib_endpoint.c +++ b/opal/mca/btl/openib/btl_openib_endpoint.c @@ -183,12 +183,42 @@ endpoint_init_qp_xrc(mca_btl_base_endpoint_t *ep, const int qp) (mca_btl_openib_component.use_eager_rdma ? mca_btl_openib_component.max_eager_rdma : 0); mca_btl_openib_endpoint_qp_t *ep_qp = &ep->qps[qp]; + int32_t wqe, incr = mca_btl_openib_component.qp_infos[qp].u.srq_qp.sd_max; + int rc; + + opal_mutex_lock (&ep->ib_addr->addr_lock); + ep_qp->qp = ep->ib_addr->qp; - ep_qp->qp->sd_wqe += mca_btl_openib_component.qp_infos[qp].u.srq_qp.sd_max; - /* make sure that we don't overrun maximum supported by device */ - if (ep_qp->qp->sd_wqe > max) - ep_qp->qp->sd_wqe = max; + if (ep->ib_addr->max_wqe + incr > max) { + /* make sure that we don't overrun maximum supported by device */ + incr = max - ep->ib_addr->max_wqe; + } + + wqe = ep->ib_addr->max_wqe + incr + + (mca_btl_openib_component.use_eager_rdma ? + mca_btl_openib_component.max_eager_rdma : 0); + + ep->ib_addr->max_wqe += incr; + + if (NULL != ep_qp->qp->lcl_qp) { + struct ibv_qp_attr qp_attr; + + /* if this is modified the code in udcm_xrc_send_qp_create may + * need to be updated as well */ + qp_attr.cap.max_recv_wr = 0; + qp_attr.cap.max_send_wr = wqe; + qp_attr.cap.max_inline_data = ep->endpoint_btl->device->max_inline_data; + qp_attr.cap.max_send_sge = 1; + qp_attr.cap.max_recv_sge = 1; /* we do not use SG list */ + rc = ibv_modify_qp (ep_qp->qp->lcl_qp, &qp_attr, IBV_QP_CAP); + if (0 == rc) { + opal_atomic_add_32 (&ep_qp->qp->sd_wqe, incr); + } + } else { + ep_qp->qp->sd_wqe = ep->ib_addr->max_wqe; + } ep_qp->qp->users++; + opal_mutex_unlock (&ep->ib_addr->addr_lock); } static void endpoint_init_qp(mca_btl_base_endpoint_t *ep, const int qp) diff --git a/opal/mca/btl/openib/btl_openib_endpoint.h b/opal/mca/btl/openib/btl_openib_endpoint.h index ed80aec639..c74cd5b0a6 100644 --- a/opal/mca/btl/openib/btl_openib_endpoint.h +++ b/opal/mca/btl/openib/btl_openib_endpoint.h @@ -141,7 +141,7 @@ typedef struct mca_btl_openib_endpoint_srq_qp_t { typedef struct mca_btl_openib_qp_t { struct ibv_qp *lcl_qp; uint32_t lcl_psn; - int32_t sd_wqe; /**< number of available send wqe entries */ + volatile int32_t sd_wqe; /**< number of available send wqe entries */ int32_t sd_wqe_inflight; int wqe_count; int users; diff --git a/opal/mca/btl/openib/btl_openib_xrc.c b/opal/mca/btl/openib/btl_openib_xrc.c index 1952c31b12..0b3322ba1d 100644 --- a/opal/mca/btl/openib/btl_openib_xrc.c +++ b/opal/mca/btl/openib/btl_openib_xrc.c @@ -125,6 +125,7 @@ static void ib_address_constructor(ib_address_t *ib_addr) ib_addr->lid = 0; ib_addr->status = MCA_BTL_IB_ADDR_CLOSED; ib_addr->qp = NULL; + ib_addr->max_wqe = 0; /* NTH: make the addr_lock recursive because mca_btl_openib_endpoint_connected can call * into the CPC with the lock held. The alternative would be to drop the lock but the * lock is never obtained in a critical path. */ diff --git a/opal/mca/btl/openib/btl_openib_xrc.h b/opal/mca/btl/openib/btl_openib_xrc.h index 72e1509c1c..30313471ad 100644 --- a/opal/mca/btl/openib/btl_openib_xrc.h +++ b/opal/mca/btl/openib/btl_openib_xrc.h @@ -1,3 +1,4 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2007-2008 Mellanox Technologies. All rights reserved. * Copyright (c) 2014 Research Organization for Information Science @@ -5,6 +6,8 @@ * Copyright (c) 2014 Bull SAS. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2016 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -43,6 +46,7 @@ struct ib_address_t { uint32_t remote_xrc_rcv_qp_num; /* remote xrc qp number */ opal_mutex_t addr_lock; /* protection */ mca_btl_openib_ib_addr_state_t status; /* ib port status */ + int32_t max_wqe; }; typedef struct ib_address_t ib_address_t; diff --git a/opal/mca/btl/openib/connect/btl_openib_connect_udcm.c b/opal/mca/btl/openib/connect/btl_openib_connect_udcm.c index 7920fd7aa3..29b7de3554 100644 --- a/opal/mca/btl/openib/connect/btl_openib_connect_udcm.c +++ b/opal/mca/btl/openib/connect/btl_openib_connect_udcm.c @@ -2542,7 +2542,7 @@ static int udcm_xrc_send_qp_create (mca_btl_base_endpoint_t *lcl_ep) psn = &lcl_ep->qps[0].qp->lcl_psn; /* reserve additional wr for eager rdma credit management */ - send_wr = lcl_ep->ib_addr->qp->sd_wqe + + send_wr = lcl_ep->ib_addr->max_wqe + (mca_btl_openib_component.use_eager_rdma ? mca_btl_openib_component.max_eager_rdma : 0); #if OPAL_HAVE_CONNECTX_XRC_DOMAINS @@ -2554,6 +2554,8 @@ static int udcm_xrc_send_qp_create (mca_btl_base_endpoint_t *lcl_ep) qp_init_attr.send_cq = qp_init_attr.recv_cq = openib_btl->device->ib_cq[prio]; + /* if this code is update the code in endpoint_init_qp_xrc may need to + * be updated as well */ /* no need recv queue; receives are posted to srq */ qp_init_attr.cap.max_recv_wr = 0; qp_init_attr.cap.max_send_wr = send_wr;