btl/openib: fix XRC WQE calculation
Before dynamic add_procs support was committed to master we called add_procs with every proc in the job. The XRC code in the openib btl was taking advantage of this and setting the number of work queue entries (WQE) based on all the procs on a remote node. Since that is no longer the case we can not simply increment the sd_wqe field on the queue pair. To fix the issue a new field has been added to the xrc queue pair structure to keep track of how many wqes there are total on the queue pair. If a new endpoint is added that increases the number of wqes and the xrc queue pair is already connected the code will attempt to modify the number of wqes on the queue pair. A failure is ignored because all that will happen is the number of active send work requests on an XRC queue pair will be more limited. Signed-off-by: Nathan Hjelm <hjelmn@lanl.gov>
Этот коммит содержится в:
родитель
f19c647f21
Коммит
56bdcd0888
@ -183,12 +183,42 @@ endpoint_init_qp_xrc(mca_btl_base_endpoint_t *ep, const int qp)
|
||||
(mca_btl_openib_component.use_eager_rdma ?
|
||||
mca_btl_openib_component.max_eager_rdma : 0);
|
||||
mca_btl_openib_endpoint_qp_t *ep_qp = &ep->qps[qp];
|
||||
int32_t wqe, incr = mca_btl_openib_component.qp_infos[qp].u.srq_qp.sd_max;
|
||||
int rc;
|
||||
|
||||
opal_mutex_lock (&ep->ib_addr->addr_lock);
|
||||
|
||||
ep_qp->qp = ep->ib_addr->qp;
|
||||
ep_qp->qp->sd_wqe += mca_btl_openib_component.qp_infos[qp].u.srq_qp.sd_max;
|
||||
/* make sure that we don't overrun maximum supported by device */
|
||||
if (ep_qp->qp->sd_wqe > max)
|
||||
ep_qp->qp->sd_wqe = max;
|
||||
if (ep->ib_addr->max_wqe + incr > max) {
|
||||
/* make sure that we don't overrun maximum supported by device */
|
||||
incr = max - ep->ib_addr->max_wqe;
|
||||
}
|
||||
|
||||
wqe = ep->ib_addr->max_wqe + incr +
|
||||
(mca_btl_openib_component.use_eager_rdma ?
|
||||
mca_btl_openib_component.max_eager_rdma : 0);
|
||||
|
||||
ep->ib_addr->max_wqe += incr;
|
||||
|
||||
if (NULL != ep_qp->qp->lcl_qp) {
|
||||
struct ibv_qp_attr qp_attr;
|
||||
|
||||
/* if this is modified the code in udcm_xrc_send_qp_create may
|
||||
* need to be updated as well */
|
||||
qp_attr.cap.max_recv_wr = 0;
|
||||
qp_attr.cap.max_send_wr = wqe;
|
||||
qp_attr.cap.max_inline_data = ep->endpoint_btl->device->max_inline_data;
|
||||
qp_attr.cap.max_send_sge = 1;
|
||||
qp_attr.cap.max_recv_sge = 1; /* we do not use SG list */
|
||||
rc = ibv_modify_qp (ep_qp->qp->lcl_qp, &qp_attr, IBV_QP_CAP);
|
||||
if (0 == rc) {
|
||||
opal_atomic_add_32 (&ep_qp->qp->sd_wqe, incr);
|
||||
}
|
||||
} else {
|
||||
ep_qp->qp->sd_wqe = ep->ib_addr->max_wqe;
|
||||
}
|
||||
ep_qp->qp->users++;
|
||||
opal_mutex_unlock (&ep->ib_addr->addr_lock);
|
||||
}
|
||||
|
||||
static void endpoint_init_qp(mca_btl_base_endpoint_t *ep, const int qp)
|
||||
|
@ -141,7 +141,7 @@ typedef struct mca_btl_openib_endpoint_srq_qp_t {
|
||||
typedef struct mca_btl_openib_qp_t {
|
||||
struct ibv_qp *lcl_qp;
|
||||
uint32_t lcl_psn;
|
||||
int32_t sd_wqe; /**< number of available send wqe entries */
|
||||
volatile int32_t sd_wqe; /**< number of available send wqe entries */
|
||||
int32_t sd_wqe_inflight;
|
||||
int wqe_count;
|
||||
int users;
|
||||
|
@ -125,6 +125,7 @@ static void ib_address_constructor(ib_address_t *ib_addr)
|
||||
ib_addr->lid = 0;
|
||||
ib_addr->status = MCA_BTL_IB_ADDR_CLOSED;
|
||||
ib_addr->qp = NULL;
|
||||
ib_addr->max_wqe = 0;
|
||||
/* NTH: make the addr_lock recursive because mca_btl_openib_endpoint_connected can call
|
||||
* into the CPC with the lock held. The alternative would be to drop the lock but the
|
||||
* lock is never obtained in a critical path. */
|
||||
|
@ -1,3 +1,4 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2007-2008 Mellanox Technologies. All rights reserved.
|
||||
* Copyright (c) 2014 Research Organization for Information Science
|
||||
@ -5,6 +6,8 @@
|
||||
* Copyright (c) 2014 Bull SAS. All rights reserved.
|
||||
* Copyright (c) 2015 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* Copyright (c) 2016 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -43,6 +46,7 @@ struct ib_address_t {
|
||||
uint32_t remote_xrc_rcv_qp_num; /* remote xrc qp number */
|
||||
opal_mutex_t addr_lock; /* protection */
|
||||
mca_btl_openib_ib_addr_state_t status; /* ib port status */
|
||||
int32_t max_wqe;
|
||||
};
|
||||
typedef struct ib_address_t ib_address_t;
|
||||
|
||||
|
@ -2542,7 +2542,7 @@ static int udcm_xrc_send_qp_create (mca_btl_base_endpoint_t *lcl_ep)
|
||||
psn = &lcl_ep->qps[0].qp->lcl_psn;
|
||||
|
||||
/* reserve additional wr for eager rdma credit management */
|
||||
send_wr = lcl_ep->ib_addr->qp->sd_wqe +
|
||||
send_wr = lcl_ep->ib_addr->max_wqe +
|
||||
(mca_btl_openib_component.use_eager_rdma ?
|
||||
mca_btl_openib_component.max_eager_rdma : 0);
|
||||
#if OPAL_HAVE_CONNECTX_XRC_DOMAINS
|
||||
@ -2554,6 +2554,8 @@ static int udcm_xrc_send_qp_create (mca_btl_base_endpoint_t *lcl_ep)
|
||||
|
||||
qp_init_attr.send_cq = qp_init_attr.recv_cq = openib_btl->device->ib_cq[prio];
|
||||
|
||||
/* if this code is update the code in endpoint_init_qp_xrc may need to
|
||||
* be updated as well */
|
||||
/* no need recv queue; receives are posted to srq */
|
||||
qp_init_attr.cap.max_recv_wr = 0;
|
||||
qp_init_attr.cap.max_send_wr = send_wr;
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user