Put send completions to low prio CQ. Receive is more important.
This commit was SVN r16817.
Этот коммит содержится в:
родитель
b17f5b7480
Коммит
a774cd98f8
@ -287,12 +287,10 @@ static int create_srq(mca_btl_openib_module_t *openib_btl)
|
||||
openib_btl->qps[qp].u.srq_qp.rd_posted = 0;
|
||||
#if HAVE_XRC
|
||||
if(BTL_OPENIB_QP_TYPE_XRC(qp)) {
|
||||
int prio = qp_cq_prio(qp);
|
||||
openib_btl->qps[qp].u.srq_qp.srq =
|
||||
ibv_create_xrc_srq(openib_btl->hca->ib_pd,
|
||||
openib_btl->hca->xrc_domain,
|
||||
openib_btl->hca->ib_cq[prio], &attr);
|
||||
openib_btl->hca->cq_users[prio]++;
|
||||
openib_btl->hca->ib_cq[qp_cq_prio(qp)], &attr);
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
@ -310,8 +308,20 @@ static int create_srq(mca_btl_openib_module_t *openib_btl)
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
static int adjust_cq(mca_btl_openib_hca_t *hca, const int cq_size, const int cq)
|
||||
|
||||
static int adjust_cq(mca_btl_openib_hca_t *hca, const int cq)
|
||||
{
|
||||
uint32_t cq_size = hca->cq_size[cq];
|
||||
|
||||
/* make sure we don't exceed the maximum CQ size and that we
|
||||
* don't size the queue smaller than otherwise requested
|
||||
*/
|
||||
if(cq_size < mca_btl_openib_component.ib_cq_size[cq])
|
||||
cq_size = mca_btl_openib_component.ib_cq_size[cq];
|
||||
|
||||
if(cq_size > (uint32_t)hca->ib_dev_attr.max_cq)
|
||||
cq_size = hca->ib_dev_attr.max_cq;
|
||||
|
||||
if(NULL == hca->ib_cq[cq]) {
|
||||
hca->ib_cq[cq] = ibv_create_cq_compat(hca->ib_dev_context, cq_size,
|
||||
#if OMPI_ENABLE_PROGRESS_THREADS == 1
|
||||
@ -347,7 +357,7 @@ static int adjust_cq(mca_btl_openib_hca_t *hca, const int cq_size, const int cq)
|
||||
#endif
|
||||
}
|
||||
#ifdef HAVE_IBV_RESIZE_CQ
|
||||
else {
|
||||
else if (cq_size > mca_btl_openib_component.ib_cq_size[cq]){
|
||||
int rc;
|
||||
rc = ibv_resize_cq(hca->ib_cq[cq], cq_size);
|
||||
/* For ConnectX the resize CQ is not implemented and verbs returns -ENOSYS
|
||||
@ -358,66 +368,37 @@ static int adjust_cq(mca_btl_openib_hca_t *hca, const int cq_size, const int cq)
|
||||
}
|
||||
}
|
||||
#endif
|
||||
hca->cq_size[cq] = cq_size;
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
static int mca_btl_openib_size_queues( struct mca_btl_openib_module_t* openib_btl, size_t nprocs)
|
||||
static int mca_btl_openib_size_queues(struct mca_btl_openib_module_t* openib_btl, size_t nprocs)
|
||||
{
|
||||
uint32_t min_hp_cq_size = openib_btl->hca->cq_size[BTL_OPENIB_HP_CQ],
|
||||
min_lp_cq_size = openib_btl->hca->cq_size[BTL_OPENIB_HP_CQ],
|
||||
cq_size;
|
||||
uint32_t send_cqes, recv_cqes;
|
||||
int rc = OMPI_SUCCESS, qp;
|
||||
mca_btl_openib_hca_t *hca = openib_btl->hca;
|
||||
|
||||
/* figure out reasonable sizes for completion queues */
|
||||
for(qp = 0; qp < mca_btl_openib_component.num_qps; qp++) {
|
||||
if(BTL_OPENIB_QP_TYPE_SRQ(qp)) {
|
||||
cq_size = mca_btl_openib_component.qp_infos[qp].rd_num +
|
||||
mca_btl_openib_component.qp_infos[qp].u.srq_qp.sd_max;
|
||||
if(mca_btl_openib_component.qp_infos[qp].size <=
|
||||
mca_btl_openib_component.eager_limit) {
|
||||
min_hp_cq_size += cq_size;
|
||||
} else {
|
||||
min_lp_cq_size += cq_size;
|
||||
}
|
||||
send_cqes = mca_btl_openib_component.qp_infos[qp].u.srq_qp.sd_max;
|
||||
recv_cqes = mca_btl_openib_component.qp_infos[qp].rd_num;
|
||||
} else {
|
||||
cq_size = (mca_btl_openib_component.qp_infos[qp].rd_num +
|
||||
mca_btl_openib_component.qp_infos[qp].u.pp_qp.rd_rsv) *
|
||||
2 * nprocs;
|
||||
if(mca_btl_openib_component.qp_infos[qp].size <=
|
||||
mca_btl_openib_component.eager_limit) {
|
||||
min_hp_cq_size += cq_size;
|
||||
} else {
|
||||
min_lp_cq_size += cq_size;
|
||||
}
|
||||
send_cqes = (mca_btl_openib_component.qp_infos[qp].rd_num +
|
||||
mca_btl_openib_component.qp_infos[qp].u.pp_qp.rd_rsv) * nprocs;
|
||||
recv_cqes = send_cqes;
|
||||
}
|
||||
openib_btl->hca->cq_size[qp_cq_prio(qp)] += recv_cqes;
|
||||
openib_btl->hca->cq_size[BTL_OPENIB_LP_CQ] += send_cqes;
|
||||
}
|
||||
|
||||
/* make sure we don't exceed the maximum CQ size and that we
|
||||
* don't size the queue smaller than otherwise requested
|
||||
*/
|
||||
if(min_lp_cq_size < mca_btl_openib_component.ib_lp_cq_size)
|
||||
min_lp_cq_size = mca_btl_openib_component.ib_lp_cq_size;
|
||||
if(min_lp_cq_size > (uint32_t)openib_btl->hca->ib_dev_attr.max_cq)
|
||||
min_lp_cq_size = openib_btl->hca->ib_dev_attr.max_cq;
|
||||
rc = adjust_cq(hca, BTL_OPENIB_HP_CQ);
|
||||
if(rc != OMPI_SUCCESS)
|
||||
goto out;
|
||||
|
||||
if(min_hp_cq_size < mca_btl_openib_component.ib_hp_cq_size)
|
||||
min_hp_cq_size = mca_btl_openib_component.ib_hp_cq_size;
|
||||
if(min_hp_cq_size > (uint32_t)openib_btl->hca->ib_dev_attr.max_cq)
|
||||
min_hp_cq_size = openib_btl->hca->ib_dev_attr.max_cq;
|
||||
|
||||
if(min_hp_cq_size != hca->cq_size[BTL_OPENIB_HP_CQ]) {
|
||||
rc = adjust_cq(hca, min_hp_cq_size, BTL_OPENIB_HP_CQ);
|
||||
if(rc != OMPI_SUCCESS)
|
||||
goto out;
|
||||
}
|
||||
if(min_lp_cq_size != hca->cq_size[BTL_OPENIB_LP_CQ]) {
|
||||
rc = adjust_cq(hca, min_lp_cq_size, BTL_OPENIB_LP_CQ);
|
||||
if(rc != OMPI_SUCCESS)
|
||||
goto out;
|
||||
}
|
||||
rc = adjust_cq(hca, BTL_OPENIB_LP_CQ);
|
||||
if(rc != OMPI_SUCCESS)
|
||||
goto out;
|
||||
|
||||
if(0 == openib_btl->num_peers)
|
||||
rc = create_srq(openib_btl);
|
||||
|
@ -144,8 +144,7 @@ struct mca_btl_openib_component_t {
|
||||
uint32_t reg_mru_len; /**< Length of the registration cache most recently used list */
|
||||
uint32_t use_srq; /**< Use the Shared Receive Queue (SRQ mode) */
|
||||
|
||||
uint32_t ib_lp_cq_size; /**< Max outstanding CQE on the CQ */
|
||||
uint32_t ib_hp_cq_size; /**< Max outstanding CQE on the CQ */
|
||||
uint32_t ib_cq_size[2]; /**< Max outstanding CQE on the CQ */
|
||||
|
||||
uint32_t ib_sg_list_size; /**< Max scatter/gather descriptor entries on the WQ */
|
||||
uint32_t ib_pkey_ix; /**< InfiniBand pkey index */
|
||||
@ -253,7 +252,6 @@ struct mca_btl_openib_hca_t {
|
||||
struct ibv_device_attr ib_dev_attr;
|
||||
struct ibv_pd *ib_pd;
|
||||
struct ibv_cq *ib_cq[2];
|
||||
uint32_t cq_users[2];
|
||||
uint32_t cq_size[2];
|
||||
mca_mpool_base_module_t *mpool;
|
||||
/* MTU for this HCA */
|
||||
|
@ -495,8 +495,6 @@ static int init_one_hca(opal_list_t *btl_list, struct ibv_device* ib_dev)
|
||||
hca->btls = 0;
|
||||
hca->ib_cq[BTL_OPENIB_HP_CQ] = NULL;
|
||||
hca->ib_cq[BTL_OPENIB_LP_CQ] = NULL;
|
||||
hca->cq_users[BTL_OPENIB_HP_CQ] = 0;
|
||||
hca->cq_users[BTL_OPENIB_LP_CQ] = 0;
|
||||
hca->cq_size[BTL_OPENIB_HP_CQ] = 0;
|
||||
hca->cq_size[BTL_OPENIB_LP_CQ] = 0;
|
||||
OBJ_CONSTRUCT(&hca->hca_lock, opal_mutex_t);
|
||||
@ -1550,21 +1548,26 @@ static int btl_openib_module_progress(mca_btl_openib_hca_t* hca)
|
||||
{
|
||||
static char *cq_name[] = {"HP CQ", "LP CQ"};
|
||||
int cq, qp;
|
||||
int count = 0,ne = 0;
|
||||
int count = 0, ne = 0;
|
||||
mca_btl_openib_com_frag_t* frag;
|
||||
mca_btl_base_descriptor_t *des;
|
||||
mca_btl_openib_endpoint_t* endpoint;
|
||||
mca_btl_openib_module_t *openib_btl = NULL;
|
||||
struct ibv_wc wc;
|
||||
|
||||
for(cq = 0; cq < 2; cq++) {
|
||||
if(0 == hca->cq_users[cq])
|
||||
continue;
|
||||
for(cq = 0; cq < 2;) {
|
||||
ne = ibv_poll_cq(hca->ib_cq[cq], 1, &wc);
|
||||
if(0 == ne)
|
||||
if(0 == ne) {
|
||||
/* don't check low prio cq if there was something in high prio cq */
|
||||
if(count)
|
||||
break;
|
||||
cq++;
|
||||
continue;
|
||||
}
|
||||
if(ne < 0)
|
||||
goto error;
|
||||
|
||||
count++;
|
||||
|
||||
des = (mca_btl_base_descriptor_t*)(uintptr_t)wc.wr_id;
|
||||
frag = to_com_frag(des);
|
||||
@ -1604,8 +1607,6 @@ static int btl_openib_module_progress(mca_btl_openib_hca_t* hca)
|
||||
/* new wqe or/and get token available. Try to progress pending frags */
|
||||
progress_pending_frags_wqe(endpoint->qps[qp].qp);
|
||||
mca_btl_openib_frag_progress_pending_put_get(endpoint, qp);
|
||||
|
||||
count++;
|
||||
break;
|
||||
case IBV_WC_RECV:
|
||||
if(wc.wc_flags & IBV_WC_WITH_IMM) {
|
||||
@ -1623,8 +1624,6 @@ static int btl_openib_module_progress(mca_btl_openib_hca_t* hca)
|
||||
return 0;
|
||||
}
|
||||
|
||||
count++;
|
||||
|
||||
/* decide if it is time to setup an eager rdma channel */
|
||||
if (!endpoint->eager_rdma_local.base.pval &&
|
||||
endpoint->use_eager_rdma &&
|
||||
@ -1640,7 +1639,8 @@ static int btl_openib_module_progress(mca_btl_openib_hca_t* hca)
|
||||
BTL_ERROR(("Unhandled work completion opcode is %d",
|
||||
wc.opcode));
|
||||
if(openib_btl)
|
||||
openib_btl->error_cb(&openib_btl->super, MCA_BTL_ERROR_FLAGS_FATAL);
|
||||
openib_btl->error_cb(&openib_btl->super,
|
||||
MCA_BTL_ERROR_FLAGS_FATAL);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -214,8 +214,8 @@ int btl_openib_register_mca_params(void)
|
||||
"queue (will automatically be set to a minimum of "
|
||||
"(2 * number_of_peers * btl_openib_rd_num))",
|
||||
1000, &ival, REGINT_GE_ONE));
|
||||
mca_btl_openib_component.ib_lp_cq_size =
|
||||
mca_btl_openib_component.ib_hp_cq_size = (uint32_t) ival;
|
||||
mca_btl_openib_component.ib_cq_size[BTL_OPENIB_LP_CQ] =
|
||||
mca_btl_openib_component.ib_cq_size[BTL_OPENIB_HP_CQ] = (uint32_t) ival;
|
||||
|
||||
CHECK(reg_int("ib_sg_list_size", "Size of IB segment list "
|
||||
"(must be >= 1)",
|
||||
|
@ -330,14 +330,13 @@ static int qp_create_one(mca_btl_base_endpoint_t* endpoint, int qp,
|
||||
struct ibv_qp *my_qp;
|
||||
struct ibv_qp_init_attr init_attr;
|
||||
struct ibv_qp_attr attr;
|
||||
int prio = qp_cq_prio(qp);
|
||||
|
||||
memset(&init_attr, 0, sizeof(init_attr));
|
||||
memset(&attr, 0, sizeof(attr));
|
||||
|
||||
init_attr.qp_type = IBV_QPT_RC;
|
||||
init_attr.send_cq = openib_btl->hca->ib_cq[prio];
|
||||
init_attr.recv_cq = openib_btl->hca->ib_cq[prio];
|
||||
init_attr.send_cq = openib_btl->hca->ib_cq[BTL_OPENIB_LP_CQ];
|
||||
init_attr.recv_cq = openib_btl->hca->ib_cq[qp_cq_prio(qp)];
|
||||
init_attr.srq = srq;
|
||||
init_attr.cap.max_send_sge = mca_btl_openib_component.ib_sg_list_size;
|
||||
init_attr.cap.max_recv_sge = mca_btl_openib_component.ib_sg_list_size;
|
||||
@ -371,7 +370,6 @@ static int qp_create_one(mca_btl_base_endpoint_t* endpoint, int qp,
|
||||
/* Setup meta data on the endpoint */
|
||||
endpoint->qps[qp].qp->lcl_psn = lrand48() & 0xffffff;
|
||||
endpoint->qps[qp].credit_frag = NULL;
|
||||
openib_btl->hca->cq_users[prio]++;
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
@ -250,8 +250,6 @@ static int xoob_qp_create(mca_btl_base_endpoint_t* endpoint, xoob_qp_type type)
|
||||
|
||||
qp_init_attr.send_cq = qp_init_attr.recv_cq = openib_btl->hca->ib_cq[prio];
|
||||
|
||||
openib_btl->hca->cq_users[prio]++;
|
||||
|
||||
/* no need recv queue; receives are posted to srq */
|
||||
qp_init_attr.cap.max_recv_wr = 0;
|
||||
qp_init_attr.cap.max_send_wr = send_wr;
|
||||
|
Загрузка…
Ссылка в новой задаче
Block a user