1
1

Put send completions to low prio CQ. Receive is more important.

This commit was SVN r16817.
Этот коммит содержится в:
Gleb Natapov 2007-12-02 14:46:37 +00:00
родитель b17f5b7480
Коммит a774cd98f8
6 изменённых файлов: 47 добавлений и 72 удалений

Просмотреть файл

@ -287,12 +287,10 @@ static int create_srq(mca_btl_openib_module_t *openib_btl)
openib_btl->qps[qp].u.srq_qp.rd_posted = 0;
#if HAVE_XRC
if(BTL_OPENIB_QP_TYPE_XRC(qp)) {
int prio = qp_cq_prio(qp);
openib_btl->qps[qp].u.srq_qp.srq =
ibv_create_xrc_srq(openib_btl->hca->ib_pd,
openib_btl->hca->xrc_domain,
openib_btl->hca->ib_cq[prio], &attr);
openib_btl->hca->cq_users[prio]++;
openib_btl->hca->ib_cq[qp_cq_prio(qp)], &attr);
} else
#endif
{
@ -310,8 +308,20 @@ static int create_srq(mca_btl_openib_module_t *openib_btl)
return OMPI_SUCCESS;
}
static int adjust_cq(mca_btl_openib_hca_t *hca, const int cq_size, const int cq)
static int adjust_cq(mca_btl_openib_hca_t *hca, const int cq)
{
uint32_t cq_size = hca->cq_size[cq];
/* make sure we don't exceed the maximum CQ size and that we
* don't size the queue smaller than otherwise requested
*/
if(cq_size < mca_btl_openib_component.ib_cq_size[cq])
cq_size = mca_btl_openib_component.ib_cq_size[cq];
if(cq_size > (uint32_t)hca->ib_dev_attr.max_cq)
cq_size = hca->ib_dev_attr.max_cq;
if(NULL == hca->ib_cq[cq]) {
hca->ib_cq[cq] = ibv_create_cq_compat(hca->ib_dev_context, cq_size,
#if OMPI_ENABLE_PROGRESS_THREADS == 1
@ -347,7 +357,7 @@ static int adjust_cq(mca_btl_openib_hca_t *hca, const int cq_size, const int cq)
#endif
}
#ifdef HAVE_IBV_RESIZE_CQ
else {
else if (cq_size > mca_btl_openib_component.ib_cq_size[cq]){
int rc;
rc = ibv_resize_cq(hca->ib_cq[cq], cq_size);
/* For ConnectX the resize CQ is not implemented and verbs returns -ENOSYS
@ -358,66 +368,37 @@ static int adjust_cq(mca_btl_openib_hca_t *hca, const int cq_size, const int cq)
}
}
#endif
hca->cq_size[cq] = cq_size;
return OMPI_SUCCESS;
}
static int mca_btl_openib_size_queues( struct mca_btl_openib_module_t* openib_btl, size_t nprocs)
static int mca_btl_openib_size_queues(struct mca_btl_openib_module_t* openib_btl, size_t nprocs)
{
uint32_t min_hp_cq_size = openib_btl->hca->cq_size[BTL_OPENIB_HP_CQ],
min_lp_cq_size = openib_btl->hca->cq_size[BTL_OPENIB_HP_CQ],
cq_size;
uint32_t send_cqes, recv_cqes;
int rc = OMPI_SUCCESS, qp;
mca_btl_openib_hca_t *hca = openib_btl->hca;
/* figure out reasonable sizes for completion queues */
for(qp = 0; qp < mca_btl_openib_component.num_qps; qp++) {
if(BTL_OPENIB_QP_TYPE_SRQ(qp)) {
cq_size = mca_btl_openib_component.qp_infos[qp].rd_num +
mca_btl_openib_component.qp_infos[qp].u.srq_qp.sd_max;
if(mca_btl_openib_component.qp_infos[qp].size <=
mca_btl_openib_component.eager_limit) {
min_hp_cq_size += cq_size;
} else {
min_lp_cq_size += cq_size;
}
send_cqes = mca_btl_openib_component.qp_infos[qp].u.srq_qp.sd_max;
recv_cqes = mca_btl_openib_component.qp_infos[qp].rd_num;
} else {
cq_size = (mca_btl_openib_component.qp_infos[qp].rd_num +
mca_btl_openib_component.qp_infos[qp].u.pp_qp.rd_rsv) *
2 * nprocs;
if(mca_btl_openib_component.qp_infos[qp].size <=
mca_btl_openib_component.eager_limit) {
min_hp_cq_size += cq_size;
} else {
min_lp_cq_size += cq_size;
}
send_cqes = (mca_btl_openib_component.qp_infos[qp].rd_num +
mca_btl_openib_component.qp_infos[qp].u.pp_qp.rd_rsv) * nprocs;
recv_cqes = send_cqes;
}
openib_btl->hca->cq_size[qp_cq_prio(qp)] += recv_cqes;
openib_btl->hca->cq_size[BTL_OPENIB_LP_CQ] += send_cqes;
}
/* make sure we don't exceed the maximum CQ size and that we
* don't size the queue smaller than otherwise requested
*/
if(min_lp_cq_size < mca_btl_openib_component.ib_lp_cq_size)
min_lp_cq_size = mca_btl_openib_component.ib_lp_cq_size;
if(min_lp_cq_size > (uint32_t)openib_btl->hca->ib_dev_attr.max_cq)
min_lp_cq_size = openib_btl->hca->ib_dev_attr.max_cq;
rc = adjust_cq(hca, BTL_OPENIB_HP_CQ);
if(rc != OMPI_SUCCESS)
goto out;
if(min_hp_cq_size < mca_btl_openib_component.ib_hp_cq_size)
min_hp_cq_size = mca_btl_openib_component.ib_hp_cq_size;
if(min_hp_cq_size > (uint32_t)openib_btl->hca->ib_dev_attr.max_cq)
min_hp_cq_size = openib_btl->hca->ib_dev_attr.max_cq;
if(min_hp_cq_size != hca->cq_size[BTL_OPENIB_HP_CQ]) {
rc = adjust_cq(hca, min_hp_cq_size, BTL_OPENIB_HP_CQ);
if(rc != OMPI_SUCCESS)
goto out;
}
if(min_lp_cq_size != hca->cq_size[BTL_OPENIB_LP_CQ]) {
rc = adjust_cq(hca, min_lp_cq_size, BTL_OPENIB_LP_CQ);
if(rc != OMPI_SUCCESS)
goto out;
}
rc = adjust_cq(hca, BTL_OPENIB_LP_CQ);
if(rc != OMPI_SUCCESS)
goto out;
if(0 == openib_btl->num_peers)
rc = create_srq(openib_btl);

Просмотреть файл

@ -144,8 +144,7 @@ struct mca_btl_openib_component_t {
uint32_t reg_mru_len; /**< Length of the registration cache most recently used list */
uint32_t use_srq; /**< Use the Shared Receive Queue (SRQ mode) */
uint32_t ib_lp_cq_size; /**< Max outstanding CQE on the CQ */
uint32_t ib_hp_cq_size; /**< Max outstanding CQE on the CQ */
uint32_t ib_cq_size[2]; /**< Max outstanding CQE on the CQ */
uint32_t ib_sg_list_size; /**< Max scatter/gather descriptor entries on the WQ */
uint32_t ib_pkey_ix; /**< InfiniBand pkey index */
@ -253,7 +252,6 @@ struct mca_btl_openib_hca_t {
struct ibv_device_attr ib_dev_attr;
struct ibv_pd *ib_pd;
struct ibv_cq *ib_cq[2];
uint32_t cq_users[2];
uint32_t cq_size[2];
mca_mpool_base_module_t *mpool;
/* MTU for this HCA */

Просмотреть файл

@ -495,8 +495,6 @@ static int init_one_hca(opal_list_t *btl_list, struct ibv_device* ib_dev)
hca->btls = 0;
hca->ib_cq[BTL_OPENIB_HP_CQ] = NULL;
hca->ib_cq[BTL_OPENIB_LP_CQ] = NULL;
hca->cq_users[BTL_OPENIB_HP_CQ] = 0;
hca->cq_users[BTL_OPENIB_LP_CQ] = 0;
hca->cq_size[BTL_OPENIB_HP_CQ] = 0;
hca->cq_size[BTL_OPENIB_LP_CQ] = 0;
OBJ_CONSTRUCT(&hca->hca_lock, opal_mutex_t);
@ -1550,21 +1548,26 @@ static int btl_openib_module_progress(mca_btl_openib_hca_t* hca)
{
static char *cq_name[] = {"HP CQ", "LP CQ"};
int cq, qp;
int count = 0,ne = 0;
int count = 0, ne = 0;
mca_btl_openib_com_frag_t* frag;
mca_btl_base_descriptor_t *des;
mca_btl_openib_endpoint_t* endpoint;
mca_btl_openib_module_t *openib_btl = NULL;
struct ibv_wc wc;
for(cq = 0; cq < 2; cq++) {
if(0 == hca->cq_users[cq])
continue;
for(cq = 0; cq < 2;) {
ne = ibv_poll_cq(hca->ib_cq[cq], 1, &wc);
if(0 == ne)
if(0 == ne) {
/* don't check low prio cq if there was something in high prio cq */
if(count)
break;
cq++;
continue;
}
if(ne < 0)
goto error;
count++;
des = (mca_btl_base_descriptor_t*)(uintptr_t)wc.wr_id;
frag = to_com_frag(des);
@ -1604,8 +1607,6 @@ static int btl_openib_module_progress(mca_btl_openib_hca_t* hca)
/* new wqe or/and get token available. Try to progress pending frags */
progress_pending_frags_wqe(endpoint->qps[qp].qp);
mca_btl_openib_frag_progress_pending_put_get(endpoint, qp);
count++;
break;
case IBV_WC_RECV:
if(wc.wc_flags & IBV_WC_WITH_IMM) {
@ -1623,8 +1624,6 @@ static int btl_openib_module_progress(mca_btl_openib_hca_t* hca)
return 0;
}
count++;
/* decide if it is time to setup an eager rdma channel */
if (!endpoint->eager_rdma_local.base.pval &&
endpoint->use_eager_rdma &&
@ -1640,7 +1639,8 @@ static int btl_openib_module_progress(mca_btl_openib_hca_t* hca)
BTL_ERROR(("Unhandled work completion opcode is %d",
wc.opcode));
if(openib_btl)
openib_btl->error_cb(&openib_btl->super, MCA_BTL_ERROR_FLAGS_FATAL);
openib_btl->error_cb(&openib_btl->super,
MCA_BTL_ERROR_FLAGS_FATAL);
break;
}
}

Просмотреть файл

@ -214,8 +214,8 @@ int btl_openib_register_mca_params(void)
"queue (will automatically be set to a minimum of "
"(2 * number_of_peers * btl_openib_rd_num))",
1000, &ival, REGINT_GE_ONE));
mca_btl_openib_component.ib_lp_cq_size =
mca_btl_openib_component.ib_hp_cq_size = (uint32_t) ival;
mca_btl_openib_component.ib_cq_size[BTL_OPENIB_LP_CQ] =
mca_btl_openib_component.ib_cq_size[BTL_OPENIB_HP_CQ] = (uint32_t) ival;
CHECK(reg_int("ib_sg_list_size", "Size of IB segment list "
"(must be >= 1)",

Просмотреть файл

@ -330,14 +330,13 @@ static int qp_create_one(mca_btl_base_endpoint_t* endpoint, int qp,
struct ibv_qp *my_qp;
struct ibv_qp_init_attr init_attr;
struct ibv_qp_attr attr;
int prio = qp_cq_prio(qp);
memset(&init_attr, 0, sizeof(init_attr));
memset(&attr, 0, sizeof(attr));
init_attr.qp_type = IBV_QPT_RC;
init_attr.send_cq = openib_btl->hca->ib_cq[prio];
init_attr.recv_cq = openib_btl->hca->ib_cq[prio];
init_attr.send_cq = openib_btl->hca->ib_cq[BTL_OPENIB_LP_CQ];
init_attr.recv_cq = openib_btl->hca->ib_cq[qp_cq_prio(qp)];
init_attr.srq = srq;
init_attr.cap.max_send_sge = mca_btl_openib_component.ib_sg_list_size;
init_attr.cap.max_recv_sge = mca_btl_openib_component.ib_sg_list_size;
@ -371,7 +370,6 @@ static int qp_create_one(mca_btl_base_endpoint_t* endpoint, int qp,
/* Setup meta data on the endpoint */
endpoint->qps[qp].qp->lcl_psn = lrand48() & 0xffffff;
endpoint->qps[qp].credit_frag = NULL;
openib_btl->hca->cq_users[prio]++;
return OMPI_SUCCESS;
}

Просмотреть файл

@ -250,8 +250,6 @@ static int xoob_qp_create(mca_btl_base_endpoint_t* endpoint, xoob_qp_type type)
qp_init_attr.send_cq = qp_init_attr.recv_cq = openib_btl->hca->ib_cq[prio];
openib_btl->hca->cq_users[prio]++;
/* no need recv queue; receives are posted to srq */
qp_init_attr.cap.max_recv_wr = 0;
qp_init_attr.cap.max_send_wr = send_wr;