From 453782797381998fec47fe6166b31cdedcee378b Mon Sep 17 00:00:00 2001 From: Pavel Shamis Date: Thu, 19 Jun 2008 08:40:39 +0000 Subject: [PATCH] Making the qp allocation more optimized. - sq parameter was replaced with max_inline parameter - inline is allocated only for relevant QPs This commit was SVN r18675. --- ompi/mca/btl/openib/btl_openib.c | 2 +- ompi/mca/btl/openib/btl_openib.h | 4 +-- ompi/mca/btl/openib/btl_openib_endpoint.c | 2 +- ompi/mca/btl/openib/btl_openib_endpoint.h | 5 ++-- ompi/mca/btl/openib/btl_openib_mca.c | 6 ++-- .../openib/connect/btl_openib_connect_ibcm.c | 27 ++++++++++++++--- .../openib/connect/btl_openib_connect_oob.c | 26 ++++++++++++++-- .../connect/btl_openib_connect_rdmacm.c | 30 +++++++++++++++---- .../openib/connect/btl_openib_connect_xoob.c | 12 +++++--- 9 files changed, 88 insertions(+), 26 deletions(-) diff --git a/ompi/mca/btl/openib/btl_openib.c b/ompi/mca/btl/openib/btl_openib.c index f89f560808..1d3e3bdd13 100644 --- a/ompi/mca/btl/openib/btl_openib.c +++ b/ompi/mca/btl/openib/btl_openib.c @@ -1107,7 +1107,7 @@ int mca_btl_openib_put( mca_btl_base_module_t* btl, /* Setting opcode on a frag constructor isn't enough since prepare_src * may return send_frag instead of put_frag */ frag->sr_desc.opcode = IBV_WR_RDMA_WRITE; - frag->sr_desc.send_flags = ib_send_flags(descriptor->des_src->seg_len, ep); + frag->sr_desc.send_flags = ib_send_flags(descriptor->des_src->seg_len, &(ep->qps[qp])); if(ibv_post_send(ep->qps[qp].qp->lcl_qp, &frag->sr_desc, &bad_wr)) return OMPI_ERROR; diff --git a/ompi/mca/btl/openib/btl_openib.h b/ompi/mca/btl/openib/btl_openib.h index 0b967c1a84..1a9283073f 100644 --- a/ompi/mca/btl/openib/btl_openib.h +++ b/ompi/mca/btl/openib/btl_openib.h @@ -172,7 +172,7 @@ struct mca_btl_openib_component_t { uint32_t ib_cq_size[2]; /**< Max outstanding CQE on the CQ */ - uint32_t ib_sg_list_size; /**< Max scatter/gather descriptor entries on the WQ */ + uint32_t ib_max_inline_data; /**< Max size of inline data */ uint32_t ib_pkey_ix; /**< InfiniBand pkey index */ uint32_t ib_pkey_val; uint32_t ib_psn; @@ -374,8 +374,6 @@ struct mca_btl_openib_module_t { opal_mutex_t ib_lock; /**< module level lock */ - size_t ib_inline_max; /**< max size of inline send*/ - size_t eager_rdma_frag_size; /**< length of eager frag */ volatile int32_t eager_rdma_channels; /**< number of open RDMA channels */ diff --git a/ompi/mca/btl/openib/btl_openib_endpoint.c b/ompi/mca/btl/openib/btl_openib_endpoint.c index da4df1a246..fa0c11299d 100644 --- a/ompi/mca/btl/openib/btl_openib_endpoint.c +++ b/ompi/mca/btl/openib/btl_openib_endpoint.c @@ -62,7 +62,7 @@ static int post_send(mca_btl_openib_endpoint_t *ep, sg->length = seg->seg_len + sizeof(mca_btl_openib_header_t) + (rdma ? sizeof(mca_btl_openib_footer_t) : 0) + frag->coalesced_length; - sr_desc->send_flags = ib_send_flags(sg->length, ep); + sr_desc->send_flags = ib_send_flags(sg->length, &(ep->qps[qp])); if(ep->nbo) BTL_OPENIB_HEADER_HTON(*frag->hdr); diff --git a/ompi/mca/btl/openib/btl_openib_endpoint.h b/ompi/mca/btl/openib/btl_openib_endpoint.h index d81b34f089..4e4b0500a4 100644 --- a/ompi/mca/btl/openib/btl_openib_endpoint.h +++ b/ompi/mca/btl/openib/btl_openib_endpoint.h @@ -138,6 +138,7 @@ typedef struct mca_btl_openib_endpoint_qp_t { available */ int32_t rd_credit_send_lock; /**< Lock credit send fragment */ mca_btl_openib_send_control_frag_t *credit_frag; + size_t ib_inline_max; /**< max size of inline send*/ union { mca_btl_openib_endpoint_srq_qp_t srq_qp; mca_btl_openib_endpoint_pp_qp_t pp_qp; @@ -403,10 +404,10 @@ static inline int check_endpoint_state(mca_btl_openib_endpoint_t *ep, } static inline __opal_attribute_always_inline__ int -ib_send_flags(uint32_t size, mca_btl_openib_endpoint_t *ep) +ib_send_flags(uint32_t size, mca_btl_openib_endpoint_qp_t *qp) { return IBV_SEND_SIGNALED | - ((size <= ep->endpoint_btl->ib_inline_max) ? IBV_SEND_INLINE : 0); + ((size <= qp->ib_inline_max) ? IBV_SEND_INLINE : 0); } END_C_DECLS diff --git a/ompi/mca/btl/openib/btl_openib_mca.c b/ompi/mca/btl/openib/btl_openib_mca.c index 0f06ee8c65..76862c9c2f 100644 --- a/ompi/mca/btl/openib/btl_openib_mca.c +++ b/ompi/mca/btl/openib/btl_openib_mca.c @@ -222,10 +222,10 @@ int btl_openib_register_mca_params(void) mca_btl_openib_component.ib_cq_size[BTL_OPENIB_LP_CQ] = mca_btl_openib_component.ib_cq_size[BTL_OPENIB_HP_CQ] = (uint32_t) ival; - CHECK(reg_int("ib_sg_list_size", "Size of IB segment list " + CHECK(reg_int("ib_max_inline_data", "Maximal size of inline data segment " "(must be >= 1)", - 4, &ival, REGINT_GE_ONE)); - mca_btl_openib_component.ib_sg_list_size = (uint32_t) ival; + 128, &ival, REGINT_GE_ZERO)); + mca_btl_openib_component.ib_max_inline_data = (uint32_t) ival; CHECK(reg_int("ib_pkey_ix", "InfiniBand pkey index " "(must be >= 0)", diff --git a/ompi/mca/btl/openib/connect/btl_openib_connect_ibcm.c b/ompi/mca/btl/openib/connect/btl_openib_connect_ibcm.c index 6117b28c8e..661cf5511b 100644 --- a/ompi/mca/btl/openib/connect/btl_openib_connect_ibcm.c +++ b/ompi/mca/btl/openib/connect/btl_openib_connect_ibcm.c @@ -768,6 +768,23 @@ static int ibcm_component_query(mca_btl_openib_module_t *btl, /******************************************************************* * Module *******************************************************************/ +/* Returns max inlne size for qp #N */ +static int max_inline_size(int qp) +{ + if (mca_btl_openib_component.qp_infos[qp].size <= + mca_btl_openib_component.ib_max_inline_data) { + /* If qp message size is smaller that max inline - + * we should enable inline messages */ + return mca_btl_openib_component.qp_infos[qp].size; + } else if (mca_btl_openib_component.rdma_qp == qp || 0 == qp) { + /* If qp message size is bigger that max inline - + * we should enable inline messages + * only for RDMA QP (for PUT/GET fin messages) and for the first qp */ + return mca_btl_openib_component.ib_max_inline_data; + } + /* Otherway it is no reason for inline */ + return 0; +} /* * Create the local side of one qp. The remote side will be connected @@ -780,6 +797,7 @@ static int qp_create_one(mca_btl_base_endpoint_t* endpoint, int qp, mca_btl_openib_module_t *openib_btl = endpoint->endpoint_btl; struct ibv_qp *my_qp; struct ibv_qp_init_attr init_attr; + size_t req_inline; memset(&init_attr, 0, sizeof(init_attr)); @@ -787,7 +805,8 @@ static int qp_create_one(mca_btl_base_endpoint_t* endpoint, int qp, init_attr.send_cq = openib_btl->hca->ib_cq[BTL_OPENIB_LP_CQ]; init_attr.recv_cq = openib_btl->hca->ib_cq[qp_cq_prio(qp)]; init_attr.srq = srq; - init_attr.cap.max_send_sge = mca_btl_openib_component.ib_sg_list_size; + init_attr.cap.max_inline_data = req_inline = max_inline_size(qp); + init_attr.cap.max_send_sge = 1; init_attr.cap.max_recv_sge = 1; /* we do not use SG list */ if(BTL_OPENIB_QP_TYPE_PP(qp)) { init_attr.cap.max_recv_wr = max_recv_wr; @@ -801,10 +820,10 @@ static int qp_create_one(mca_btl_base_endpoint_t* endpoint, int qp, BTL_ERROR(("error creating qp errno says %s", strerror(errno))); return OMPI_ERROR; } - endpoint->qps[qp].qp->lcl_qp = my_qp; - openib_btl->ib_inline_max = init_attr.cap.max_inline_data; - + endpoint->qps[qp].ib_inline_max = + init_attr.cap.max_inline_data < req_inline ? + init_attr.cap.max_inline_data : req_inline; /* Setup meta data on the endpoint */ endpoint->qps[qp].qp->lcl_psn = lrand48() & 0xffffff; endpoint->qps[qp].credit_frag = NULL; diff --git a/ompi/mca/btl/openib/connect/btl_openib_connect_oob.c b/ompi/mca/btl/openib/connect/btl_openib_connect_oob.c index 3d63491aa3..d5c74ae95f 100644 --- a/ompi/mca/btl/openib/connect/btl_openib_connect_oob.c +++ b/ompi/mca/btl/openib/connect/btl_openib_connect_oob.c @@ -403,6 +403,24 @@ static int qp_create_all(mca_btl_base_endpoint_t* endpoint) } +/* Returns max inlne size for qp #N */ +static int max_inline_size(int qp) +{ + if (mca_btl_openib_component.qp_infos[qp].size <= + mca_btl_openib_component.ib_max_inline_data) { + /* If qp message size is smaller that max inline - + * we should enable inline messages */ + return mca_btl_openib_component.qp_infos[qp].size; + } else if (mca_btl_openib_component.rdma_qp == qp || 0 == qp) { + /* If qp message size is bigger that max inline - + * we should enable inline messages + * only for RDMA QP (for PUT/GET fin messages) and for the first qp */ + return mca_btl_openib_component.ib_max_inline_data; + } + /* Otherway it is no reason for inline */ + return 0; +} + /* * Create the local side of one qp. The remote side will be connected * later. @@ -414,6 +432,7 @@ static int qp_create_one(mca_btl_base_endpoint_t* endpoint, int qp, struct ibv_qp *my_qp; struct ibv_qp_init_attr init_attr; struct ibv_qp_attr attr; + size_t req_inline; memset(&init_attr, 0, sizeof(init_attr)); memset(&attr, 0, sizeof(attr)); @@ -422,7 +441,8 @@ static int qp_create_one(mca_btl_base_endpoint_t* endpoint, int qp, init_attr.send_cq = openib_btl->hca->ib_cq[BTL_OPENIB_LP_CQ]; init_attr.recv_cq = openib_btl->hca->ib_cq[qp_cq_prio(qp)]; init_attr.srq = srq; - init_attr.cap.max_send_sge = mca_btl_openib_component.ib_sg_list_size; + init_attr.cap.max_inline_data = req_inline = max_inline_size(qp); + init_attr.cap.max_send_sge = 1; init_attr.cap.max_recv_sge = 1; /* we do not use SG list */ if(BTL_OPENIB_QP_TYPE_PP(qp)) { init_attr.cap.max_recv_wr = max_recv_wr; @@ -438,7 +458,9 @@ static int qp_create_one(mca_btl_base_endpoint_t* endpoint, int qp, return OMPI_ERROR; } endpoint->qps[qp].qp->lcl_qp = my_qp; - openib_btl->ib_inline_max = init_attr.cap.max_inline_data; + endpoint->qps[qp].ib_inline_max = + init_attr.cap.max_inline_data < req_inline ? + init_attr.cap.max_inline_data : req_inline; attr.qp_state = IBV_QPS_INIT; attr.pkey_index = openib_btl->pkey_index; diff --git a/ompi/mca/btl/openib/connect/btl_openib_connect_rdmacm.c b/ompi/mca/btl/openib/connect/btl_openib_connect_rdmacm.c index ddc2aa39ca..d5dc5aebf4 100644 --- a/ompi/mca/btl/openib/connect/btl_openib_connect_rdmacm.c +++ b/ompi/mca/btl/openib/connect/btl_openib_connect_rdmacm.c @@ -198,6 +198,24 @@ static void rdmacm_cleanup(rdmacm_contents_t *local, } } +/* Returns max inlne size for qp #N */ +static int max_inline_size(int qp) +{ + if (mca_btl_openib_component.qp_infos[qp].size <= + mca_btl_openib_component.ib_max_inline_data) { + /* If qp message size is smaller that max inline - + * we should enable inline messages */ + return mca_btl_openib_component.qp_infos[qp].size; + } else if (mca_btl_openib_component.rdma_qp == qp || 0 == qp) { + /* If qp message size is bigger that max inline - + * we should enable inline messages + * only for RDMA QP (for PUT/GET fin messages) and for the first qp */ + return mca_btl_openib_component.ib_max_inline_data; + } + /* Otherway it is no reason for inline */ + return 0; +} + static int rdmacm_setup_qp(rdmacm_contents_t *local, mca_btl_openib_endpoint_t *endpoint, struct rdma_cm_id *id, @@ -207,6 +225,7 @@ static int rdmacm_setup_qp(rdmacm_contents_t *local, struct ibv_qp *qp; struct ibv_srq *srq = NULL; int credits = 0, reserved = 0, max_recv_wr, max_send_wr; + size_t req_inline; if (qpnum == mca_btl_openib_component.credits_qp) { int i; @@ -233,7 +252,8 @@ static int rdmacm_setup_qp(rdmacm_contents_t *local, attr.srq = srq; attr.cap.max_recv_wr = max_recv_wr; attr.cap.max_send_wr = max_send_wr; - attr.cap.max_send_sge = mca_btl_openib_component.ib_sg_list_size; + attr.cap.max_inline_data = req_inline = max_inline_size(qpnum); + attr.cap.max_send_sge = 1; attr.cap.max_recv_sge = 1; /* we do not use SG list */ qp = ibv_create_qp(local->openib_btl->hca->ib_pd, &attr); @@ -243,13 +263,11 @@ static int rdmacm_setup_qp(rdmacm_contents_t *local, } endpoint->qps[qpnum].qp->lcl_qp = qp; + endpoint->qps[qpnum].ib_inline_max = + attr.cap.max_inline_data < req_inline ? + attr.cap.max_inline_data : req_inline; id->qp = qp; - /* After creating the qp, the driver will write the max_inline_data - * in the attributes. Update the btl with this data. - */ - local->openib_btl->ib_inline_max = attr.cap.max_inline_data; - return 0; out: diff --git a/ompi/mca/btl/openib/connect/btl_openib_connect_xoob.c b/ompi/mca/btl/openib/connect/btl_openib_connect_xoob.c index 3e7e83276b..57a0ad74bb 100644 --- a/ompi/mca/btl/openib/connect/btl_openib_connect_xoob.c +++ b/ompi/mca/btl/openib/connect/btl_openib_connect_xoob.c @@ -371,6 +371,7 @@ static int xoob_send_qp_create (mca_btl_base_endpoint_t* endpoint) struct ibv_qp_init_attr qp_init_attr; struct ibv_qp_attr attr; int ret; + size_t req_inline; mca_btl_openib_module_t *openib_btl = (mca_btl_openib_module_t*)endpoint->endpoint_btl; @@ -391,19 +392,22 @@ static int xoob_send_qp_create (mca_btl_base_endpoint_t* endpoint) /* no need recv queue; receives are posted to srq */ qp_init_attr.cap.max_recv_wr = 0; qp_init_attr.cap.max_send_wr = send_wr; - - qp_init_attr.cap.max_send_sge = mca_btl_openib_component.ib_sg_list_size; + qp_init_attr.cap.max_inline_data = req_inline = + mca_btl_openib_component.ib_max_inline_data; + qp_init_attr.cap.max_send_sge = 1; /* this one is ignored by driver */ qp_init_attr.cap.max_recv_sge = 1; /* we do not use SG list */ qp_init_attr.qp_type = IBV_QPT_XRC; qp_init_attr.xrc_domain = openib_btl->hca->xrc_domain; *qp = ibv_create_qp(openib_btl->hca->ib_pd, &qp_init_attr); - + endpoint->qps[0].ib_inline_max = + qp_init_attr.cap.max_inline_data < req_inline ? + qp_init_attr.cap.max_inline_data : req_inline; if (NULL == *qp) { BTL_ERROR(("Error creating QP, errno says: %s", strerror(errno))); return OMPI_ERROR; } - openib_btl->ib_inline_max = qp_init_attr.cap.max_inline_data; + attr.qp_state = IBV_QPS_INIT; attr.pkey_index = openib_btl->pkey_index; attr.port_num = openib_btl->port_num;