From c9e0eda19037b33a06eeced7464c0dd09864721b Mon Sep 17 00:00:00 2001 From: Galen Shipman Date: Sun, 30 Jul 2006 00:58:40 +0000 Subject: [PATCH] Initialize the completion queue to a reasonable size based on maximum number of send/receives outstanding. Use ibv_cq_resize if available after initial creation of completion queue if cq_size is too small (based on number of peers). This commit was SVN r11053. --- config/ompi_check_openib.m4 | 5 ++ ompi/mca/btl/openib/btl_openib.c | 71 ++++++++++++++++++---- ompi/mca/btl/openib/btl_openib.h | 3 +- ompi/mca/btl/openib/btl_openib_component.c | 21 ++----- ompi/mca/btl/openib/btl_openib_endpoint.c | 2 - 5 files changed, 72 insertions(+), 30 deletions(-) diff --git a/config/ompi_check_openib.m4 b/config/ompi_check_openib.m4 index 3dd4d46278..9e75f172e5 100644 --- a/config/ompi_check_openib.m4 +++ b/config/ompi_check_openib.m4 @@ -134,6 +134,11 @@ AC_DEFUN([OMPI_CHECK_OPENIB],[ [$ompi_check_openib_have_device_list], [Whether install of OpenIB includes ibv_get_device_list API]) + AC_CHECK_FUNCS([ibv_resize_cq], [ompi_check_openib_have_resize_cq=1], [ompi_check_openib_have_resize_cq=0]) + AC_DEFINE_UNQUOTED([OMPI_MCA_]m4_translit([$1], [a-z], [A-Z])[_HAVE_RESIZE_CQ], + [$ompi_check_openib_have_resize_cq], + [Whether install of OpenIB includes resize completion queue support]) + CPPFLAGS="$ompi_check_openib_$1_save_CPPFLAGS" LDFLAGS="$ompi_check_openib_$1_save_LDFLAGS" LIBS="$ompi_check_openib_$1_save_LIBS"], diff --git a/ompi/mca/btl/openib/btl_openib.c b/ompi/mca/btl/openib/btl_openib.c index d20f52862d..a4d53b3915 100644 --- a/ompi/mca/btl/openib/btl_openib.c +++ b/ompi/mca/btl/openib/btl_openib.c @@ -122,21 +122,70 @@ int mca_btl_openib_add_procs( OPAL_THREAD_UNLOCK(&ib_proc->proc_lock); peers[i] = ib_peer; } -#ifdef OMPI_MCA_BTL_OPENIB_HAVE_SRQ - if( 0 == openib_btl->num_peers ) { + + do { + int min_cq_size; + int first_time = openib_btl->num_peers == 0; + int rc; openib_btl->num_peers += nprocs; +#ifdef OMPI_MCA_BTL_OPENIB_HAVE_SRQ if(mca_btl_openib_component.use_srq) { openib_btl->rd_num = mca_btl_openib_component.rd_num + log2(nprocs) * mca_btl_openib_component.srq_rd_per_peer; if(openib_btl->rd_num > mca_btl_openib_component.srq_rd_max) - openib_btl->rd_num = mca_btl_openib_component.srq_rd_max; + openib_btl->rd_num = mca_btl_openib_component.srq_rd_max; openib_btl->rd_low = openib_btl->rd_num - 1; - free(openib_btl->rd_desc_post); - openib_btl->rd_desc_post = (struct ibv_recv_wr*) malloc((openib_btl->rd_num * sizeof(struct ibv_recv_wr))); - + min_cq_size = openib_btl->rd_num * 2 * openib_btl->num_peers; + if(!first_time) { + struct ibv_srq_attr srq_attr; + srq_attr.max_wr = openib_btl->rd_num; + rc = ibv_modify_srq( openib_btl->srq_hp, &srq_attr, IBV_SRQ_MAX_WR); + if(rc) { + BTL_ERROR(("cannot resize high priority shared receive queue, error: %d", rc)); + return OMPI_ERROR; + } + rc = ibv_modify_srq(openib_btl->srq_lp, &srq_attr, IBV_SRQ_MAX_WR); + if(rc) { + BTL_ERROR(("cannot resize low priority shared receive queue, error: %d", rc)); + return OMPI_ERROR; + } + + } + + } else +#endif + { + min_cq_size = ( mca_btl_openib_component.rd_num > (int32_t) mca_btl_openib_component.eager_rdma_num ? + mca_btl_openib_component.rd_num : (int32_t) mca_btl_openib_component.eager_rdma_num ) * + 2 * openib_btl->num_peers; + } - } -#endif + +#ifdef OMPI_MCA_BTL_OPENIB_HAVE_RESIZE_CQ + + if(min_cq_size > (int32_t) mca_btl_openib_component.ib_cq_size) { + mca_btl_openib_component.ib_cq_size = min_cq_size; + if(!first_time) { + rc = ibv_resize_cq(openib_btl->ib_cq_lp, min_cq_size); + if(rc) { + BTL_ERROR(("cannot resize low priority completion queue, error: %d", rc)); + return OMPI_ERROR; + } + rc = ibv_resize_cq(openib_btl->ib_cq_hp, min_cq_size); + if(rc) { + BTL_ERROR(("cannot resize high priority completion queue, error: %d", rc)); + return OMPI_ERROR; + } + } + } +#endif + if(first_time) { + /* never been here before, setup cq and srq */ + mca_btl_openib_create_cq_srq(openib_btl); + } + } while(0); + return OMPI_SUCCESS; + } @@ -693,10 +742,10 @@ int mca_btl_openib_get( mca_btl_base_module_t* btl, } /* - * Initialize the btl module by allocating a protection domain - * and creating both the high and low priority completion queues + * create both the high and low priority completion queues + * and the shared receive queue (if requested) */ -int mca_btl_openib_module_init(mca_btl_openib_module_t *openib_btl) +int mca_btl_openib_create_cq_srq(mca_btl_openib_module_t *openib_btl) { /* Allocate Protection Domain */ openib_btl->poll_cq = false; diff --git a/ompi/mca/btl/openib/btl_openib.h b/ompi/mca/btl/openib/btl_openib.h index 1b2232a088..cae6f60672 100644 --- a/ompi/mca/btl/openib/btl_openib.h +++ b/ompi/mca/btl/openib/btl_openib.h @@ -152,7 +152,6 @@ struct mca_btl_openib_module_t { struct ibv_cq *ib_cq_hp; struct ibv_cq *ib_cq_lp; struct ibv_port_attr ib_port_attr; - struct ibv_recv_wr* rd_desc_post; uint16_t lid; /**< lid that is actually used (for LMC) */ uint8_t src_path_bits; /**< offset from base lid (for LMC) */ @@ -424,7 +423,7 @@ extern void mca_btl_openib_send_frag_return( ); -int mca_btl_openib_module_init(mca_btl_openib_module_t* openib_btl); +int mca_btl_openib_create_cq_srq(mca_btl_openib_module_t* openib_btl); #ifdef OMPI_MCA_BTL_OPENIB_HAVE_SRQ diff --git a/ompi/mca/btl/openib/btl_openib_component.c b/ompi/mca/btl/openib/btl_openib_component.c index 9edfbee4f5..068c7b2fc2 100644 --- a/ompi/mca/btl/openib/btl_openib_component.c +++ b/ompi/mca/btl/openib/btl_openib_component.c @@ -148,7 +148,9 @@ int mca_btl_openib_component_open(void) 16, (int*) &mca_btl_openib_component.reg_mru_len); mca_btl_openib_param_register_int("use_srq", "if 1 use the IB shared receive queue to post receive descriptors", 0, (int*) &mca_btl_openib_component.use_srq); - mca_btl_openib_param_register_int("ib_cq_size", "size of the IB completion queue", + mca_btl_openib_param_register_int("ib_cq_size", "size of the IB completion " + "queue, an override of this value may occur if set too small, " + "the override is 2*Number of Peers* btl_openib_rd_num", 1000, (int*) &mca_btl_openib_component.ib_cq_size); mca_btl_openib_param_register_int("ib_sg_list_size", "size of IB segment list", 4, (int*) &mca_btl_openib_component.ib_sg_list_size); @@ -600,16 +602,7 @@ mca_btl_base_module_t** mca_btl_openib_component_init(int *num_btl_modules, OBJ_CONSTRUCT(&openib_btl->recv_free_eager, ompi_free_list_t); OBJ_CONSTRUCT(&openib_btl->recv_free_max, ompi_free_list_t); - - if(mca_btl_openib_module_init(openib_btl) != OMPI_SUCCESS) { -#if OMPI_MCA_BTL_OPENIB_HAVE_DEVICE_LIST - ibv_free_device_list(ib_devs); -#else - free(ib_devs); -#endif - return NULL; - } - + /* initialize the memory pool using the hca */ openib_btl->super.btl_mpool = openib_btl->hca->mpool; @@ -698,10 +691,7 @@ mca_btl_base_module_t** mca_btl_openib_component_init(int *num_btl_modules, openib_btl->eager_rdma_buffers_count = 0; OBJ_CONSTRUCT(&openib_btl->eager_rdma_lock, opal_mutex_t); - /* Initialize the rd_desc_post array for posting of rr*/ - openib_btl->rd_desc_post = (struct ibv_recv_wr *) - malloc(((mca_btl_openib_component.rd_num + mca_btl_openib_component.rd_rsv) * sizeof(struct ibv_recv_wr))); - + btls[i] = &openib_btl->super; } @@ -987,6 +977,7 @@ int mca_btl_openib_component_progress() wc.status, wc.wr_id, wc.opcode)); if(wc.status == IBV_WC_RETRY_EXC_ERR) { opal_show_help("help-mpi-btl-openib.txt", "btl_openib:retry-exceeded", true); + abort(); } return OMPI_ERROR; diff --git a/ompi/mca/btl/openib/btl_openib_endpoint.c b/ompi/mca/btl/openib/btl_openib_endpoint.c index 68676a0b1d..5ef2ce7e2d 100644 --- a/ompi/mca/btl/openib/btl_openib_endpoint.c +++ b/ompi/mca/btl/openib/btl_openib_endpoint.c @@ -1065,7 +1065,6 @@ void mca_btl_openib_endpoint_send_credits_lp( mca_btl_openib_module_t* openib_btl = endpoint->endpoint_btl; mca_btl_openib_frag_t* frag; struct ibv_send_wr* bad_wr; - int rc; frag = endpoint->lp_credit_frag; @@ -1137,7 +1136,6 @@ void mca_btl_openib_endpoint_send_credits_hp( mca_btl_openib_module_t* openib_btl = endpoint->endpoint_btl; mca_btl_openib_frag_t* frag; struct ibv_send_wr* bad_wr; - int rc; frag = endpoint->hp_credit_frag;