From 6004e177e00b5d805cdf0d7ec9ef3a8a992059fd Mon Sep 17 00:00:00 2001 From: Jeff Squyres Date: Fri, 14 Sep 2007 21:42:56 +0000 Subject: [PATCH] Fixes trac:1133: if you specify a max freelist size that is too small, you'll get a helpful error message and the openib BTL will deactivate itself. This commit was SVN r16133. The following Trac tickets were found above: Ticket 1133 --> https://svn.open-mpi.org/trac/ompi/ticket/1133 --- ompi/mca/btl/openib/btl_openib_mca.c | 37 +++++++++++++++++---- ompi/mca/btl/openib/help-mpi-btl-openib.txt | 12 +++++++ 2 files changed, 43 insertions(+), 6 deletions(-) diff --git a/ompi/mca/btl/openib/btl_openib_mca.c b/ompi/mca/btl/openib/btl_openib_mca.c index a29d3ae5ea..ea39b80e9a 100644 --- a/ompi/mca/btl/openib/btl_openib_mca.c +++ b/ompi/mca/btl/openib/btl_openib_mca.c @@ -443,13 +443,14 @@ int btl_openib_register_mca_params(void) ret = mca_btl_base_param_register( &mca_btl_openib_component.super.btl_version, &mca_btl_openib_module.super); - - if(ret != OMPI_SUCCESS) + if (OMPI_SUCCESS != ret) { return ret; + } /* setup all the qp stuff */ - if((ret = mca_btl_openib_mca_setup_qps()) != MPI_SUCCESS) + if (OMPI_SUCCESS != (ret = mca_btl_openib_mca_setup_qps())) { return ret; + } CHECK(reg_string("if_include", "Comma-delimited list of HCAs/ports to be used (e.g. \"mthca0,mthca1:2\"; empty value means to use all ports found). Mutually exclusive with btl_openib_if_exclude.", @@ -479,7 +480,7 @@ static int mca_btl_openib_mca_setup_qps(void) char **queues, **params = NULL; int num_pp_qps = 0, num_srq_qps = 0, qp = 0, ret = OMPI_ERROR; char *default_qps = "P,128,256,128,16:S,1024,256,128,32:S,4096,256,128,32:S,65536,256,128,32"; - uint32_t max_qp_size, max_size_needed; + uint32_t max_qp_size, max_size_needed, min_freelist_size = 0; reg_string("receive_queues", "Colon-delimited, coma delimited list of receive queues: P,4096,8,6,4:P,32768,8,6,4", @@ -549,6 +550,15 @@ static int mca_btl_openib_mca_setup_qps(void) ); mca_btl_openib_component.qp_infos[qp].type = MCA_BTL_OPENIB_PP_QP; + + /* Calculate the smallest freelist size that can be allowed */ + if (mca_btl_openib_component.qp_infos[qp].rd_num + + mca_btl_openib_component.qp_infos[qp].u.pp_qp.rd_rsv > + min_freelist_size) { + min_freelist_size = + mca_btl_openib_component.qp_infos[qp].rd_num + + mca_btl_openib_component.qp_infos[qp].u.pp_qp.rd_rsv; + } } else if(params[0][0] =='S') { if(count < 3 || count > 5) { opal_show_help("help-mpi-btl-openib.txt", @@ -569,6 +579,13 @@ static int mca_btl_openib_mca_setup_qps(void) mca_btl_openib_component.qp_infos[qp].rd_low, mca_btl_openib_component.qp_infos[qp].u.srq_qp.sd_max); mca_btl_openib_component.qp_infos[qp].type = MCA_BTL_OPENIB_SRQ_QP; + + /* Calculate the smallest freelist size that can be allowed */ + if (mca_btl_openib_component.qp_infos[qp].rd_num > + min_freelist_size) { + min_freelist_size = + mca_btl_openib_component.qp_infos[qp].rd_num; + } } if (mca_btl_openib_component.qp_infos[qp].rd_num <= @@ -586,13 +603,13 @@ static int mca_btl_openib_mca_setup_qps(void) } params = NULL; - max_qp_size = mca_btl_openib_component.qp_infos[mca_btl_openib_component.num_qps - 1].size; + /* Sanity check some sizes */ + max_qp_size = mca_btl_openib_component.qp_infos[mca_btl_openib_component.num_qps - 1].size; max_size_needed = (mca_btl_openib_module.super.btl_eager_limit > mca_btl_openib_module.super.btl_max_send_size) ? mca_btl_openib_module.super.btl_eager_limit : mca_btl_openib_module.super.btl_max_send_size; - if (max_qp_size < max_size_needed) { opal_show_help("help-mpi-btl-openib.txt", "biggest qp size is too small", true, @@ -608,6 +625,14 @@ static int mca_btl_openib_mca_setup_qps(void) opal_output(0, "The biggest QP size is bigger than maximum send size. " "This is not optimal configuration as memory will be waisted.\n"); } + + if (min_freelist_size > mca_btl_openib_component.ib_free_list_max) { + opal_show_help("help-mpi-btl-openib.txt", "freelist too small", true, + orte_system_info.nodename, + mca_btl_openib_component.ib_free_list_max, + min_freelist_size); + goto error; + } mca_btl_openib_component.rdma_qp = mca_btl_openib_component.num_qps - 1; mca_btl_openib_component.eager_rdma_qp = 0; diff --git a/ompi/mca/btl/openib/help-mpi-btl-openib.txt b/ompi/mca/btl/openib/help-mpi-btl-openib.txt index 16a5325ca3..31a41604a3 100644 --- a/ompi/mca/btl/openib/help-mpi-btl-openib.txt +++ b/ompi/mca/btl/openib/help-mpi-btl-openib.txt @@ -352,3 +352,15 @@ Host: %s Largest buffer size: %d Maximum send fragment size: %d # +[freelist too small] + +WARNING: The maximum freelist size that was specified was too small +for the requested receive queue sizes. The maximum freelist size must +be at least equal to the sum of the largest number of buffers posted +to a single queue plus the corresponding number of reserved/credit +buffers for that queue. It is suggested that the maximum be quite a +bit larger than this for performance reasons. + +Host: %s +Specified freelist size: %d +Minimum required freelist size: %d