From e4d52b16b51617ba9f2b9e93c320517636f34e13 Mon Sep 17 00:00:00 2001 From: Terry Dontje Date: Mon, 10 Aug 2009 12:46:20 +0000 Subject: [PATCH] Add in eager limit checks in pmls. This commit was SVN r21778. --- ompi/mca/pml/csum/Makefile.am | 4 +++ ompi/mca/pml/csum/help-mpi-pml-csum.txt | 20 ++++++++++++++ ompi/mca/pml/csum/pml_csum.c | 33 +++++++++++++++++++++++ ompi/mca/pml/dr/Makefile.am | 4 +++ ompi/mca/pml/dr/help-mpi-pml-dr.txt | 20 ++++++++++++++ ompi/mca/pml/dr/pml_dr.c | 35 ++++++++++++++++++++++++- ompi/mca/pml/ob1/Makefile.am | 4 +++ ompi/mca/pml/ob1/help-mpi-pml-ob1.txt | 20 ++++++++++++++ ompi/mca/pml/ob1/pml_ob1.c | 34 ++++++++++++++++++++++++ 9 files changed, 173 insertions(+), 1 deletion(-) create mode 100644 ompi/mca/pml/csum/help-mpi-pml-csum.txt create mode 100644 ompi/mca/pml/dr/help-mpi-pml-dr.txt create mode 100644 ompi/mca/pml/ob1/help-mpi-pml-ob1.txt diff --git a/ompi/mca/pml/csum/Makefile.am b/ompi/mca/pml/csum/Makefile.am index 9f783a7bd3..f108237513 100644 --- a/ompi/mca/pml/csum/Makefile.am +++ b/ompi/mca/pml/csum/Makefile.am @@ -9,6 +9,7 @@ # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. +# Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. # # $COPYRIGHT$ # @@ -17,6 +18,9 @@ # $HEADER$ # +dist_pkgdata_DATA = \ + help-mpi-pml-csum.txt + EXTRA_DIST = post_configure.sh pml_csum_endpoint.c pml_csum_endpoint.h csum_sources = \ diff --git a/ompi/mca/pml/csum/help-mpi-pml-csum.txt b/ompi/mca/pml/csum/help-mpi-pml-csum.txt new file mode 100644 index 0000000000..ed378d5003 --- /dev/null +++ b/ompi/mca/pml/csum/help-mpi-pml-csum.txt @@ -0,0 +1,20 @@ +# -*- text -*- +# +# Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# +[eager_limit_too_small] +The "eager limit" MCA parameter in the %s BTL was set to a value which +is too low for Open MPI to function properly. Please re-run your job +with a higher eager limit value for this BTL; the exact MCA parameter +name and its corresponding minimum value is shown below. + + Local host: %s + BTL name: %s + BTL eager limit value: %d (set via btl_%s_eager_limit) + BTL eager limit minimum: %d + MCA parameter name: btl_%s_eager_limit diff --git a/ompi/mca/pml/csum/pml_csum.c b/ompi/mca/pml/csum/pml_csum.c index e650290559..8adfe8a97c 100644 --- a/ompi/mca/pml/csum/pml_csum.c +++ b/ompi/mca/pml/csum/pml_csum.c @@ -15,6 +15,7 @@ * Copyright (c) 2009 IBM Corporation. All rights reserved. * Copyright (c) 2009 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -299,6 +300,7 @@ int mca_pml_csum_add_procs(ompi_proc_t** procs, size_t nprocs) opal_bitmap_t reachable; int rc; size_t i; + opal_list_item_t *item; if(nprocs == 0) return OMPI_SUCCESS; @@ -341,6 +343,37 @@ int mca_pml_csum_add_procs(ompi_proc_t** procs, size_t nprocs) if(OMPI_SUCCESS != rc) goto cleanup_and_return; + /* Check that values supplied by all initialized btls will work + for us. Note that this is the list of all initialized BTLs, + not the ones used for the just added procs. This is a little + overkill and inaccurate, as we may end up not using the BTL in + question and all add_procs calls after the first one are + duplicating an already completed check. But the final + initialization of the PML occurs before the final + initialization of the BTLs, and iterating through the in-use + BTLs requires iterating over the procs, as the BML does not + expose all currently in use btls. */ + + for (item = opal_list_get_first(&mca_btl_base_modules_initialized) ; + item != opal_list_get_end(&mca_btl_base_modules_initialized) ; + item = opal_list_get_next(item)) { + mca_btl_base_selected_module_t *sm = + (mca_btl_base_selected_module_t*) item; + if (sm->btl_module->btl_eager_limit < sizeof(mca_pml_csum_hdr_t)) { + orte_show_help("help-mpi-pml-csum.txt", "eager_limit_too_small", + true, + sm->btl_component->btl_version.mca_component_name, + orte_process_info.nodename, + sm->btl_component->btl_version.mca_component_name, + sm->btl_module->btl_eager_limit, + sm->btl_component->btl_version.mca_component_name, + sizeof(mca_pml_csum_hdr_t), + sm->btl_component->btl_version.mca_component_name); + rc = OMPI_ERR_BAD_PARAM; + goto cleanup_and_return; + } + } + rc = mca_bml.bml_register( MCA_PML_CSUM_HDR_TYPE_MATCH, mca_pml_csum_recv_frag_callback_match, NULL ); diff --git a/ompi/mca/pml/dr/Makefile.am b/ompi/mca/pml/dr/Makefile.am index dd8e60f2cc..f0637523fc 100644 --- a/ompi/mca/pml/dr/Makefile.am +++ b/ompi/mca/pml/dr/Makefile.am @@ -9,6 +9,7 @@ # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. +# Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. # # $COPYRIGHT$ # @@ -17,6 +18,9 @@ # $HEADER$ # +dist_pkgdata_DATA = \ + help-mpi-pml-dr.txt + EXTRA_DIST = .windows dr_sources = \ diff --git a/ompi/mca/pml/dr/help-mpi-pml-dr.txt b/ompi/mca/pml/dr/help-mpi-pml-dr.txt new file mode 100644 index 0000000000..ed378d5003 --- /dev/null +++ b/ompi/mca/pml/dr/help-mpi-pml-dr.txt @@ -0,0 +1,20 @@ +# -*- text -*- +# +# Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# +[eager_limit_too_small] +The "eager limit" MCA parameter in the %s BTL was set to a value which +is too low for Open MPI to function properly. Please re-run your job +with a higher eager limit value for this BTL; the exact MCA parameter +name and its corresponding minimum value is shown below. + + Local host: %s + BTL name: %s + BTL eager limit value: %d (set via btl_%s_eager_limit) + BTL eager limit minimum: %d + MCA parameter name: btl_%s_eager_limit diff --git a/ompi/mca/pml/dr/pml_dr.c b/ompi/mca/pml/dr/pml_dr.c index e8827bfeb0..ba9f052d3b 100644 --- a/ompi/mca/pml/dr/pml_dr.c +++ b/ompi/mca/pml/dr/pml_dr.c @@ -10,6 +10,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2006 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -161,6 +162,7 @@ int mca_pml_dr_add_procs(ompi_proc_t** procs, size_t nprocs) opal_bitmap_t reachable; int rc; size_t i; + opal_list_item_t *item; if(nprocs == 0) return OMPI_SUCCESS; @@ -193,7 +195,38 @@ int mca_pml_dr_add_procs(ompi_proc_t** procs, size_t nprocs) ); if(OMPI_SUCCESS != rc) return rc; - + + /* Check that values supplied by all initialized btls will work + for us. Note that this is the list of all initialized BTLs, + not the ones used for the just added procs. This is a little + overkill and inaccurate, as we may end up not using the BTL in + question and all add_procs calls after the first one are + duplicating an already completed check. But the final + initialization of the PML occurs before the final + initialization of the BTLs, and iterating through the in-use + BTLs requires iterating over the procs, as the BML does not + expose all currently in use btls. */ + + for (item = opal_list_get_first(&mca_btl_base_modules_initialized) ; + item != opal_list_get_end(&mca_btl_base_modules_initialized) ; + item = opal_list_get_next(item)) { + mca_btl_base_selected_module_t *sm = + (mca_btl_base_selected_module_t*) item; + if (sm->btl_module->btl_eager_limit < sizeof(mca_pml_dr_hdr_t)) { + orte_show_help("help-mpi-pml-dr.txt", "eager_limit_too_small", + true, + sm->btl_component->btl_version.mca_component_name, + orte_process_info.nodename, + sm->btl_component->btl_version.mca_component_name, + sm->btl_module->btl_eager_limit, + sm->btl_component->btl_version.mca_component_name, + sizeof(mca_pml_dr_hdr_t), + sm->btl_component->btl_version.mca_component_name); + rc = OMPI_ERR_BAD_PARAM; + return rc; + } + } + /* register recv handler */ rc = mca_bml.bml_register( MCA_BTL_TAG_PML, diff --git a/ompi/mca/pml/ob1/Makefile.am b/ompi/mca/pml/ob1/Makefile.am index 4295eae00e..8cfc5a137e 100644 --- a/ompi/mca/pml/ob1/Makefile.am +++ b/ompi/mca/pml/ob1/Makefile.am @@ -9,6 +9,7 @@ # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. +# Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. # # $COPYRIGHT$ # @@ -17,6 +18,9 @@ # $HEADER$ # +dist_pkgdata_DATA = \ + help-mpi-pml-ob1.txt + EXTRA_DIST = post_configure.sh pml_ob1_endpoint.c pml_ob1_endpoint.h .windows ob1_sources = \ diff --git a/ompi/mca/pml/ob1/help-mpi-pml-ob1.txt b/ompi/mca/pml/ob1/help-mpi-pml-ob1.txt new file mode 100644 index 0000000000..ed378d5003 --- /dev/null +++ b/ompi/mca/pml/ob1/help-mpi-pml-ob1.txt @@ -0,0 +1,20 @@ +# -*- text -*- +# +# Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# +[eager_limit_too_small] +The "eager limit" MCA parameter in the %s BTL was set to a value which +is too low for Open MPI to function properly. Please re-run your job +with a higher eager limit value for this BTL; the exact MCA parameter +name and its corresponding minimum value is shown below. + + Local host: %s + BTL name: %s + BTL eager limit value: %d (set via btl_%s_eager_limit) + BTL eager limit minimum: %d + MCA parameter name: btl_%s_eager_limit diff --git a/ompi/mca/pml/ob1/pml_ob1.c b/ompi/mca/pml/ob1/pml_ob1.c index 6a283d671c..62e3befb2b 100644 --- a/ompi/mca/pml/ob1/pml_ob1.c +++ b/ompi/mca/pml/ob1/pml_ob1.c @@ -12,6 +12,7 @@ * All rights reserved. * Copyright (c) 2008 UT-Battelle, LLC. All rights reserved. * Copyright (c) 2006-2008 University of Houston. All rights reserved. + * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -289,6 +290,7 @@ int mca_pml_ob1_add_procs(ompi_proc_t** procs, size_t nprocs) opal_bitmap_t reachable; int rc; size_t i; + opal_list_item_t *item; if(nprocs == 0) return OMPI_SUCCESS; @@ -325,6 +327,38 @@ int mca_pml_ob1_add_procs(ompi_proc_t** procs, size_t nprocs) if(OMPI_SUCCESS != rc) goto cleanup_and_return; + /* Check that values supplied by all initialized btls will work + for us. Note that this is the list of all initialized BTLs, + not the ones used for the just added procs. This is a little + overkill and inaccurate, as we may end up not using the BTL in + question and all add_procs calls after the first one are + duplicating an already completed check. But the final + initialization of the PML occurs before the final + initialization of the BTLs, and iterating through the in-use + BTLs requires iterating over the procs, as the BML does not + expose all currently in use btls. */ + + for (item = opal_list_get_first(&mca_btl_base_modules_initialized) ; + item != opal_list_get_end(&mca_btl_base_modules_initialized) ; + item = opal_list_get_next(item)) { + mca_btl_base_selected_module_t *sm = + (mca_btl_base_selected_module_t*) item; + if (sm->btl_module->btl_eager_limit < sizeof(mca_pml_ob1_hdr_t)) { + orte_show_help("help-mpi-pml-ob1.txt", "eager_limit_too_small", + true, + sm->btl_component->btl_version.mca_component_name, + orte_process_info.nodename, + sm->btl_component->btl_version.mca_component_name, + sm->btl_module->btl_eager_limit, + sm->btl_component->btl_version.mca_component_name, + sizeof(mca_pml_ob1_hdr_t), + sm->btl_component->btl_version.mca_component_name); + rc = OMPI_ERR_BAD_PARAM; + goto cleanup_and_return; + } + } + + /* TODO: Move these callback registration to another place */ rc = mca_bml.bml_register( MCA_PML_OB1_HDR_TYPE_MATCH, mca_pml_ob1_recv_frag_callback_match,