From 6b6c08ef67d9e0507c1c3bf36d0d54ce2541c81e Mon Sep 17 00:00:00 2001 From: Jeff Squyres Date: Tue, 28 Oct 2008 18:29:57 +0000 Subject: [PATCH] Fixes trac:1588: have several BTLs disable themselves in the presence of THREAD_MULTIPLE. There's a new (hidden) MCA parameter to re-enable these BTLs in the presence of THREAD_MULTIPLE: btl_base_thread_multiple_override. This MCA parameter should ''only'' be used by developers who are working on make their BTLs thread safe; it should ''not'' be used by end-users! This commit was SVN r19826. The following Trac tickets were found above: Ticket 1588 --> https://svn.open-mpi.org/trac/ompi/ticket/1588 --- ompi/mca/btl/base/base.h | 1 + ompi/mca/btl/base/btl_base_open.c | 12 ++++++ ompi/mca/btl/gm/btl_gm_component.c | 11 ++++- ompi/mca/btl/ofud/btl_ofud_component.c | 7 +++ ompi/mca/btl/openib/btl_openib_component.c | 8 +++- ompi/mca/btl/sctp/btl_sctp_component.c | 7 +++ ompi/mca/btl/udapl/btl_udapl_component.c | 10 ++++- ompi/op/op_predefined.c | 28 ++++++++++++ ompi/runtime/ompi_mpi_init.c | 50 ++++++++++++---------- 9 files changed, 108 insertions(+), 26 deletions(-) diff --git a/ompi/mca/btl/base/base.h b/ompi/mca/btl/base/base.h index 44c7da48e8..2c22cee67b 100644 --- a/ompi/mca/btl/base/base.h +++ b/ompi/mca/btl/base/base.h @@ -74,6 +74,7 @@ extern int mca_btl_base_warn_component_unused; extern int mca_btl_base_already_opened; OMPI_DECLSPEC extern opal_list_t mca_btl_base_components_opened; OMPI_DECLSPEC extern opal_list_t mca_btl_base_modules_initialized; +OMPI_DECLSPEC extern bool mca_btl_base_thread_multiple_override; END_C_DECLS diff --git a/ompi/mca/btl/base/btl_base_open.c b/ompi/mca/btl/base/btl_base_open.c index b4bccc2512..024a6a66d3 100644 --- a/ompi/mca/btl/base/btl_base_open.c +++ b/ompi/mca/btl/base/btl_base_open.c @@ -10,6 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2007 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2008 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -79,6 +80,7 @@ int mca_btl_base_warn_component_unused = 1; opal_list_t mca_btl_base_components_opened; opal_list_t mca_btl_base_modules_initialized; int mca_btl_base_already_opened = 0; +bool mca_btl_base_thread_multiple_override = false; /** * Function for finding and opening either all MCA components, or the one @@ -86,6 +88,7 @@ int mca_btl_base_already_opened = 0; */ int mca_btl_base_open(void) { + int i; if( ++mca_btl_base_already_opened > 1 ) return OMPI_SUCCESS; /* Verbose output */ @@ -99,6 +102,15 @@ int mca_btl_base_open(void) mca_btl_base_output = opal_output_open(NULL); opal_output_set_verbosity(mca_btl_base_output, mca_btl_base_verbose); + /* Override the per-BTL "don't run if THREAD_MULTIPLE selected" + embargo? */ + mca_base_param_reg_int_name("btl", + "base_thread_multiple_override", + "Enable BTLs that are not normally enabled when MPI_THREAD_MULTIPLE is enabled (THIS IS FOR DEVELOPERS ONLY! SHOULD NOT BE USED BY END USERS!)", + true, false, + 0, &i); + mca_btl_base_thread_multiple_override = OPAL_INT_TO_BOOL(i); + /* Open up all available components */ if (OMPI_SUCCESS != diff --git a/ompi/mca/btl/gm/btl_gm_component.c b/ompi/mca/btl/gm/btl_gm_component.c index 4399a0da0e..c19fc5fba4 100644 --- a/ompi/mca/btl/gm/btl_gm_component.c +++ b/ompi/mca/btl/gm/btl_gm_component.c @@ -9,7 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2007-2008 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -47,6 +47,7 @@ #include "orte/runtime/orte_globals.h" #include "orte/util/name_fns.h" #include "ompi/runtime/ompi_module_exchange.h" +#include "ompi/runtime/mpiruntime.h" #if OMPI_ENABLE_PROGRESS_THREADS @@ -503,6 +504,14 @@ mca_btl_gm_component_init (int *num_btl_modules, OPAL_THREAD_LOCK(&mca_btl_gm_component.gm_lock); + /* Currently refuse to run if MPI_THREAD_MULTIPLE is enabled */ + if (ompi_mpi_thread_multiple && !mca_btl_base_thread_multiple_override) { + mca_btl_gm_component.gm_num_btls = 0; + mca_btl_gm_modex_send(); + OPAL_THREAD_UNLOCK(&mca_btl_gm_component.gm_lock); + return NULL; + } + /* try to initialize GM */ if( GM_SUCCESS != gm_init() ) { opal_output( 0, "[%s:%d] error in initializing the gm library\n", __FILE__, __LINE__ ); diff --git a/ompi/mca/btl/ofud/btl_ofud_component.c b/ompi/mca/btl/ofud/btl_ofud_component.c index f0dee11f40..68f2dbce7d 100644 --- a/ompi/mca/btl/ofud/btl_ofud_component.c +++ b/ompi/mca/btl/ofud/btl_ofud_component.c @@ -11,6 +11,7 @@ * All rights reserved. * Copyright (c) 2006 Sandia National Laboratories. All rights * reserved. + * Copyright (c) 2008 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -34,6 +35,7 @@ #include "ompi/mca/btl/base/base.h" #include "ompi/mca/mpool/rdma/mpool_rdma.h" #include "ompi/runtime/ompi_module_exchange.h" +#include "ompi/runtime/mpiruntime.h" #include "orte/runtime/orte_globals.h" @@ -374,6 +376,11 @@ mca_btl_base_module_t** mca_btl_ud_component_init(int* num_btl_modules, char* btl_str; char* tok; + /* Currently refuse to run if MPI_THREAD_MULTIPLE is enabled */ + if (ompi_mpi_thread_multiple && !mca_btl_base_thread_multiple_override) { + return NULL; + } + /* First, check if the UD BTL was specifically selected. If not, then short out right away. */ mca_base_param_lookup_string( diff --git a/ompi/mca/btl/openib/btl_openib_component.c b/ompi/mca/btl/openib/btl_openib_component.c index 44e1584451..518be786dd 100644 --- a/ompi/mca/btl/openib/btl_openib_component.c +++ b/ompi/mca/btl/openib/btl_openib_component.c @@ -2002,15 +2002,19 @@ btl_openib_component_init(int *num_btl_modules, *num_btl_modules = 0; num_devs = 0; + /* Currently refuse to run if MPI_THREAD_MULTIPLE is enabled */ + if (ompi_mpi_thread_multiple && !mca_btl_base_thread_multiple_override) { + goto no_btls; + } + /* Per https://svn.open-mpi.org/trac/ompi/ticket/1305, check to see if $sysfsdir/class/infiniband exists. If it does not, assume that the RDMA hardware drivers are not loaded, and therefore we don't want OpenFabrics verbs support in this OMPI job. No need to print a warning. */ if (!check_basics()) { - return NULL; + goto no_btls; } - seedv[0] = ORTE_PROC_MY_NAME->vpid; seedv[1] = opal_sys_timer_get_cycles(); diff --git a/ompi/mca/btl/sctp/btl_sctp_component.c b/ompi/mca/btl/sctp/btl_sctp_component.c index db7c7bf383..ac164c3c3d 100644 --- a/ompi/mca/btl/sctp/btl_sctp_component.c +++ b/ompi/mca/btl/sctp/btl_sctp_component.c @@ -9,6 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2008 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -53,6 +54,7 @@ #include "opal/mca/base/mca_base_param.h" #include "ompi/runtime/ompi_module_exchange.h" +#include "ompi/runtime/mpiruntime.h" #include "orte/mca/errmgr/errmgr.h" #include "ompi/mca/mpool/base/base.h" #include "ompi/mca/btl/base/btl_base_error.h" @@ -691,6 +693,11 @@ mca_btl_base_module_t** mca_btl_sctp_component_init(int *num_btl_modules, bool enable_progress_threads, bool enable_mpi_threads) { + /* Currently refuse to run if MPI_THREAD_MULTIPLE is enabled */ + if (ompi_mpi_thread_multiple && !mca_btl_base_thread_multiple_override) { + return NULL; + } + if(mca_btl_sctp_component.sctp_if_11) { /* 1 to 1 */ mca_btl_base_module_t **btls; diff --git a/ompi/mca/btl/udapl/btl_udapl_component.c b/ompi/mca/btl/udapl/btl_udapl_component.c index bd2109cc5c..e54dac3aeb 100644 --- a/ompi/mca/btl/udapl/btl_udapl_component.c +++ b/ompi/mca/btl/udapl/btl_udapl_component.c @@ -13,7 +13,7 @@ * Copyright (c) 2006 Sandia National Laboratories. All rights * reserved. * Copyright (c) 2007-2008 Sun Microsystems, Inc. All rights reserved. - * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2007-2008 Cisco Systems, Inc. All rights reserved. * * $COPYRIGHT$ * @@ -48,6 +48,7 @@ #include "btl_udapl_endpoint.h" #include "orte/util/proc_info.h" #include "ompi/runtime/ompi_module_exchange.h" +#include "ompi/runtime/mpiruntime.h" /* * Local Functions @@ -448,6 +449,13 @@ mca_btl_udapl_component_init (int *num_btl_modules, DAT_COUNT num_ias; int32_t i; + /* Currently refuse to run if MPI_THREAD_MULTIPLE is enabled */ + if (ompi_mpi_thread_multiple && !mca_btl_base_thread_multiple_override) { + mca_btl_udapl_component.udapl_num_btls = 0; + mca_btl_udapl_modex_send(); + return NULL; + } + /* parse the include and exclude lists, checking for errors */ mca_btl_udapl_component.if_include_list = mca_btl_udapl_component.if_exclude_list = diff --git a/ompi/op/op_predefined.c b/ompi/op/op_predefined.c index fc6fa0305a..26469be26d 100644 --- a/ompi/op/op_predefined.c +++ b/ompi/op/op_predefined.c @@ -293,8 +293,21 @@ OP_FUNC(sum, fortran_integer16, ompi_fortran_integer16_t, +=) OP_FUNC(sum, float, float, +=) OP_FUNC(sum, double, double, +=) #if HAVE_LONG_DOUBLE +#if 1 + void ompi_mpi_op_sum_long_double(void *in, void *out, int *count, \ + MPI_Datatype *dtype) \ + { \ + int i; \ + long double *a = (long double *) in; \ + long double *b = (long double *) out; \ + for (i = 0; i < *count; ++i) { \ + *(b++) += *(a++); \ + } \ + } +#else OP_FUNC(sum, long_double, long double, +=) #endif +#endif #if OMPI_HAVE_FORTRAN_REAL OP_FUNC(sum, fortran_real, ompi_fortran_real_t, +=) #endif @@ -311,8 +324,23 @@ OP_FUNC(sum, fortran_real4, ompi_fortran_real4_t, +=) OP_FUNC(sum, fortran_real8, ompi_fortran_real8_t, +=) #endif #if OMPI_HAVE_FORTRAN_REAL16 +#if 1 + void ompi_mpi_op_sum_fortran_real16(void *in, void *out, int *count, \ + MPI_Datatype *dtype) \ + { \ + int i; \ + ompi_fortran_real16_t *a = (ompi_fortran_real16_t *) in; \ + ompi_fortran_real16_t *b = (ompi_fortran_real16_t *) out; \ + printf("Adding %Lf with %Lf\n", *b, *a); \ + for (i = 0; i < *count; ++i) { \ + *(b++) += *(a++); \ + } \ + } + +#else OP_FUNC(sum, fortran_real16, ompi_fortran_real16_t, +=) #endif +#endif /* Complex */ #if OMPI_HAVE_FORTRAN_REAL && OMPI_HAVE_FORTRAN_COMPLEX COMPLEX_OP_FUNC_SUM(fortran_complex, ompi_fortran_complex_t) diff --git a/ompi/runtime/ompi_mpi_init.c b/ompi/runtime/ompi_mpi_init.c index 77919fe4d8..8c15d5a7d7 100644 --- a/ompi/runtime/ompi_mpi_init.c +++ b/ompi/runtime/ompi_mpi_init.c @@ -365,6 +365,32 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) (ompistop.tv_usec - ompistart.tv_usec))); gettimeofday(&ompistart, NULL); } + + /* Figure out the final MPI thread levels. If we were not + compiled for support for MPI threads, then don't allow + MPI_THREAD_MULTIPLE. Set this stuff up here early in the + process so that other components can make decisions based on + this value. */ + + ompi_mpi_thread_requested = requested; + if (OMPI_HAVE_THREAD_SUPPORT == 0) { + ompi_mpi_thread_provided = *provided = MPI_THREAD_SINGLE; + ompi_mpi_main_thread = NULL; + } else if (OMPI_ENABLE_MPI_THREADS == 1) { + ompi_mpi_thread_provided = *provided = requested; + ompi_mpi_main_thread = opal_thread_get_self(); + } else { + if (MPI_THREAD_MULTIPLE == requested) { + ompi_mpi_thread_provided = *provided = MPI_THREAD_SERIALIZED; + } else { + ompi_mpi_thread_provided = *provided = requested; + } + ompi_mpi_main_thread = opal_thread_get_self(); + } + + ompi_mpi_thread_multiple = (ompi_mpi_thread_provided == + MPI_THREAD_MULTIPLE); + /* Once we've joined the RTE, see if any MCA parameters were passed to the MPI level */ @@ -629,29 +655,9 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) error = "ompi_proc_set_arch failed"; goto error; } - - /* Figure out the final MPI thread levels. If we were not - compiled for support for MPI threads, then don't allow - MPI_THREAD_MULTIPLE. */ - ompi_mpi_thread_requested = requested; - if (OMPI_HAVE_THREAD_SUPPORT == 0) { - ompi_mpi_thread_provided = *provided = MPI_THREAD_SINGLE; - ompi_mpi_main_thread = NULL; - } else if (OMPI_ENABLE_MPI_THREADS == 1) { - ompi_mpi_thread_provided = *provided = requested; - ompi_mpi_main_thread = opal_thread_get_self(); - } else { - if (MPI_THREAD_MULTIPLE == requested) { - ompi_mpi_thread_provided = *provided = MPI_THREAD_SERIALIZED; - } else { - ompi_mpi_thread_provided = *provided = requested; - } - ompi_mpi_main_thread = opal_thread_get_self(); - } - - ompi_mpi_thread_multiple = (ompi_mpi_thread_provided == - MPI_THREAD_MULTIPLE); + /* If thread support was enabled, then setup OPAL to allow for + them. */ if ((OMPI_ENABLE_PROGRESS_THREADS == 1) || (*provided != MPI_THREAD_SINGLE)) { opal_set_using_threads(true);