From 2c086e56be3c7fa5bbdc3c3adebf91a933ddaae6 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Wed, 1 Jun 2016 17:01:15 -0700 Subject: [PATCH] Add an experimental ability to skip the RTE barriers at the end of MPI_Init and the beginning of MPI_Finalize --- ompi/runtime/ompi_mpi_finalize.c | 40 +++++++++++++++++--------------- ompi/runtime/ompi_mpi_init.c | 17 +++++++------- ompi/runtime/ompi_mpi_params.c | 21 ++++++++++++++++- ompi/runtime/params.h | 7 ++++++ 4 files changed, 57 insertions(+), 28 deletions(-) diff --git a/ompi/runtime/ompi_mpi_finalize.c b/ompi/runtime/ompi_mpi_finalize.c index d75f67be38..21565c3eb5 100644 --- a/ompi/runtime/ompi_mpi_finalize.c +++ b/ompi/runtime/ompi_mpi_finalize.c @@ -246,26 +246,28 @@ int ompi_mpi_finalize(void) del_procs behavior around May of 2014 (see https://svn.open-mpi.org/trac/ompi/ticket/4669#comment:4 for more details). */ - if (NULL != opal_pmix.fence_nb) { - active = true; - /* Note that use of the non-blocking PMIx fence will - * allow us to lazily cycle calling - * opal_progress(), which will allow any other pending - * communications/actions to complete. See - * https://github.com/open-mpi/ompi/issues/1576 for the - * original bug report. */ - opal_pmix.fence_nb(NULL, 0, fence_cbfunc, (void*)&active); - OMPI_LAZY_WAIT_FOR_COMPLETION(active); - } else { - /* However, we cannot guarantee that the provided PMIx has - * fence_nb. If it doesn't, then do the best we can: an MPI - * barrier on COMM_WORLD (which isn't the best because of the - * reasons cited above), followed by a blocking PMIx fence - * (which does not call opal_progress()). */ - ompi_communicator_t *comm = &ompi_mpi_comm_world.comm; - comm->c_coll.coll_barrier(comm, comm->c_coll.coll_barrier_module); + if (!ompi_async_mpi_finalize) { + if (NULL != opal_pmix.fence_nb) { + active = true; + /* Note that use of the non-blocking PMIx fence will + * allow us to lazily cycle calling + * opal_progress(), which will allow any other pending + * communications/actions to complete. See + * https://github.com/open-mpi/ompi/issues/1576 for the + * original bug report. */ + opal_pmix.fence_nb(NULL, 0, fence_cbfunc, (void*)&active); + OMPI_LAZY_WAIT_FOR_COMPLETION(active); + } else { + /* However, we cannot guarantee that the provided PMIx has + * fence_nb. If it doesn't, then do the best we can: an MPI + * barrier on COMM_WORLD (which isn't the best because of the + * reasons cited above), followed by a blocking PMIx fence + * (which does not call opal_progress()). */ + ompi_communicator_t *comm = &ompi_mpi_comm_world.comm; + comm->c_coll.coll_barrier(comm, comm->c_coll.coll_barrier_module); - opal_pmix.fence(NULL, 0); + opal_pmix.fence(NULL, 0); + } } /* check for timing request - get stop time and report elapsed diff --git a/ompi/runtime/ompi_mpi_init.c b/ompi/runtime/ompi_mpi_init.c index 7ddf21484e..5616992456 100644 --- a/ompi/runtime/ompi_mpi_init.c +++ b/ompi/runtime/ompi_mpi_init.c @@ -819,14 +819,15 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) /* wait for everyone to reach this point - this is a hard * barrier requirement at this time, though we hope to relax * it at a later point */ - active = true; - opal_pmix.commit(); - if (NULL != opal_pmix.fence_nb) { - opal_pmix.fence_nb(NULL, opal_pmix_collect_all_data, - fence_release, (void*)&active); - OMPI_WAIT_FOR_COMPLETION(active); - } else { - opal_pmix.fence(NULL, opal_pmix_collect_all_data); + if (!ompi_async_mpi_init) { + active = true; + if (NULL != opal_pmix.fence_nb) { + opal_pmix.fence_nb(NULL, opal_pmix_collect_all_data, + fence_release, (void*)&active); + OMPI_WAIT_FOR_COMPLETION(active); + } else { + opal_pmix.fence(NULL, opal_pmix_collect_all_data); + } } /* check for timing request - get stop time and report elapsed diff --git a/ompi/runtime/ompi_mpi_params.c b/ompi/runtime/ompi_mpi_params.c index 5997231d18..ab02e20fec 100644 --- a/ompi/runtime/ompi_mpi_params.c +++ b/ompi/runtime/ompi_mpi_params.c @@ -14,7 +14,7 @@ * Copyright (c) 2007-2015 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2013 NVIDIA Corporation. All rights reserved. - * Copyright (c) 2013-2014 Intel, Inc. All rights reserved + * Copyright (c) 2013-2016 Intel, Inc. All rights reserved * Copyright (c) 2015 Mellanox Technologies, Inc. * All rights reserved. * $COPYRIGHT$ @@ -65,6 +65,9 @@ char *ompi_mpi_show_mca_params_string = NULL; bool ompi_mpi_have_sparse_group_storage = !!(OMPI_GROUP_SPARSE); bool ompi_mpi_preconnect_mpi = false; +bool ompi_async_mpi_init = false; +bool ompi_async_mpi_finalize = false; + #define OMPI_ADD_PROCS_CUTOFF_DEFAULT 0 uint32_t ompi_add_procs_cutoff = OMPI_ADD_PROCS_CUTOFF_DEFAULT; bool ompi_mpi_dynamics_enabled = true; @@ -282,6 +285,22 @@ int ompi_mpi_register_params(void) MCA_BASE_VAR_SCOPE_READONLY, &ompi_mpi_dynamics_enabled); + ompi_async_mpi_init = false; + (void) mca_base_var_register("ompi", "async", "mpi", "init", + "Do not perform a barrier at the end of MPI_Init", + MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, + OPAL_INFO_LVL_9, + MCA_BASE_VAR_SCOPE_READONLY, + &ompi_async_mpi_init); + + ompi_async_mpi_finalize = false; + (void) mca_base_var_register("ompi", "async", "mpi", "finalize", + "Do not perform a barrier at the beginning of MPI_Finalize", + MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, + OPAL_INFO_LVL_9, + MCA_BASE_VAR_SCOPE_READONLY, + &ompi_async_mpi_finalize); + value = mca_base_var_find ("opal", "opal", NULL, "abort_delay"); if (0 <= value) { (void) mca_base_var_register_synonym(value, "ompi", "mpi", NULL, "abort_delay", diff --git a/ompi/runtime/params.h b/ompi/runtime/params.h index 1b4a5aeac7..5716e14252 100644 --- a/ompi/runtime/params.h +++ b/ompi/runtime/params.h @@ -135,6 +135,13 @@ OMPI_DECLSPEC extern uint32_t ompi_add_procs_cutoff; */ OMPI_DECLSPEC extern bool ompi_mpi_dynamics_enabled; +/* EXPERIMENTAL: do not perform an RTE barrier at the end of MPI_Init */ +OMPI_DECLSPEC extern bool ompi_async_mpi_init; + +/* EXPERIMENTAL: do not perform an RTE barrier at the beginning of MPI_Finalize */ +OMPI_DECLSPEC extern bool ompi_async_mpi_finalize; + + /** * Register MCA parameters used by the MPI layer. *