1
1

Merge pull request #1740 from rhc54/topic/async

Add an experimental ability to skip the RTE barriers at the end of MPI_Init and the beginning of MPI_Finalize
Этот коммит содержится в:
rhc54 2016-06-01 18:31:35 -07:00
родитель f33bbfd381 2c086e56be
Коммит 3b68c1f8db
4 изменённых файлов: 57 добавлений и 28 удалений

Просмотреть файл

@ -246,26 +246,28 @@ int ompi_mpi_finalize(void)
del_procs behavior around May of 2014 (see
https://svn.open-mpi.org/trac/ompi/ticket/4669#comment:4 for
more details). */
if (NULL != opal_pmix.fence_nb) {
active = true;
/* Note that use of the non-blocking PMIx fence will
* allow us to lazily cycle calling
* opal_progress(), which will allow any other pending
* communications/actions to complete. See
* https://github.com/open-mpi/ompi/issues/1576 for the
* original bug report. */
opal_pmix.fence_nb(NULL, 0, fence_cbfunc, (void*)&active);
OMPI_LAZY_WAIT_FOR_COMPLETION(active);
} else {
/* However, we cannot guarantee that the provided PMIx has
* fence_nb. If it doesn't, then do the best we can: an MPI
* barrier on COMM_WORLD (which isn't the best because of the
* reasons cited above), followed by a blocking PMIx fence
* (which does not call opal_progress()). */
ompi_communicator_t *comm = &ompi_mpi_comm_world.comm;
comm->c_coll.coll_barrier(comm, comm->c_coll.coll_barrier_module);
if (!ompi_async_mpi_finalize) {
if (NULL != opal_pmix.fence_nb) {
active = true;
/* Note that use of the non-blocking PMIx fence will
* allow us to lazily cycle calling
* opal_progress(), which will allow any other pending
* communications/actions to complete. See
* https://github.com/open-mpi/ompi/issues/1576 for the
* original bug report. */
opal_pmix.fence_nb(NULL, 0, fence_cbfunc, (void*)&active);
OMPI_LAZY_WAIT_FOR_COMPLETION(active);
} else {
/* However, we cannot guarantee that the provided PMIx has
* fence_nb. If it doesn't, then do the best we can: an MPI
* barrier on COMM_WORLD (which isn't the best because of the
* reasons cited above), followed by a blocking PMIx fence
* (which does not call opal_progress()). */
ompi_communicator_t *comm = &ompi_mpi_comm_world.comm;
comm->c_coll.coll_barrier(comm, comm->c_coll.coll_barrier_module);
opal_pmix.fence(NULL, 0);
opal_pmix.fence(NULL, 0);
}
}
/* check for timing request - get stop time and report elapsed

Просмотреть файл

@ -819,14 +819,15 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
/* wait for everyone to reach this point - this is a hard
* barrier requirement at this time, though we hope to relax
* it at a later point */
active = true;
opal_pmix.commit();
if (NULL != opal_pmix.fence_nb) {
opal_pmix.fence_nb(NULL, opal_pmix_collect_all_data,
fence_release, (void*)&active);
OMPI_WAIT_FOR_COMPLETION(active);
} else {
opal_pmix.fence(NULL, opal_pmix_collect_all_data);
if (!ompi_async_mpi_init) {
active = true;
if (NULL != opal_pmix.fence_nb) {
opal_pmix.fence_nb(NULL, opal_pmix_collect_all_data,
fence_release, (void*)&active);
OMPI_WAIT_FOR_COMPLETION(active);
} else {
opal_pmix.fence(NULL, opal_pmix_collect_all_data);
}
}
/* check for timing request - get stop time and report elapsed

Просмотреть файл

@ -14,7 +14,7 @@
* Copyright (c) 2007-2015 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2013 NVIDIA Corporation. All rights reserved.
* Copyright (c) 2013-2014 Intel, Inc. All rights reserved
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved
* Copyright (c) 2015 Mellanox Technologies, Inc.
* All rights reserved.
* $COPYRIGHT$
@ -65,6 +65,9 @@ char *ompi_mpi_show_mca_params_string = NULL;
bool ompi_mpi_have_sparse_group_storage = !!(OMPI_GROUP_SPARSE);
bool ompi_mpi_preconnect_mpi = false;
bool ompi_async_mpi_init = false;
bool ompi_async_mpi_finalize = false;
#define OMPI_ADD_PROCS_CUTOFF_DEFAULT 0
uint32_t ompi_add_procs_cutoff = OMPI_ADD_PROCS_CUTOFF_DEFAULT;
bool ompi_mpi_dynamics_enabled = true;
@ -282,6 +285,22 @@ int ompi_mpi_register_params(void)
MCA_BASE_VAR_SCOPE_READONLY,
&ompi_mpi_dynamics_enabled);
ompi_async_mpi_init = false;
(void) mca_base_var_register("ompi", "async", "mpi", "init",
"Do not perform a barrier at the end of MPI_Init",
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&ompi_async_mpi_init);
ompi_async_mpi_finalize = false;
(void) mca_base_var_register("ompi", "async", "mpi", "finalize",
"Do not perform a barrier at the beginning of MPI_Finalize",
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&ompi_async_mpi_finalize);
value = mca_base_var_find ("opal", "opal", NULL, "abort_delay");
if (0 <= value) {
(void) mca_base_var_register_synonym(value, "ompi", "mpi", NULL, "abort_delay",

Просмотреть файл

@ -135,6 +135,13 @@ OMPI_DECLSPEC extern uint32_t ompi_add_procs_cutoff;
*/
OMPI_DECLSPEC extern bool ompi_mpi_dynamics_enabled;
/* EXPERIMENTAL: do not perform an RTE barrier at the end of MPI_Init */
OMPI_DECLSPEC extern bool ompi_async_mpi_init;
/* EXPERIMENTAL: do not perform an RTE barrier at the beginning of MPI_Finalize */
OMPI_DECLSPEC extern bool ompi_async_mpi_finalize;
/**
* Register MCA parameters used by the MPI layer.
*