Merge pull request #1740 from rhc54/topic/async
Add an experimental ability to skip the RTE barriers at the end of MPI_Init and the beginning of MPI_Finalize
Этот коммит содержится в:
Коммит
3b68c1f8db
@ -246,26 +246,28 @@ int ompi_mpi_finalize(void)
|
||||
del_procs behavior around May of 2014 (see
|
||||
https://svn.open-mpi.org/trac/ompi/ticket/4669#comment:4 for
|
||||
more details). */
|
||||
if (NULL != opal_pmix.fence_nb) {
|
||||
active = true;
|
||||
/* Note that use of the non-blocking PMIx fence will
|
||||
* allow us to lazily cycle calling
|
||||
* opal_progress(), which will allow any other pending
|
||||
* communications/actions to complete. See
|
||||
* https://github.com/open-mpi/ompi/issues/1576 for the
|
||||
* original bug report. */
|
||||
opal_pmix.fence_nb(NULL, 0, fence_cbfunc, (void*)&active);
|
||||
OMPI_LAZY_WAIT_FOR_COMPLETION(active);
|
||||
} else {
|
||||
/* However, we cannot guarantee that the provided PMIx has
|
||||
* fence_nb. If it doesn't, then do the best we can: an MPI
|
||||
* barrier on COMM_WORLD (which isn't the best because of the
|
||||
* reasons cited above), followed by a blocking PMIx fence
|
||||
* (which does not call opal_progress()). */
|
||||
ompi_communicator_t *comm = &ompi_mpi_comm_world.comm;
|
||||
comm->c_coll.coll_barrier(comm, comm->c_coll.coll_barrier_module);
|
||||
if (!ompi_async_mpi_finalize) {
|
||||
if (NULL != opal_pmix.fence_nb) {
|
||||
active = true;
|
||||
/* Note that use of the non-blocking PMIx fence will
|
||||
* allow us to lazily cycle calling
|
||||
* opal_progress(), which will allow any other pending
|
||||
* communications/actions to complete. See
|
||||
* https://github.com/open-mpi/ompi/issues/1576 for the
|
||||
* original bug report. */
|
||||
opal_pmix.fence_nb(NULL, 0, fence_cbfunc, (void*)&active);
|
||||
OMPI_LAZY_WAIT_FOR_COMPLETION(active);
|
||||
} else {
|
||||
/* However, we cannot guarantee that the provided PMIx has
|
||||
* fence_nb. If it doesn't, then do the best we can: an MPI
|
||||
* barrier on COMM_WORLD (which isn't the best because of the
|
||||
* reasons cited above), followed by a blocking PMIx fence
|
||||
* (which does not call opal_progress()). */
|
||||
ompi_communicator_t *comm = &ompi_mpi_comm_world.comm;
|
||||
comm->c_coll.coll_barrier(comm, comm->c_coll.coll_barrier_module);
|
||||
|
||||
opal_pmix.fence(NULL, 0);
|
||||
opal_pmix.fence(NULL, 0);
|
||||
}
|
||||
}
|
||||
|
||||
/* check for timing request - get stop time and report elapsed
|
||||
|
@ -819,14 +819,15 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
|
||||
/* wait for everyone to reach this point - this is a hard
|
||||
* barrier requirement at this time, though we hope to relax
|
||||
* it at a later point */
|
||||
active = true;
|
||||
opal_pmix.commit();
|
||||
if (NULL != opal_pmix.fence_nb) {
|
||||
opal_pmix.fence_nb(NULL, opal_pmix_collect_all_data,
|
||||
fence_release, (void*)&active);
|
||||
OMPI_WAIT_FOR_COMPLETION(active);
|
||||
} else {
|
||||
opal_pmix.fence(NULL, opal_pmix_collect_all_data);
|
||||
if (!ompi_async_mpi_init) {
|
||||
active = true;
|
||||
if (NULL != opal_pmix.fence_nb) {
|
||||
opal_pmix.fence_nb(NULL, opal_pmix_collect_all_data,
|
||||
fence_release, (void*)&active);
|
||||
OMPI_WAIT_FOR_COMPLETION(active);
|
||||
} else {
|
||||
opal_pmix.fence(NULL, opal_pmix_collect_all_data);
|
||||
}
|
||||
}
|
||||
|
||||
/* check for timing request - get stop time and report elapsed
|
||||
|
@ -14,7 +14,7 @@
|
||||
* Copyright (c) 2007-2015 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2013 NVIDIA Corporation. All rights reserved.
|
||||
* Copyright (c) 2013-2014 Intel, Inc. All rights reserved
|
||||
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved
|
||||
* Copyright (c) 2015 Mellanox Technologies, Inc.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
@ -65,6 +65,9 @@ char *ompi_mpi_show_mca_params_string = NULL;
|
||||
bool ompi_mpi_have_sparse_group_storage = !!(OMPI_GROUP_SPARSE);
|
||||
bool ompi_mpi_preconnect_mpi = false;
|
||||
|
||||
bool ompi_async_mpi_init = false;
|
||||
bool ompi_async_mpi_finalize = false;
|
||||
|
||||
#define OMPI_ADD_PROCS_CUTOFF_DEFAULT 0
|
||||
uint32_t ompi_add_procs_cutoff = OMPI_ADD_PROCS_CUTOFF_DEFAULT;
|
||||
bool ompi_mpi_dynamics_enabled = true;
|
||||
@ -282,6 +285,22 @@ int ompi_mpi_register_params(void)
|
||||
MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&ompi_mpi_dynamics_enabled);
|
||||
|
||||
ompi_async_mpi_init = false;
|
||||
(void) mca_base_var_register("ompi", "async", "mpi", "init",
|
||||
"Do not perform a barrier at the end of MPI_Init",
|
||||
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
|
||||
OPAL_INFO_LVL_9,
|
||||
MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&ompi_async_mpi_init);
|
||||
|
||||
ompi_async_mpi_finalize = false;
|
||||
(void) mca_base_var_register("ompi", "async", "mpi", "finalize",
|
||||
"Do not perform a barrier at the beginning of MPI_Finalize",
|
||||
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
|
||||
OPAL_INFO_LVL_9,
|
||||
MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&ompi_async_mpi_finalize);
|
||||
|
||||
value = mca_base_var_find ("opal", "opal", NULL, "abort_delay");
|
||||
if (0 <= value) {
|
||||
(void) mca_base_var_register_synonym(value, "ompi", "mpi", NULL, "abort_delay",
|
||||
|
@ -135,6 +135,13 @@ OMPI_DECLSPEC extern uint32_t ompi_add_procs_cutoff;
|
||||
*/
|
||||
OMPI_DECLSPEC extern bool ompi_mpi_dynamics_enabled;
|
||||
|
||||
/* EXPERIMENTAL: do not perform an RTE barrier at the end of MPI_Init */
|
||||
OMPI_DECLSPEC extern bool ompi_async_mpi_init;
|
||||
|
||||
/* EXPERIMENTAL: do not perform an RTE barrier at the beginning of MPI_Finalize */
|
||||
OMPI_DECLSPEC extern bool ompi_async_mpi_finalize;
|
||||
|
||||
|
||||
/**
|
||||
* Register MCA parameters used by the MPI layer.
|
||||
*
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user