Merge pull request #1740 from rhc54/topic/async
Add an experimental ability to skip the RTE barriers at the end of MPI_Init and the beginning of MPI_Finalize
Этот коммит содержится в:
Коммит
3b68c1f8db
@ -246,26 +246,28 @@ int ompi_mpi_finalize(void)
|
|||||||
del_procs behavior around May of 2014 (see
|
del_procs behavior around May of 2014 (see
|
||||||
https://svn.open-mpi.org/trac/ompi/ticket/4669#comment:4 for
|
https://svn.open-mpi.org/trac/ompi/ticket/4669#comment:4 for
|
||||||
more details). */
|
more details). */
|
||||||
if (NULL != opal_pmix.fence_nb) {
|
if (!ompi_async_mpi_finalize) {
|
||||||
active = true;
|
if (NULL != opal_pmix.fence_nb) {
|
||||||
/* Note that use of the non-blocking PMIx fence will
|
active = true;
|
||||||
* allow us to lazily cycle calling
|
/* Note that use of the non-blocking PMIx fence will
|
||||||
* opal_progress(), which will allow any other pending
|
* allow us to lazily cycle calling
|
||||||
* communications/actions to complete. See
|
* opal_progress(), which will allow any other pending
|
||||||
* https://github.com/open-mpi/ompi/issues/1576 for the
|
* communications/actions to complete. See
|
||||||
* original bug report. */
|
* https://github.com/open-mpi/ompi/issues/1576 for the
|
||||||
opal_pmix.fence_nb(NULL, 0, fence_cbfunc, (void*)&active);
|
* original bug report. */
|
||||||
OMPI_LAZY_WAIT_FOR_COMPLETION(active);
|
opal_pmix.fence_nb(NULL, 0, fence_cbfunc, (void*)&active);
|
||||||
} else {
|
OMPI_LAZY_WAIT_FOR_COMPLETION(active);
|
||||||
/* However, we cannot guarantee that the provided PMIx has
|
} else {
|
||||||
* fence_nb. If it doesn't, then do the best we can: an MPI
|
/* However, we cannot guarantee that the provided PMIx has
|
||||||
* barrier on COMM_WORLD (which isn't the best because of the
|
* fence_nb. If it doesn't, then do the best we can: an MPI
|
||||||
* reasons cited above), followed by a blocking PMIx fence
|
* barrier on COMM_WORLD (which isn't the best because of the
|
||||||
* (which does not call opal_progress()). */
|
* reasons cited above), followed by a blocking PMIx fence
|
||||||
ompi_communicator_t *comm = &ompi_mpi_comm_world.comm;
|
* (which does not call opal_progress()). */
|
||||||
comm->c_coll.coll_barrier(comm, comm->c_coll.coll_barrier_module);
|
ompi_communicator_t *comm = &ompi_mpi_comm_world.comm;
|
||||||
|
comm->c_coll.coll_barrier(comm, comm->c_coll.coll_barrier_module);
|
||||||
|
|
||||||
opal_pmix.fence(NULL, 0);
|
opal_pmix.fence(NULL, 0);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* check for timing request - get stop time and report elapsed
|
/* check for timing request - get stop time and report elapsed
|
||||||
|
@ -819,14 +819,15 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
|
|||||||
/* wait for everyone to reach this point - this is a hard
|
/* wait for everyone to reach this point - this is a hard
|
||||||
* barrier requirement at this time, though we hope to relax
|
* barrier requirement at this time, though we hope to relax
|
||||||
* it at a later point */
|
* it at a later point */
|
||||||
active = true;
|
if (!ompi_async_mpi_init) {
|
||||||
opal_pmix.commit();
|
active = true;
|
||||||
if (NULL != opal_pmix.fence_nb) {
|
if (NULL != opal_pmix.fence_nb) {
|
||||||
opal_pmix.fence_nb(NULL, opal_pmix_collect_all_data,
|
opal_pmix.fence_nb(NULL, opal_pmix_collect_all_data,
|
||||||
fence_release, (void*)&active);
|
fence_release, (void*)&active);
|
||||||
OMPI_WAIT_FOR_COMPLETION(active);
|
OMPI_WAIT_FOR_COMPLETION(active);
|
||||||
} else {
|
} else {
|
||||||
opal_pmix.fence(NULL, opal_pmix_collect_all_data);
|
opal_pmix.fence(NULL, opal_pmix_collect_all_data);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* check for timing request - get stop time and report elapsed
|
/* check for timing request - get stop time and report elapsed
|
||||||
|
@ -14,7 +14,7 @@
|
|||||||
* Copyright (c) 2007-2015 Los Alamos National Security, LLC. All rights
|
* Copyright (c) 2007-2015 Los Alamos National Security, LLC. All rights
|
||||||
* reserved.
|
* reserved.
|
||||||
* Copyright (c) 2013 NVIDIA Corporation. All rights reserved.
|
* Copyright (c) 2013 NVIDIA Corporation. All rights reserved.
|
||||||
* Copyright (c) 2013-2014 Intel, Inc. All rights reserved
|
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved
|
||||||
* Copyright (c) 2015 Mellanox Technologies, Inc.
|
* Copyright (c) 2015 Mellanox Technologies, Inc.
|
||||||
* All rights reserved.
|
* All rights reserved.
|
||||||
* $COPYRIGHT$
|
* $COPYRIGHT$
|
||||||
@ -65,6 +65,9 @@ char *ompi_mpi_show_mca_params_string = NULL;
|
|||||||
bool ompi_mpi_have_sparse_group_storage = !!(OMPI_GROUP_SPARSE);
|
bool ompi_mpi_have_sparse_group_storage = !!(OMPI_GROUP_SPARSE);
|
||||||
bool ompi_mpi_preconnect_mpi = false;
|
bool ompi_mpi_preconnect_mpi = false;
|
||||||
|
|
||||||
|
bool ompi_async_mpi_init = false;
|
||||||
|
bool ompi_async_mpi_finalize = false;
|
||||||
|
|
||||||
#define OMPI_ADD_PROCS_CUTOFF_DEFAULT 0
|
#define OMPI_ADD_PROCS_CUTOFF_DEFAULT 0
|
||||||
uint32_t ompi_add_procs_cutoff = OMPI_ADD_PROCS_CUTOFF_DEFAULT;
|
uint32_t ompi_add_procs_cutoff = OMPI_ADD_PROCS_CUTOFF_DEFAULT;
|
||||||
bool ompi_mpi_dynamics_enabled = true;
|
bool ompi_mpi_dynamics_enabled = true;
|
||||||
@ -282,6 +285,22 @@ int ompi_mpi_register_params(void)
|
|||||||
MCA_BASE_VAR_SCOPE_READONLY,
|
MCA_BASE_VAR_SCOPE_READONLY,
|
||||||
&ompi_mpi_dynamics_enabled);
|
&ompi_mpi_dynamics_enabled);
|
||||||
|
|
||||||
|
ompi_async_mpi_init = false;
|
||||||
|
(void) mca_base_var_register("ompi", "async", "mpi", "init",
|
||||||
|
"Do not perform a barrier at the end of MPI_Init",
|
||||||
|
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
|
||||||
|
OPAL_INFO_LVL_9,
|
||||||
|
MCA_BASE_VAR_SCOPE_READONLY,
|
||||||
|
&ompi_async_mpi_init);
|
||||||
|
|
||||||
|
ompi_async_mpi_finalize = false;
|
||||||
|
(void) mca_base_var_register("ompi", "async", "mpi", "finalize",
|
||||||
|
"Do not perform a barrier at the beginning of MPI_Finalize",
|
||||||
|
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
|
||||||
|
OPAL_INFO_LVL_9,
|
||||||
|
MCA_BASE_VAR_SCOPE_READONLY,
|
||||||
|
&ompi_async_mpi_finalize);
|
||||||
|
|
||||||
value = mca_base_var_find ("opal", "opal", NULL, "abort_delay");
|
value = mca_base_var_find ("opal", "opal", NULL, "abort_delay");
|
||||||
if (0 <= value) {
|
if (0 <= value) {
|
||||||
(void) mca_base_var_register_synonym(value, "ompi", "mpi", NULL, "abort_delay",
|
(void) mca_base_var_register_synonym(value, "ompi", "mpi", NULL, "abort_delay",
|
||||||
|
@ -135,6 +135,13 @@ OMPI_DECLSPEC extern uint32_t ompi_add_procs_cutoff;
|
|||||||
*/
|
*/
|
||||||
OMPI_DECLSPEC extern bool ompi_mpi_dynamics_enabled;
|
OMPI_DECLSPEC extern bool ompi_mpi_dynamics_enabled;
|
||||||
|
|
||||||
|
/* EXPERIMENTAL: do not perform an RTE barrier at the end of MPI_Init */
|
||||||
|
OMPI_DECLSPEC extern bool ompi_async_mpi_init;
|
||||||
|
|
||||||
|
/* EXPERIMENTAL: do not perform an RTE barrier at the beginning of MPI_Finalize */
|
||||||
|
OMPI_DECLSPEC extern bool ompi_async_mpi_finalize;
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Register MCA parameters used by the MPI layer.
|
* Register MCA parameters used by the MPI layer.
|
||||||
*
|
*
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user