1
1

Merge pull request #5234 from jsquyres/pr/oshmem-init-race

ompi_mpi_init: fix race condition
Этот коммит содержится в:
bosilca 2018-06-06 12:14:00 -04:00 коммит произвёл GitHub
родитель 356947fead 9b9cb5fef0
Коммит fa1386768f
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
5 изменённых файлов: 42 добавлений и 32 удалений

Просмотреть файл

@ -9,7 +9,7 @@
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007-2015 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2007-2018 Cisco Systems, Inc. All rights reserved
* Copyright (c) 2007-2008 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
@ -63,9 +63,9 @@ int MPI_Init(int *argc, char ***argv)
don't lose anything) */
if (NULL != argc && NULL != argv) {
err = ompi_mpi_init(*argc, *argv, required, &provided);
err = ompi_mpi_init(*argc, *argv, required, &provided, false);
} else {
err = ompi_mpi_init(0, NULL, required, &provided);
err = ompi_mpi_init(0, NULL, required, &provided, false);
}
/* Since we don't have a communicator to invoke an errorhandler on

Просмотреть файл

@ -13,7 +13,7 @@
* Copyright (c) 2010 Oak Ridge National Labs. All rights reserved.
* Copyright (c) 2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2015 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2015-2018 Cisco Systems, Inc. All rights reserved
* Copyright (c) 2016 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
@ -63,9 +63,9 @@ int MPI_Init_thread(int *argc, char ***argv, int required,
don't lose anything) */
if (NULL != argc && NULL != argv) {
err = ompi_mpi_init(*argc, *argv, required, provided);
err = ompi_mpi_init(*argc, *argv, required, provided, false);
} else {
err = ompi_mpi_init(0, NULL, required, provided);
err = ompi_mpi_init(0, NULL, required, provided, false);
}
/* Since we don't have a communicator to invoke an errorhandler on

Просмотреть файл

@ -175,6 +175,8 @@ void ompi_mpi_thread_level(int requested, int *provided);
* @param argv argv, typically from main() (IN)
* @param requested Thread support that is requested (IN)
* @param provided Thread support that is provided (OUT)
* @param reinit_ok Return successfully (with no error) if someone has
* already called ompi_mpi_init().
*
* @returns MPI_SUCCESS if successful
* @returns Error code if unsuccessful
@ -186,7 +188,8 @@ void ompi_mpi_thread_level(int requested, int *provided);
*
* It is permissable to pass in (0, NULL) for (argc, argv).
*/
int ompi_mpi_init(int argc, char **argv, int requested, int *provided);
int ompi_mpi_init(int argc, char **argv, int requested, int *provided,
bool reinit_ok);
/**
* Finalize the Open MPI MPI environment

Просмотреть файл

@ -368,7 +368,8 @@ static void fence_release(int status, void *cbdata)
OPAL_POST_OBJECT(active);
}
int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
int ompi_mpi_init(int argc, char **argv, int requested, int *provided,
bool reinit_ok)
{
int ret;
ompi_proc_t** procs;
@ -384,28 +385,36 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
ompi_hook_base_mpi_init_top(argc, argv, requested, provided);
/* Ensure that we were not already initialized or finalized.
This lock is held for the duration of ompi_mpi_init() and
ompi_mpi_finalize(). Hence, if we get it, then no other thread
is inside the critical section (and we don't have to check the
*_started bool variables). */
opal_atomic_rmb();
int32_t state = ompi_mpi_state;
if (state >= OMPI_MPI_STATE_FINALIZE_STARTED) {
opal_show_help("help-mpi-runtime.txt",
"mpi_init: already finalized", true);
return MPI_ERR_OTHER;
} else if (state >= OMPI_MPI_STATE_INIT_STARTED) {
opal_show_help("help-mpi-runtime.txt",
"mpi_init: invoked multiple times", true);
return MPI_ERR_OTHER;
}
/* Indicate that we have *started* MPI_INIT* */
/* Ensure that we were not already initialized or finalized. */
int32_t expected = OMPI_MPI_STATE_NOT_INITIALIZED;
int32_t desired = OMPI_MPI_STATE_INIT_STARTED;
opal_atomic_wmb();
opal_atomic_swap_32(&ompi_mpi_state, OMPI_MPI_STATE_INIT_STARTED);
if (!opal_atomic_compare_exchange_strong_32(&ompi_mpi_state, &expected,
desired)) {
// If we failed to atomically transition ompi_mpi_state from
// NOT_INITIALIZED to INIT_STARTED, then someone else already
// did that, and we should return.
if (expected >= OMPI_MPI_STATE_FINALIZE_STARTED) {
opal_show_help("help-mpi-runtime.txt",
"mpi_init: already finalized", true);
return MPI_ERR_OTHER;
} else if (expected >= OMPI_MPI_STATE_INIT_STARTED) {
// In some cases (e.g., oshmem_shmem_init()), we may call
// ompi_mpi_init() multiple times. In such cases, just
// silently return successfully once the initializing
// thread has completed.
if (reinit_ok) {
while (ompi_mpi_state < OMPI_MPI_STATE_INIT_COMPLETED) {
usleep(1);
}
return MPI_SUCCESS;
}
opal_show_help("help-mpi-runtime.txt",
"mpi_init: invoked multiple times", true);
return MPI_ERR_OTHER;
}
}
/* Figure out the final MPI thread levels. If we were not
compiled for support for MPI threads, then don't allow

Просмотреть файл

@ -147,9 +147,7 @@ int oshmem_shmem_init(int argc, char **argv, int requested, int *provided)
OMPI_TIMING_INIT(32);
if (!oshmem_shmem_initialized) {
if (ompi_mpi_state < OMPI_MPI_STATE_INIT_COMPLETED) {
ret = ompi_mpi_init(argc, argv, requested, provided);
}
ret = ompi_mpi_init(argc, argv, requested, provided, true);
OMPI_TIMING_NEXT("ompi_mpi_init");
if (OSHMEM_SUCCESS != ret) {