1
1

mpi/finalized: revamp INITIALIZED/FINALIZED

Per MPI-3.1:8.7.1 p361:11-13, it's valid for MPI_FINALIZED to be
invoked during an attribute destruction callback (e.g., during the
destruction of keyvals on MPI_COMM_SELF during the very beginning of
MPI_FINALIZE).  In such cases, MPI_FINALIZED must return "false".

Prior to this commit, we hung in FINALIZED if it were invoked during
a COMM_SELF attribute destruction callback in FINALIZE.  See
https://github.com/open-mpi/ompi/issues/5084.

This commit converts the MPI_INITIALIZED / MPI_FINALIZED
infrastructure to use a single enum (ompi_mpi_state, set atomically)
to represent the state of MPI:

- not initialized
- init started
- init completed
- finalize started
- finalize past COMM_SELF destruction
- finalize completed

The "finalize past COMM_SELF destruction" state is what allows us to
return "false" from MPI_FINALIZED before COMM_SELF has been fully
destroyed / all attribute callbacks have been invoked.

Since this state is checked at nearly every MPI API call (to see if
we're outside of the INIT/FINALIZE epoch), care was taken to use
atomics to *set* the ompi_mpi_state value in ompi_mpi_init() and
ompi_mpi_finalize(), but performance-critical code paths can simply
read the variable without needing to use a slow call to an
opal_atomic_*() function.

Thanks to @AndrewGaspar for reporting the issue.

Signed-off-by: Jeff Squyres <jsquyres@cisco.com>
Этот коммит содержится в:
Jeff Squyres 2018-04-24 09:57:12 -07:00
родитель 0d66e02179
Коммит 35438ae9b5
19 изменённых файлов: 135 добавлений и 100 удалений

Просмотреть файл

@ -10,7 +10,7 @@
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2008-2012 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2008-2018 Cisco Systems, Inc. All rights reserved
* Copyright (c) 2008-2009 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2015-2016 Intel, Inc. All rights reserved.
* Copyright (c) 2016 Los Alamos National Security, LLC. All rights
@ -193,11 +193,22 @@ struct ompi_request_t;
* This macro directly invokes the ompi_mpi_errors_are_fatal_handler()
* when an error occurs because MPI_COMM_WORLD does not exist (because
* we're before MPI_Init() or after MPI_Finalize()).
*
* NOTE: The ompi_mpi_state variable is a volatile that is set
* atomically in ompi_mpi_init() and ompi_mpi_finalize(). The
* appropriate memory barriers are done in those 2 functions such that
* we do not need to do a read memory barrier here (in
* potentially-performance-critical code paths) before reading the
* variable.
*/
#define OMPI_ERR_INIT_FINALIZE(name) \
if( OPAL_UNLIKELY(!ompi_mpi_initialized || ompi_mpi_finalized) ) { \
ompi_mpi_errors_are_fatal_comm_handler(NULL, NULL, name); \
}
#define OMPI_ERR_INIT_FINALIZE(name) \
{ \
int32_t state = ompi_mpi_state; \
if (OPAL_UNLIKELY(state < OMPI_MPI_STATE_INIT_COMPLETED || \
state > OMPI_MPI_STATE_FINALIZE_PAST_COMM_SELF_DESTRUCT)) { \
ompi_mpi_errors_are_fatal_comm_handler(NULL, NULL, name); \
} \
}
/**
* This is the macro to invoke to directly invoke an MPI error

Просмотреть файл

@ -10,7 +10,7 @@
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006 University of Houston. All rights reserved.
* Copyright (c) 2008-2013 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2008-2018 Cisco Systems, Inc. All rights reserved
* Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2010-2011 Oak Ridge National Labs. All rights reserved.
* Copyright (c) 2012 Los Alamos National Security, LLC.
@ -149,7 +149,8 @@ void ompi_mpi_errors_return_win_handler(struct ompi_win_t **win,
static void out(char *str, char *arg)
{
if (ompi_rte_initialized && !ompi_mpi_finalized) {
if (ompi_rte_initialized &&
ompi_mpi_state < OMPI_MPI_STATE_FINALIZE_PAST_COMM_SELF_DESTRUCT) {
if (NULL != arg) {
opal_output(0, str, arg);
} else {
@ -280,7 +281,9 @@ static void backend_fatal_no_aggregate(char *type,
{
char *arg;
assert(!ompi_mpi_initialized || ompi_mpi_finalized);
int32_t state = ompi_mpi_state;
assert(state < OMPI_MPI_STATE_INIT_COMPLETED ||
state >= OMPI_MPI_STATE_FINALIZE_PAST_COMM_SELF_DESTRUCT);
fflush(stdout);
fflush(stderr);
@ -289,7 +292,7 @@ static void backend_fatal_no_aggregate(char *type,
/* Per #2152, print out in plain english if something was invoked
before MPI_INIT* or after MPI_FINALIZE */
if (!ompi_mpi_init_started && !ompi_mpi_initialized) {
if (state < OMPI_MPI_STATE_INIT_STARTED) {
if (NULL != arg) {
out("*** The %s() function was called before MPI_INIT was invoked.\n"
"*** This is disallowed by the MPI standard.\n", arg);
@ -300,7 +303,7 @@ static void backend_fatal_no_aggregate(char *type,
"*** function was invoked, sorry. :-(\n", NULL);
}
out("*** Your MPI job will now abort.\n", NULL);
} else if (ompi_mpi_finalized) {
} else if (state >= OMPI_MPI_STATE_FINALIZE_PAST_COMM_SELF_DESTRUCT) {
if (NULL != arg) {
out("*** The %s() function was called after MPI_FINALIZE was invoked.\n"
"*** This is disallowed by the MPI standard.\n", arg);

Просмотреть файл

@ -2,6 +2,7 @@
* Copyright (c) 2011 Mellanox Technologies. All rights reserved.
* Copyright (c) 2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2018 Cisco Systems, Inc. All rights reserved
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -159,7 +160,7 @@ int mca_coll_fca_barrier(struct ompi_communicator_t *comm,
int ret;
FCA_VERBOSE(5,"Using FCA Barrier");
if (OPAL_UNLIKELY(ompi_mpi_finalize_started)) {
if (OPAL_UNLIKELY(ompi_mpi_state >= OMPI_MPI_STATE_FINALIZE_STARTED)) {
FCA_VERBOSE(5, "In finalize, reverting to previous barrier");
goto orig_barrier;
}

Просмотреть файл

@ -4,6 +4,7 @@
* Copyright (c) 2017 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2018 Cisco Systems, Inc. All rights reserved
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -241,7 +242,7 @@ static int mca_coll_hcoll_module_enable(mca_coll_base_module_t *module,
int mca_coll_hcoll_progress(void)
{
if (ompi_mpi_finalized){
if (ompi_mpi_state >= OMPI_MPI_STATE_FINALIZE_STARTED) {
hcoll_rte_p2p_disabled_notify();
}

Просмотреть файл

@ -2,6 +2,7 @@
Copyright (c) 2011 Mellanox Technologies. All rights reserved.
Copyright (c) 2015 Research Organization for Information Science
and Technology (RIST). All rights reserved.
Copyright (c) 2018 Cisco Systems, Inc. All rights reserved
$COPYRIGHT$
Additional copyrights may follow
@ -21,7 +22,7 @@ int mca_coll_hcoll_barrier(struct ompi_communicator_t *comm,
mca_coll_hcoll_module_t *hcoll_module = (mca_coll_hcoll_module_t*)module;
HCOL_VERBOSE(20,"RUNNING HCOL BARRIER");
if (OPAL_UNLIKELY(ompi_mpi_finalize_started)) {
if (OPAL_UNLIKELY(ompi_mpi_state >= OMPI_MPI_STATE_FINALIZE_STARTED)) {
HCOL_VERBOSE(5, "In finalize, reverting to previous barrier");
goto orig_barrier;
}

Просмотреть файл

@ -12,6 +12,7 @@
* Copyright (c) 2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2016-2017 IBM Corporation. All rights reserved.
* Copyright (c) 2018 Cisco Systems, Inc. All rights reserved
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -69,7 +70,7 @@ mca_io_romio314_file_close (ompi_file_t *fh)
which we obviously can't do if we've started to MPI_Finalize).
The user didn't close the file, so they should expect
unexpected behavior. */
if (ompi_mpi_finalized) {
if (ompi_mpi_state >= OMPI_MPI_STATE_FINALIZE_PAST_COMM_SELF_DESTRUCT) {
return OMPI_SUCCESS;
}

Просмотреть файл

@ -2,6 +2,7 @@
* Copyright (C) 2001-2011 Mellanox Technologies Ltd. ALL RIGHTS RESERVED.
* Copyright (c) 2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2018 Cisco Systems, Inc. All rights reserved
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -265,7 +266,7 @@ int mca_pml_yalla_del_procs(struct ompi_proc_t **procs, size_t nprocs)
{
size_t i;
if (ompi_mpi_finalized) {
if (ompi_mpi_state >= OMPI_MPI_STATE_FINALIZE_STARTED) {
PML_YALLA_VERBOSE(3, "%s", "using bulk powerdown");
mxm_ep_powerdown(ompi_pml_yalla.mxm_ep);
}

Просмотреть файл

@ -11,7 +11,7 @@
* All rights reserved.
* Copyright (c) 2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2015 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2015-2018 Cisco Systems, Inc. All rights reserved
* Copyright (c) 2015 Intel, Inc. All rights reserved
* $COPYRIGHT$
*
@ -44,13 +44,7 @@ int MPI_Finalized(int *flag)
ompi_hook_base_mpi_finalized_top(flag);
/* We must obtain the lock to guarnatee consistent values of
ompi_mpi_initialized and ompi_mpi_finalized. Note, too, that
this lock is held for the bulk of the duration of
ompi_mpi_init() and ompi_mpi_finalize(), so when we get the
lock, we are guaranteed that some other thread is not part way
through initialization or finalization. */
opal_mutex_lock(&ompi_mpi_bootstrap_mutex);
int32_t state = ompi_mpi_state;
if (MPI_PARAM_CHECK) {
if (NULL == flag) {
@ -59,12 +53,11 @@ int MPI_Finalized(int *flag)
whether we're currently (after MPI_Init and before
MPI_Finalize) or not */
if (ompi_mpi_initialized && !ompi_mpi_finalized) {
opal_mutex_unlock(&ompi_mpi_bootstrap_mutex);
if (state >= OMPI_MPI_STATE_INIT_COMPLETED &&
state < OMPI_MPI_STATE_FINALIZE_PAST_COMM_SELF_DESTRUCT) {
return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG,
FUNC_NAME);
} else {
opal_mutex_unlock(&ompi_mpi_bootstrap_mutex);
/* We have no MPI object here so call ompi_errhandle_invoke
* directly */
return ompi_errhandler_invoke(NULL, NULL, -1,
@ -74,8 +67,7 @@ int MPI_Finalized(int *flag)
}
}
*flag = ompi_mpi_finalized;
opal_mutex_unlock(&ompi_mpi_bootstrap_mutex);
*flag = (state >= OMPI_MPI_STATE_FINALIZE_PAST_COMM_SELF_DESTRUCT);
ompi_hook_base_mpi_finalized_bottom(flag);

Просмотреть файл

@ -9,7 +9,7 @@
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2014-2015 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2014-2018 Cisco Systems, Inc. All rights reserved
* Copyright (c) 2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2015 Intel, Inc. All rights reserved
@ -58,7 +58,9 @@ int MPI_Get_library_version(char *version, int *resultlen)
(i.e., use a NULL communicator, which will end up at the
default errhandler, which is abort). */
if (ompi_mpi_initialized && !ompi_mpi_finalized) {
int32_t state = ompi_mpi_state;
if (state >= OMPI_MPI_STATE_INIT_COMPLETED &&
state < OMPI_MPI_STATE_FINALIZE_PAST_COMM_SELF_DESTRUCT) {
return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG,
FUNC_NAME);
} else {

Просмотреть файл

@ -12,6 +12,7 @@
* Copyright (c) 2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2015 Intel, Inc. All rights reserved
* Copyright (c) 2018 Cisco Systems, Inc. All rights reserved
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -54,7 +55,9 @@ int MPI_Get_version(int *version, int *subversion)
(i.e., use a NULL communicator, which will end up at the
default errhandler, which is abort). */
if (ompi_mpi_initialized && !ompi_mpi_finalized) {
int32_t state = ompi_mpi_state;
if (state >= OMPI_MPI_STATE_INIT_COMPLETED &&
state < OMPI_MPI_STATE_FINALIZE_PAST_COMM_SELF_DESTRUCT) {
return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG,
FUNC_NAME);
} else {

Просмотреть файл

@ -11,7 +11,7 @@
* All rights reserved.
* Copyright (c) 2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2015 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2015-2018 Cisco Systems, Inc. All rights reserved
* Copyright (c) 2015 Intel, Inc. All rights reserved
* $COPYRIGHT$
*
@ -44,13 +44,7 @@ int MPI_Initialized(int *flag)
ompi_hook_base_mpi_initialized_top(flag);
/* We must obtain the lock to guarnatee consistent values of
ompi_mpi_initialized and ompi_mpi_finalized. Note, too, that
this lock is held for the bulk of the duration of
ompi_mpi_init() and ompi_mpi_finalize(), so when we get the
lock, we are guaranteed that some other thread is not part way
through initialization or finalization. */
opal_mutex_lock(&ompi_mpi_bootstrap_mutex);
int32_t state = ompi_mpi_state;
if (MPI_PARAM_CHECK) {
if (NULL == flag) {
@ -59,12 +53,11 @@ int MPI_Initialized(int *flag)
whether we're currently (after MPI_Init and before
MPI_Finalize) or not */
if (ompi_mpi_initialized && !ompi_mpi_finalized) {
opal_mutex_unlock(&ompi_mpi_bootstrap_mutex);
if (state >= OMPI_MPI_STATE_INIT_COMPLETED &&
state < OMPI_MPI_STATE_FINALIZE_PAST_COMM_SELF_DESTRUCT) {
return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG,
FUNC_NAME);
} else {
opal_mutex_unlock(&ompi_mpi_bootstrap_mutex);
/* We have no MPI object here so call ompi_errhandle_invoke
* directly */
return ompi_errhandler_invoke(NULL, NULL, -1,
@ -74,8 +67,7 @@ int MPI_Initialized(int *flag)
}
}
*flag = ompi_mpi_initialized;
opal_mutex_unlock(&ompi_mpi_bootstrap_mutex);
*flag = (state >= OMPI_MPI_STATE_INIT_COMPLETED);
ompi_hook_base_mpi_initialized_bottom(flag);

Просмотреть файл

@ -2,7 +2,7 @@
/*
* Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2014 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2014-2018 Cisco Systems, Inc. All rights reserved
* Copyright (c) 2017 IBM Corporation. All rights reserved.
* $COPYRIGHT$
*
@ -39,7 +39,9 @@ int MPI_T_finalize (void)
if (0 == --ompi_mpit_init_count) {
(void) ompi_info_close_components ();
if ((!ompi_mpi_initialized || ompi_mpi_finalized) &&
int32_t state = ompi_mpi_state;
if ((state < OMPI_MPI_STATE_INIT_COMPLETED ||
state >= OMPI_MPI_STATE_FINALIZE_PAST_COMM_SELF_DESTRUCT) &&
(NULL != ompi_mpi_main_thread)) {
/* we are not between MPI_Init and MPI_Finalize so we
* have to free the ompi_mpi_main_thread */

Просмотреть файл

@ -4,6 +4,7 @@
* reserved.
* Copyright (c) 2004-2008 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2018 Cisco Systems, Inc. All rights reserved
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -64,8 +65,11 @@ const int PERUSE_num_events = (sizeof(PERUSE_events) / sizeof(peruse_event_assoc
int PERUSE_Init (void)
{
if (MPI_PARAM_CHECK) {
if (!ompi_mpi_initialized || ompi_mpi_finalized)
int32_t state = ompi_mpi_state;
if (state < OMPI_MPI_STATE_INIT_COMPLETED ||
state >= OMPI_MPI_STATE_FINALIZE_STARTED) {
return PERUSE_ERR_INIT;
}
}
ompi_peruse_init ();
return PERUSE_SUCCESS;

Просмотреть файл

@ -9,7 +9,7 @@
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006-2015 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2006-2018 Cisco Systems, Inc. All rights reserved
* Copyright (c) 2007 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved.
@ -51,15 +51,9 @@ struct ompi_predefined_datatype_t;
/** Mutex to protect all the _init and _finalize variables */
OMPI_DECLSPEC extern opal_mutex_t ompi_mpi_bootstrap_mutex;
/** Did MPI start to initialize? */
OMPI_DECLSPEC extern volatile bool ompi_mpi_init_started;
OMPI_DECLSPEC extern volatile int32_t ompi_mpi_state;
/** Has the RTE been initialized? */
OMPI_DECLSPEC extern volatile bool ompi_rte_initialized;
/** Is MPI fully initialized? */
OMPI_DECLSPEC extern volatile bool ompi_mpi_initialized;
/** Did MPI start to finalize? */
OMPI_DECLSPEC extern volatile bool ompi_mpi_finalize_started;
/** Has MPI been fully finalized? */
OMPI_DECLSPEC extern volatile bool ompi_mpi_finalized;
/** Do we have multiple threads? */
OMPI_DECLSPEC extern bool ompi_mpi_thread_multiple;
@ -70,6 +64,29 @@ OMPI_DECLSPEC extern int ompi_mpi_thread_provided;
/** Identifier of the main thread */
OMPI_DECLSPEC extern struct opal_thread_t *ompi_mpi_main_thread;
/*
* State of the MPI runtime.
*
* Atomically set/read in the ompi_mpi_state global variable (for
* functions such as MPI_INITIALIZED and MPI_FINALIZED).
*/
typedef enum {
OMPI_MPI_STATE_NOT_INITIALIZED = 0,
OMPI_MPI_STATE_INIT_STARTED,
OMPI_MPI_STATE_INIT_COMPLETED,
/* The PAST_COMM_SELF_DESTRUCT state is needed because attribute
callbacks that are invoked during the very beginning of
MPI_FINALIZE are supposed to return FALSE if they call
MPI_FINALIZED. Hence, we need to distinguish between "We've
started MPI_FINALIZE" and "We're far enough in MPI_FINALIZE
that we now need to return TRUE from MPI_FINALIZED." */
OMPI_MPI_STATE_FINALIZE_STARTED,
OMPI_MPI_STATE_FINALIZE_PAST_COMM_SELF_DESTRUCT,
OMPI_MPI_STATE_FINALIZE_COMPLETED
} ompi_mpi_state_t;
/*
* These variables are for the MPI F03 bindings (F03 must bind Fortran
* varaiables to symbols; it cannot bind Fortran variables to the

Просмотреть файл

@ -10,7 +10,7 @@
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006-2016 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2006-2018 Cisco Systems, Inc. All rights reserved
* Copyright (c) 2010-2011 Oak Ridge National Labs. All rights reserved.
* Copyright (c) 2014 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
@ -166,16 +166,20 @@ ompi_mpi_abort(struct ompi_communicator_t* comm,
/* If the RTE isn't setup yet/any more, then don't even try
killing everyone. Sorry, Charlie... */
int32_t state = ompi_mpi_state;
if (!ompi_rte_initialized) {
fprintf(stderr, "[%s:%05d] Local abort %s completed successfully, but am not able to aggregate error messages, and not able to guarantee that all other processes were killed!\n",
host, (int) pid, ompi_mpi_finalized ?
host, (int) pid,
state >= OMPI_MPI_STATE_FINALIZE_STARTED ?
"after MPI_FINALIZE started" : "before MPI_INIT completed");
_exit(errcode == 0 ? 1 : errcode);
}
/* If OMPI is initialized and we have a non-NULL communicator,
then try to kill just that set of processes */
if (ompi_mpi_initialized && !ompi_mpi_finalized && NULL != comm) {
if (state >= OMPI_MPI_STATE_INIT_COMPLETED &&
state < OMPI_MPI_STATE_FINALIZE_PAST_COMM_SELF_DESTRUCT &&
NULL != comm) {
try_kill_peers(comm, errcode);
}

Просмотреть файл

@ -10,7 +10,7 @@
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006-2016 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2006-2018 Cisco Systems, Inc. All rights reserved
* Copyright (c) 2006-2014 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2006 University of Houston. All rights reserved.
@ -115,17 +115,9 @@ int ompi_mpi_finalize(void)
ompi_hook_base_mpi_finalize_top();
/* Be a bit social if an erroneous program calls MPI_FINALIZE in
two different threads, otherwise we may deadlock in
ompi_comm_free() (or run into other nasty lions, tigers, or
bears).
This lock is held for the duration of ompi_mpi_init() and
ompi_mpi_finalize(). Hence, if we get it, then no other thread
is inside the critical section (and we don't have to check the
*_started bool variables). */
opal_mutex_lock(&ompi_mpi_bootstrap_mutex);
if (!ompi_mpi_initialized || ompi_mpi_finalized) {
int32_t state = ompi_mpi_state;
if (state < OMPI_MPI_STATE_INIT_COMPLETED ||
state >= OMPI_MPI_STATE_FINALIZE_STARTED) {
/* Note that if we're not initialized or already finalized, we
cannot raise an MPI exception. The best that we can do is
write something to stderr. */
@ -133,19 +125,19 @@ int ompi_mpi_finalize(void)
pid_t pid = getpid();
gethostname(hostname, sizeof(hostname));
if (ompi_mpi_initialized) {
if (state < OMPI_MPI_STATE_INIT_COMPLETED) {
opal_show_help("help-mpi-runtime.txt",
"mpi_finalize: not initialized",
true, hostname, pid);
} else if (ompi_mpi_finalized) {
} else if (state >= OMPI_MPI_STATE_FINALIZE_STARTED) {
opal_show_help("help-mpi-runtime.txt",
"mpi_finalize:invoked_multiple_times",
true, hostname, pid);
}
opal_mutex_unlock(&ompi_mpi_bootstrap_mutex);
return MPI_ERR_OTHER;
}
ompi_mpi_finalize_started = true;
opal_atomic_wmb();
opal_atomic_swap_32(&ompi_mpi_state, OMPI_MPI_STATE_FINALIZE_STARTED);
ompi_mpiext_fini();
@ -160,9 +152,14 @@ int ompi_mpi_finalize(void)
ompi_mpi_comm_self.comm.c_keyhash = NULL;
}
/* Proceed with MPI_FINALIZE */
ompi_mpi_finalized = true;
/* Mark that we are past COMM_SELF destruction so that
MPI_FINALIZED can return an accurate value (per MPI-3.1,
FINALIZED needs to return FALSE to MPI_FINALIZED until after
COMM_SELF is destroyed / all the attribute callbacks have been
invoked) */
opal_atomic_wmb();
opal_atomic_swap_32(&ompi_mpi_state,
OMPI_MPI_STATE_FINALIZE_PAST_COMM_SELF_DESTRUCT);
/* As finalize is the last legal MPI call, we are allowed to force the release
* of the user buffer used for bsend, before going anywhere further.
@ -513,8 +510,10 @@ int ompi_mpi_finalize(void)
/* All done */
done:
opal_mutex_unlock(&ompi_mpi_bootstrap_mutex);
done:
opal_atomic_wmb();
opal_atomic_swap_32(&ompi_mpi_state, OMPI_MPI_STATE_FINALIZE_COMPLETED);
ompi_hook_base_mpi_finalize_bottom();
return ret;

Просмотреть файл

@ -10,7 +10,7 @@
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006-2015 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2006-2018 Cisco Systems, Inc. All rights reserved
* Copyright (c) 2006-2015 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2006-2009 University of Houston. All rights reserved.
@ -130,11 +130,7 @@ const char ompi_version_string[] = OMPI_IDENT_STRING;
* Global variables and symbols for the MPI layer
*/
opal_mutex_t ompi_mpi_bootstrap_mutex = OPAL_MUTEX_STATIC_INIT;
volatile bool ompi_mpi_init_started = false;
volatile bool ompi_mpi_initialized = false;
volatile bool ompi_mpi_finalize_started = false;
volatile bool ompi_mpi_finalized = false;
volatile int32_t ompi_mpi_state = OMPI_MPI_STATE_NOT_INITIALIZED;
volatile bool ompi_rte_initialized = false;
bool ompi_mpi_thread_multiple = false;
@ -394,21 +390,22 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
ompi_mpi_finalize(). Hence, if we get it, then no other thread
is inside the critical section (and we don't have to check the
*_started bool variables). */
opal_mutex_lock(&ompi_mpi_bootstrap_mutex);
if (ompi_mpi_finalized) {
opal_atomic_rmb();
int32_t state = ompi_mpi_state;
if (state >= OMPI_MPI_STATE_FINALIZE_STARTED) {
opal_show_help("help-mpi-runtime.txt",
"mpi_init: already finalized", true);
opal_mutex_unlock(&ompi_mpi_bootstrap_mutex);
return MPI_ERR_OTHER;
} else if (ompi_mpi_initialized) {
} else if (state >= OMPI_MPI_STATE_INIT_STARTED) {
opal_show_help("help-mpi-runtime.txt",
"mpi_init: invoked multiple times", true);
opal_mutex_unlock(&ompi_mpi_bootstrap_mutex);
return MPI_ERR_OTHER;
}
/* Indicate that we have *started* MPI_INIT* */
ompi_mpi_init_started = true;
opal_atomic_wmb();
opal_atomic_swap_32(&ompi_mpi_state, OMPI_MPI_STATE_INIT_STARTED);
/* Figure out the final MPI thread levels. If we were not
compiled for support for MPI threads, then don't allow
@ -988,7 +985,6 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
"mpi_init:startup:internal-failure", true,
"MPI_INIT", "MPI_INIT", error, err_msg, ret);
}
opal_mutex_unlock(&ompi_mpi_bootstrap_mutex);
ompi_hook_base_mpi_init_error(argc, argv, requested, provided);
OMPI_TIMING_FINALIZE;
return ret;
@ -1010,8 +1006,8 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
opal_hash_table_init(&ompi_mpi_f90_complex_hashtable, FLT_MAX_10_EXP);
/* All done. Wasn't that simple? */
ompi_mpi_initialized = true;
opal_atomic_wmb();
opal_atomic_swap_32(&ompi_mpi_state, OMPI_MPI_STATE_INIT_COMPLETED);
/* Finish last measurement, output results
* and clear timing structure */
@ -1019,8 +1015,6 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
OMPI_TIMING_OUT;
OMPI_TIMING_FINALIZE;
opal_mutex_unlock(&ompi_mpi_bootstrap_mutex);
ompi_hook_base_mpi_init_bottom(argc, argv, requested, provided);
return MPI_SUCCESS;

Просмотреть файл

@ -3,6 +3,7 @@
* All rights reserved.
* Copyright (c) 2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2018 Cisco Systems, Inc. All rights reserved
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -75,8 +76,14 @@ int oshmem_shmem_finalize(void)
}
}
if ((OSHMEM_SUCCESS == ret) && ompi_mpi_initialized
&& !ompi_mpi_finalized && oshmem_shmem_globalexit_status == 0) {
/* Note: ompi_mpi_state is set atomically in ompi_mpi_init() and
ompi_mpi_finalize(). Those 2 functions have the appropriate
memory barriers such that we don't need one here. */
int32_t state = ompi_mpi_state;
if ((OSHMEM_SUCCESS == ret) &&
(state >= OMPI_MPI_STATE_INIT_COMPLETED &&
state < OMPI_MPI_STATE_FINALIZE_PAST_COMM_SELF_DESTRUCT) &&
oshmem_shmem_globalexit_status == 0) {
PMPI_Comm_free(&oshmem_comm_world);
ret = ompi_mpi_finalize();
}

Просмотреть файл

@ -3,7 +3,7 @@
* All rights reserved.
* Copyright (c) 2015-2016 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2015 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2015-2018 Cisco Systems, Inc. All rights reserved
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -147,7 +147,7 @@ int oshmem_shmem_init(int argc, char **argv, int requested, int *provided)
OMPI_TIMING_INIT(32);
if (!oshmem_shmem_initialized) {
if (!ompi_mpi_initialized && !ompi_mpi_finalized) {
if (ompi_mpi_state < OMPI_MPI_STATE_INIT_COMPLETED) {
ret = ompi_mpi_init(argc, argv, requested, provided);
}
OMPI_TIMING_NEXT("ompi_mpi_init");