1
1

Merge pull request #5092 from jsquyres/pr/finalized-attribute-fix

mpi/finalized: don't hang if called during MPI_FINALIZE
Этот коммит содержится в:
Jeff Squyres 2018-06-03 13:10:41 -04:00 коммит произвёл GitHub
родитель 623e36de8a 38ed70de6f
Коммит a1737ca3eb
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
19 изменённых файлов: 135 добавлений и 109 удалений

Просмотреть файл

@ -10,7 +10,7 @@
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2008-2012 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2008-2018 Cisco Systems, Inc. All rights reserved
* Copyright (c) 2008-2009 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2015-2016 Intel, Inc. All rights reserved.
* Copyright (c) 2016 Los Alamos National Security, LLC. All rights
@ -193,11 +193,22 @@ struct ompi_request_t;
* This macro directly invokes the ompi_mpi_errors_are_fatal_handler()
* when an error occurs because MPI_COMM_WORLD does not exist (because
* we're before MPI_Init() or after MPI_Finalize()).
*
* NOTE: The ompi_mpi_state variable is a volatile that is set
* atomically in ompi_mpi_init() and ompi_mpi_finalize(). The
* appropriate memory barriers are done in those 2 functions such that
* we do not need to do a read memory barrier here (in
* potentially-performance-critical code paths) before reading the
* variable.
*/
#define OMPI_ERR_INIT_FINALIZE(name) \
if( OPAL_UNLIKELY(!ompi_mpi_initialized || ompi_mpi_finalized) ) { \
ompi_mpi_errors_are_fatal_comm_handler(NULL, NULL, name); \
}
#define OMPI_ERR_INIT_FINALIZE(name) \
{ \
int32_t state = ompi_mpi_state; \
if (OPAL_UNLIKELY(state < OMPI_MPI_STATE_INIT_COMPLETED || \
state > OMPI_MPI_STATE_FINALIZE_PAST_COMM_SELF_DESTRUCT)) { \
ompi_mpi_errors_are_fatal_comm_handler(NULL, NULL, name); \
} \
}
/**
* This is the macro to invoke to directly invoke an MPI error

Просмотреть файл

@ -10,7 +10,7 @@
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006 University of Houston. All rights reserved.
* Copyright (c) 2008-2013 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2008-2018 Cisco Systems, Inc. All rights reserved
* Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2010-2011 Oak Ridge National Labs. All rights reserved.
* Copyright (c) 2012 Los Alamos National Security, LLC.
@ -149,7 +149,8 @@ void ompi_mpi_errors_return_win_handler(struct ompi_win_t **win,
static void out(char *str, char *arg)
{
if (ompi_rte_initialized && !ompi_mpi_finalized) {
if (ompi_rte_initialized &&
ompi_mpi_state < OMPI_MPI_STATE_FINALIZE_PAST_COMM_SELF_DESTRUCT) {
if (NULL != arg) {
opal_output(0, str, arg);
} else {
@ -280,7 +281,9 @@ static void backend_fatal_no_aggregate(char *type,
{
char *arg;
assert(!ompi_mpi_initialized || ompi_mpi_finalized);
int32_t state = ompi_mpi_state;
assert(state < OMPI_MPI_STATE_INIT_COMPLETED ||
state >= OMPI_MPI_STATE_FINALIZE_PAST_COMM_SELF_DESTRUCT);
fflush(stdout);
fflush(stderr);
@ -289,7 +292,7 @@ static void backend_fatal_no_aggregate(char *type,
/* Per #2152, print out in plain english if something was invoked
before MPI_INIT* or after MPI_FINALIZE */
if (!ompi_mpi_init_started && !ompi_mpi_initialized) {
if (state < OMPI_MPI_STATE_INIT_STARTED) {
if (NULL != arg) {
out("*** The %s() function was called before MPI_INIT was invoked.\n"
"*** This is disallowed by the MPI standard.\n", arg);
@ -300,7 +303,7 @@ static void backend_fatal_no_aggregate(char *type,
"*** function was invoked, sorry. :-(\n", NULL);
}
out("*** Your MPI job will now abort.\n", NULL);
} else if (ompi_mpi_finalized) {
} else if (state >= OMPI_MPI_STATE_FINALIZE_PAST_COMM_SELF_DESTRUCT) {
if (NULL != arg) {
out("*** The %s() function was called after MPI_FINALIZE was invoked.\n"
"*** This is disallowed by the MPI standard.\n", arg);

Просмотреть файл

@ -2,6 +2,7 @@
* Copyright (c) 2011 Mellanox Technologies. All rights reserved.
* Copyright (c) 2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2018 Cisco Systems, Inc. All rights reserved
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -159,7 +160,7 @@ int mca_coll_fca_barrier(struct ompi_communicator_t *comm,
int ret;
FCA_VERBOSE(5,"Using FCA Barrier");
if (OPAL_UNLIKELY(ompi_mpi_finalize_started)) {
if (OPAL_UNLIKELY(ompi_mpi_state >= OMPI_MPI_STATE_FINALIZE_STARTED)) {
FCA_VERBOSE(5, "In finalize, reverting to previous barrier");
goto orig_barrier;
}

Просмотреть файл

@ -4,6 +4,7 @@
* Copyright (c) 2017 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2018 Cisco Systems, Inc. All rights reserved
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -241,7 +242,7 @@ static int mca_coll_hcoll_module_enable(mca_coll_base_module_t *module,
int mca_coll_hcoll_progress(void)
{
if (ompi_mpi_finalized){
if (ompi_mpi_state >= OMPI_MPI_STATE_FINALIZE_STARTED) {
hcoll_rte_p2p_disabled_notify();
}

Просмотреть файл

@ -2,6 +2,7 @@
Copyright (c) 2011 Mellanox Technologies. All rights reserved.
Copyright (c) 2015 Research Organization for Information Science
and Technology (RIST). All rights reserved.
Copyright (c) 2018 Cisco Systems, Inc. All rights reserved
$COPYRIGHT$
Additional copyrights may follow
@ -21,7 +22,7 @@ int mca_coll_hcoll_barrier(struct ompi_communicator_t *comm,
mca_coll_hcoll_module_t *hcoll_module = (mca_coll_hcoll_module_t*)module;
HCOL_VERBOSE(20,"RUNNING HCOL BARRIER");
if (OPAL_UNLIKELY(ompi_mpi_finalize_started)) {
if (OPAL_UNLIKELY(ompi_mpi_state >= OMPI_MPI_STATE_FINALIZE_STARTED)) {
HCOL_VERBOSE(5, "In finalize, reverting to previous barrier");
goto orig_barrier;
}

Просмотреть файл

@ -12,6 +12,7 @@
* Copyright (c) 2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2016-2017 IBM Corporation. All rights reserved.
* Copyright (c) 2018 Cisco Systems, Inc. All rights reserved
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -69,7 +70,7 @@ mca_io_romio314_file_close (ompi_file_t *fh)
which we obviously can't do if we've started to MPI_Finalize).
The user didn't close the file, so they should expect
unexpected behavior. */
if (ompi_mpi_finalized) {
if (ompi_mpi_state >= OMPI_MPI_STATE_FINALIZE_PAST_COMM_SELF_DESTRUCT) {
return OMPI_SUCCESS;
}

Просмотреть файл

@ -2,6 +2,7 @@
* Copyright (C) 2001-2011 Mellanox Technologies Ltd. ALL RIGHTS RESERVED.
* Copyright (c) 2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2018 Cisco Systems, Inc. All rights reserved
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -265,7 +266,7 @@ int mca_pml_yalla_del_procs(struct ompi_proc_t **procs, size_t nprocs)
{
size_t i;
if (ompi_mpi_finalized) {
if (ompi_mpi_state >= OMPI_MPI_STATE_FINALIZE_STARTED) {
PML_YALLA_VERBOSE(3, "%s", "using bulk powerdown");
mxm_ep_powerdown(ompi_pml_yalla.mxm_ep);
}

Просмотреть файл

@ -11,7 +11,7 @@
* All rights reserved.
* Copyright (c) 2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2015 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2015-2018 Cisco Systems, Inc. All rights reserved
* Copyright (c) 2015 Intel, Inc. All rights reserved
* $COPYRIGHT$
*
@ -44,13 +44,7 @@ int MPI_Finalized(int *flag)
ompi_hook_base_mpi_finalized_top(flag);
/* We must obtain the lock to guarnatee consistent values of
ompi_mpi_initialized and ompi_mpi_finalized. Note, too, that
this lock is held for the bulk of the duration of
ompi_mpi_init() and ompi_mpi_finalize(), so when we get the
lock, we are guaranteed that some other thread is not part way
through initialization or finalization. */
opal_mutex_lock(&ompi_mpi_bootstrap_mutex);
int32_t state = ompi_mpi_state;
if (MPI_PARAM_CHECK) {
if (NULL == flag) {
@ -59,12 +53,11 @@ int MPI_Finalized(int *flag)
whether we're currently (after MPI_Init and before
MPI_Finalize) or not */
if (ompi_mpi_initialized && !ompi_mpi_finalized) {
opal_mutex_unlock(&ompi_mpi_bootstrap_mutex);
if (state >= OMPI_MPI_STATE_INIT_COMPLETED &&
state < OMPI_MPI_STATE_FINALIZE_PAST_COMM_SELF_DESTRUCT) {
return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG,
FUNC_NAME);
} else {
opal_mutex_unlock(&ompi_mpi_bootstrap_mutex);
/* We have no MPI object here so call ompi_errhandle_invoke
* directly */
return ompi_errhandler_invoke(NULL, NULL, -1,
@ -74,8 +67,7 @@ int MPI_Finalized(int *flag)
}
}
*flag = ompi_mpi_finalized;
opal_mutex_unlock(&ompi_mpi_bootstrap_mutex);
*flag = (state >= OMPI_MPI_STATE_FINALIZE_PAST_COMM_SELF_DESTRUCT);
ompi_hook_base_mpi_finalized_bottom(flag);

Просмотреть файл

@ -9,7 +9,7 @@
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2014-2015 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2014-2018 Cisco Systems, Inc. All rights reserved
* Copyright (c) 2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2015 Intel, Inc. All rights reserved
@ -58,7 +58,9 @@ int MPI_Get_library_version(char *version, int *resultlen)
(i.e., use a NULL communicator, which will end up at the
default errhandler, which is abort). */
if (ompi_mpi_initialized && !ompi_mpi_finalized) {
int32_t state = ompi_mpi_state;
if (state >= OMPI_MPI_STATE_INIT_COMPLETED &&
state < OMPI_MPI_STATE_FINALIZE_PAST_COMM_SELF_DESTRUCT) {
return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG,
FUNC_NAME);
} else {

Просмотреть файл

@ -12,6 +12,7 @@
* Copyright (c) 2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2015 Intel, Inc. All rights reserved
* Copyright (c) 2018 Cisco Systems, Inc. All rights reserved
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -54,7 +55,9 @@ int MPI_Get_version(int *version, int *subversion)
(i.e., use a NULL communicator, which will end up at the
default errhandler, which is abort). */
if (ompi_mpi_initialized && !ompi_mpi_finalized) {
int32_t state = ompi_mpi_state;
if (state >= OMPI_MPI_STATE_INIT_COMPLETED &&
state < OMPI_MPI_STATE_FINALIZE_PAST_COMM_SELF_DESTRUCT) {
return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG,
FUNC_NAME);
} else {

Просмотреть файл

@ -11,7 +11,7 @@
* All rights reserved.
* Copyright (c) 2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2015 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2015-2018 Cisco Systems, Inc. All rights reserved
* Copyright (c) 2015 Intel, Inc. All rights reserved
* $COPYRIGHT$
*
@ -44,13 +44,7 @@ int MPI_Initialized(int *flag)
ompi_hook_base_mpi_initialized_top(flag);
/* We must obtain the lock to guarnatee consistent values of
ompi_mpi_initialized and ompi_mpi_finalized. Note, too, that
this lock is held for the bulk of the duration of
ompi_mpi_init() and ompi_mpi_finalize(), so when we get the
lock, we are guaranteed that some other thread is not part way
through initialization or finalization. */
opal_mutex_lock(&ompi_mpi_bootstrap_mutex);
int32_t state = ompi_mpi_state;
if (MPI_PARAM_CHECK) {
if (NULL == flag) {
@ -59,12 +53,11 @@ int MPI_Initialized(int *flag)
whether we're currently (after MPI_Init and before
MPI_Finalize) or not */
if (ompi_mpi_initialized && !ompi_mpi_finalized) {
opal_mutex_unlock(&ompi_mpi_bootstrap_mutex);
if (state >= OMPI_MPI_STATE_INIT_COMPLETED &&
state < OMPI_MPI_STATE_FINALIZE_PAST_COMM_SELF_DESTRUCT) {
return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG,
FUNC_NAME);
} else {
opal_mutex_unlock(&ompi_mpi_bootstrap_mutex);
/* We have no MPI object here so call ompi_errhandle_invoke
* directly */
return ompi_errhandler_invoke(NULL, NULL, -1,
@ -74,8 +67,7 @@ int MPI_Initialized(int *flag)
}
}
*flag = ompi_mpi_initialized;
opal_mutex_unlock(&ompi_mpi_bootstrap_mutex);
*flag = (state >= OMPI_MPI_STATE_INIT_COMPLETED);
ompi_hook_base_mpi_initialized_bottom(flag);

Просмотреть файл

@ -2,7 +2,7 @@
/*
* Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2014 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2014-2018 Cisco Systems, Inc. All rights reserved
* Copyright (c) 2017 IBM Corporation. All rights reserved.
* $COPYRIGHT$
*
@ -39,7 +39,9 @@ int MPI_T_finalize (void)
if (0 == --ompi_mpit_init_count) {
(void) ompi_info_close_components ();
if ((!ompi_mpi_initialized || ompi_mpi_finalized) &&
int32_t state = ompi_mpi_state;
if ((state < OMPI_MPI_STATE_INIT_COMPLETED ||
state >= OMPI_MPI_STATE_FINALIZE_PAST_COMM_SELF_DESTRUCT) &&
(NULL != ompi_mpi_main_thread)) {
/* we are not between MPI_Init and MPI_Finalize so we
* have to free the ompi_mpi_main_thread */

Просмотреть файл

@ -4,6 +4,7 @@
* reserved.
* Copyright (c) 2004-2008 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2018 Cisco Systems, Inc. All rights reserved
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -64,8 +65,11 @@ const int PERUSE_num_events = (sizeof(PERUSE_events) / sizeof(peruse_event_assoc
int PERUSE_Init (void)
{
if (MPI_PARAM_CHECK) {
if (!ompi_mpi_initialized || ompi_mpi_finalized)
int32_t state = ompi_mpi_state;
if (state < OMPI_MPI_STATE_INIT_COMPLETED ||
state >= OMPI_MPI_STATE_FINALIZE_STARTED) {
return PERUSE_ERR_INIT;
}
}
ompi_peruse_init ();
return PERUSE_SUCCESS;

Просмотреть файл

@ -9,7 +9,7 @@
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006-2015 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2006-2018 Cisco Systems, Inc. All rights reserved
* Copyright (c) 2007 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved.
@ -51,15 +51,9 @@ struct ompi_predefined_datatype_t;
/** Mutex to protect all the _init and _finalize variables */
OMPI_DECLSPEC extern opal_mutex_t ompi_mpi_bootstrap_mutex;
/** Did MPI start to initialize? */
OMPI_DECLSPEC extern volatile bool ompi_mpi_init_started;
OMPI_DECLSPEC extern volatile int32_t ompi_mpi_state;
/** Has the RTE been initialized? */
OMPI_DECLSPEC extern volatile bool ompi_rte_initialized;
/** Is MPI fully initialized? */
OMPI_DECLSPEC extern volatile bool ompi_mpi_initialized;
/** Did MPI start to finalize? */
OMPI_DECLSPEC extern volatile bool ompi_mpi_finalize_started;
/** Has MPI been fully finalized? */
OMPI_DECLSPEC extern volatile bool ompi_mpi_finalized;
/** Do we have multiple threads? */
OMPI_DECLSPEC extern bool ompi_mpi_thread_multiple;
@ -70,6 +64,29 @@ OMPI_DECLSPEC extern int ompi_mpi_thread_provided;
/** Identifier of the main thread */
OMPI_DECLSPEC extern struct opal_thread_t *ompi_mpi_main_thread;
/*
* State of the MPI runtime.
*
* Atomically set/read in the ompi_mpi_state global variable (for
* functions such as MPI_INITIALIZED and MPI_FINALIZED).
*/
typedef enum {
OMPI_MPI_STATE_NOT_INITIALIZED = 0,
OMPI_MPI_STATE_INIT_STARTED,
OMPI_MPI_STATE_INIT_COMPLETED,
/* The PAST_COMM_SELF_DESTRUCT state is needed because attribute
callbacks that are invoked during the very beginning of
MPI_FINALIZE are supposed to return FALSE if they call
MPI_FINALIZED. Hence, we need to distinguish between "We've
started MPI_FINALIZE" and "We're far enough in MPI_FINALIZE
that we now need to return TRUE from MPI_FINALIZED." */
OMPI_MPI_STATE_FINALIZE_STARTED,
OMPI_MPI_STATE_FINALIZE_PAST_COMM_SELF_DESTRUCT,
OMPI_MPI_STATE_FINALIZE_COMPLETED
} ompi_mpi_state_t;
/*
* These variables are for the MPI F03 bindings (F03 must bind Fortran
* varaiables to symbols; it cannot bind Fortran variables to the

Просмотреть файл

@ -10,7 +10,7 @@
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006-2016 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2006-2018 Cisco Systems, Inc. All rights reserved
* Copyright (c) 2010-2011 Oak Ridge National Labs. All rights reserved.
* Copyright (c) 2014 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
@ -166,16 +166,20 @@ ompi_mpi_abort(struct ompi_communicator_t* comm,
/* If the RTE isn't setup yet/any more, then don't even try
killing everyone. Sorry, Charlie... */
int32_t state = ompi_mpi_state;
if (!ompi_rte_initialized) {
fprintf(stderr, "[%s:%05d] Local abort %s completed successfully, but am not able to aggregate error messages, and not able to guarantee that all other processes were killed!\n",
host, (int) pid, ompi_mpi_finalized ?
host, (int) pid,
state >= OMPI_MPI_STATE_FINALIZE_STARTED ?
"after MPI_FINALIZE started" : "before MPI_INIT completed");
_exit(errcode == 0 ? 1 : errcode);
}
/* If OMPI is initialized and we have a non-NULL communicator,
then try to kill just that set of processes */
if (ompi_mpi_initialized && !ompi_mpi_finalized && NULL != comm) {
if (state >= OMPI_MPI_STATE_INIT_COMPLETED &&
state < OMPI_MPI_STATE_FINALIZE_PAST_COMM_SELF_DESTRUCT &&
NULL != comm) {
try_kill_peers(comm, errcode);
}

Просмотреть файл

@ -10,7 +10,7 @@
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006-2016 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2006-2018 Cisco Systems, Inc. All rights reserved
* Copyright (c) 2006-2014 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2006 University of Houston. All rights reserved.
@ -110,22 +110,12 @@ int ompi_mpi_finalize(void)
volatile bool active;
uint32_t key;
ompi_datatype_t * datatype;
//OPAL_TIMING_DECLARE(tm);
//OPAL_TIMING_INIT_EXT(&tm, OPAL_TIMING_GET_TIME_OF_DAY);
ompi_hook_base_mpi_finalize_top();
/* Be a bit social if an erroneous program calls MPI_FINALIZE in
two different threads, otherwise we may deadlock in
ompi_comm_free() (or run into other nasty lions, tigers, or
bears).
This lock is held for the duration of ompi_mpi_init() and
ompi_mpi_finalize(). Hence, if we get it, then no other thread
is inside the critical section (and we don't have to check the
*_started bool variables). */
opal_mutex_lock(&ompi_mpi_bootstrap_mutex);
if (!ompi_mpi_initialized || ompi_mpi_finalized) {
int32_t state = ompi_mpi_state;
if (state < OMPI_MPI_STATE_INIT_COMPLETED ||
state >= OMPI_MPI_STATE_FINALIZE_STARTED) {
/* Note that if we're not initialized or already finalized, we
cannot raise an MPI exception. The best that we can do is
write something to stderr. */
@ -133,19 +123,19 @@ int ompi_mpi_finalize(void)
pid_t pid = getpid();
gethostname(hostname, sizeof(hostname));
if (ompi_mpi_initialized) {
if (state < OMPI_MPI_STATE_INIT_COMPLETED) {
opal_show_help("help-mpi-runtime.txt",
"mpi_finalize: not initialized",
true, hostname, pid);
} else if (ompi_mpi_finalized) {
} else if (state >= OMPI_MPI_STATE_FINALIZE_STARTED) {
opal_show_help("help-mpi-runtime.txt",
"mpi_finalize:invoked_multiple_times",
true, hostname, pid);
}
opal_mutex_unlock(&ompi_mpi_bootstrap_mutex);
return MPI_ERR_OTHER;
}
ompi_mpi_finalize_started = true;
opal_atomic_wmb();
opal_atomic_swap_32(&ompi_mpi_state, OMPI_MPI_STATE_FINALIZE_STARTED);
ompi_mpiext_fini();
@ -160,9 +150,14 @@ int ompi_mpi_finalize(void)
ompi_mpi_comm_self.comm.c_keyhash = NULL;
}
/* Proceed with MPI_FINALIZE */
ompi_mpi_finalized = true;
/* Mark that we are past COMM_SELF destruction so that
MPI_FINALIZED can return an accurate value (per MPI-3.1,
FINALIZED needs to return FALSE to MPI_FINALIZED until after
COMM_SELF is destroyed / all the attribute callbacks have been
invoked) */
opal_atomic_wmb();
opal_atomic_swap_32(&ompi_mpi_state,
OMPI_MPI_STATE_FINALIZE_PAST_COMM_SELF_DESTRUCT);
/* As finalize is the last legal MPI call, we are allowed to force the release
* of the user buffer used for bsend, before going anywhere further.
@ -177,9 +172,6 @@ int ompi_mpi_finalize(void)
MPI lifetime, to get better latency when not using TCP */
opal_progress_event_users_increment();
/* check to see if we want timing information */
//OPAL_TIMING_MSTART((&tm,"time to execute finalize barrier"));
/* NOTE: MPI-2.1 requires that MPI_FINALIZE is "collective" across
*all* connected processes. This only means that all processes
have to call it. It does *not* mean that all connected
@ -280,10 +272,6 @@ int ompi_mpi_finalize(void)
}
}
/* check for timing request - get stop time and report elapsed
time if so */
//OPAL_TIMING_DELTAS(ompi_enable_timing, &tm);
/*
* Shutdown the Checkpoint/Restart Mech.
*/
@ -513,8 +501,10 @@ int ompi_mpi_finalize(void)
/* All done */
done:
opal_mutex_unlock(&ompi_mpi_bootstrap_mutex);
done:
opal_atomic_wmb();
opal_atomic_swap_32(&ompi_mpi_state, OMPI_MPI_STATE_FINALIZE_COMPLETED);
ompi_hook_base_mpi_finalize_bottom();
return ret;

Просмотреть файл

@ -10,7 +10,7 @@
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006-2015 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2006-2018 Cisco Systems, Inc. All rights reserved
* Copyright (c) 2006-2015 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2006-2009 University of Houston. All rights reserved.
@ -130,11 +130,7 @@ const char ompi_version_string[] = OMPI_IDENT_STRING;
* Global variables and symbols for the MPI layer
*/
opal_mutex_t ompi_mpi_bootstrap_mutex = OPAL_MUTEX_STATIC_INIT;
volatile bool ompi_mpi_init_started = false;
volatile bool ompi_mpi_initialized = false;
volatile bool ompi_mpi_finalize_started = false;
volatile bool ompi_mpi_finalized = false;
volatile int32_t ompi_mpi_state = OMPI_MPI_STATE_NOT_INITIALIZED;
volatile bool ompi_rte_initialized = false;
bool ompi_mpi_thread_multiple = false;
@ -394,21 +390,22 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
ompi_mpi_finalize(). Hence, if we get it, then no other thread
is inside the critical section (and we don't have to check the
*_started bool variables). */
opal_mutex_lock(&ompi_mpi_bootstrap_mutex);
if (ompi_mpi_finalized) {
opal_atomic_rmb();
int32_t state = ompi_mpi_state;
if (state >= OMPI_MPI_STATE_FINALIZE_STARTED) {
opal_show_help("help-mpi-runtime.txt",
"mpi_init: already finalized", true);
opal_mutex_unlock(&ompi_mpi_bootstrap_mutex);
return MPI_ERR_OTHER;
} else if (ompi_mpi_initialized) {
} else if (state >= OMPI_MPI_STATE_INIT_STARTED) {
opal_show_help("help-mpi-runtime.txt",
"mpi_init: invoked multiple times", true);
opal_mutex_unlock(&ompi_mpi_bootstrap_mutex);
return MPI_ERR_OTHER;
}
/* Indicate that we have *started* MPI_INIT* */
ompi_mpi_init_started = true;
opal_atomic_wmb();
opal_atomic_swap_32(&ompi_mpi_state, OMPI_MPI_STATE_INIT_STARTED);
/* Figure out the final MPI thread levels. If we were not
compiled for support for MPI threads, then don't allow
@ -988,7 +985,6 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
"mpi_init:startup:internal-failure", true,
"MPI_INIT", "MPI_INIT", error, err_msg, ret);
}
opal_mutex_unlock(&ompi_mpi_bootstrap_mutex);
ompi_hook_base_mpi_init_error(argc, argv, requested, provided);
OMPI_TIMING_FINALIZE;
return ret;
@ -1010,8 +1006,8 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
opal_hash_table_init(&ompi_mpi_f90_complex_hashtable, FLT_MAX_10_EXP);
/* All done. Wasn't that simple? */
ompi_mpi_initialized = true;
opal_atomic_wmb();
opal_atomic_swap_32(&ompi_mpi_state, OMPI_MPI_STATE_INIT_COMPLETED);
/* Finish last measurement, output results
* and clear timing structure */
@ -1019,8 +1015,6 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
OMPI_TIMING_OUT;
OMPI_TIMING_FINALIZE;
opal_mutex_unlock(&ompi_mpi_bootstrap_mutex);
ompi_hook_base_mpi_init_bottom(argc, argv, requested, provided);
return MPI_SUCCESS;

Просмотреть файл

@ -3,6 +3,7 @@
* All rights reserved.
* Copyright (c) 2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2018 Cisco Systems, Inc. All rights reserved
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -75,8 +76,14 @@ int oshmem_shmem_finalize(void)
}
}
if ((OSHMEM_SUCCESS == ret) && ompi_mpi_initialized
&& !ompi_mpi_finalized && oshmem_shmem_globalexit_status == 0) {
/* Note: ompi_mpi_state is set atomically in ompi_mpi_init() and
ompi_mpi_finalize(). Those 2 functions have the appropriate
memory barriers such that we don't need one here. */
int32_t state = ompi_mpi_state;
if ((OSHMEM_SUCCESS == ret) &&
(state >= OMPI_MPI_STATE_INIT_COMPLETED &&
state < OMPI_MPI_STATE_FINALIZE_PAST_COMM_SELF_DESTRUCT) &&
oshmem_shmem_globalexit_status == 0) {
PMPI_Comm_free(&oshmem_comm_world);
ret = ompi_mpi_finalize();
}

Просмотреть файл

@ -3,7 +3,7 @@
* All rights reserved.
* Copyright (c) 2015-2016 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2015 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2015-2018 Cisco Systems, Inc. All rights reserved
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -147,7 +147,7 @@ int oshmem_shmem_init(int argc, char **argv, int requested, int *provided)
OMPI_TIMING_INIT(32);
if (!oshmem_shmem_initialized) {
if (!ompi_mpi_initialized && !ompi_mpi_finalized) {
if (ompi_mpi_state < OMPI_MPI_STATE_INIT_COMPLETED) {
ret = ompi_mpi_init(argc, argv, requested, provided);
}
OMPI_TIMING_NEXT("ompi_mpi_init");