1
1
Proposed extensions for Open MPI:

- If MPI_INITLIZED is invoked and MPI is only partially initialized,
  wait until MPI is fully initialized before returning.
- If MPI_FINALIZED is invoked and MPI is only partially finalized,
  wait until MPI is fully finalized before returning.
- If the ompi_mpix_allow_multi_init MCA param is true, allow MPI_INIT
  and MPI_INIT_THREAD to be invoked multiple times without error (MPI
  will be safely initialized only the first time it is invoked).
Этот коммит содержится в:
Jeff Squyres 2015-10-11 07:31:47 -05:00
родитель 341b60dd57
Коммит f5ad90c920
9 изменённых файлов: 141 добавлений и 99 удалений

Просмотреть файл

@ -11,6 +11,7 @@
* All rights reserved.
* Copyright (c) 2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2015 Cisco Systems, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -41,6 +42,14 @@ int MPI_Finalized(int *flag)
OPAL_CR_NOOP_PROGRESS();
/* We must obtain the lock to guarnatee consistent values of
ompi_mpi_initialized and ompi_mpi_finalized. Note, too, that
this lock is held for the bulk of the duration of
ompi_mpi_init() and ompi_mpi_finalize(), so when we get the
lock, we are guaranteed that some other thread is not part way
through initialization or finalization. */
opal_mutex_lock(&ompi_mpi_bootstrap_mutex);
if (MPI_PARAM_CHECK) {
if (NULL == flag) {
@ -49,17 +58,19 @@ int MPI_Finalized(int *flag)
MPI_Finalize) or not */
if (ompi_mpi_initialized && !ompi_mpi_finalized) {
opal_mutex_unlock(&ompi_mpi_bootstrap_mutex);
return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG,
FUNC_NAME);
} else {
opal_mutex_unlock(&ompi_mpi_bootstrap_mutex);
return OMPI_ERRHANDLER_INVOKE(null, MPI_ERR_ARG,
FUNC_NAME);
}
}
}
/* Pretty simple */
*flag = ompi_mpi_finalized;
opal_mutex_unlock(&ompi_mpi_bootstrap_mutex);
return MPI_SUCCESS;
}

Просмотреть файл

@ -9,7 +9,7 @@
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2007-2015 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2007-2008 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
@ -47,25 +47,6 @@ int MPI_Init(int *argc, char ***argv)
char *env;
int required = MPI_THREAD_SINGLE;
/* Ensure that we were not already initialized or finalized */
if (ompi_mpi_finalized) {
if (0 == ompi_comm_rank(MPI_COMM_WORLD)) {
opal_show_help("help-mpi-api.txt",
"mpi-function-after-finalize", true, FUNC_NAME);
}
return ompi_errhandler_invoke(NULL, NULL,
OMPI_ERRHANDLER_TYPE_COMM,
MPI_ERR_OTHER, FUNC_NAME);
} else if (ompi_mpi_initialized) {
if (0 == ompi_comm_rank(MPI_COMM_WORLD)) {
opal_show_help("help-mpi-api.txt", "mpi-initialize-twice",
true, FUNC_NAME);
}
return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_OTHER,
FUNC_NAME);
}
/* check for environment overrides for required thread level. If
there is, check to see that it is a valid/supported thread level.
If not, default to MPI_THREAD_MULTIPLE. */

Просмотреть файл

@ -12,6 +12,7 @@
* Copyright (c) 2010 Oak Ridge National Labs. All rights reserved.
* Copyright (c) 2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2015 Cisco Systems, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -60,23 +61,6 @@ int MPI_Init_thread(int *argc, char ***argv, int required,
*provided = MPI_THREAD_SINGLE;
#endif
/* Ensure that we were not already initialized or finalized */
if (ompi_mpi_finalized) {
if (0 == ompi_comm_rank(MPI_COMM_WORLD)) {
opal_show_help("help-mpi-api.txt", "mpi-function-after-finalize",
true, FUNC_NAME);
}
return ompi_errhandler_invoke(NULL, NULL, OMPI_ERRHANDLER_TYPE_COMM,
MPI_ERR_OTHER, FUNC_NAME);
} else if (ompi_mpi_initialized) {
if (0 == ompi_comm_rank(MPI_COMM_WORLD)) {
opal_show_help("help-mpi-api.txt", "mpi-initialize-twice",
true, FUNC_NAME);
}
return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_OTHER, FUNC_NAME);
}
/* Call the back-end initialization function (we need to put as
little in this function as possible so that if it's profiled, we
don't lose anything) */

Просмотреть файл

@ -11,6 +11,7 @@
* All rights reserved.
* Copyright (c) 2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2015 Cisco Systems, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -41,6 +42,14 @@ int MPI_Initialized(int *flag)
OPAL_CR_NOOP_PROGRESS();
/* We must obtain the lock to guarnatee consistent values of
ompi_mpi_initialized and ompi_mpi_finalized. Note, too, that
this lock is held for the bulk of the duration of
ompi_mpi_init() and ompi_mpi_finalize(), so when we get the
lock, we are guaranteed that some other thread is not part way
through initialization or finalization. */
opal_mutex_lock(&ompi_mpi_bootstrap_mutex);
if (MPI_PARAM_CHECK) {
if (NULL == flag) {
@ -49,17 +58,19 @@ int MPI_Initialized(int *flag)
MPI_Finalize) or not */
if (ompi_mpi_initialized && !ompi_mpi_finalized) {
opal_mutex_unlock(&ompi_mpi_bootstrap_mutex);
return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG,
FUNC_NAME);
} else {
opal_mutex_unlock(&ompi_mpi_bootstrap_mutex);
return OMPI_ERRHANDLER_INVOKE(null, MPI_ERR_ARG,
FUNC_NAME);
}
}
}
/* Pretty simple */
*flag = ompi_mpi_initialized;
opal_mutex_unlock(&ompi_mpi_bootstrap_mutex);
return MPI_SUCCESS;
}

Просмотреть файл

@ -50,6 +50,24 @@ You may wish to try to narrow down the problem;
WARNING: The MCA parameter mpi_param_check has been set to true, but
parameter checking has been compiled out of Open MPI. The
mpi_param_check value has therefore been ignored.
#
[mpi_init: invoked multiple times]
Open MPI has detected that this process has attempted to initialize
MPI (via MPI_INIT or MPI_INIT_THREAD) more than once. This is
erroneous.
#
[mpi_init: already finalized]
Open MPI has detected that this process has attempted to initialize
MPI (via MPI_INIT or MPI_INIT_THREAD) after MPI_FINALIZE has been
called. This is erroneous.
#
[mpi_finalize: not initialized]
The function MPI_FINALIZE was invoked before MPI was initialized in a
process on host %s, PID %d.
This indicates an erroneous MPI program; MPI must be initialized
before it can be finalized.
#
[mpi_finalize:invoked_multiple_times]
The function MPI_FINALIZE was invoked multiple times in a single
process on host %s, PID %d.

Просмотреть файл

@ -35,6 +35,7 @@
#include "opal/class/opal_list.h"
#include "opal/class/opal_hash_table.h"
#include "opal/threads/mutex.h"
BEGIN_C_DECLS
@ -47,16 +48,18 @@ struct ompi_predefined_datatype_t;
/* Global variables and symbols for the MPI layer */
/** Did mpi start to initialize? */
OMPI_DECLSPEC extern bool ompi_mpi_init_started;
/** Is mpi initialized? */
OMPI_DECLSPEC extern bool ompi_mpi_initialized;
/** Has mpi been finalized? */
OMPI_DECLSPEC extern bool ompi_mpi_finalized;
/** Mutex to protect all the _init and _finalize variables */
OMPI_DECLSPEC extern opal_mutex_t ompi_mpi_bootstrap_mutex;
/** Did MPI start to initialize? */
OMPI_DECLSPEC extern volatile bool ompi_mpi_init_started;
/** Has the RTE been initialized? */
OMPI_DECLSPEC extern bool ompi_rte_initialized;
/** Did mpi start to finalize? */
OMPI_DECLSPEC extern int32_t ompi_mpi_finalize_started;
OMPI_DECLSPEC extern volatile bool ompi_rte_initialized;
/** Is MPI fully initialized? */
OMPI_DECLSPEC extern volatile bool ompi_mpi_initialized;
/** Did MPI start to finalize? */
OMPI_DECLSPEC extern volatile bool ompi_mpi_finalize_started;
/** Has MPI been fully finalized? */
OMPI_DECLSPEC extern volatile bool ompi_mpi_finalized;
/** Do we have multiple threads? */
OMPI_DECLSPEC extern bool ompi_mpi_thread_multiple;

Просмотреть файл

@ -92,32 +92,44 @@ extern bool ompi_enable_timing_ext;
int ompi_mpi_finalize(void)
{
int ret;
int ret = MPI_SUCCESS;
opal_list_item_t *item;
ompi_proc_t** procs;
size_t nprocs;
OPAL_TIMING_DECLARE(tm);
OPAL_TIMING_INIT_EXT(&tm, OPAL_TIMING_GET_TIME_OF_DAY);
/* Be a bit social if an erroneous program calls MPI_FINALIZE in
two different threads, otherwise we may deadlock in
ompi_comm_free() (or run into other nasty lions, tigers, or
bears) */
bears).
if (! opal_atomic_cmpset_32(&ompi_mpi_finalize_started, 0, 1)) {
/* Note that if we're already finalized, we cannot raise an
MPI exception. The best that we can do is write something
to stderr. */
This lock is held for the duration of ompi_mpi_init() and
ompi_mpi_finalize(). Hence, if we get it, then no other thread
is inside the critical section (and we don't have to check the
*_started bool variables). */
opal_mutex_lock(&ompi_mpi_bootstrap_mutex);
if (!ompi_mpi_initialized || ompi_mpi_finalized) {
/* Note that if we're not initialized or already finalized, we
cannot raise an MPI exception. The best that we can do is
write something to stderr. */
char hostname[MAXHOSTNAMELEN];
pid_t pid = getpid();
gethostname(hostname, sizeof(hostname));
if (ompi_mpi_initialized) {
opal_show_help("help-mpi-runtime.txt",
"mpi_finalize: not initialized",
true, hostname, pid);
} else if (ompi_mpi_finalized) {
opal_show_help("help-mpi-runtime.txt",
"mpi_finalize:invoked_multiple_times",
true, hostname, pid);
}
opal_mutex_unlock(&ompi_mpi_bootstrap_mutex);
return MPI_ERR_OTHER;
}
ompi_mpi_finalize_started = true;
ompi_mpiext_fini();
@ -267,21 +279,21 @@ int ompi_mpi_finalize(void)
/* free file resources */
if (OMPI_SUCCESS != (ret = ompi_file_finalize())) {
return ret;
goto done;
}
/* free window resources */
if (OMPI_SUCCESS != (ret = ompi_win_finalize())) {
return ret;
goto done;
}
if (OMPI_SUCCESS != (ret = ompi_osc_base_finalize())) {
return ret;
goto done;
}
/* free communicator resources. this MUST come before finalizing the PML
* as this will call into the pml */
if (OMPI_SUCCESS != (ret = ompi_comm_finalize())) {
return ret;
goto done;
}
/* call del_procs on all allocated procs even though some may not be known
@ -294,16 +306,16 @@ int ompi_mpi_finalize(void)
/* free pml resource */
if(OMPI_SUCCESS != (ret = mca_pml_base_finalize())) {
return ret;
goto done;
}
/* free requests */
if (OMPI_SUCCESS != (ret = ompi_request_finalize())) {
return ret;
goto done;
}
if (OMPI_SUCCESS != (ret = ompi_message_finalize())) {
return ret;
goto done;
}
/* If requested, print out a list of memory allocated by ALLOC_MEM
@ -316,7 +328,7 @@ int ompi_mpi_finalize(void)
shut down MCA types having to do with communications */
if (OMPI_SUCCESS != (ret = mca_base_framework_close(&ompi_pml_base_framework) ) ) {
OMPI_ERROR_LOG(ret);
return ret;
goto done;
}
/* shut down buffered send code */
@ -328,7 +340,7 @@ int ompi_mpi_finalize(void)
*/
if (OMPI_SUCCESS != (ret = mca_base_framework_close(&ompi_crcp_base_framework) ) ) {
OMPI_ERROR_LOG(ret);
return ret;
goto done;
}
#endif
@ -336,49 +348,49 @@ int ompi_mpi_finalize(void)
/* free attr resources */
if (OMPI_SUCCESS != (ret = ompi_attr_finalize())) {
return ret;
goto done;
}
/* free group resources */
if (OMPI_SUCCESS != (ret = ompi_group_finalize())) {
return ret;
goto done;
}
/* finalize the DPM subsystem */
if ( OMPI_SUCCESS != (ret = ompi_dpm_finalize())) {
return ret;
goto done;
}
/* free internal error resources */
if (OMPI_SUCCESS != (ret = ompi_errcode_intern_finalize())) {
return ret;
goto done;
}
/* free error code resources */
if (OMPI_SUCCESS != (ret = ompi_mpi_errcode_finalize())) {
return ret;
goto done;
}
/* free errhandler resources */
if (OMPI_SUCCESS != (ret = ompi_errhandler_finalize())) {
return ret;
goto done;
}
/* Free all other resources */
/* free op resources */
if (OMPI_SUCCESS != (ret = ompi_op_finalize())) {
return ret;
goto done;
}
/* free ddt resources */
if (OMPI_SUCCESS != (ret = ompi_datatype_finalize())) {
return ret;
goto done;
}
/* free info resources */
if (OMPI_SUCCESS != (ret = ompi_info_finalize())) {
return ret;
goto done;
}
/* Close down MCA modules */
@ -390,32 +402,32 @@ int ompi_mpi_finalize(void)
ompi_io_base_framework.framework_refcnt = 1;
if (OMPI_SUCCESS != mca_base_framework_close(&ompi_io_base_framework)) {
return ret;
goto done;
}
}
(void) mca_base_framework_close(&ompi_topo_base_framework);
if (OMPI_SUCCESS != (ret = mca_base_framework_close(&ompi_osc_base_framework))) {
return ret;
goto done;
}
if (OMPI_SUCCESS != (ret = mca_base_framework_close(&ompi_coll_base_framework))) {
return ret;
goto done;
}
if (OMPI_SUCCESS != (ret = mca_base_framework_close(&ompi_bml_base_framework))) {
return ret;
goto done;
}
if (OMPI_SUCCESS != (ret = mca_base_framework_close(&opal_mpool_base_framework))) {
return ret;
goto done;
}
if (OMPI_SUCCESS != (ret = mca_base_framework_close(&opal_rcache_base_framework))) {
return ret;
goto done;
}
if (OMPI_SUCCESS != (ret = mca_base_framework_close(&opal_allocator_base_framework))) {
return ret;
goto done;
}
/* free proc resources */
if ( OMPI_SUCCESS != (ret = ompi_proc_finalize())) {
return ret;
goto done;
}
if (NULL != ompi_mpi_main_thread) {
@ -430,21 +442,24 @@ int ompi_mpi_finalize(void)
/* Leave the RTE */
if (OMPI_SUCCESS != (ret = ompi_rte_finalize())) {
return ret;
goto done;
}
ompi_rte_initialized = false;
/* now close the rte framework */
if (OMPI_SUCCESS != (ret = mca_base_framework_close(&ompi_rte_base_framework) ) ) {
OMPI_ERROR_LOG(ret);
return ret;
goto done;
}
if (OPAL_SUCCESS != (ret = opal_finalize_util())) {
return ret;
goto done;
}
/* All done */
return MPI_SUCCESS;
done:
opal_mutex_unlock(&ompi_mpi_bootstrap_mutex);
return ret;
}

Просмотреть файл

@ -124,11 +124,12 @@ const char ompi_version_string[] = OMPI_IDENT_STRING;
* Global variables and symbols for the MPI layer
*/
bool ompi_mpi_init_started = false;
bool ompi_mpi_initialized = false;
bool ompi_mpi_finalized = false;
bool ompi_rte_initialized = false;
int32_t ompi_mpi_finalize_started = false;
opal_mutex_t ompi_mpi_bootstrap_mutex = OPAL_MUTEX_STATIC_INIT;
volatile bool ompi_mpi_init_started = false;
volatile bool ompi_mpi_initialized = false;
volatile bool ompi_mpi_finalize_started = false;
volatile bool ompi_mpi_finalized = false;
volatile bool ompi_rte_initialized = false;
bool ompi_mpi_thread_multiple = false;
int ompi_mpi_thread_requested = MPI_THREAD_SINGLE;
@ -384,9 +385,26 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
* for the modex in order to work in heterogeneous environments. */
uint8_t threadlevel_bf;
/* Indicate that we have *started* MPI_INIT*. MPI_FINALIZE has
something sorta similar in a static local variable in
ompi_mpi_finalize(). */
/* Ensure that we were not already initialized or finalized.
This lock is held for the duration of ompi_mpi_init() and
ompi_mpi_finalize(). Hence, if we get it, then no other thread
is inside the critical section (and we don't have to check the
*_started bool variables). */
opal_mutex_lock(&ompi_mpi_bootstrap_mutex);
if (ompi_mpi_finalized) {
opal_show_help("help-mpi-runtime.txt",
"mpi_init: already finalized", true);
opal_mutex_unlock(&ompi_mpi_bootstrap_mutex);
return MPI_ERR_OTHER;
} else if (ompi_mpi_initialized) {
opal_show_help("help-mpi-runtime.txt",
"mpi_init: invoked multiple times", true);
opal_mutex_unlock(&ompi_mpi_bootstrap_mutex);
return MPI_ERR_OTHER;
}
/* Indicate that we have *started* MPI_INIT* */
ompi_mpi_init_started = true;
/* Setup enough to check get/set MCA params */
@ -904,6 +922,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
"mpi_init:startup:internal-failure", true,
"MPI_INIT", "MPI_INIT", error, err_msg, ret);
}
opal_mutex_unlock(&ompi_mpi_bootstrap_mutex);
return ret;
}
@ -933,5 +952,6 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
OPAL_TIMING_REPORT(ompi_enable_timing_ext, &tm);
OPAL_TIMING_RELEASE(&tm);
opal_mutex_unlock(&ompi_mpi_bootstrap_mutex);
return MPI_SUCCESS;
}

Просмотреть файл

@ -299,7 +299,6 @@ int ompi_mpi_register_params(void)
0, 0, OPAL_INFO_LVL_3, MCA_BASE_VAR_SCOPE_LOCAL,
&ompi_add_procs_cutoff);
ompi_mpi_dynamics_enabled = true;
(void) mca_base_var_register("ompi", "mpi", NULL, "dynamics_enabled",
"Is the MPI dynamic process functionality enabled (e.g., MPI_COMM_SPAWN)? Default is yes, but certain transports and/or environments may disable it.",