Provide a warning message if a user's app executes a "fork" operation while using subsystems that may not cleanly support it - e.g., the openib btl. The provided warning is a generic one indicating that use of fork in current conditions is not recommended.
This is setup so that it only is issued once (as opposed to every time they do it), and goes through orte_show_help so the user doesn't get hammered by #procs copies of the warning. In addition, there is a new MCA param (can't have too many!) to shut the warning off altogether. This closes ticket #1244 This commit was SVN r19196.
Этот коммит содержится в:
родитель
3c8d43deed
Коммит
277e4ac292
@ -52,6 +52,7 @@
|
||||
#include "orte/util/show_help.h"
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
#include "orte/util/proc_info.h"
|
||||
#include "orte/util/name_fns.h"
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
|
||||
#include "ompi/proc/proc.h"
|
||||
@ -63,6 +64,7 @@
|
||||
#include "ompi/datatype/convertor.h"
|
||||
#include "ompi/mca/mpool/mpool.h"
|
||||
#include "ompi/runtime/ompi_module_exchange.h"
|
||||
#include "ompi/runtime/mpiruntime.h"
|
||||
|
||||
#include "btl_openib.h"
|
||||
#include "btl_openib_frag.h"
|
||||
@ -1898,6 +1900,7 @@ sort_devs_by_distance(struct ibv_device **ib_devs, int count)
|
||||
return devs;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* IB component initialization:
|
||||
* (1) read interface list from kernel and compare against component parameters
|
||||
@ -2255,6 +2258,12 @@ btl_openib_component_init(int *num_btl_modules,
|
||||
opal_argv_free(mca_btl_openib_component.if_exclude_list);
|
||||
mca_btl_openib_component.if_exclude_list = NULL;
|
||||
}
|
||||
|
||||
/* setup the fork warning message as we are sensitive
|
||||
* to memory corruption issues when fork is called
|
||||
*/
|
||||
ompi_warn_fork();
|
||||
|
||||
return btls;
|
||||
|
||||
no_btls:
|
||||
|
@ -75,3 +75,10 @@ Node: %s
|
||||
|
||||
In order to operate in a heterogeneous environment, please reconfigure
|
||||
Open MPI with --enable-heterogeneous.
|
||||
#
|
||||
[mpi_init:warn-fork]
|
||||
The program has executed an operation involving a call to "fork". Open MPI
|
||||
is currently operating in a condition that could result in memory corruption
|
||||
issues in this situation - for example, use of the OpenIB BTL. Use of fork
|
||||
is therefore not recommended in this situation.
|
||||
|
||||
|
@ -60,6 +60,9 @@ OMPI_DECLSPEC extern struct opal_thread_t *ompi_mpi_main_thread;
|
||||
it down during MPI_FINALIZE)? */
|
||||
OMPI_DECLSPEC extern bool ompi_mpi_maffinity_setup;
|
||||
|
||||
/** Do we want to be warned on fork or not? */
|
||||
OMPI_DECLSPEC extern bool ompi_do_not_warn_on_fork;
|
||||
|
||||
/** In ompi_mpi_init: a list of all memory associated with calling
|
||||
MPI_REGISTER_DATAREP so that we can free it during
|
||||
MPI_FINALIZE. */
|
||||
@ -68,6 +71,8 @@ OMPI_DECLSPEC extern opal_list_t ompi_registered_datareps;
|
||||
/** version string of ompi */
|
||||
OMPI_DECLSPEC extern const char ompi_version_string[];
|
||||
|
||||
OMPI_DECLSPEC void ompi_warn_fork(void);
|
||||
|
||||
/**
|
||||
* Initialize the Open MPI MPI environment
|
||||
*
|
||||
|
@ -27,6 +27,9 @@
|
||||
#ifdef HAVE_SYS_TIME_H
|
||||
#include <sys/time.h>
|
||||
#endif /* HAVE_SYS_TIME_H */
|
||||
#if HAVE_PTHREAD_H
|
||||
#include <pthread.h>
|
||||
#endif
|
||||
|
||||
#include "mpi.h"
|
||||
#include "opal/class/opal_list.h"
|
||||
@ -115,6 +118,27 @@ opal_thread_t *ompi_mpi_main_thread = NULL;
|
||||
|
||||
bool ompi_mpi_maffinity_setup = false;
|
||||
|
||||
bool ompi_do_not_warn_on_fork;
|
||||
|
||||
static bool fork_warning_issued = false;
|
||||
|
||||
static void warn_fork(void)
|
||||
{
|
||||
if (ompi_mpi_initialized && !ompi_mpi_finalized && !fork_warning_issued) {
|
||||
orte_show_help("help-mpi-runtime.txt", "mpi_init:warn-fork", true);
|
||||
fork_warning_issued = true;
|
||||
}
|
||||
}
|
||||
|
||||
void ompi_warn_fork(void)
|
||||
{
|
||||
#if HAVE_PTHREAD_H
|
||||
if (!ompi_do_not_warn_on_fork) {
|
||||
pthread_atfork(warn_fork, NULL, NULL);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
* These variables are here, rather than under ompi/mpi/c/foo.c
|
||||
* because it is not sufficient to have a .c file that only contains
|
||||
|
@ -275,6 +275,12 @@ int ompi_mpi_register_params(void)
|
||||
(int) ompi_mpi_paffinity_alone, &value);
|
||||
ompi_mpi_paffinity_alone = OPAL_INT_TO_BOOL(value);
|
||||
|
||||
mca_base_param_reg_int_name("mpi", "do_not_warn_on_fork",
|
||||
"If nonzero, do not warn if program forks under conditions that could cause memory corruption issues",
|
||||
false, false,
|
||||
(int) false, &value);
|
||||
ompi_do_not_warn_on_fork = OPAL_INT_TO_BOOL(value);
|
||||
|
||||
/* Sparse group storage support */
|
||||
|
||||
mca_base_param_reg_int_name("mpi", "have_sparse_group_storage",
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user