1
1

Provide a warning message if a user's app executes a "fork" operation while using subsystems that may not cleanly support it - e.g., the openib btl. The provided warning is a generic one indicating that use of fork in current conditions is not recommended.

This is setup so that it only is issued once (as opposed to every time they do it), and goes through orte_show_help so the user doesn't get hammered by #procs copies of the warning. In addition, there is a new MCA param (can't have too many!) to shut the warning off altogether.

This closes ticket #1244

This commit was SVN r19196.
Этот коммит содержится в:
Ralph Castain 2008-08-06 14:22:03 +00:00
родитель 3c8d43deed
Коммит 277e4ac292
5 изменённых файлов: 51 добавлений и 0 удалений

Просмотреть файл

@ -52,6 +52,7 @@
#include "orte/util/show_help.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/util/proc_info.h"
#include "orte/util/name_fns.h"
#include "orte/runtime/orte_globals.h"
#include "ompi/proc/proc.h"
@ -63,6 +64,7 @@
#include "ompi/datatype/convertor.h"
#include "ompi/mca/mpool/mpool.h"
#include "ompi/runtime/ompi_module_exchange.h"
#include "ompi/runtime/mpiruntime.h"
#include "btl_openib.h"
#include "btl_openib_frag.h"
@ -1898,6 +1900,7 @@ sort_devs_by_distance(struct ibv_device **ib_devs, int count)
return devs;
}
/*
* IB component initialization:
* (1) read interface list from kernel and compare against component parameters
@ -2255,6 +2258,12 @@ btl_openib_component_init(int *num_btl_modules,
opal_argv_free(mca_btl_openib_component.if_exclude_list);
mca_btl_openib_component.if_exclude_list = NULL;
}
/* setup the fork warning message as we are sensitive
* to memory corruption issues when fork is called
*/
ompi_warn_fork();
return btls;
no_btls:

Просмотреть файл

@ -75,3 +75,10 @@ Node: %s
In order to operate in a heterogeneous environment, please reconfigure
Open MPI with --enable-heterogeneous.
#
[mpi_init:warn-fork]
The program has executed an operation involving a call to "fork". Open MPI
is currently operating in a condition that could result in memory corruption
issues in this situation - for example, use of the OpenIB BTL. Use of fork
is therefore not recommended in this situation.

Просмотреть файл

@ -60,6 +60,9 @@ OMPI_DECLSPEC extern struct opal_thread_t *ompi_mpi_main_thread;
it down during MPI_FINALIZE)? */
OMPI_DECLSPEC extern bool ompi_mpi_maffinity_setup;
/** Do we want to be warned on fork or not? */
OMPI_DECLSPEC extern bool ompi_do_not_warn_on_fork;
/** In ompi_mpi_init: a list of all memory associated with calling
MPI_REGISTER_DATAREP so that we can free it during
MPI_FINALIZE. */
@ -68,6 +71,8 @@ OMPI_DECLSPEC extern opal_list_t ompi_registered_datareps;
/** version string of ompi */
OMPI_DECLSPEC extern const char ompi_version_string[];
OMPI_DECLSPEC void ompi_warn_fork(void);
/**
* Initialize the Open MPI MPI environment
*

Просмотреть файл

@ -27,6 +27,9 @@
#ifdef HAVE_SYS_TIME_H
#include <sys/time.h>
#endif /* HAVE_SYS_TIME_H */
#if HAVE_PTHREAD_H
#include <pthread.h>
#endif
#include "mpi.h"
#include "opal/class/opal_list.h"
@ -115,6 +118,27 @@ opal_thread_t *ompi_mpi_main_thread = NULL;
bool ompi_mpi_maffinity_setup = false;
bool ompi_do_not_warn_on_fork;
static bool fork_warning_issued = false;
static void warn_fork(void)
{
if (ompi_mpi_initialized && !ompi_mpi_finalized && !fork_warning_issued) {
orte_show_help("help-mpi-runtime.txt", "mpi_init:warn-fork", true);
fork_warning_issued = true;
}
}
void ompi_warn_fork(void)
{
#if HAVE_PTHREAD_H
if (!ompi_do_not_warn_on_fork) {
pthread_atfork(warn_fork, NULL, NULL);
}
#endif
}
/*
* These variables are here, rather than under ompi/mpi/c/foo.c
* because it is not sufficient to have a .c file that only contains

Просмотреть файл

@ -275,6 +275,12 @@ int ompi_mpi_register_params(void)
(int) ompi_mpi_paffinity_alone, &value);
ompi_mpi_paffinity_alone = OPAL_INT_TO_BOOL(value);
mca_base_param_reg_int_name("mpi", "do_not_warn_on_fork",
"If nonzero, do not warn if program forks under conditions that could cause memory corruption issues",
false, false,
(int) false, &value);
ompi_do_not_warn_on_fork = OPAL_INT_TO_BOOL(value);
/* Sparse group storage support */
mca_base_param_reg_int_name("mpi", "have_sparse_group_storage",