Add the initial_errhandler info key to MPI_INFO_ENV and populate the
value from prun populated paremeters Signed-off-by: Aurélien Bouteiller <bouteill@icl.utk.edu> Allow errhandlers to invoke the initial error handler before MPI_INIT Signed-off-by: Aurelien Bouteiller <bouteill@icl.utk.edu> Indentation Signed-off-by: Aurelien Bouteiller <bouteill@icl.utk.edu>
Этот коммит содержится в:
родитель
703b8c356f
Коммит
3cd85a9ec5
@ -78,6 +78,55 @@ ompi_predefined_errhandler_t ompi_mpi_errors_throw_exceptions = {{{0}}};
|
|||||||
ompi_predefined_errhandler_t *ompi_mpi_errors_throw_exceptions_addr =
|
ompi_predefined_errhandler_t *ompi_mpi_errors_throw_exceptions_addr =
|
||||||
&ompi_mpi_errors_throw_exceptions;
|
&ompi_mpi_errors_throw_exceptions;
|
||||||
|
|
||||||
|
static opal_mutex_t errhandler_init_lock = OPAL_MUTEX_STATIC_INIT;
|
||||||
|
ompi_errhandler_t* ompi_initial_error_handler_eh = NULL;
|
||||||
|
void (*ompi_initial_error_handler)(struct ompi_communicator_t **comm, int *error_code, ...) = NULL;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Initialize the initial errhandler infrastructure only.
|
||||||
|
* This does not allocate any memory and does not require a corresponding fini.
|
||||||
|
*/
|
||||||
|
int ompi_initial_errhandler_init(void) {
|
||||||
|
opal_mutex_lock(&errhandler_init_lock);
|
||||||
|
if ( NULL != ompi_initial_error_handler ) {
|
||||||
|
/* Already initialized (presumably by an API call before MPI_init) */
|
||||||
|
opal_mutex_unlock(&errhandler_init_lock);
|
||||||
|
return OMPI_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* If it has been requested from the launch keys, set the initial
|
||||||
|
* error handler that will be attached by default with predefined
|
||||||
|
* communicators. We use an env because that can be obtained before
|
||||||
|
* OPAL and PMIx initialization.
|
||||||
|
*/
|
||||||
|
char *env = getenv("OMPI_MCA_mpi_initial_errhandler");
|
||||||
|
if( NULL != env ) {
|
||||||
|
if( 0 == strcasecmp(env, "mpi_errors_are_fatal") ) {
|
||||||
|
ompi_initial_error_handler = &ompi_mpi_errors_are_fatal_comm_handler;
|
||||||
|
ompi_initial_error_handler_eh = &ompi_mpi_errors_are_fatal.eh;
|
||||||
|
}
|
||||||
|
else if( 0 == strcasecmp(env, "mpi_errors_abort") ) {
|
||||||
|
ompi_initial_error_handler = &ompi_mpi_errors_abort_comm_handler;
|
||||||
|
ompi_initial_error_handler_eh = &ompi_mpi_errors_abort.eh;
|
||||||
|
}
|
||||||
|
else if( 0 == strcasecmp(env, "mpi_errors_return") ) {
|
||||||
|
ompi_initial_error_handler = &ompi_mpi_errors_return_comm_handler;
|
||||||
|
ompi_initial_error_handler_eh = &ompi_mpi_errors_return.eh;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
/* invalid entry detected, ignore it, set fatal by default */
|
||||||
|
opal_output(0, "WARNING: invalid value for launch key 'mpi_initial_errhandler'; defaulting to 'mpi_errors_are_fatal'.");
|
||||||
|
ompi_initial_error_handler = &ompi_mpi_errors_are_fatal_comm_handler;
|
||||||
|
ompi_initial_error_handler_eh = &ompi_mpi_errors_are_fatal.eh;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
ompi_initial_error_handler = &ompi_mpi_errors_are_fatal_comm_handler;
|
||||||
|
ompi_initial_error_handler_eh = &ompi_mpi_errors_are_fatal.eh;
|
||||||
|
}
|
||||||
|
opal_mutex_unlock(&errhandler_init_lock);
|
||||||
|
return OMPI_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Initialize OMPI errhandler infrastructure
|
* Initialize OMPI errhandler infrastructure
|
||||||
@ -163,9 +212,12 @@ int ompi_errhandler_init(void)
|
|||||||
"MPI_ERRORS_THROW_EXCEPTIONS",
|
"MPI_ERRORS_THROW_EXCEPTIONS",
|
||||||
sizeof(ompi_mpi_errors_throw_exceptions.eh.eh_name));
|
sizeof(ompi_mpi_errors_throw_exceptions.eh.eh_name));
|
||||||
|
|
||||||
/* All done */
|
/* Lets initialize the initial error handler if not already done */
|
||||||
|
char *env = getenv("OMPI_MCA_mpi_initial_errhandler");
|
||||||
return OMPI_SUCCESS;
|
if( NULL != env ) {
|
||||||
|
ompi_process_info.initial_errhandler = strndup(env, MPI_MAX_INFO_VAL);
|
||||||
|
}
|
||||||
|
return ompi_initial_errhandler_init();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -185,6 +185,26 @@ OMPI_DECLSPEC extern ompi_predefined_errhandler_t ompi_mpi_errors_throw_exceptio
|
|||||||
*/
|
*/
|
||||||
OMPI_DECLSPEC extern opal_pointer_array_t ompi_errhandler_f_to_c_table;
|
OMPI_DECLSPEC extern opal_pointer_array_t ompi_errhandler_f_to_c_table;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This function selects the initial error handler.
|
||||||
|
* It may be called during MPI_INIT, or during the first MPI call
|
||||||
|
* that raises an error. This function does not allocate memory,
|
||||||
|
* and will only populate the ompi_initial_error_handler_eh and
|
||||||
|
* ompi_initial_error_handler pointers with predefined error handler
|
||||||
|
* and error handler functions aliases.
|
||||||
|
*/
|
||||||
|
OMPI_DECLSPEC int ompi_initial_errhandler_init(void);
|
||||||
|
/**
|
||||||
|
* The initial error handler pointer. Will be set to alias one of the
|
||||||
|
* predefined error handlers through launch keys during the first MPI call,
|
||||||
|
* and will then be attached to predefined communicators.
|
||||||
|
*/
|
||||||
|
OMPI_DECLSPEC extern ompi_errhandler_t* ompi_initial_error_handler_eh;
|
||||||
|
/**
|
||||||
|
* The initial error handler function pointer. Will be called when an error
|
||||||
|
* is raised before MPI_INIT or after MPI_FINALIZE.
|
||||||
|
*/
|
||||||
|
OMPI_DECLSPEC extern void (*ompi_initial_error_handler)(struct ompi_communicator_t **comm, int *error_code, ...);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Forward declaration so that we don't have to include
|
* Forward declaration so that we don't have to include
|
||||||
|
@ -131,6 +131,12 @@ int ompi_mpiinfo_init(void)
|
|||||||
opal_info_set(&ompi_mpi_info_env.info.super, "soft", cptr);
|
opal_info_set(&ompi_mpi_info_env.info.super, "soft", cptr);
|
||||||
free(cptr);
|
free(cptr);
|
||||||
|
|
||||||
|
/* the initial error handler, set it as requested (nothing if not
|
||||||
|
* requested) */
|
||||||
|
if (NULL != ompi_process_info.initial_errhandler) {
|
||||||
|
opal_info_set(&ompi_mpi_info_env.info.super, "mpi_initial_errhandler", ompi_process_info.initial_errhandler);
|
||||||
|
}
|
||||||
|
|
||||||
/* local host name */
|
/* local host name */
|
||||||
opal_info_set(&ompi_mpi_info_env.info.super, "host", ompi_process_info.nodename);
|
opal_info_set(&ompi_mpi_info_env.info.super, "host", ompi_process_info.nodename);
|
||||||
|
|
||||||
|
@ -751,12 +751,6 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided,
|
|||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* initialize info */
|
|
||||||
if (OMPI_SUCCESS != (ret = ompi_mpiinfo_init())) {
|
|
||||||
error = "ompi_info_init() failed";
|
|
||||||
goto error;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* initialize error handlers */
|
/* initialize error handlers */
|
||||||
if (OMPI_SUCCESS != (ret = ompi_errhandler_init())) {
|
if (OMPI_SUCCESS != (ret = ompi_errhandler_init())) {
|
||||||
error = "ompi_errhandler_init() failed";
|
error = "ompi_errhandler_init() failed";
|
||||||
@ -775,6 +769,12 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided,
|
|||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* initialize info */
|
||||||
|
if (OMPI_SUCCESS != (ret = ompi_mpiinfo_init())) {
|
||||||
|
error = "ompi_info_init() failed";
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
|
||||||
/* initialize groups */
|
/* initialize groups */
|
||||||
if (OMPI_SUCCESS != (ret = ompi_group_init())) {
|
if (OMPI_SUCCESS != (ret = ompi_group_init())) {
|
||||||
error = "ompi_group_init() failed";
|
error = "ompi_group_init() failed";
|
||||||
|
@ -935,6 +935,11 @@ int ompi_rte_finalize(void)
|
|||||||
opal_process_info.initial_wdir = NULL;
|
opal_process_info.initial_wdir = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (NULL != opal_process_info.initial_errhandler) {
|
||||||
|
free(opal_process_info.initial_errhandler);
|
||||||
|
opal_process_info.initial_errhandler = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
/* cleanup our internal nspace hack */
|
/* cleanup our internal nspace hack */
|
||||||
opal_pmix_finalize_nspace_tracker();
|
opal_pmix_finalize_nspace_tracker();
|
||||||
|
|
||||||
|
@ -51,7 +51,8 @@ opal_process_info_t opal_process_info = {
|
|||||||
.num_apps = 0,
|
.num_apps = 0,
|
||||||
.initial_wdir = NULL,
|
.initial_wdir = NULL,
|
||||||
.reincarnation = 0,
|
.reincarnation = 0,
|
||||||
.proc_is_bound = false
|
.proc_is_bound = false,
|
||||||
|
.initial_errhandler = NULL,
|
||||||
};
|
};
|
||||||
|
|
||||||
static opal_proc_t opal_local_proc = {
|
static opal_proc_t opal_local_proc = {
|
||||||
|
@ -126,6 +126,7 @@ typedef struct opal_process_info_t {
|
|||||||
char *initial_wdir;
|
char *initial_wdir;
|
||||||
uint32_t reincarnation;
|
uint32_t reincarnation;
|
||||||
bool proc_is_bound;
|
bool proc_is_bound;
|
||||||
|
char *initial_errhandler;
|
||||||
} opal_process_info_t;
|
} opal_process_info_t;
|
||||||
OPAL_DECLSPEC extern opal_process_info_t opal_process_info;
|
OPAL_DECLSPEC extern opal_process_info_t opal_process_info;
|
||||||
|
|
||||||
|
Загрузка…
Ссылка в новой задаче
Block a user