1
1

Add the initial_errhandler info key to MPI_INFO_ENV and populate the

value from prun populated paremeters

Signed-off-by: Aurélien Bouteiller <bouteill@icl.utk.edu>

Allow errhandlers to invoke the initial error handler before MPI_INIT

Signed-off-by: Aurelien Bouteiller <bouteill@icl.utk.edu>

Indentation

Signed-off-by: Aurelien Bouteiller <bouteill@icl.utk.edu>
Этот коммит содержится в:
Aurélien Bouteiller 2020-06-09 08:25:28 -04:00 коммит произвёл Aurelien Bouteiller
родитель 703b8c356f
Коммит 3cd85a9ec5
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 08F60797C5941DB2
7 изменённых файлов: 95 добавлений и 10 удалений

Просмотреть файл

@ -78,6 +78,55 @@ ompi_predefined_errhandler_t ompi_mpi_errors_throw_exceptions = {{{0}}};
ompi_predefined_errhandler_t *ompi_mpi_errors_throw_exceptions_addr =
&ompi_mpi_errors_throw_exceptions;
static opal_mutex_t errhandler_init_lock = OPAL_MUTEX_STATIC_INIT;
ompi_errhandler_t* ompi_initial_error_handler_eh = NULL;
void (*ompi_initial_error_handler)(struct ompi_communicator_t **comm, int *error_code, ...) = NULL;
/*
* Initialize the initial errhandler infrastructure only.
* This does not allocate any memory and does not require a corresponding fini.
*/
int ompi_initial_errhandler_init(void) {
opal_mutex_lock(&errhandler_init_lock);
if ( NULL != ompi_initial_error_handler ) {
/* Already initialized (presumably by an API call before MPI_init) */
opal_mutex_unlock(&errhandler_init_lock);
return OMPI_SUCCESS;
}
/* If it has been requested from the launch keys, set the initial
* error handler that will be attached by default with predefined
* communicators. We use an env because that can be obtained before
* OPAL and PMIx initialization.
*/
char *env = getenv("OMPI_MCA_mpi_initial_errhandler");
if( NULL != env ) {
if( 0 == strcasecmp(env, "mpi_errors_are_fatal") ) {
ompi_initial_error_handler = &ompi_mpi_errors_are_fatal_comm_handler;
ompi_initial_error_handler_eh = &ompi_mpi_errors_are_fatal.eh;
}
else if( 0 == strcasecmp(env, "mpi_errors_abort") ) {
ompi_initial_error_handler = &ompi_mpi_errors_abort_comm_handler;
ompi_initial_error_handler_eh = &ompi_mpi_errors_abort.eh;
}
else if( 0 == strcasecmp(env, "mpi_errors_return") ) {
ompi_initial_error_handler = &ompi_mpi_errors_return_comm_handler;
ompi_initial_error_handler_eh = &ompi_mpi_errors_return.eh;
}
else {
/* invalid entry detected, ignore it, set fatal by default */
opal_output(0, "WARNING: invalid value for launch key 'mpi_initial_errhandler'; defaulting to 'mpi_errors_are_fatal'.");
ompi_initial_error_handler = &ompi_mpi_errors_are_fatal_comm_handler;
ompi_initial_error_handler_eh = &ompi_mpi_errors_are_fatal.eh;
}
}
else {
ompi_initial_error_handler = &ompi_mpi_errors_are_fatal_comm_handler;
ompi_initial_error_handler_eh = &ompi_mpi_errors_are_fatal.eh;
}
opal_mutex_unlock(&errhandler_init_lock);
return OMPI_SUCCESS;
}
/*
* Initialize OMPI errhandler infrastructure
@ -163,9 +212,12 @@ int ompi_errhandler_init(void)
"MPI_ERRORS_THROW_EXCEPTIONS",
sizeof(ompi_mpi_errors_throw_exceptions.eh.eh_name));
/* All done */
return OMPI_SUCCESS;
/* Lets initialize the initial error handler if not already done */
char *env = getenv("OMPI_MCA_mpi_initial_errhandler");
if( NULL != env ) {
ompi_process_info.initial_errhandler = strndup(env, MPI_MAX_INFO_VAL);
}
return ompi_initial_errhandler_init();
}

Просмотреть файл

@ -185,6 +185,26 @@ OMPI_DECLSPEC extern ompi_predefined_errhandler_t ompi_mpi_errors_throw_exceptio
*/
OMPI_DECLSPEC extern opal_pointer_array_t ompi_errhandler_f_to_c_table;
/**
* This function selects the initial error handler.
* It may be called during MPI_INIT, or during the first MPI call
* that raises an error. This function does not allocate memory,
* and will only populate the ompi_initial_error_handler_eh and
* ompi_initial_error_handler pointers with predefined error handler
* and error handler functions aliases.
*/
OMPI_DECLSPEC int ompi_initial_errhandler_init(void);
/**
* The initial error handler pointer. Will be set to alias one of the
* predefined error handlers through launch keys during the first MPI call,
* and will then be attached to predefined communicators.
*/
OMPI_DECLSPEC extern ompi_errhandler_t* ompi_initial_error_handler_eh;
/**
* The initial error handler function pointer. Will be called when an error
* is raised before MPI_INIT or after MPI_FINALIZE.
*/
OMPI_DECLSPEC extern void (*ompi_initial_error_handler)(struct ompi_communicator_t **comm, int *error_code, ...);
/**
* Forward declaration so that we don't have to include

Просмотреть файл

@ -131,6 +131,12 @@ int ompi_mpiinfo_init(void)
opal_info_set(&ompi_mpi_info_env.info.super, "soft", cptr);
free(cptr);
/* the initial error handler, set it as requested (nothing if not
* requested) */
if (NULL != ompi_process_info.initial_errhandler) {
opal_info_set(&ompi_mpi_info_env.info.super, "mpi_initial_errhandler", ompi_process_info.initial_errhandler);
}
/* local host name */
opal_info_set(&ompi_mpi_info_env.info.super, "host", ompi_process_info.nodename);

Просмотреть файл

@ -751,12 +751,6 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided,
goto error;
}
/* initialize info */
if (OMPI_SUCCESS != (ret = ompi_mpiinfo_init())) {
error = "ompi_info_init() failed";
goto error;
}
/* initialize error handlers */
if (OMPI_SUCCESS != (ret = ompi_errhandler_init())) {
error = "ompi_errhandler_init() failed";
@ -775,6 +769,12 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided,
goto error;
}
/* initialize info */
if (OMPI_SUCCESS != (ret = ompi_mpiinfo_init())) {
error = "ompi_info_init() failed";
goto error;
}
/* initialize groups */
if (OMPI_SUCCESS != (ret = ompi_group_init())) {
error = "ompi_group_init() failed";

Просмотреть файл

@ -935,6 +935,11 @@ int ompi_rte_finalize(void)
opal_process_info.initial_wdir = NULL;
}
if (NULL != opal_process_info.initial_errhandler) {
free(opal_process_info.initial_errhandler);
opal_process_info.initial_errhandler = NULL;
}
/* cleanup our internal nspace hack */
opal_pmix_finalize_nspace_tracker();

Просмотреть файл

@ -51,7 +51,8 @@ opal_process_info_t opal_process_info = {
.num_apps = 0,
.initial_wdir = NULL,
.reincarnation = 0,
.proc_is_bound = false
.proc_is_bound = false,
.initial_errhandler = NULL,
};
static opal_proc_t opal_local_proc = {

Просмотреть файл

@ -126,6 +126,7 @@ typedef struct opal_process_info_t {
char *initial_wdir;
uint32_t reincarnation;
bool proc_is_bound;
char *initial_errhandler;
} opal_process_info_t;
OPAL_DECLSPEC extern opal_process_info_t opal_process_info;