Merge pull request #7800 from abouteiller/mpi-next/errors_abort
MPI4: Add ERRORS_ABORT infrastructure
Этот коммит содержится в:
Коммит
3ed466e629
@ -3,7 +3,7 @@
|
||||
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
# University Research and Technology
|
||||
# Corporation. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
# Copyright (c) 2004-2020 The University of Tennessee and The University
|
||||
# of Tennessee Research Foundation. All rights
|
||||
# reserved.
|
||||
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
@ -186,7 +186,7 @@ SEARCH_HEADER[5]="ompi/datatype/convertor.h OMPI_COMM_INTER OMPI_COMM_CART OMPI_
|
||||
SEARCH_HEADER[6]="ompi/datatype/datatype.h MPI_Datatype DT_MAX_PREDEFINED DT_FLAG_ MAX_DT_COMPONENT_COUNT opal_ddt_count_t dt_type_desc_t ompi_datatype_t ompi_predefined_datatype_t ompi_ddt_init ompi_ddt_finalize ompi_ddt_create_ ompi_ddt_duplicate ompi_ddt_is_predefined ompi_ddt_create_from_packed_description"
|
||||
SEARCH_HEADER[7]="ompi/datatype/datatype_internal.h DDT_DUMP_STACK DT_ ddt_elem_id_description ddt_elem_desc ddt_elem_desc_t ddt_loop_desc ddt_loop_desc_t ddt_endloop_desc ddt_endloop_desc_t dt_elem_desc CREATE_LOOP_START CREATE_LOOP_END CREATE_ELEM ompi_complex_float_t ompi_complex_double_t ompi_complex_long_double_t ompi_ddt_basicDatatypes BASIC_DDT_FROM_ELEM ompi_ddt_default_convertors_init ompi_ddt_default_convertors_fini SAVE_STACK PUSH_STACK ompi_ddt_safeguard_pointer_debug_breakpoint OMPI_DDT_SAFEGUARD_POINTER GET_FIRST_NON_LOOP UPDATE_INTERNAL_COUNTERS ompi_ddt_print_args"
|
||||
SEARCH_HEADER[8]="ompi/errhandler/errhandler.h OMPI_ERRHANDLER_LANG_ ompi_errhandler_lang_t OMPI_ERRHANDLER_TYPE_ ompi_errhandler_type_t ompi_errhandler_t ompi_predefined_errhandler_t ompi_mpi_errhandler_null OMPI_ERRHANDLER_CHECK OMPI_ERRHANDLER_RETURN ompi_errhandler_init ompi_errhandler_finalize OMPI_ERRHANDLER_INVOKE ompi_errhandler_invoke ompi_errhandler_request_invoke ompi_errhandler_create ompi_errhandler_is_intrinsic ompi_errhandler_fortran_handler_fn_t OMPI_ERR_INIT_FINALIZE MPI_Errhandler"
|
||||
SEARCH_HEADER[9]="ompi/errhandler/errhandler_predefined.h ompi_mpi_errors_are_fatal_ ompi_mpi_errors_return_ ompi_mpi_errors_throw_exceptions"
|
||||
SEARCH_HEADER[9]="ompi/errhandler/errhandler_predefined.h ompi_mpi_errors_are_fatal_ ompi_mpi_errors_return_ ompi_mpi_errors_abort_ ompi_mpi_errors_throw_exceptions"
|
||||
###
|
||||
SEARCH_HEADER[10]="ompi/file/file.h OMPI_FILE_ISCLOSED OMPI_FILE_HIDDEN ompi_file_t ompi_predefined_file_t ompi_mpi_file_null ompi_file_f_to_c_table ompi_file_init ompi_file_open ompi_file_set_name ompi_file_close ompi_file_finalize ompi_file_invalid MPI_File MPI_FILE_NULL ompi_mpi_cxx_file_errhandler_invoke" # THE LAST ONE WAS FOR THE CXX INTERFACE
|
||||
SEARCH_HEADER[11]="ompi/group/group.h ompi_group_sporadic_list_t ompi_group_sporadic_data_t ompi_group_strided_data_t ompi_group_bitmap_data_t ompi_group_t ompi_predefined_group_t OMPI_GROUP_ ompi_group_f_to_c_table ompi_mpi_group_null ompi_group_allocate ompi_group_increment_proc_count ompi_group_decrement_proc_count ompi_group_size ompi_group_rank ompi_set_group_rank ompi_group_translate_ranks ompi_group_free ompi_group_get_proc_ptr ompi_group_calc_ ompi_group_peer_lookup ompi_group_div_ceil MPI_Group"
|
||||
|
@ -314,8 +314,8 @@ C++: MPI::Errhandler
|
||||
|
||||
MPI allows applications to define their own error handlers. The
|
||||
default error handler is to abort the MPI job. Error handlers can be
|
||||
attached to communicators, files, and windows. There are 3 predefined
|
||||
error handlers (MPI_ERRORS_ARE_FATAL, MPI_ERRORS_RETURN,
|
||||
attached to communicators, files, and windows. There are 4 predefined
|
||||
error handlers (MPI_ERRORS_ARE_FATAL, MPI_ERRORS_RETURN, MPI_ERRORS_ABORT,
|
||||
MPI::ERRORS_THROW_EXCEPTIONS), and applications can create their own
|
||||
error handlers.
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* Copyright (c) 2007-2008 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2004-2013 The University of Tennessee and The University
|
||||
* Copyright (c) 2004-2020 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
@ -384,6 +384,8 @@ int mpidbg_init_per_process(mqs_process *process,
|
||||
int i = 0;
|
||||
fill_map(image, "MPI_ERRORS_ARE_FATAL", "ompi_mpi_errors_are_fatal",
|
||||
&mpidbg_errhandler_name_map[i++]);
|
||||
fill_map(image, "MPI_ERRORS_ABORT", "ompi_mpi_errors_abort",
|
||||
&mpidbg_errhandler_name_map[i++]);
|
||||
fill_map(image, "MPI_ERRORS_RETURN", "ompi_mpi_errors_return",
|
||||
&mpidbg_errhandler_name_map[i++]);
|
||||
fill_map(image, "MPI_ERRHANDLER_NULL", "ompi_mpi_errhandler_null",
|
||||
|
@ -3,7 +3,7 @@
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2017 The University of Tennessee and The University
|
||||
* Copyright (c) 2004-2020 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
@ -85,6 +85,7 @@ static ompi_mpi_errcode_t ompi_err_not_same;
|
||||
static ompi_mpi_errcode_t ompi_err_no_space;
|
||||
static ompi_mpi_errcode_t ompi_err_no_such_file;
|
||||
static ompi_mpi_errcode_t ompi_err_port;
|
||||
static ompi_mpi_errcode_t ompi_err_proc_aborted;
|
||||
static ompi_mpi_errcode_t ompi_err_quota;
|
||||
static ompi_mpi_errcode_t ompi_err_read_only;
|
||||
static ompi_mpi_errcode_t ompi_err_rma_conflict;
|
||||
@ -186,6 +187,7 @@ int ompi_mpi_errcode_init (void)
|
||||
CONSTRUCT_ERRCODE( ompi_err_no_space, MPI_ERR_NO_SPACE, "MPI_ERR_NO_SPACE: no space left on device" );
|
||||
CONSTRUCT_ERRCODE( ompi_err_no_such_file, MPI_ERR_NO_SUCH_FILE, "MPI_ERR_NO_SUCH_FILE: no such file or directory" );
|
||||
CONSTRUCT_ERRCODE( ompi_err_port, MPI_ERR_PORT, "MPI_ERR_PORT: invalid port" );
|
||||
CONSTRUCT_ERRCODE( ompi_err_proc_aborted, MPI_ERR_PROC_ABORTED, "MPI_ERR_PROC_ABORTED: operation failed because a remote peer has aborted" );
|
||||
CONSTRUCT_ERRCODE( ompi_err_quota, MPI_ERR_QUOTA, "MPI_ERR_QUOTA: out of quota" );
|
||||
CONSTRUCT_ERRCODE( ompi_err_read_only, MPI_ERR_READ_ONLY, "MPI_ERR_READ_ONLY: file is read only" );
|
||||
CONSTRUCT_ERRCODE( ompi_err_rma_conflict, MPI_ERR_RMA_CONFLICT, "MPI_ERR_RMA_CONFLICT: rma conflict during operation" );
|
||||
@ -282,6 +284,7 @@ int ompi_mpi_errcode_finalize(void)
|
||||
OBJ_DESTRUCT(&ompi_err_no_space);
|
||||
OBJ_DESTRUCT(&ompi_err_no_such_file);
|
||||
OBJ_DESTRUCT(&ompi_err_port);
|
||||
OBJ_DESTRUCT(&ompi_err_proc_aborted);
|
||||
OBJ_DESTRUCT(&ompi_err_quota);
|
||||
OBJ_DESTRUCT(&ompi_err_read_only);
|
||||
OBJ_DESTRUCT(&ompi_err_rma_conflict);
|
||||
|
@ -3,7 +3,7 @@
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2017 The University of Tennessee and The University
|
||||
* Copyright (c) 2004-2020 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
@ -68,6 +68,9 @@ ompi_predefined_errhandler_t *ompi_mpi_errhandler_null_addr =
|
||||
ompi_predefined_errhandler_t ompi_mpi_errors_are_fatal = {{{0}}};
|
||||
ompi_predefined_errhandler_t *ompi_mpi_errors_are_fatal_addr =
|
||||
&ompi_mpi_errors_are_fatal;
|
||||
ompi_predefined_errhandler_t ompi_mpi_errors_abort = {{{0}}};
|
||||
ompi_predefined_errhandler_t *ompi_mpi_errors_abort_addr =
|
||||
&ompi_mpi_errors_abort;
|
||||
ompi_predefined_errhandler_t ompi_mpi_errors_return = {{{0}}};
|
||||
ompi_predefined_errhandler_t *ompi_mpi_errors_return_addr =
|
||||
&ompi_mpi_errors_return;
|
||||
@ -127,6 +130,19 @@ int ompi_errhandler_init(void)
|
||||
opal_string_copy(ompi_mpi_errors_return.eh.eh_name, "MPI_ERRORS_RETURN",
|
||||
sizeof(ompi_mpi_errors_return.eh.eh_name));
|
||||
|
||||
OBJ_CONSTRUCT( &ompi_mpi_errors_abort.eh, ompi_errhandler_t );
|
||||
if( ompi_mpi_errors_abort.eh.eh_f_to_c_index != OMPI_ERRORS_ABORT_FORTRAN )
|
||||
return OMPI_ERROR;
|
||||
ompi_mpi_errors_abort.eh.eh_mpi_object_type = OMPI_ERRHANDLER_TYPE_PREDEFINED;
|
||||
ompi_mpi_errors_abort.eh.eh_lang = OMPI_ERRHANDLER_LANG_C;
|
||||
ompi_mpi_errors_abort.eh.eh_comm_fn = ompi_mpi_errors_abort_comm_handler;
|
||||
ompi_mpi_errors_abort.eh.eh_file_fn = ompi_mpi_errors_abort_file_handler;
|
||||
ompi_mpi_errors_abort.eh.eh_win_fn = ompi_mpi_errors_abort_win_handler ;
|
||||
ompi_mpi_errors_abort.eh.eh_fort_fn = NULL;
|
||||
opal_string_copy(ompi_mpi_errors_abort.eh.eh_name,
|
||||
"MPI_ERRORS_ABORT",
|
||||
sizeof(ompi_mpi_errors_abort.eh.eh_name));
|
||||
|
||||
/* If we're going to use C++, functions will be fixed up during
|
||||
MPI::Init. Note that it is proper to use ERRHANDLER_LANG_C here;
|
||||
the dispatch function is in C (although in libmpi_cxx); the
|
||||
|
@ -3,7 +3,7 @@
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2011 The University of Tennessee and The University
|
||||
* Copyright (c) 2004-2020 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
@ -49,7 +49,8 @@ BEGIN_C_DECLS
|
||||
enum {
|
||||
OMPI_ERRHANDLER_NULL_FORTRAN = 0,
|
||||
OMPI_ERRORS_ARE_FATAL_FORTRAN,
|
||||
OMPI_ERRORS_RETURN_FORTRAN
|
||||
OMPI_ERRORS_RETURN_FORTRAN,
|
||||
OMPI_ERRORS_ABORT_FORTRAN,
|
||||
};
|
||||
|
||||
|
||||
@ -167,6 +168,12 @@ OMPI_DECLSPEC extern ompi_predefined_errhandler_t *ompi_mpi_errors_are_fatal_add
|
||||
OMPI_DECLSPEC extern ompi_predefined_errhandler_t ompi_mpi_errors_return;
|
||||
OMPI_DECLSPEC extern ompi_predefined_errhandler_t *ompi_mpi_errors_return_addr;
|
||||
|
||||
/*
|
||||
* Global variable for MPI_ERRORS_ABORT (_addr flavor is for F03 bindings)
|
||||
*/
|
||||
OMPI_DECLSPEC extern ompi_predefined_errhandler_t ompi_mpi_errors_abort;
|
||||
OMPI_DECLSPEC extern ompi_predefined_errhandler_t *ompi_mpi_errors_abort_addr;
|
||||
|
||||
/**
|
||||
* Global variable for MPI::ERRORS_THROW_EXCEPTIONS. Will abort if
|
||||
* MPI_INIT wasn't called as MPI::INIT (_addr flavor is for F03 bindings)
|
||||
|
@ -3,7 +3,7 @@
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* Copyright (c) 2004-2020 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
@ -41,10 +41,10 @@ int ompi_errhandler_invoke(ompi_errhandler_t *errhandler, void *mpi_object,
|
||||
ompi_win_t *win;
|
||||
ompi_file_t *file;
|
||||
|
||||
/* If we got no errorhandler, then just invoke errors_abort */
|
||||
/* If we got no errorhandler, then just invoke errors_are_fatal */
|
||||
if (NULL == errhandler) {
|
||||
ompi_mpi_errors_are_fatal_comm_handler(NULL, NULL, message);
|
||||
return err_code;
|
||||
return err_code;
|
||||
}
|
||||
|
||||
/* Figure out what kind of errhandler it is, figure out if it's
|
||||
|
@ -2,7 +2,7 @@
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2014 The University of Tennessee and The University
|
||||
* Copyright (c) 2004-2020 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
@ -47,7 +47,7 @@
|
||||
/*
|
||||
* Local functions
|
||||
*/
|
||||
static void backend_fatal(char *type, struct ompi_communicator_t *comm,
|
||||
static void backend_abort(int fatal, char *type, struct ompi_communicator_t *comm,
|
||||
char *name, int *error_code, va_list arglist);
|
||||
static void out(char *str, char *arg);
|
||||
|
||||
@ -68,7 +68,7 @@ void ompi_mpi_errors_are_fatal_comm_handler(struct ompi_communicator_t **comm,
|
||||
name = NULL;
|
||||
abort_comm = NULL;
|
||||
}
|
||||
backend_fatal("communicator", abort_comm, name, error_code, arglist);
|
||||
backend_abort(true, "communicator", abort_comm, name, error_code, arglist);
|
||||
va_end(arglist);
|
||||
}
|
||||
|
||||
@ -89,7 +89,7 @@ void ompi_mpi_errors_are_fatal_file_handler(struct ompi_file_t **file,
|
||||
name = NULL;
|
||||
abort_comm = NULL;
|
||||
}
|
||||
backend_fatal("file", abort_comm, name, error_code, arglist);
|
||||
backend_abort(true, "file", abort_comm, name, error_code, arglist);
|
||||
va_end(arglist);
|
||||
}
|
||||
|
||||
@ -108,7 +108,67 @@ void ompi_mpi_errors_are_fatal_win_handler(struct ompi_win_t **win,
|
||||
} else {
|
||||
name = NULL;
|
||||
}
|
||||
backend_fatal("win", abort_comm, name, error_code, arglist);
|
||||
backend_abort(true, "win", abort_comm, name, error_code, arglist);
|
||||
va_end(arglist);
|
||||
}
|
||||
|
||||
void ompi_mpi_errors_abort_comm_handler(struct ompi_communicator_t **comm,
|
||||
int *error_code, ...)
|
||||
{
|
||||
char *name;
|
||||
struct ompi_communicator_t *abort_comm;
|
||||
va_list arglist;
|
||||
|
||||
va_start(arglist, error_code);
|
||||
|
||||
if ( (NULL != comm) && (NULL != *comm) ) {
|
||||
name = (*comm)->c_name;
|
||||
abort_comm = *comm;
|
||||
} else {
|
||||
name = NULL;
|
||||
abort_comm = NULL;
|
||||
}
|
||||
backend_abort(false, "communicator", abort_comm, name, error_code, arglist);
|
||||
va_end(arglist);
|
||||
}
|
||||
|
||||
|
||||
void ompi_mpi_errors_abort_file_handler(struct ompi_file_t **file,
|
||||
int *error_code, ...)
|
||||
{
|
||||
char *name;
|
||||
struct ompi_communicator_t *abort_comm;
|
||||
va_list arglist;
|
||||
|
||||
va_start(arglist, error_code);
|
||||
|
||||
if (NULL != file) {
|
||||
name = (*file)->f_filename;
|
||||
abort_comm = (*file)->f_comm;
|
||||
} else {
|
||||
name = NULL;
|
||||
abort_comm = NULL;
|
||||
}
|
||||
backend_abort(false, "file", abort_comm, name, error_code, arglist);
|
||||
va_end(arglist);
|
||||
}
|
||||
|
||||
|
||||
void ompi_mpi_errors_abort_win_handler(struct ompi_win_t **win,
|
||||
int *error_code, ...)
|
||||
{
|
||||
char *name;
|
||||
struct ompi_communicator_t *abort_comm = NULL;
|
||||
va_list arglist;
|
||||
|
||||
va_start(arglist, error_code);
|
||||
|
||||
if (NULL != win) {
|
||||
name = (*win)->w_name;
|
||||
} else {
|
||||
name = NULL;
|
||||
}
|
||||
backend_abort(false, "win", abort_comm, name, error_code, arglist);
|
||||
va_end(arglist);
|
||||
}
|
||||
|
||||
@ -175,7 +235,7 @@ static void out(char *str, char *arg)
|
||||
* there's no need to handle the pre-MPI_INIT and post-MPI_FINALIZE
|
||||
* errors here.
|
||||
*/
|
||||
static void backend_fatal_aggregate(char *type,
|
||||
static void backend_abort_aggregate(int fatal, char *type,
|
||||
struct ompi_communicator_t *comm,
|
||||
char *name, int *error_code,
|
||||
va_list arglist)
|
||||
@ -199,7 +259,7 @@ static void backend_fatal_aggregate(char *type,
|
||||
ompi_process_info.nodename,
|
||||
(int) ompi_process_info.pid) == -1) {
|
||||
prefix = NULL;
|
||||
// non-fatal, we could still go on to give useful information here...
|
||||
// non-abort, we could still go on to give useful information here...
|
||||
opal_output(0, "%s", "Could not write node and PID to prefix");
|
||||
opal_output(0, "Node: %s", ompi_process_info.nodename);
|
||||
opal_output(0, "PID: %d", (int) ompi_process_info.pid);
|
||||
@ -224,7 +284,7 @@ static void backend_fatal_aggregate(char *type,
|
||||
|
||||
if (NULL != name) {
|
||||
opal_show_help("help-mpi-errors.txt",
|
||||
"mpi_errors_are_fatal",
|
||||
fatal? "mpi_errors_are_fatal": "mpi_errors_abort",
|
||||
false,
|
||||
usable_prefix,
|
||||
(NULL == arg) ? "" : "in",
|
||||
@ -267,7 +327,7 @@ static void backend_fatal_aggregate(char *type,
|
||||
|
||||
/*
|
||||
* Note that this function has to handle pre-MPI_INIT and
|
||||
* post-MPI_FINALIZE errors, which backend_fatal_aggregate() does not
|
||||
* post-MPI_FINALIZE errors, which backend_abort_aggregate() does not
|
||||
* have to handle.
|
||||
*
|
||||
* This function also intentionally does not call malloc(), just in
|
||||
@ -275,7 +335,7 @@ static void backend_fatal_aggregate(char *type,
|
||||
* we *might* be able to get a message out if we're not further
|
||||
* corrupting the stack by calling malloc()...
|
||||
*/
|
||||
static void backend_fatal_no_aggregate(char *type,
|
||||
static void backend_abort_no_aggregate(int fatal, char *type,
|
||||
struct ompi_communicator_t *comm,
|
||||
char *name, int *error_code,
|
||||
va_list arglist)
|
||||
@ -303,7 +363,7 @@ static void backend_fatal_no_aggregate(char *type,
|
||||
"*** Unfortunately, no further information is available on *which* MPI\n"
|
||||
"*** function was invoked, sorry. :-(\n", NULL);
|
||||
}
|
||||
out("*** Your MPI job will now abort.\n", NULL);
|
||||
if(fatal) out("*** Your MPI job will now abort.\n", NULL);
|
||||
} else if (state >= OMPI_MPI_STATE_FINALIZE_PAST_COMM_SELF_DESTRUCT) {
|
||||
if (NULL != arg) {
|
||||
out("*** The %s() function was called after MPI_FINALIZE was invoked.\n"
|
||||
@ -314,7 +374,7 @@ static void backend_fatal_no_aggregate(char *type,
|
||||
"*** Unfortunately, no further information is available on *which* MPI\n"
|
||||
"*** function was invoked, sorry. :-(\n", NULL);
|
||||
}
|
||||
out("*** Your MPI job will now abort.\n", NULL);
|
||||
if(fatal) out("*** Your MPI job will now abort.\n", NULL);
|
||||
}
|
||||
|
||||
else {
|
||||
@ -365,23 +425,30 @@ static void backend_fatal_no_aggregate(char *type,
|
||||
out("*** Error code: %d (no associated error message)\n", intbuf);
|
||||
}
|
||||
}
|
||||
/* out("*** MPI_ERRORS_ARE_FATAL: your MPI job will now abort\n", NULL); */
|
||||
out("*** MPI_ERRORS_ARE_FATAL (processes in this %s will now abort,\n", type);
|
||||
out("*** and potentially your MPI job)\n", NULL);
|
||||
|
||||
/* out("*** MPI_ERRORS_ABORT: your MPI job will now abort\n", NULL); */
|
||||
if(fatal) {
|
||||
out("*** MPI_ERRORS_ARE_FATAL (processes in this %s will now abort,\n", type);
|
||||
out("*** and MPI will try to terminate your MPI job as well)\n", NULL);
|
||||
}
|
||||
else {
|
||||
out("*** MPI_ERRORS_ABORT (processes in this %s will now abort,\n", type);
|
||||
out("*** and potentially the rest of your MPI job)\n", NULL);
|
||||
}
|
||||
}
|
||||
va_end(arglist);
|
||||
}
|
||||
|
||||
static void backend_fatal(char *type, struct ompi_communicator_t *comm,
|
||||
static void backend_abort(int fatal, char *type, struct ompi_communicator_t *comm,
|
||||
char *name, int *error_code,
|
||||
va_list arglist)
|
||||
{
|
||||
int err = MPI_ERR_UNKNOWN;
|
||||
|
||||
/* We only want aggregation while the rte is initialized */
|
||||
if (ompi_rte_initialized) {
|
||||
backend_fatal_aggregate(type, comm, name, error_code, arglist);
|
||||
backend_abort_aggregate(fatal, type, comm, name, error_code, arglist);
|
||||
} else {
|
||||
backend_fatal_no_aggregate(type, comm, name, error_code, arglist);
|
||||
backend_abort_no_aggregate(fatal, type, comm, name, error_code, arglist);
|
||||
}
|
||||
|
||||
/* In most instances the communicator will be valid. If not, we are either early in
|
||||
@ -392,9 +459,9 @@ static void backend_fatal(char *type, struct ompi_communicator_t *comm,
|
||||
comm = &ompi_mpi_comm_self.comm;
|
||||
}
|
||||
|
||||
if (NULL != error_code) {
|
||||
ompi_mpi_abort(comm, *error_code);
|
||||
} else {
|
||||
ompi_mpi_abort(comm, 1);
|
||||
}
|
||||
if (NULL != error_code)
|
||||
err = *error_code;
|
||||
|
||||
/* Call abort without a specified comm to force RTE Job termination */
|
||||
ompi_mpi_abort(fatal? NULL: comm, err);
|
||||
}
|
||||
|
@ -2,7 +2,7 @@
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* Copyright (c) 2004-2020 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
@ -35,6 +35,16 @@ OMPI_DECLSPEC void ompi_mpi_errors_are_fatal_file_handler(struct ompi_file_t **f
|
||||
OMPI_DECLSPEC void ompi_mpi_errors_are_fatal_win_handler(struct ompi_win_t **win,
|
||||
int *error_code, ...);
|
||||
|
||||
/**
|
||||
* Handler function for MPI_ERRORS_ABORT
|
||||
*/
|
||||
OMPI_DECLSPEC void ompi_mpi_errors_abort_comm_handler(struct ompi_communicator_t **comm,
|
||||
int *error_code, ...);
|
||||
OMPI_DECLSPEC void ompi_mpi_errors_abort_file_handler(struct ompi_file_t **file,
|
||||
int *error_code, ...);
|
||||
OMPI_DECLSPEC void ompi_mpi_errors_abort_win_handler(struct ompi_win_t **win,
|
||||
int *error_code, ...);
|
||||
|
||||
/**
|
||||
* Handler function for MPI_ERRORS_RETURN
|
||||
*/
|
||||
|
@ -3,7 +3,7 @@
|
||||
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
# University Research and Technology
|
||||
# Corporation. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
# Copyright (c) 2004-2020 The University of Tennessee and The University
|
||||
# of Tennessee Research Foundation. All rights
|
||||
# reserved.
|
||||
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
@ -28,7 +28,7 @@
|
||||
%s *** on %s %s
|
||||
%s *** %s
|
||||
%s *** MPI_ERRORS_ARE_FATAL (processes in this %s will now abort,
|
||||
%s *** and potentially your MPI job)
|
||||
%s *** and MPI will try to terminate your MPI job as well)
|
||||
#
|
||||
[mpi_errors_are_fatal unknown handle]
|
||||
%s *** An error occurred %s %s
|
||||
@ -36,5 +36,13 @@
|
||||
%s *** on a NULL %s
|
||||
%s *** %s
|
||||
%s *** MPI_ERRORS_ARE_FATAL (processes in this %s will now abort,
|
||||
%s *** and potentially your MPI job)
|
||||
%s *** and MPI will try to terminate your MPI job as well)
|
||||
#
|
||||
[mpi_errors_abort]
|
||||
%s *** An error occurred %s %s
|
||||
%s *** reported by process [%lu,%lu]
|
||||
%s *** on %s %s
|
||||
%s *** %s
|
||||
%s *** MPI_ERRORS_ABORT (processes in this %s will now abort,
|
||||
%s *** and potentially the rest of your MPI job)
|
||||
#
|
||||
|
@ -2,7 +2,7 @@
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2013 The University of Tennessee and The University
|
||||
* Copyright (c) 2004-2020 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2007 High Performance Computing Center Stuttgart,
|
||||
@ -658,6 +658,7 @@ enum {
|
||||
#define MPI_ERR_RMA_SHARED 71
|
||||
#define MPI_T_ERR_INVALID 72
|
||||
#define MPI_T_ERR_INVALID_NAME 73
|
||||
#define MPI_ERR_PROC_ABORTED 74
|
||||
|
||||
/* Per MPI-3 p349 47, MPI_ERR_LASTCODE must be >= the last predefined
|
||||
MPI_ERR_<foo> code. Set the last code to allow some room for adding
|
||||
@ -1046,6 +1047,7 @@ OMPI_DECLSPEC extern struct ompi_predefined_datatype_t ompi_mpi_c_long_double_co
|
||||
|
||||
OMPI_DECLSPEC extern struct ompi_predefined_errhandler_t ompi_mpi_errhandler_null;
|
||||
OMPI_DECLSPEC extern struct ompi_predefined_errhandler_t ompi_mpi_errors_are_fatal;
|
||||
OMPI_DECLSPEC extern struct ompi_predefined_errhandler_t ompi_mpi_errors_abort;
|
||||
OMPI_DECLSPEC extern struct ompi_predefined_errhandler_t ompi_mpi_errors_return;
|
||||
|
||||
OMPI_DECLSPEC extern struct ompi_predefined_win_t ompi_mpi_win_null;
|
||||
@ -1242,6 +1244,7 @@ OMPI_DECLSPEC extern struct ompi_predefined_datatype_t ompi_mpi_ub;
|
||||
#define MPI_COUNT OMPI_PREDEFINED_GLOBAL(MPI_Datatype, ompi_mpi_count)
|
||||
|
||||
#define MPI_ERRORS_ARE_FATAL OMPI_PREDEFINED_GLOBAL(MPI_Errhandler, ompi_mpi_errors_are_fatal)
|
||||
#define MPI_ERRORS_ABORT OMPI_PREDEFINED_GLOBAL(MPI_Errhandler, ompi_mpi_errors_abort)
|
||||
#define MPI_ERRORS_RETURN OMPI_PREDEFINED_GLOBAL(MPI_Errhandler, ompi_mpi_errors_return)
|
||||
|
||||
/* Typeclass definition for MPI_Type_match_size */
|
||||
|
@ -4,6 +4,9 @@
|
||||
# Copyright (c) 2016-2019 Research Organization for Information Science
|
||||
# and Technology (RIST). All rights reserved.
|
||||
# Copyright (c) 2016-2018 FUJITSU LIMITED. All rights reserved.
|
||||
# Copyright (c) 2020 The University of Tennessee and The University
|
||||
# of Tennessee Research Foundation. All rights
|
||||
# reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
@ -93,6 +96,7 @@ $handles->{MPI_COMM_SELF} = 1;
|
||||
$handles->{MPI_GROUP_EMPTY} = 1;
|
||||
$handles->{MPI_ERRORS_ARE_FATAL} = 1;
|
||||
$handles->{MPI_ERRORS_RETURN} = 2;
|
||||
$handles->{MPI_ERRORS_ABORT} = 3;
|
||||
|
||||
$handles->{MPI_MAX} = 1;
|
||||
$handles->{MPI_MIN} = 2;
|
||||
@ -312,6 +316,7 @@ $constants->{MPI_ERR_NOT_SAME} = 40;
|
||||
$constants->{MPI_ERR_NO_SPACE} = 41;
|
||||
$constants->{MPI_ERR_NO_SUCH_FILE} = 42;
|
||||
$constants->{MPI_ERR_PORT} = 43;
|
||||
$constants->{MPI_ERR_PROC_ABORTED} = 74;
|
||||
$constants->{MPI_ERR_QUOTA} = 44;
|
||||
$constants->{MPI_ERR_READ_ONLY} = 45;
|
||||
$constants->{MPI_ERR_RMA_CONFLICT} = 46;
|
||||
|
@ -6,6 +6,9 @@
|
||||
! Copyright (c) 2015-2019 Research Organization for Information Science
|
||||
! and Technology (RIST). All rights reserved.
|
||||
! Copyright (c) 2018 FUJITSU LIMITED. All rights reserved.
|
||||
! Copyright (c) 2020 The University of Tennessee and The University
|
||||
! of Tennessee Research Foundation. All rights
|
||||
! reserved.
|
||||
! $COPYRIGHT$
|
||||
!
|
||||
! This file creates mappings between MPI C types (e.g., MPI_Comm) and
|
||||
@ -85,6 +88,7 @@ module mpi_f08_types
|
||||
type(MPI_Group), parameter :: MPI_GROUP_EMPTY = MPI_Group(OMPI_MPI_GROUP_EMPTY)
|
||||
|
||||
type(MPI_Errhandler), parameter :: MPI_ERRORS_ARE_FATAL = MPI_Errhandler(OMPI_MPI_ERRORS_ARE_FATAL)
|
||||
type(MPI_Errhandler), parameter :: MPI_ERRORS_ABORT = MPI_Errhandler(OMPI_MPI_ERRORS_ABORT)
|
||||
type(MPI_Errhandler), parameter :: MPI_ERRORS_RETURN = MPI_Errhandler(OMPI_MPI_ERRORS_RETURN)
|
||||
|
||||
type(MPI_Message), parameter :: MPI_MESSAGE_NO_PROC = MPI_Message(OMPI_MPI_MESSAGE_NO_PROC)
|
||||
|
@ -2,7 +2,7 @@
|
||||
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* Copyright (c) 2004-2020 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
@ -160,6 +160,7 @@ JNIEXPORT void JNICALL Java_mpi_Constant_setConstant(JNIEnv *env, jobject obj)
|
||||
ompi_java_setIntField(env, c, obj, "ERR_NO_SPACE", MPI_ERR_NO_SPACE);
|
||||
ompi_java_setIntField(env, c, obj, "ERR_NO_SUCH_FILE", MPI_ERR_NO_SUCH_FILE);
|
||||
ompi_java_setIntField(env, c, obj, "ERR_PORT", MPI_ERR_PORT);
|
||||
ompi_java_setIntField(env, c, obj, "ERR_PROC_ABORTED", MPI_ERR_PROC_ABORTED);
|
||||
ompi_java_setIntField(env, c, obj, "ERR_QUOTA", MPI_ERR_QUOTA);
|
||||
ompi_java_setIntField(env, c, obj, "ERR_READ_ONLY", MPI_ERR_READ_ONLY);
|
||||
ompi_java_setIntField(env, c, obj, "ERR_RMA_CONFLICT", MPI_ERR_RMA_CONFLICT);
|
||||
|
@ -2,7 +2,7 @@
|
||||
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* Copyright (c) 2004-2020 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
@ -59,6 +59,11 @@ JNIEXPORT jlong JNICALL Java_mpi_Errhandler_getFatal(JNIEnv *env, jclass clazz)
|
||||
return (jlong)MPI_ERRORS_ARE_FATAL;
|
||||
}
|
||||
|
||||
JNIEXPORT jlong JNICALL Java_mpi_Errhandler_getAbort(JNIEnv *env, jclass clazz)
|
||||
{
|
||||
return (jlong)MPI_ERRORS_ABORT;
|
||||
}
|
||||
|
||||
JNIEXPORT jlong JNICALL Java_mpi_Errhandler_getReturn(JNIEnv *env, jclass clazz)
|
||||
{
|
||||
return (jlong)MPI_ERRORS_RETURN;
|
||||
|
@ -2,7 +2,7 @@
|
||||
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* Copyright (c) 2004-2020 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
@ -98,6 +98,7 @@ class Constant
|
||||
protected int ERR_NO_SPACE;
|
||||
protected int ERR_NO_SUCH_FILE;
|
||||
protected int ERR_PORT;
|
||||
protected int ERR_PROC_ABORTED;
|
||||
protected int ERR_QUOTA;
|
||||
protected int ERR_READ_ONLY;
|
||||
protected int ERR_RMA_CONFLICT;
|
||||
|
@ -2,7 +2,7 @@
|
||||
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* Copyright (c) 2004-2020 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
@ -141,7 +141,7 @@ public final class MPI
|
||||
MODE_NOSTORE, MODE_NOSUCCEED;
|
||||
public static final int LOCK_EXCLUSIVE, LOCK_SHARED;
|
||||
|
||||
public static final Errhandler ERRORS_ARE_FATAL, ERRORS_RETURN;
|
||||
public static final Errhandler ERRORS_ARE_FATAL, ERRORS_ABORT, ERRORS_RETURN;
|
||||
|
||||
// Error classes and codes
|
||||
public static final int SUCCESS;
|
||||
@ -188,6 +188,7 @@ public final class MPI
|
||||
public static final int ERR_NO_SPACE;
|
||||
public static final int ERR_NO_SUCH_FILE;
|
||||
public static final int ERR_PORT;
|
||||
public static final int ERR_PROC_ABORTED;
|
||||
public static final int ERR_QUOTA;
|
||||
public static final int ERR_READ_ONLY;
|
||||
public static final int ERR_RMA_CONFLICT;
|
||||
@ -332,6 +333,7 @@ public final class MPI
|
||||
LOCK_SHARED = c.LOCK_SHARED;
|
||||
|
||||
ERRORS_ARE_FATAL = new Errhandler(Errhandler.getFatal());
|
||||
ERRORS_ABORT = new Errhandler(Errhandler.getAbort());
|
||||
ERRORS_RETURN = new Errhandler(Errhandler.getReturn());
|
||||
|
||||
COMM_WORLD = new Intracomm();
|
||||
@ -382,6 +384,7 @@ public final class MPI
|
||||
ERR_NO_SPACE = c.ERR_NO_SPACE;
|
||||
ERR_NO_SUCH_FILE = c.ERR_NO_SUCH_FILE;
|
||||
ERR_PORT = c.ERR_PORT;
|
||||
ERR_PROC_ABORTED = c.ERR_PROC_ABORTED;
|
||||
ERR_QUOTA = c.ERR_QUOTA;
|
||||
ERR_READ_ONLY = c.ERR_READ_ONLY;
|
||||
ERR_RMA_CONFLICT = c.ERR_RMA_CONFLICT;
|
||||
|
@ -107,6 +107,7 @@ Standard error return classes for Open MPI:
|
||||
| MPI_ERR_NO_SPACE | 41 | Not enough space. |
|
||||
| MPI_ERR_NO_SUCH_FILE | 42 | File (or directory) does not exist. |
|
||||
| MPI_ERR_PORT | 43 | Invalid port. |
|
||||
| MPI_ERR_PROC_ABORTED | 74 | Operation failed because a remote peer has aborted. |
|
||||
| MPI_ERR_QUOTA | 44 | Quota exceeded. |
|
||||
| MPI_ERR_READ_ONLY | 45 | Read-only file system. |
|
||||
| MPI_ERR_RMA_CONFLICT | 46 | Conflicting accesses to window. |
|
||||
|
@ -3,7 +3,7 @@
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2014 The University of Tennessee and The University
|
||||
* Copyright (c) 2004-2020 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
@ -69,6 +69,7 @@ static bool have_been_invoked = false;
|
||||
* It would be nifty if we could differentiate between the
|
||||
* abort scenarios (but we don't, currently):
|
||||
* - MPI_Abort()
|
||||
* - MPI_ERRORS_ABORT
|
||||
* - MPI_ERRORS_ARE_FATAL
|
||||
* - Victim of MPI_Abort()
|
||||
*/
|
||||
@ -182,7 +183,7 @@ ompi_mpi_abort(struct ompi_communicator_t* comm,
|
||||
if (state >= OMPI_MPI_STATE_INIT_COMPLETED &&
|
||||
state < OMPI_MPI_STATE_FINALIZE_PAST_COMM_SELF_DESTRUCT &&
|
||||
NULL != comm) {
|
||||
try_kill_peers(comm, errcode);
|
||||
try_kill_peers(comm, errcode); /* kill only the specified groups, no return if it worked. */
|
||||
}
|
||||
|
||||
/* We can fall through to here in a few cases:
|
||||
|
@ -1,4 +1,4 @@
|
||||
PROGS = mpi_no_op mpi_barrier hello hello_nodename abort multi_abort simple_spawn \
|
||||
PROGS = mpi_no_op mpi_barrier hello hello_nodename abort multi_abort comm_abort simple_spawn \
|
||||
concurrent_spawn spawn_multiple mpi_spin delayed_abort loop_spawn loop_child \
|
||||
bad_exit pubsub hello_barrier segv accept connect hello_output hello_show_help \
|
||||
crisscross read_write ziatest slave reduce-hang ziaprobe ziatest bcast_loop \
|
||||
|
174
test/simple/comm_abort.c
Обычный файл
174
test/simple/comm_abort.c
Обычный файл
@ -0,0 +1,174 @@
|
||||
/* -*- C -*-
|
||||
* Copyright (c) 2020 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*
|
||||
* Test aborting communicators
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <unistd.h>
|
||||
#include "mpi.h"
|
||||
|
||||
#define print1(format...) if(0 == rank) printf(format)
|
||||
|
||||
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
int rank, size, more;
|
||||
double start, now;
|
||||
MPI_Comm comm_pair_fatal, comm_pair_return, comm_pair_abort;
|
||||
|
||||
MPI_Init(&argc, &argv);
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
|
||||
MPI_Comm_size(MPI_COMM_WORLD, &size);
|
||||
|
||||
if(0 == rank && size%2) {
|
||||
fprintf(stderr, "This test requires an even number of processes\n\n");
|
||||
MPI_Abort(MPI_COMM_WORLD, size);
|
||||
}
|
||||
|
||||
/* Setup: split our world in a set of 2-processes islands */
|
||||
MPI_Comm_split(MPI_COMM_WORLD, rank/2, rank, &comm_pair_fatal);
|
||||
MPI_Comm_set_errhandler(comm_pair_fatal, MPI_ERRORS_ARE_FATAL);
|
||||
MPI_Comm_split(MPI_COMM_WORLD, rank/2, rank, &comm_pair_return);
|
||||
MPI_Comm_set_errhandler(comm_pair_return, MPI_ERRORS_RETURN);
|
||||
MPI_Comm_split(MPI_COMM_WORLD, rank/2, rank, &comm_pair_abort);
|
||||
/* If this code fails to compile, the MPI implementation is not compliant
|
||||
* with MPI-4 (TODO: add ref to chapter/line when MPI-4 published). */
|
||||
MPI_Comm_set_errhandler(comm_pair_abort, MPI_ERRORS_ABORT);
|
||||
MPI_Barrier(MPI_COMM_WORLD);
|
||||
|
||||
print1(
|
||||
"This program will test partial abort functionality (communicator scoped abort).\n"
|
||||
" Each test will perform a loop of communication on a subcommunicator for about\n"
|
||||
" 1 second between printouts, and then, a 1 second cooldown.\n");
|
||||
|
||||
print1("\n\n"
|
||||
"Test1: MPI_Abort(MPI_COMM_SELF) aborts only one process?\n"
|
||||
" In a high quality implementation, all ranks except %d\n"
|
||||
" should report their presence.\n", 1);
|
||||
if(rank == 1) {
|
||||
MPI_Abort(MPI_COMM_SELF, 1);
|
||||
}
|
||||
/* Spin on communication for 1 second to let time for Abort to have an
|
||||
* effect, if any. */
|
||||
more = 1; start = MPI_Wtime();
|
||||
do {
|
||||
now = MPI_Wtime();
|
||||
if(now - start > 1.) more = 0;
|
||||
if(rank > 1) /* don't reduce on aborted pairs */
|
||||
MPI_Allreduce(MPI_IN_PLACE, &more, 1, MPI_INT, MPI_MIN, comm_pair_fatal);
|
||||
} while(more);
|
||||
printf(" This is rank %d: still kickin after %d MPI_Abort'ed self\n", rank, 1);
|
||||
|
||||
sleep(1);
|
||||
print1("===============================================================\n");
|
||||
|
||||
print1("\n\n"
|
||||
"Test2: MPI_Abort(comm) aborts all processes in comm?\n"
|
||||
" In a high quality implementation, all ranks except %d--%d\n"
|
||||
" should report their presence.\n", 1, 3);
|
||||
if(rank == 3) {
|
||||
MPI_Abort(comm_pair_return, 2);
|
||||
}
|
||||
/* Spin on communication for 1 second to let time for Abort to have an
|
||||
* effect, if any. */
|
||||
more = 1; start = MPI_Wtime();
|
||||
do {
|
||||
now = MPI_Wtime();
|
||||
if(now - start > 1.) more = 0;
|
||||
if(rank > 3) /* don't reduce on aborted pairs */
|
||||
MPI_Allreduce(MPI_IN_PLACE, &more, 1, MPI_INT, MPI_MIN, comm_pair_fatal);
|
||||
} while(more);
|
||||
printf(" This is rank %d: still kickin after %d aborted comm pair %d-%d\n", rank, 3, 2, 3);
|
||||
|
||||
/* This process should have aborted, give it an opportunity to do so if no
|
||||
* async progress: message to self to spin MPI progress. */
|
||||
if(rank == 2) {
|
||||
MPI_Sendrecv(&start, 1, MPI_DOUBLE, 0, 0,
|
||||
&now, 1, MPI_DOUBLE, 0, 0,
|
||||
MPI_COMM_SELF, MPI_STATUS_IGNORE);
|
||||
printf(" This is rank %d: ERROR: I SHOULD HAVE ABORTED!\n", 2);
|
||||
}
|
||||
|
||||
sleep(1);
|
||||
print1("===============================================================\n");
|
||||
|
||||
print1("\n\n"
|
||||
"Test3: MPI_ERRORS_ABORT aborts all processes in comm?\n"
|
||||
" In a high quality implementation, all ranks except %d--%d\n"
|
||||
" should report their presence.\n", 1, 5);
|
||||
if(rank == 5) {
|
||||
MPI_Comm_call_errhandler(comm_pair_abort, 3);
|
||||
}
|
||||
/* Spin on communication for 1 second to let time for Abort to have an
|
||||
* effect, if any. */
|
||||
more = 1; start = MPI_Wtime();
|
||||
do {
|
||||
now = MPI_Wtime();
|
||||
if(now - start > 1.) more = 0;
|
||||
if(rank > 5) /* don't reduce on aborted pairs */
|
||||
MPI_Allreduce(MPI_IN_PLACE, &more, 1, MPI_INT, MPI_MIN, comm_pair_fatal);
|
||||
} while(more);
|
||||
printf(" This is rank %d: still kickin after %d aborted comm pair %d-%d\n", rank, 5, 4, 5);
|
||||
|
||||
/* This process should have aborted, give it an opportunity to do so if no
|
||||
* async progress: message to self to spin MPI progress. */
|
||||
if(rank == 4) {
|
||||
MPI_Sendrecv(&start, 1, MPI_DOUBLE, 0, 0,
|
||||
&now, 1, MPI_DOUBLE, 0, 0,
|
||||
MPI_COMM_SELF, MPI_STATUS_IGNORE);
|
||||
printf(" This is rank %d: ERROR: I SHOULD HAVE ABORTED!\n", 4);
|
||||
}
|
||||
|
||||
sleep(1);
|
||||
print1("===============================================================\n");
|
||||
|
||||
print1("\n\n"\
|
||||
"Test4: Communicating with an aborted process %d returns a good error code?\n"
|
||||
" In a high quality implementation, rank %d should print an error string;\n"
|
||||
" In a higher quality implementation the error should be of class\n"
|
||||
" MPI_ERR_PROC_ABORTED.\n", 1, 0);
|
||||
if(rank == 0) {
|
||||
int err, class, slen;
|
||||
char str[MPI_MAX_ERROR_STRING];
|
||||
/* remember, 1 aborted in test1 */
|
||||
MPI_Error_class(err, &class);
|
||||
MPI_Error_string(err, str, &slen);
|
||||
err = MPI_Recv(&more, 1, MPI_INT, 1, 0, comm_pair_return, MPI_STATUS_IGNORE);
|
||||
printf(" This is rank %d: Recv(from=%d) returned code=%d: class=%d: %s\n", 0, 1, err, class, str);
|
||||
}
|
||||
|
||||
sleep(1);
|
||||
print1("===============================================================\n");
|
||||
|
||||
print1("\n\n"
|
||||
"Test5: MPI_ERRORS_ARE_FATAL aborts all processes?\n");
|
||||
if(rank == 0) {
|
||||
MPI_Comm_call_errhandler(comm_pair_fatal, 5);
|
||||
}
|
||||
/* Spin on communication for 1 second to let time for Abort to have an
|
||||
* effect, if any. */
|
||||
more = 1; start = MPI_Wtime();
|
||||
do {
|
||||
now = MPI_Wtime();
|
||||
if(now - start > 1.) more = 0;
|
||||
if(rank > 5) /* don't reduce on aborted pairs */
|
||||
MPI_Allreduce(MPI_IN_PLACE, &more, 1, MPI_INT, MPI_MIN, comm_pair_fatal);
|
||||
} while(more);
|
||||
MPI_Sendrecv(&start, 1, MPI_DOUBLE, 0, 0,
|
||||
&now, 1, MPI_DOUBLE, 0, 0,
|
||||
MPI_COMM_SELF, MPI_STATUS_IGNORE);
|
||||
printf(" This is rank %d: ERROR: I SHOULD HAVE ABORTED!\n", rank);
|
||||
|
||||
/* Should never get there */
|
||||
|
||||
MPI_Finalize();
|
||||
return 0;
|
||||
}
|
Загрузка…
x
Ссылка в новой задаче
Block a user