Minor fixes for MPI-level aborting:
- Fix some fpritnf's in ompi_mpi_abort() that incorrectly assumed that we were always being invoked from MPI_ABORT (ompi_mpi_abort() may be invoked from a bunch of different places) - Also try to opal_backtrace_print() if opal_bactrace_buffer() is not supported. - Print a message in MPI_ABORT if we're aborting. This commit was SVN r12998.
Этот коммит содержится в:
родитель
48ec0b2071
Коммит
75df4ca602
@ -9,6 +9,7 @@
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -17,8 +18,16 @@
|
||||
*/
|
||||
#include "ompi_config.h"
|
||||
|
||||
#ifdef HAVE_UNISTD_H
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
#ifdef HAVE_SYS_PARAM_H
|
||||
#include <sys/param.h>
|
||||
#endif
|
||||
|
||||
#include "ompi/mpi/c/bindings.h"
|
||||
#include "ompi/runtime/mpiruntime.h"
|
||||
#include "ompi/communicator/communicator.h"
|
||||
|
||||
#if OMPI_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES
|
||||
#pragma weak MPI_Abort = PMPI_Abort
|
||||
@ -40,5 +49,7 @@ int MPI_Abort(MPI_Comm comm, int errorcode)
|
||||
OMPI_ERR_INIT_FINALIZE(FUNC_NAME);
|
||||
}
|
||||
|
||||
opal_output(0, "MPI_ABORT invoked on rank %d in communicator %s with errorcode %d\n",
|
||||
ompi_comm_rank(comm), comm->c_name, errorcode);
|
||||
return ompi_mpi_abort(comm, errorcode, true);
|
||||
}
|
||||
|
@ -9,7 +9,7 @@
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2006 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2006-2007 Cisco Systems, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -87,9 +87,10 @@ ompi_mpi_abort(struct ompi_communicator_t* comm,
|
||||
}
|
||||
free(messages);
|
||||
} else {
|
||||
fprintf(stderr, "[%s:%d] Abort is unable to print a stack trace\n",
|
||||
hostname, (int) pid);
|
||||
fflush(stderr);
|
||||
/* This will print an message if it's unable to print the
|
||||
backtrace, so we don't need an additional "else" clause
|
||||
if opal_backtrace_print() is not supported. */
|
||||
opal_backtrace_print(stderr);
|
||||
}
|
||||
}
|
||||
|
||||
@ -97,14 +98,14 @@ ompi_mpi_abort(struct ompi_communicator_t* comm,
|
||||
|
||||
if (0 != ompi_mpi_abort_delay) {
|
||||
if (ompi_mpi_abort_delay < 0) {
|
||||
fprintf(stderr ,"[%s:%d] Looping forever in MPI abort\n",
|
||||
fprintf(stderr ,"[%s:%d] Looping forever (MCA parameter mpi_abort_delay is < 0)\n",
|
||||
hostname, (int) pid);
|
||||
fflush(stderr);
|
||||
while (1) {
|
||||
sleep(5);
|
||||
}
|
||||
} else {
|
||||
fprintf(stderr, "[%s:%d] Delaying for %d seconds in MPI_abort\n",
|
||||
fprintf(stderr, "[%s:%d] Delaying for %d seconds before aborting\n",
|
||||
hostname, (int) pid, ompi_mpi_abort_delay);
|
||||
do {
|
||||
sleep(1);
|
||||
|
Загрузка…
Ссылка в новой задаче
Block a user