Minor fixes for MPI-level aborting:
- Fix some fpritnf's in ompi_mpi_abort() that incorrectly assumed that we were always being invoked from MPI_ABORT (ompi_mpi_abort() may be invoked from a bunch of different places) - Also try to opal_backtrace_print() if opal_bactrace_buffer() is not supported. - Print a message in MPI_ABORT if we're aborting. This commit was SVN r12998.
Этот коммит содержится в:
родитель
48ec0b2071
Коммит
75df4ca602
@ -9,6 +9,7 @@
|
|||||||
* University of Stuttgart. All rights reserved.
|
* University of Stuttgart. All rights reserved.
|
||||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||||
* All rights reserved.
|
* All rights reserved.
|
||||||
|
* Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
|
||||||
* $COPYRIGHT$
|
* $COPYRIGHT$
|
||||||
*
|
*
|
||||||
* Additional copyrights may follow
|
* Additional copyrights may follow
|
||||||
@ -17,8 +18,16 @@
|
|||||||
*/
|
*/
|
||||||
#include "ompi_config.h"
|
#include "ompi_config.h"
|
||||||
|
|
||||||
|
#ifdef HAVE_UNISTD_H
|
||||||
|
#include <unistd.h>
|
||||||
|
#endif
|
||||||
|
#ifdef HAVE_SYS_PARAM_H
|
||||||
|
#include <sys/param.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
#include "ompi/mpi/c/bindings.h"
|
#include "ompi/mpi/c/bindings.h"
|
||||||
#include "ompi/runtime/mpiruntime.h"
|
#include "ompi/runtime/mpiruntime.h"
|
||||||
|
#include "ompi/communicator/communicator.h"
|
||||||
|
|
||||||
#if OMPI_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES
|
#if OMPI_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES
|
||||||
#pragma weak MPI_Abort = PMPI_Abort
|
#pragma weak MPI_Abort = PMPI_Abort
|
||||||
@ -40,5 +49,7 @@ int MPI_Abort(MPI_Comm comm, int errorcode)
|
|||||||
OMPI_ERR_INIT_FINALIZE(FUNC_NAME);
|
OMPI_ERR_INIT_FINALIZE(FUNC_NAME);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
opal_output(0, "MPI_ABORT invoked on rank %d in communicator %s with errorcode %d\n",
|
||||||
|
ompi_comm_rank(comm), comm->c_name, errorcode);
|
||||||
return ompi_mpi_abort(comm, errorcode, true);
|
return ompi_mpi_abort(comm, errorcode, true);
|
||||||
}
|
}
|
||||||
|
@ -9,7 +9,7 @@
|
|||||||
* University of Stuttgart. All rights reserved.
|
* University of Stuttgart. All rights reserved.
|
||||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||||
* All rights reserved.
|
* All rights reserved.
|
||||||
* Copyright (c) 2006 Cisco Systems, Inc. All rights reserved.
|
* Copyright (c) 2006-2007 Cisco Systems, Inc. All rights reserved.
|
||||||
* $COPYRIGHT$
|
* $COPYRIGHT$
|
||||||
*
|
*
|
||||||
* Additional copyrights may follow
|
* Additional copyrights may follow
|
||||||
@ -87,9 +87,10 @@ ompi_mpi_abort(struct ompi_communicator_t* comm,
|
|||||||
}
|
}
|
||||||
free(messages);
|
free(messages);
|
||||||
} else {
|
} else {
|
||||||
fprintf(stderr, "[%s:%d] Abort is unable to print a stack trace\n",
|
/* This will print an message if it's unable to print the
|
||||||
hostname, (int) pid);
|
backtrace, so we don't need an additional "else" clause
|
||||||
fflush(stderr);
|
if opal_backtrace_print() is not supported. */
|
||||||
|
opal_backtrace_print(stderr);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -97,14 +98,14 @@ ompi_mpi_abort(struct ompi_communicator_t* comm,
|
|||||||
|
|
||||||
if (0 != ompi_mpi_abort_delay) {
|
if (0 != ompi_mpi_abort_delay) {
|
||||||
if (ompi_mpi_abort_delay < 0) {
|
if (ompi_mpi_abort_delay < 0) {
|
||||||
fprintf(stderr ,"[%s:%d] Looping forever in MPI abort\n",
|
fprintf(stderr ,"[%s:%d] Looping forever (MCA parameter mpi_abort_delay is < 0)\n",
|
||||||
hostname, (int) pid);
|
hostname, (int) pid);
|
||||||
fflush(stderr);
|
fflush(stderr);
|
||||||
while (1) {
|
while (1) {
|
||||||
sleep(5);
|
sleep(5);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
fprintf(stderr, "[%s:%d] Delaying for %d seconds in MPI_abort\n",
|
fprintf(stderr, "[%s:%d] Delaying for %d seconds before aborting\n",
|
||||||
hostname, (int) pid, ompi_mpi_abort_delay);
|
hostname, (int) pid, ompi_mpi_abort_delay);
|
||||||
do {
|
do {
|
||||||
sleep(1);
|
sleep(1);
|
||||||
|
Загрузка…
Ссылка в новой задаче
Block a user