1
1

Minor fixes for MPI-level aborting:

- Fix some fpritnf's in ompi_mpi_abort() that incorrectly assumed that
  we were always being invoked from MPI_ABORT (ompi_mpi_abort() may be
  invoked from a bunch of different places)
- Also try to opal_backtrace_print() if opal_bactrace_buffer() is not
  supported. 
- Print a message in MPI_ABORT if we're aborting.

This commit was SVN r12998.
Этот коммит содержится в:
Jeff Squyres 2007-01-04 22:30:28 +00:00
родитель 48ec0b2071
Коммит 75df4ca602
2 изменённых файлов: 18 добавлений и 6 удалений

Просмотреть файл

@ -9,6 +9,7 @@
* University of Stuttgart. All rights reserved. * University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California. * Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved. * All rights reserved.
* Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
* $COPYRIGHT$ * $COPYRIGHT$
* *
* Additional copyrights may follow * Additional copyrights may follow
@ -17,8 +18,16 @@
*/ */
#include "ompi_config.h" #include "ompi_config.h"
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#ifdef HAVE_SYS_PARAM_H
#include <sys/param.h>
#endif
#include "ompi/mpi/c/bindings.h" #include "ompi/mpi/c/bindings.h"
#include "ompi/runtime/mpiruntime.h" #include "ompi/runtime/mpiruntime.h"
#include "ompi/communicator/communicator.h"
#if OMPI_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES #if OMPI_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES
#pragma weak MPI_Abort = PMPI_Abort #pragma weak MPI_Abort = PMPI_Abort
@ -40,5 +49,7 @@ int MPI_Abort(MPI_Comm comm, int errorcode)
OMPI_ERR_INIT_FINALIZE(FUNC_NAME); OMPI_ERR_INIT_FINALIZE(FUNC_NAME);
} }
opal_output(0, "MPI_ABORT invoked on rank %d in communicator %s with errorcode %d\n",
ompi_comm_rank(comm), comm->c_name, errorcode);
return ompi_mpi_abort(comm, errorcode, true); return ompi_mpi_abort(comm, errorcode, true);
} }

Просмотреть файл

@ -9,7 +9,7 @@
* University of Stuttgart. All rights reserved. * University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California. * Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved. * All rights reserved.
* Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2006-2007 Cisco Systems, Inc. All rights reserved.
* $COPYRIGHT$ * $COPYRIGHT$
* *
* Additional copyrights may follow * Additional copyrights may follow
@ -87,9 +87,10 @@ ompi_mpi_abort(struct ompi_communicator_t* comm,
} }
free(messages); free(messages);
} else { } else {
fprintf(stderr, "[%s:%d] Abort is unable to print a stack trace\n", /* This will print an message if it's unable to print the
hostname, (int) pid); backtrace, so we don't need an additional "else" clause
fflush(stderr); if opal_backtrace_print() is not supported. */
opal_backtrace_print(stderr);
} }
} }
@ -97,14 +98,14 @@ ompi_mpi_abort(struct ompi_communicator_t* comm,
if (0 != ompi_mpi_abort_delay) { if (0 != ompi_mpi_abort_delay) {
if (ompi_mpi_abort_delay < 0) { if (ompi_mpi_abort_delay < 0) {
fprintf(stderr ,"[%s:%d] Looping forever in MPI abort\n", fprintf(stderr ,"[%s:%d] Looping forever (MCA parameter mpi_abort_delay is < 0)\n",
hostname, (int) pid); hostname, (int) pid);
fflush(stderr); fflush(stderr);
while (1) { while (1) {
sleep(5); sleep(5);
} }
} else { } else {
fprintf(stderr, "[%s:%d] Delaying for %d seconds in MPI_abort\n", fprintf(stderr, "[%s:%d] Delaying for %d seconds before aborting\n",
hostname, (int) pid, ompi_mpi_abort_delay); hostname, (int) pid, ompi_mpi_abort_delay);
do { do {
sleep(1); sleep(1);