1
1
openmpi/opal/util/stacktrace.c
Brian Barrett 60ac1cb5f4 print stack traces (when available) for opal and orte processes, as well as
ompi processes.  Also add SIGABRT to the list of signals that are intercepted
to print out pretty messages.

This commit was SVN r8672.
2006-01-11 04:36:39 +00:00

372 строки
9.7 KiB
C

/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include <stdio.h>
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#ifdef HAVE_EXECINFO_H
#include <execinfo.h>
#endif
#ifdef HAVE_STRING_H
#include <string.h>
#endif
#ifdef HAVE_SIGNAL_H
#include <signal.h>
#endif
#include "opal/util/stacktrace.h"
#include "opal/mca/base/mca_base_param.h"
#include "ompi/include/constants.h"
#ifndef _NSIG
#define _NSIG 32
#endif
/**
* This function is being called as a signal-handler in response
* to a user-specified signal (e.g. SIGFPE or SIGSEGV).
* For Linux/Glibc, it then uses backtrace and backtrace_symbols
* to figure the current stack and then prints that out to stdout.
* Where available, the BSD libexecinfo is used to provide Linux/Glibc
* compatable backtrace and backtrace_symbols functions.
* Yes, printf and malloc are not signal-safe per se, but should be
* on Linux?
*
* @param signo with the signal number raised
* @param info with information regarding the reason/send of the signal
* @param p
*
* FIXME: Should distinguish for systems, which don't have siginfo...
*/
#if OMPI_WANT_PRETTY_PRINT_STACKTRACE && ! defined(__WINDOWS__)
static void opal_show_stackframe (int signo, siginfo_t * info, void * p)
{
#ifdef HAVE_BACKTRACE
int i;
int trace_size;
void * trace[32];
char ** messages = (char **)NULL;
#endif
char print_buffer[1024];
char * tmp = print_buffer;
int size = sizeof (print_buffer);
int ret;
char * str = "";
char eof_msg[] = "*** End of error message ***\n";
/*
* Yes, we are doing printf inside a signal-handler.
* However, backtrace itself calls malloc (which may not be signal-safe,
* under linux, printf and malloc are)
*
* We could use backtrace_symbols_fd and write directly into an
* filedescriptor, however, without formatting -- also this fd
* should be opened in a sensible way...
*/
memset (print_buffer, 0, sizeof (print_buffer));
switch (signo)
{
case SIGILL:
switch (info->si_code)
{
#ifdef ILL_ILLOPC
case ILL_ILLOPC: str = "ILL_ILLOPC"; break;
#endif
#ifdef ILL_ILLOPN
case ILL_ILLOPN: str = "ILL_ILLOPN"; break;
#endif
#ifdef ILL_ILLADR
case ILL_ILLADR: str = "ILL_ILLADR"; break;
#endif
#ifdef ILL_ILLTRP
case ILL_ILLTRP: str = "ILL_ILLTRP"; break;
#endif
#ifdef ILL_PRVOPC
case ILL_PRVOPC: str = "ILL_PRVOPC"; break;
#endif
#ifdef ILL_PRVREG
case ILL_PRVREG: str = "ILL_PRVREG"; break;
#endif
#ifdef ILL_COPROC
case ILL_COPROC: str = "ILL_COPROC"; break;
#endif
#ifdef ILL_BADSTK
case ILL_BADSTK: str = "ILL_BADSTK"; break;
#endif
}
break;
case SIGFPE:
switch (info->si_code)
{
#ifdef FPE_INTDIV
case FPE_INTDIV: str = "FPE_INTDIV"; break;
#endif
#ifdef FPE_INTOVF
case FPE_INTOVF: str = "FPE_INTOVF"; break;
#endif
case FPE_FLTDIV: str = "FPE_FLTDIV"; break;
case FPE_FLTOVF: str = "FPE_FLTOVF"; break;
case FPE_FLTUND: str = "FPE_FLTUND"; break;
case FPE_FLTRES: str = "FPE_FLTRES"; break;
case FPE_FLTINV: str = "FPE_FLTINV"; break;
#ifdef FPE_FLTSUB
case FPE_FLTSUB: str = "FPE_FLTSUB"; break;
#endif
}
break;
case SIGSEGV:
switch (info->si_code)
{
#ifdef SEGV_MAPERR
case SEGV_MAPERR: str = "SEGV_MAPERR"; break;
#endif
#ifdef SEGV_ACCERR
case SEGV_ACCERR: str = "SEGV_ACCERR"; break;
#endif
}
break;
case SIGBUS:
switch (info->si_code)
{
#ifdef BUS_ADRALN
case BUS_ADRALN: str = "BUS_ADRALN"; break;
#endif
#ifdef BUSADRERR
case BUS_ADRERR: str = "BUS_ADRERR"; break;
#endif
#ifdef BUS_OBJERR
case BUS_OBJERR: str = "BUS_OBJERR"; break;
#endif
}
break;
case SIGTRAP:
switch (info->si_code)
{
#ifdef TRAP_BRKPT
case TRAP_BRKPT: str = "TRAP_BRKPT"; break;
#endif
#ifdef TRAP_TRACE
case TRAP_TRACE: str = "TRAP_TRACE"; break;
#endif
}
break;
case SIGCHLD:
switch (info->si_code)
{
#ifdef CLD_EXITED
case CLD_EXITED: str = "CLD_EXITED"; break;
#endif
#ifdef CLD_KILLED
case CLD_KILLED: str = "CLD_KILLED"; break;
#endif
#ifdef CLD_DUMPED
case CLD_DUMPED: str = "CLD_DUMPED"; break;
#endif
#ifdef CLD_WTRAPPED
case CLD_TRAPPED: str = "CLD_TRAPPED"; break;
#endif
#ifdef CLD_STOPPED
case CLD_STOPPED: str = "CLD_STOPPED"; break;
#endif
#ifdef CLD_CONTINUED
case CLD_CONTINUED: str = "CLD_CONTINUED"; break;
#endif
}
break;
#ifdef SIGPOLL
case SIGPOLL:
switch (info->si_code)
{
#ifdef POLL_IN
case POLL_IN: str = "POLL_IN"; break;
#endif
#ifdef POLL_OUT
case POLL_OUT: str = "POLL_OUT"; break;
#endif
#ifdef POLL_MSG
case POLL_MSG: str = "POLL_MSG"; break;
#endif
#ifdef POLL_ERR
case POLL_ERR: str = "POLL_ERR"; break;
#endif
#ifdef POLL_PRI
case POLL_PRI: str = "POLL_PRI"; break;
#endif
#ifdef POLL_HUP
case POLL_HUP: str = "POLL_HUP"; break;
#endif
}
break;
#endif /* SIGPOLL */
default:
switch (info->si_code)
{
#ifdef SI_ASYNCNL
case SI_ASYNCNL: str = "SI_ASYNCNL"; break;
#endif
#ifdef SI_SIGIO
case SI_SIGIO: str = "SI_SIGIO"; break;
#endif
case SI_ASYNCIO: str = "SI_ASYNCIO"; break;
case SI_MESGQ: str = "SI_MESGQ"; break;
case SI_TIMER: str = "SI_TIMER"; break;
case SI_QUEUE: str = "SI_QUEUE"; break;
case SI_USER: str = "SI_USER"; break;
#ifdef SI_KERNEL
case SI_KERNEL: str = "SI_KERNEL"; break;
#endif
#ifdef SI_UNDEFINED
case SI_UNDEFINED: str = "SI_UNDEFINED"; break;
#endif
}
}
ret = snprintf (tmp, size, "Signal:%d info.si_errno:%d(%s) si_code:%d(%s)\n",
signo, info->si_errno, strerror (info->si_errno),
info->si_code, str);
size -= ret;
tmp += ret;
switch (signo)
{
case SIGILL:
case SIGFPE:
case SIGSEGV:
case SIGBUS:
{
ret = snprintf (tmp, size, "Failing at addr:%p\n",
info->si_addr);
size -= ret;
tmp += ret;
break;
}
case SIGCHLD: {
ret = snprintf (tmp, size, "si_pid:%d si_uid:%d si_status:%d\n",
info->si_pid, info->si_uid, info->si_status);
size -= ret;
tmp += ret;
break;
}
#ifdef SIGPOLL
case SIGPOLL: {
#ifdef HAVE_SIGINFO_T_SI_FD
ret = snprintf (tmp, size, "si_band:%ld si_fd:%d\n",
info->si_band, info->si_fd);
#elif HAVE_SIGINFO_T_SI_BAND
ret = snprintf (tmp, size, "si_band:%ld\n",
info->si_band);
#else
size = 0;
#endif
size -= ret;
tmp += ret;
break;
}
#endif
}
write(1, print_buffer, size);
fflush(stderr);
#ifdef HAVE_BACKTRACE
trace_size = backtrace (trace, 32);
messages = backtrace_symbols (trace, trace_size);
for (i = 0; i < trace_size; i++) {
fprintf(stderr, "[%d] func:%s\n", i, messages[i]);
fflush(stderr);
}
#endif
write(1, eof_msg, sizeof(eof_msg));
fflush(stderr);
}
#endif /* OMPI_WANT_PRETTY_PRINT_STACKTRACE && ! defined(__WINDOWS__) */
/**
* Here we register the opal_show_stackframe function for signals
* passed to OpenMPI by the mpi_signal-parameter passed to mpirun
* by the user.
*
* @returnvalue OMPI_SUCCESS
* @returnvalue OMPI_ERR_BAD_PARAM if the value in the signal-list
* is not a valid signal-number
*
*/
int opal_util_register_stackhandlers (void)
{
#if OMPI_WANT_PRETTY_PRINT_STACKTRACE && ! defined(__WINDOWS__)
struct sigaction act;
char * string_value;
char * tmp;
char * next;
int param;
param = mca_base_param_find ("opal", NULL, "signal");
mca_base_param_lookup_string (param, &string_value);
memset(&act, 0, sizeof(act));
act.sa_sigaction = opal_show_stackframe;
act.sa_flags = SA_SIGINFO;
#ifdef SA_ONESHOT
act.sa_flags |= SA_ONESHOT;
#else
act.sa_flags |= SA_RESETHAND;
#endif
for (tmp = next = string_value ;
next != NULL && *next != '\0';
tmp = next + 1)
{
int sig;
int ret;
sig = strtol (tmp, &next, 10);
/*
* If there is no sensible number in the string, exit.
* Similarly for any number which is not in the signal-number range
*/
if (((0 == sig) && (tmp == next)) || (0 > sig) || (_NSIG <= sig)) {
return OMPI_ERR_BAD_PARAM;
}
if ((next == NULL) || ((*next != ',') && (*next != '\0'))) {
return OMPI_ERR_BAD_PARAM;
}
ret = sigaction (sig, &act, NULL);
if (ret != 0) {
return OMPI_ERR_IN_ERRNO;
}
}
#endif /* OMPI_WANT_PRETTY_PRINT_STACKTRACE && ! defined(__WINDOWS__) */
return OMPI_SUCCESS;
}