/* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ #include "ompi_config.h" #include #ifdef HAVE_UNISTD_H #include #endif #ifdef HAVE_EXECINFO_H #include #endif #ifdef HAVE_STRING_H #include #endif #ifdef HAVE_SIGNAL_H #include #endif #include "opal/util/stacktrace.h" #include "opal/mca/base/mca_base_param.h" #include "ompi/include/constants.h" #ifndef _NSIG #define _NSIG 32 #endif /** * This function is being called as a signal-handler in response * to a user-specified signal (e.g. SIGFPE or SIGSEGV). * For Linux/Glibc, it then uses backtrace and backtrace_symbols * to figure the current stack and then prints that out to stdout. * Where available, the BSD libexecinfo is used to provide Linux/Glibc * compatable backtrace and backtrace_symbols functions. * Yes, printf and malloc are not signal-safe per se, but should be * on Linux? * * @param signo with the signal number raised * @param info with information regarding the reason/send of the signal * @param p * * FIXME: Should distinguish for systems, which don't have siginfo... */ #if OMPI_WANT_PRETTY_PRINT_STACKTRACE && ! defined(__WINDOWS__) static void opal_show_stackframe (int signo, siginfo_t * info, void * p) { #ifdef HAVE_BACKTRACE int i; int trace_size; void * trace[32]; char ** messages = (char **)NULL; #endif char print_buffer[1024]; char * tmp = print_buffer; int size = sizeof (print_buffer); int ret; char * str = ""; char eof_msg[] = "*** End of error message ***\n"; /* * Yes, we are doing printf inside a signal-handler. * However, backtrace itself calls malloc (which may not be signal-safe, * under linux, printf and malloc are) * * We could use backtrace_symbols_fd and write directly into an * filedescriptor, however, without formatting -- also this fd * should be opened in a sensible way... */ memset (print_buffer, 0, sizeof (print_buffer)); switch (signo) { case SIGILL: switch (info->si_code) { #ifdef ILL_ILLOPC case ILL_ILLOPC: str = "ILL_ILLOPC"; break; #endif #ifdef ILL_ILLOPN case ILL_ILLOPN: str = "ILL_ILLOPN"; break; #endif #ifdef ILL_ILLADR case ILL_ILLADR: str = "ILL_ILLADR"; break; #endif #ifdef ILL_ILLTRP case ILL_ILLTRP: str = "ILL_ILLTRP"; break; #endif #ifdef ILL_PRVOPC case ILL_PRVOPC: str = "ILL_PRVOPC"; break; #endif #ifdef ILL_PRVREG case ILL_PRVREG: str = "ILL_PRVREG"; break; #endif #ifdef ILL_COPROC case ILL_COPROC: str = "ILL_COPROC"; break; #endif #ifdef ILL_BADSTK case ILL_BADSTK: str = "ILL_BADSTK"; break; #endif } break; case SIGFPE: switch (info->si_code) { #ifdef FPE_INTDIV case FPE_INTDIV: str = "FPE_INTDIV"; break; #endif #ifdef FPE_INTOVF case FPE_INTOVF: str = "FPE_INTOVF"; break; #endif case FPE_FLTDIV: str = "FPE_FLTDIV"; break; case FPE_FLTOVF: str = "FPE_FLTOVF"; break; case FPE_FLTUND: str = "FPE_FLTUND"; break; case FPE_FLTRES: str = "FPE_FLTRES"; break; case FPE_FLTINV: str = "FPE_FLTINV"; break; #ifdef FPE_FLTSUB case FPE_FLTSUB: str = "FPE_FLTSUB"; break; #endif } break; case SIGSEGV: switch (info->si_code) { #ifdef SEGV_MAPERR case SEGV_MAPERR: str = "SEGV_MAPERR"; break; #endif #ifdef SEGV_ACCERR case SEGV_ACCERR: str = "SEGV_ACCERR"; break; #endif } break; case SIGBUS: switch (info->si_code) { #ifdef BUS_ADRALN case BUS_ADRALN: str = "BUS_ADRALN"; break; #endif #ifdef BUSADRERR case BUS_ADRERR: str = "BUS_ADRERR"; break; #endif #ifdef BUS_OBJERR case BUS_OBJERR: str = "BUS_OBJERR"; break; #endif } break; case SIGTRAP: switch (info->si_code) { #ifdef TRAP_BRKPT case TRAP_BRKPT: str = "TRAP_BRKPT"; break; #endif #ifdef TRAP_TRACE case TRAP_TRACE: str = "TRAP_TRACE"; break; #endif } break; case SIGCHLD: switch (info->si_code) { #ifdef CLD_EXITED case CLD_EXITED: str = "CLD_EXITED"; break; #endif #ifdef CLD_KILLED case CLD_KILLED: str = "CLD_KILLED"; break; #endif #ifdef CLD_DUMPED case CLD_DUMPED: str = "CLD_DUMPED"; break; #endif #ifdef CLD_WTRAPPED case CLD_TRAPPED: str = "CLD_TRAPPED"; break; #endif #ifdef CLD_STOPPED case CLD_STOPPED: str = "CLD_STOPPED"; break; #endif #ifdef CLD_CONTINUED case CLD_CONTINUED: str = "CLD_CONTINUED"; break; #endif } break; #ifdef SIGPOLL case SIGPOLL: switch (info->si_code) { #ifdef POLL_IN case POLL_IN: str = "POLL_IN"; break; #endif #ifdef POLL_OUT case POLL_OUT: str = "POLL_OUT"; break; #endif #ifdef POLL_MSG case POLL_MSG: str = "POLL_MSG"; break; #endif #ifdef POLL_ERR case POLL_ERR: str = "POLL_ERR"; break; #endif #ifdef POLL_PRI case POLL_PRI: str = "POLL_PRI"; break; #endif #ifdef POLL_HUP case POLL_HUP: str = "POLL_HUP"; break; #endif } break; #endif /* SIGPOLL */ default: switch (info->si_code) { #ifdef SI_ASYNCNL case SI_ASYNCNL: str = "SI_ASYNCNL"; break; #endif #ifdef SI_SIGIO case SI_SIGIO: str = "SI_SIGIO"; break; #endif case SI_ASYNCIO: str = "SI_ASYNCIO"; break; case SI_MESGQ: str = "SI_MESGQ"; break; case SI_TIMER: str = "SI_TIMER"; break; case SI_QUEUE: str = "SI_QUEUE"; break; case SI_USER: str = "SI_USER"; break; #ifdef SI_KERNEL case SI_KERNEL: str = "SI_KERNEL"; break; #endif #ifdef SI_UNDEFINED case SI_UNDEFINED: str = "SI_UNDEFINED"; break; #endif } } ret = snprintf (tmp, size, "Signal:%d info.si_errno:%d(%s) si_code:%d(%s)\n", signo, info->si_errno, strerror (info->si_errno), info->si_code, str); size -= ret; tmp += ret; switch (signo) { case SIGILL: case SIGFPE: case SIGSEGV: case SIGBUS: { ret = snprintf (tmp, size, "Failing at addr:%p\n", info->si_addr); size -= ret; tmp += ret; break; } case SIGCHLD: { ret = snprintf (tmp, size, "si_pid:%d si_uid:%d si_status:%d\n", info->si_pid, info->si_uid, info->si_status); size -= ret; tmp += ret; break; } #ifdef SIGPOLL case SIGPOLL: { #ifdef HAVE_SIGINFO_T_SI_FD ret = snprintf (tmp, size, "si_band:%ld si_fd:%d\n", info->si_band, info->si_fd); #elif HAVE_SIGINFO_T_SI_BAND ret = snprintf (tmp, size, "si_band:%ld\n", info->si_band); #else size = 0; #endif size -= ret; tmp += ret; break; } #endif } write(1, print_buffer, size); fflush(stderr); #ifdef HAVE_BACKTRACE trace_size = backtrace (trace, 32); messages = backtrace_symbols (trace, trace_size); for (i = 0; i < trace_size; i++) { fprintf(stderr, "[%d] func:%s\n", i, messages[i]); fflush(stderr); } #endif write(1, eof_msg, sizeof(eof_msg)); fflush(stderr); } #endif /* OMPI_WANT_PRETTY_PRINT_STACKTRACE && ! defined(__WINDOWS__) */ /** * Here we register the opal_show_stackframe function for signals * passed to OpenMPI by the mpi_signal-parameter passed to mpirun * by the user. * * @returnvalue OMPI_SUCCESS * @returnvalue OMPI_ERR_BAD_PARAM if the value in the signal-list * is not a valid signal-number * */ int opal_util_register_stackhandlers (void) { #if OMPI_WANT_PRETTY_PRINT_STACKTRACE && ! defined(__WINDOWS__) struct sigaction act; char * string_value; char * tmp; char * next; int param; param = mca_base_param_find ("opal", NULL, "signal"); mca_base_param_lookup_string (param, &string_value); memset(&act, 0, sizeof(act)); act.sa_sigaction = opal_show_stackframe; act.sa_flags = SA_SIGINFO; #ifdef SA_ONESHOT act.sa_flags |= SA_ONESHOT; #else act.sa_flags |= SA_RESETHAND; #endif for (tmp = next = string_value ; next != NULL && *next != '\0'; tmp = next + 1) { int sig; int ret; sig = strtol (tmp, &next, 10); /* * If there is no sensible number in the string, exit. * Similarly for any number which is not in the signal-number range */ if (((0 == sig) && (tmp == next)) || (0 > sig) || (_NSIG <= sig)) { return OMPI_ERR_BAD_PARAM; } if ((next == NULL) || ((*next != ',') && (*next != '\0'))) { return OMPI_ERR_BAD_PARAM; } ret = sigaction (sig, &act, NULL); if (ret != 0) { return OMPI_ERR_IN_ERRNO; } } #endif /* OMPI_WANT_PRETTY_PRINT_STACKTRACE && ! defined(__WINDOWS__) */ return OMPI_SUCCESS; }