1
1
http://www.open-mpi.org/community/lists/devel/2013/12/13412.php

fix the backtrace function to avoid async issues. Thanks to Takahiro Kawashima for the patch

This commit was SVN r29955.
Этот коммит содержится в:
Ralph Castain 2013-12-18 17:57:37 +00:00
родитель c3d2b3e9b8
Коммит 77553f72be
7 изменённых файлов: 47 добавлений и 59 удалений

Просмотреть файл

@ -87,7 +87,7 @@ ompi_mpi_abort(struct ompi_communicator_t* comm,
/* This will print an message if it's unable to print the /* This will print an message if it's unable to print the
backtrace, so we don't need an additional "else" clause backtrace, so we don't need an additional "else" clause
if opal_backtrace_print() is not supported. */ if opal_backtrace_print() is not supported. */
opal_backtrace_print(stderr); opal_backtrace_print(stderr, NULL, 1);
} }
} }

Просмотреть файл

@ -34,11 +34,12 @@ BEGIN_C_DECLS
/* /*
* print back trace to FILE file * Print back trace to FILE file with a prefix for each line.
* First strip lines are not printed.
* *
* \note some attempts made to be signal safe. * \note some attempts made to be signal safe.
*/ */
OPAL_DECLSPEC void opal_backtrace_print(FILE *file); OPAL_DECLSPEC int opal_backtrace_print(FILE *file, char *prefix, int strip);
/* /*
* Return back trace in buffer. buffer will be allocated by the * Return back trace in buffer. buffer will be allocated by the

Просмотреть файл

@ -20,6 +20,10 @@
#include "opal_config.h" #include "opal_config.h"
#include <stdio.h> #include <stdio.h>
#include <string.h>
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#ifdef HAVE_EXECINFO_H #ifdef HAVE_EXECINFO_H
#include <execinfo.h> #include <execinfo.h>
#endif #endif
@ -27,23 +31,31 @@
#include "opal/constants.h" #include "opal/constants.h"
#include "opal/mca/backtrace/backtrace.h" #include "opal/mca/backtrace/backtrace.h"
void int
opal_backtrace_print(FILE *file) opal_backtrace_print(FILE *file, char *prefix, int strip)
{ {
int i; int i, fd, len;
int trace_size; int trace_size;
void * trace[32]; void * trace[32];
char ** messages = (char **)NULL; char buf[6];
trace_size = backtrace (trace, 32); fd = fileno (file);
messages = backtrace_symbols (trace, trace_size); if (-1 == fd) {
return OPAL_ERR_BAD_PARAM;
for (i = 0; i < trace_size; i++) {
fprintf(file, "[%d] func:%s\n", i, messages[i]);
fflush(file);
} }
free(messages); trace_size = backtrace (trace, 32);
for (i = strip; i < trace_size; i++) {
if (NULL != prefix) {
write (fd, prefix, strlen (prefix));
}
len = snprintf (buf, sizeof(buf), "[%2d] ", i - strip);
write (fd, buf, len);
backtrace_symbols_fd (&trace[i], 1, fd);
}
return OPAL_SUCCESS;
} }

Просмотреть файл

@ -23,9 +23,10 @@
#include "opal/constants.h" #include "opal/constants.h"
#include "opal/mca/backtrace/backtrace.h" #include "opal/mca/backtrace/backtrace.h"
void int
opal_backtrace_print(FILE *file) opal_backtrace_print(FILE *file, char *prefix, int strip)
{ {
return OPAL_ERR_NOT_IMPLEMENTED;
} }

Просмотреть файл

@ -24,10 +24,12 @@
#include "opal/constants.h" #include "opal/constants.h"
#include "opal/mca/backtrace/backtrace.h" #include "opal/mca/backtrace/backtrace.h"
void int
opal_backtrace_print(FILE *file) opal_backtrace_print(FILE *file, char *prefix, int strip)
{ {
printstack(fileno(file)); printstack(fileno(file));
return OPAL_SUCCESS;
} }

Просмотреть файл

@ -53,12 +53,10 @@ static char *unable_to_print_msg = "Unable to print stack trace!\n";
/** /**
* This function is being called as a signal-handler in response * This function is being called as a signal-handler in response
* to a user-specified signal (e.g. SIGFPE or SIGSEGV). * to a user-specified signal (e.g. SIGFPE or SIGSEGV).
* For Linux/Glibc, it then uses backtrace and backtrace_symbols * For Linux/Glibc, it then uses backtrace and backtrace_symbols_fd
* to figure the current stack and then prints that out to stdout. * to figure the current stack and print that out to stderr.
* Where available, the BSD libexecinfo is used to provide Linux/Glibc * Where available, the BSD libexecinfo is used to provide Linux/Glibc
* compatible backtrace and backtrace_symbols functions. * compatible backtrace and backtrace_symbols_fd functions.
* Yes, printf and malloc are not signal-safe per se, but should be
* on Linux?
* *
* @param signo with the signal number raised * @param signo with the signal number raised
* @param info with information regarding the reason/send of the signal * @param info with information regarding the reason/send of the signal
@ -72,9 +70,8 @@ static void show_stackframe (int signo, siginfo_t * info, void * p)
char print_buffer[1024]; char print_buffer[1024];
char * tmp = print_buffer; char * tmp = print_buffer;
int size = sizeof (print_buffer); int size = sizeof (print_buffer);
int ret, traces_size; int ret;
char *si_code_str = ""; char *si_code_str = "";
char **traces;
/* write out the footer information */ /* write out the footer information */
memset (print_buffer, 0, sizeof (print_buffer)); memset (print_buffer, 0, sizeof (print_buffer));
@ -82,18 +79,8 @@ static void show_stackframe (int signo, siginfo_t * info, void * p)
HOSTFORMAT "*** Process received signal ***\n", HOSTFORMAT "*** Process received signal ***\n",
stacktrace_hostname, getpid()); stacktrace_hostname, getpid());
write(fileno(stderr), print_buffer, ret); write(fileno(stderr), print_buffer, ret);
fflush(stderr);
/*
* Yes, we are doing printf inside a signal-handler.
* However, backtrace itself calls malloc (which may not be signal-safe,
* under linux, printf and malloc are)
*
* We could use backtrace_symbols_fd and write directly into an
* filedescriptor, however, without formatting -- also this fd
* should be opened in a sensible way...
*/
memset (print_buffer, 0, sizeof (print_buffer)); memset (print_buffer, 0, sizeof (print_buffer));
#ifdef HAVE_STRSIGNAL #ifdef HAVE_STRSIGNAL
@ -342,28 +329,14 @@ static void show_stackframe (int signo, siginfo_t * info, void * p)
/* write out the signal information generated above */ /* write out the signal information generated above */
write(fileno(stderr), print_buffer, sizeof(print_buffer)-size); write(fileno(stderr), print_buffer, sizeof(print_buffer)-size);
fflush(stderr);
/* print out the stack trace */ /* print out the stack trace */
ret = opal_backtrace_buffer(&traces, &traces_size); snprintf(print_buffer, sizeof(print_buffer), HOSTFORMAT,
if (OPAL_SUCCESS == ret) { stacktrace_hostname, getpid());
int i; print_buffer[sizeof(print_buffer) - 1] = '\0';
/* since we have the opportunity, strip off the bottom two ret = opal_backtrace_print(stderr, print_buffer, 2);
function calls, which will be this function and if (OPAL_SUCCESS != ret) {
opal_backtrace_buffer(). */ write(fileno(stderr), unable_to_print_msg, strlen(unable_to_print_msg));
for (i = 2 ; i < traces_size ; ++i) {
ret = snprintf(print_buffer, sizeof(print_buffer),
HOSTFORMAT "[%2d] %s\n",
stacktrace_hostname, getpid(), i - 2, traces[i]);
if (ret > 0) {
write(fileno(stderr), print_buffer, ret);
} else {
write(fileno(stderr), unable_to_print_msg,
strlen(unable_to_print_msg));
}
}
} else {
opal_backtrace_print(stderr);
} }
/* write out the footer information */ /* write out the footer information */
@ -376,7 +349,6 @@ static void show_stackframe (int signo, siginfo_t * info, void * p)
} else { } else {
write(fileno(stderr), unable_to_print_msg, strlen(unable_to_print_msg)); write(fileno(stderr), unable_to_print_msg, strlen(unable_to_print_msg));
} }
fflush(stderr);
} }
#endif /* OPAL_WANT_PRETTY_PRINT_STACKTRACE */ #endif /* OPAL_WANT_PRETTY_PRINT_STACKTRACE */
@ -393,12 +365,12 @@ void opal_stackframe_output(int stream)
int i; int i;
/* since we have the opportunity, strip off the bottom two /* since we have the opportunity, strip off the bottom two
function calls, which will be this function and function calls, which will be this function and
opa_backtrace_buffer(). */ opal_backtrace_buffer(). */
for (i = 2; i < traces_size; ++i) { for (i = 2; i < traces_size; ++i) {
opal_output(stream, "%s", traces[i]); opal_output(stream, "%s", traces[i]);
} }
} else { } else {
opal_backtrace_print(stderr); opal_backtrace_print(stderr, NULL, 2);
} }
} }

Просмотреть файл

@ -79,7 +79,7 @@ void orte_oob_tcp_set_socket_options(int sd)
int optval; int optval;
optval = 1; optval = 1;
if(setsockopt(sd, IPPROTO_TCP, TCP_NODELAY, (char *)&optval, sizeof(optval)) < 0) { if(setsockopt(sd, IPPROTO_TCP, TCP_NODELAY, (char *)&optval, sizeof(optval)) < 0) {
opal_backtrace_print(stderr); opal_backtrace_print(stderr, NULL, 1);
opal_output(0, "[%s:%d] setsockopt(TCP_NODELAY) failed: %s (%d)", opal_output(0, "[%s:%d] setsockopt(TCP_NODELAY) failed: %s (%d)",
__FILE__, __LINE__, __FILE__, __LINE__,
strerror(opal_socket_errno), strerror(opal_socket_errno),