1
1

stacktrace: Add flexibility in stacktrace ouptut

- New MCA option: opal_stacktrace_output
   - Specifies where the stack trace output stream goes.
   - Accepts: none, stdout, stderr, file[:filename]
   - Default filename 'stacktrace'
     - Filename will be `stacktrace.PID`, or if VPID is available,
       then the filename will be `stacktrace.VPID.PID`
 - Update util/stacktrace to allow for different output avenues
   including files. Previously this was hardcoded to 'stderr'.
 - Since opal_backtrace_print needs to be signal safe, passing it a
   FILE object that actually represents a file stream is difficult. This
   is because we cannot open the file in the signal handler using
   `fopen` (not safe), but have to use `open` (safe). Additionally, we
   cannot use `fdopen` to convert the `int fd` to a `FILE *fh` since it
   is also not signal safe.
   - I did not want to break the backtrace.h API so I introduced a new
     rule (documented in `backtrace.c`) that if the `FILE *file`
     argument is `NULL` then look for the `opal_stacktrace_output_fileno`
     variable to tell you which file descriptor to use for output.

Signed-off-by: Joshua Hursey <jhursey@us.ibm.com>
Этот коммит содержится в:
Joshua Hursey 2017-01-26 11:20:41 -06:00
родитель f8918e37a9
Коммит 6d98559be9
7 изменённых файлов: 194 добавлений и 11 удалений

Просмотреть файл

@ -12,6 +12,7 @@
* All rights reserved.
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2017 IBM Corporation. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -26,6 +27,7 @@
#include "opal/mca/mca.h"
#include "opal/mca/base/base.h"
#include "opal/util/stacktrace.h"
BEGIN_C_DECLS
@ -39,6 +41,8 @@ BEGIN_C_DECLS
/*
* Print back trace to FILE file with a prefix for each line.
* First strip lines are not printed.
* If 'file' is NULL then the component should try to use the file descriptor
* saved in opal_stacktrace_output_fileno
*
* \note some attempts made to be signal safe.
*/

Просмотреть файл

@ -10,6 +10,7 @@
* Copyright (c) 2004-2006 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2011 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2017 IBM Corporation. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -34,12 +35,16 @@
int
opal_backtrace_print(FILE *file, char *prefix, int strip)
{
int i, fd, len;
int i, len;
int trace_size;
void * trace[32];
char buf[6];
int fd = opal_stacktrace_output_fileno;
if( NULL != file ) {
fd = fileno(file);
}
if (-1 == fd) {
return OPAL_ERR_BAD_PARAM;
}

Просмотреть файл

@ -10,6 +10,7 @@
* Copyright (c) 2004-2006 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2017 IBM Corporation. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -27,7 +28,13 @@
int
opal_backtrace_print(FILE *file, char *prefix, int strip)
{
printstack(fileno(file));
int fd = opal_stacktrace_output_fileno;
if( NULL != file ) {
fd = fileno(file);
}
printstack(fd);
return OPAL_SUCCESS;
}

Просмотреть файл

@ -21,6 +21,7 @@
* and Technology (RIST). All rights reserved.
* Copyright (c) 2015 Mellanox Technologies, Inc.
* All rights reserved.
* Copyright (c) 2017 IBM Corporation. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -48,6 +49,7 @@
#include "opal/util/timings.h"
char *opal_signal_string = NULL;
char *opal_stacktrace_output_filename = NULL;
char *opal_net_private_ipv4 = NULL;
char *opal_set_max_sys_limits = NULL;
@ -76,6 +78,7 @@ static bool opal_register_done = false;
int opal_register_params(void)
{
int ret;
char *string = NULL;
if (opal_register_done) {
return OPAL_SUCCESS;
@ -87,7 +90,6 @@ int opal_register_params(void)
* This string is going to be used in opal/util/stacktrace.c
*/
{
char *string = NULL;
int j;
int signals[] = {
#ifdef SIGABRT
@ -127,6 +129,28 @@ int opal_register_params(void)
}
}
/*
* Where should the stack trace output be directed
* This string is going to be used in opal/util/stacktrace.c
*/
string = strdup("stderr");
opal_stacktrace_output_filename = string;
ret = mca_base_var_register ("opal", "opal", NULL, "stacktrace_output",
"Specifies where the stack trace output stream goes. "
"Accepts one of the following: none (disabled), stderr (default), stdout, file[:filename]. "
"If 'filename' is not specified, a default filename of 'stacktrace' is used. "
"The 'filename' is appended with either '.PID' or '.RANK.PID', if RANK is available. "
"The 'filename' can be an absolute path or a relative path to the current working directory.",
MCA_BASE_VAR_TYPE_STRING, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
OPAL_INFO_LVL_3,
MCA_BASE_VAR_SCOPE_LOCAL,
&opal_stacktrace_output_filename);
free (string);
if (0 > ret) {
return ret;
}
#if defined(HAVE_SCHED_YIELD)
opal_progress_yield_when_idle = false;
ret = mca_base_var_register ("opal", "opal", "progress", "yield_when_idle",

Просмотреть файл

@ -18,6 +18,7 @@
* Copyright (c) 2014 Hochschule Esslingen. All rights reserved.
* Copyright (c) 2015 Mellanox Technologies, Inc.
* All rights reserved.
* Copyright (c) 2017 IBM Corporation. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -29,6 +30,7 @@
#define OPAL_PARAMS_H
extern char *opal_signal_string;
extern char *opal_stacktrace_output_filename;
extern char *opal_net_private_ipv4;
extern char *opal_set_max_sys_limits;

Просмотреть файл

@ -25,6 +25,15 @@
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#ifdef HAVE_SYS_TYPES_H
#include <sys/types.h>
#endif
#ifdef HAVE_SYS_STAT_H
#include <sys/stat.h>
#endif
#ifdef HAVE_SYS_FCNTL_H
#include <fcntl.h>
#endif
#include <string.h>
#include <signal.h>
@ -35,6 +44,7 @@
#include "opal/util/output.h"
#include "opal/util/show_help.h"
#include "opal/util/argv.h"
#include "opal/util/proc.h"
#include "opal/runtime/opal_params.h"
#ifndef _NSIG
@ -43,9 +53,35 @@
#define HOSTFORMAT "[%s:%05d] "
int opal_stacktrace_output_fileno = -1;
static char *opal_stacktrace_output_filename_base = NULL;
static size_t opal_stacktrace_output_filename_max_len = 0;
static char stacktrace_hostname[OPAL_MAXHOSTNAMELEN];
static char *unable_to_print_msg = "Unable to print stack trace!\n";
/*
* Set the stacktrace filename:
* stacktrace.PID
* -or, if VPID is available-
* stacktrace.VPID.PID
*/
static void set_stacktrace_filename(void) {
opal_proc_t *my_proc = opal_proc_local_get();
if( NULL == my_proc ) {
snprintf(opal_stacktrace_output_filename, opal_stacktrace_output_filename_max_len,
"%s.%lu",
opal_stacktrace_output_filename_base, (unsigned long)getpid());
}
else {
snprintf(opal_stacktrace_output_filename, opal_stacktrace_output_filename_max_len,
"%s.%lu.%lu",
opal_stacktrace_output_filename_base, (unsigned long)my_proc->proc_name.vpid, (unsigned long)getpid());
}
return;
}
/**
* This function is being called as a signal-handler in response
* to a user-specified signal (e.g. SIGFPE or SIGSEGV).
@ -69,12 +105,37 @@ static void show_stackframe (int signo, siginfo_t * info, void * p)
int ret;
char *si_code_str = "";
/* Do not print the stack trace */
if( 0 > opal_stacktrace_output_fileno && 0 == opal_stacktrace_output_filename_max_len ) {
/* Raise the signal again, so we don't accidentally mask critical signals.
* For critical signals, it is preferred that we call 'raise' instead of
* 'exit' or 'abort' so that the return status is set properly for this
* process.
*/
signal(signo, SIG_DFL);
raise(signo);
return;
}
/* Update the file name with the RANK, if available */
if( 0 < opal_stacktrace_output_filename_max_len ) {
set_stacktrace_filename();
opal_stacktrace_output_fileno = open(opal_stacktrace_output_filename,
O_CREAT|O_WRONLY|O_TRUNC, S_IRUSR|S_IWUSR);
if( 0 > opal_stacktrace_output_fileno ) {
opal_output(0, "Error: Failed to open the stacktrace output file. Default: stderr\n\tFilename: %s\n\tErrno: %s",
opal_stacktrace_output_filename, strerror(errno));
opal_stacktrace_output_fileno = fileno(stderr);
}
}
/* write out the footer information */
memset (print_buffer, 0, sizeof (print_buffer));
ret = snprintf(print_buffer, sizeof(print_buffer),
HOSTFORMAT "*** Process received signal ***\n",
stacktrace_hostname, getpid());
write(fileno(stderr), print_buffer, ret);
write(opal_stacktrace_output_fileno, print_buffer, ret);
memset (print_buffer, 0, sizeof (print_buffer));
@ -324,14 +385,14 @@ static void show_stackframe (int signo, siginfo_t * info, void * p)
}
/* write out the signal information generated above */
write(fileno(stderr), print_buffer, sizeof(print_buffer)-size);
write(opal_stacktrace_output_fileno, print_buffer, sizeof(print_buffer)-size);
/* print out the stack trace */
snprintf(print_buffer, sizeof(print_buffer), HOSTFORMAT,
stacktrace_hostname, getpid());
ret = opal_backtrace_print(stderr, print_buffer, 2);
ret = opal_backtrace_print(NULL, print_buffer, 2);
if (OPAL_SUCCESS != ret) {
write(fileno(stderr), unable_to_print_msg, strlen(unable_to_print_msg));
write(opal_stacktrace_output_fileno, unable_to_print_msg, strlen(unable_to_print_msg));
}
/* write out the footer information */
@ -340,9 +401,15 @@ static void show_stackframe (int signo, siginfo_t * info, void * p)
HOSTFORMAT "*** End of error message ***\n",
stacktrace_hostname, getpid());
if (ret > 0) {
write(fileno(stderr), print_buffer, ret);
write(opal_stacktrace_output_fileno, print_buffer, ret);
} else {
write(fileno(stderr), unable_to_print_msg, strlen(unable_to_print_msg));
write(opal_stacktrace_output_fileno, unable_to_print_msg, strlen(unable_to_print_msg));
}
if( fileno(stdout) != opal_stacktrace_output_fileno &&
fileno(stderr) != opal_stacktrace_output_fileno ) {
close(opal_stacktrace_output_fileno);
opal_stacktrace_output_fileno = -1;
}
/* Raise the signal again, so we don't accidentally mask critical signals.
@ -373,7 +440,30 @@ void opal_stackframe_output(int stream)
opal_output(stream, "%s", traces[i]);
}
} else {
opal_backtrace_print(stderr, NULL, 2);
/* Do not print the stack trace */
if( 0 > opal_stacktrace_output_fileno && 0 == opal_stacktrace_output_filename_max_len ) {
return;
}
/* Update the file name with the RANK, if available */
if( 0 < opal_stacktrace_output_filename_max_len ) {
set_stacktrace_filename();
opal_stacktrace_output_fileno = open(opal_stacktrace_output_filename,
O_CREAT|O_WRONLY|O_TRUNC, S_IRUSR|S_IWUSR);
if( 0 > opal_stacktrace_output_fileno ) {
opal_output(0, "Error: Failed to open the stacktrace output file. Default: stderr\n\tFilename: %s\n\tErrno: %s",
opal_stacktrace_output_filename, strerror(errno));
opal_stacktrace_output_fileno = fileno(stderr);
}
}
opal_backtrace_print(NULL, NULL, 2);
if( fileno(stdout) != opal_stacktrace_output_fileno &&
fileno(stderr) != opal_stacktrace_output_fileno ) {
close(opal_stacktrace_output_fileno);
opal_stacktrace_output_fileno = -1;
}
}
}
@ -444,6 +534,50 @@ int opal_util_register_stackhandlers (void)
}
}
/* Setup the output stream to use */
if( NULL == opal_stacktrace_output_filename ||
0 == strcasecmp(opal_stacktrace_output_filename, "none") ) {
opal_stacktrace_output_fileno = -1;
}
else if( 0 == strcasecmp(opal_stacktrace_output_filename, "stdout") ) {
opal_stacktrace_output_fileno = fileno(stdout);
}
else if( 0 == strcasecmp(opal_stacktrace_output_filename, "stderr") ) {
opal_stacktrace_output_fileno = fileno(stdout);
}
else if( 0 == strcasecmp(opal_stacktrace_output_filename, "file" ) ||
0 == strcasecmp(opal_stacktrace_output_filename, "file:") ) {
opal_stacktrace_output_filename_base = strdup("stacktrace");
free(opal_stacktrace_output_filename);
// Magic number: 8 = space for .PID and .RANK (allow 7 digits each)
opal_stacktrace_output_filename_max_len = strlen("stacktrace") + 8 + 8;
opal_stacktrace_output_filename = (char*)malloc(sizeof(char) * opal_stacktrace_output_filename_max_len);
set_stacktrace_filename();
opal_stacktrace_output_fileno = -1;
}
else if( 0 == strncasecmp(opal_stacktrace_output_filename, "file:", 5) ) {
char *filename_cpy = NULL;
next = strchr(opal_stacktrace_output_filename, ':');
next++; // move past the ':' to the filename specified
opal_stacktrace_output_filename_base = strdup(next);
free(opal_stacktrace_output_filename);
// Magic number: 8 = space for .PID and .RANK (allow 7 digits each)
opal_stacktrace_output_filename_max_len = strlen(opal_stacktrace_output_filename_base) + 8 + 8;
opal_stacktrace_output_filename = (char*)malloc(sizeof(char) * opal_stacktrace_output_filename_max_len);
set_stacktrace_filename();
opal_stacktrace_output_fileno = -1;
free(filename_cpy);
}
else {
opal_stacktrace_output_fileno = fileno(stderr);
}
/* Setup the signals to catch */
memset(&act, 0, sizeof(act));
act.sa_sigaction = show_stackframe;
act.sa_flags = SA_SIGINFO;

Просмотреть файл

@ -10,6 +10,7 @@
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2008 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2017 IBM Corporation. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -24,6 +25,12 @@
#include "opal_config.h"
/*
* File descriptor to be used by the backtrace framework if opal_backtrace_print
* is passed NULL for it's FILE file pointer.
*/
extern int opal_stacktrace_output_fileno;
/**
* Output the current stack trace (not including the call to this
* function) to the stream indicated.