stacktrace: Add flexibility in stacktrace ouptut
- New MCA option: opal_stacktrace_output - Specifies where the stack trace output stream goes. - Accepts: none, stdout, stderr, file[:filename] - Default filename 'stacktrace' - Filename will be `stacktrace.PID`, or if VPID is available, then the filename will be `stacktrace.VPID.PID` - Update util/stacktrace to allow for different output avenues including files. Previously this was hardcoded to 'stderr'. - Since opal_backtrace_print needs to be signal safe, passing it a FILE object that actually represents a file stream is difficult. This is because we cannot open the file in the signal handler using `fopen` (not safe), but have to use `open` (safe). Additionally, we cannot use `fdopen` to convert the `int fd` to a `FILE *fh` since it is also not signal safe. - I did not want to break the backtrace.h API so I introduced a new rule (documented in `backtrace.c`) that if the `FILE *file` argument is `NULL` then look for the `opal_stacktrace_output_fileno` variable to tell you which file descriptor to use for output. Signed-off-by: Joshua Hursey <jhursey@us.ibm.com>
Этот коммит содержится в:
родитель
f8918e37a9
Коммит
6d98559be9
@ -12,6 +12,7 @@
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2017 IBM Corporation. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -26,6 +27,7 @@
|
||||
|
||||
#include "opal/mca/mca.h"
|
||||
#include "opal/mca/base/base.h"
|
||||
#include "opal/util/stacktrace.h"
|
||||
|
||||
BEGIN_C_DECLS
|
||||
|
||||
@ -39,6 +41,8 @@ BEGIN_C_DECLS
|
||||
/*
|
||||
* Print back trace to FILE file with a prefix for each line.
|
||||
* First strip lines are not printed.
|
||||
* If 'file' is NULL then the component should try to use the file descriptor
|
||||
* saved in opal_stacktrace_output_fileno
|
||||
*
|
||||
* \note some attempts made to be signal safe.
|
||||
*/
|
||||
|
@ -10,6 +10,7 @@
|
||||
* Copyright (c) 2004-2006 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2011 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2017 IBM Corporation. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -34,12 +35,16 @@
|
||||
int
|
||||
opal_backtrace_print(FILE *file, char *prefix, int strip)
|
||||
{
|
||||
int i, fd, len;
|
||||
int i, len;
|
||||
int trace_size;
|
||||
void * trace[32];
|
||||
char buf[6];
|
||||
int fd = opal_stacktrace_output_fileno;
|
||||
|
||||
if( NULL != file ) {
|
||||
fd = fileno(file);
|
||||
}
|
||||
|
||||
fd = fileno (file);
|
||||
if (-1 == fd) {
|
||||
return OPAL_ERR_BAD_PARAM;
|
||||
}
|
||||
|
@ -10,6 +10,7 @@
|
||||
* Copyright (c) 2004-2006 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2006 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright (c) 2017 IBM Corporation. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -27,7 +28,13 @@
|
||||
int
|
||||
opal_backtrace_print(FILE *file, char *prefix, int strip)
|
||||
{
|
||||
printstack(fileno(file));
|
||||
int fd = opal_stacktrace_output_fileno;
|
||||
|
||||
if( NULL != file ) {
|
||||
fd = fileno(file);
|
||||
}
|
||||
|
||||
printstack(fd);
|
||||
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
@ -21,6 +21,7 @@
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* Copyright (c) 2015 Mellanox Technologies, Inc.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2017 IBM Corporation. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -48,6 +49,7 @@
|
||||
#include "opal/util/timings.h"
|
||||
|
||||
char *opal_signal_string = NULL;
|
||||
char *opal_stacktrace_output_filename = NULL;
|
||||
char *opal_net_private_ipv4 = NULL;
|
||||
char *opal_set_max_sys_limits = NULL;
|
||||
|
||||
@ -76,6 +78,7 @@ static bool opal_register_done = false;
|
||||
int opal_register_params(void)
|
||||
{
|
||||
int ret;
|
||||
char *string = NULL;
|
||||
|
||||
if (opal_register_done) {
|
||||
return OPAL_SUCCESS;
|
||||
@ -87,7 +90,6 @@ int opal_register_params(void)
|
||||
* This string is going to be used in opal/util/stacktrace.c
|
||||
*/
|
||||
{
|
||||
char *string = NULL;
|
||||
int j;
|
||||
int signals[] = {
|
||||
#ifdef SIGABRT
|
||||
@ -127,6 +129,28 @@ int opal_register_params(void)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Where should the stack trace output be directed
|
||||
* This string is going to be used in opal/util/stacktrace.c
|
||||
*/
|
||||
string = strdup("stderr");
|
||||
opal_stacktrace_output_filename = string;
|
||||
ret = mca_base_var_register ("opal", "opal", NULL, "stacktrace_output",
|
||||
"Specifies where the stack trace output stream goes. "
|
||||
"Accepts one of the following: none (disabled), stderr (default), stdout, file[:filename]. "
|
||||
"If 'filename' is not specified, a default filename of 'stacktrace' is used. "
|
||||
"The 'filename' is appended with either '.PID' or '.RANK.PID', if RANK is available. "
|
||||
"The 'filename' can be an absolute path or a relative path to the current working directory.",
|
||||
MCA_BASE_VAR_TYPE_STRING, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
|
||||
OPAL_INFO_LVL_3,
|
||||
MCA_BASE_VAR_SCOPE_LOCAL,
|
||||
&opal_stacktrace_output_filename);
|
||||
free (string);
|
||||
if (0 > ret) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
#if defined(HAVE_SCHED_YIELD)
|
||||
opal_progress_yield_when_idle = false;
|
||||
ret = mca_base_var_register ("opal", "opal", "progress", "yield_when_idle",
|
||||
|
@ -18,6 +18,7 @@
|
||||
* Copyright (c) 2014 Hochschule Esslingen. All rights reserved.
|
||||
* Copyright (c) 2015 Mellanox Technologies, Inc.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2017 IBM Corporation. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -29,6 +30,7 @@
|
||||
#define OPAL_PARAMS_H
|
||||
|
||||
extern char *opal_signal_string;
|
||||
extern char *opal_stacktrace_output_filename;
|
||||
extern char *opal_net_private_ipv4;
|
||||
extern char *opal_set_max_sys_limits;
|
||||
|
||||
|
@ -25,6 +25,15 @@
|
||||
#ifdef HAVE_UNISTD_H
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
#ifdef HAVE_SYS_TYPES_H
|
||||
#include <sys/types.h>
|
||||
#endif
|
||||
#ifdef HAVE_SYS_STAT_H
|
||||
#include <sys/stat.h>
|
||||
#endif
|
||||
#ifdef HAVE_SYS_FCNTL_H
|
||||
#include <fcntl.h>
|
||||
#endif
|
||||
|
||||
#include <string.h>
|
||||
#include <signal.h>
|
||||
@ -35,6 +44,7 @@
|
||||
#include "opal/util/output.h"
|
||||
#include "opal/util/show_help.h"
|
||||
#include "opal/util/argv.h"
|
||||
#include "opal/util/proc.h"
|
||||
#include "opal/runtime/opal_params.h"
|
||||
|
||||
#ifndef _NSIG
|
||||
@ -43,9 +53,35 @@
|
||||
|
||||
#define HOSTFORMAT "[%s:%05d] "
|
||||
|
||||
int opal_stacktrace_output_fileno = -1;
|
||||
static char *opal_stacktrace_output_filename_base = NULL;
|
||||
static size_t opal_stacktrace_output_filename_max_len = 0;
|
||||
static char stacktrace_hostname[OPAL_MAXHOSTNAMELEN];
|
||||
static char *unable_to_print_msg = "Unable to print stack trace!\n";
|
||||
|
||||
/*
|
||||
* Set the stacktrace filename:
|
||||
* stacktrace.PID
|
||||
* -or, if VPID is available-
|
||||
* stacktrace.VPID.PID
|
||||
*/
|
||||
static void set_stacktrace_filename(void) {
|
||||
opal_proc_t *my_proc = opal_proc_local_get();
|
||||
|
||||
if( NULL == my_proc ) {
|
||||
snprintf(opal_stacktrace_output_filename, opal_stacktrace_output_filename_max_len,
|
||||
"%s.%lu",
|
||||
opal_stacktrace_output_filename_base, (unsigned long)getpid());
|
||||
}
|
||||
else {
|
||||
snprintf(opal_stacktrace_output_filename, opal_stacktrace_output_filename_max_len,
|
||||
"%s.%lu.%lu",
|
||||
opal_stacktrace_output_filename_base, (unsigned long)my_proc->proc_name.vpid, (unsigned long)getpid());
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
/**
|
||||
* This function is being called as a signal-handler in response
|
||||
* to a user-specified signal (e.g. SIGFPE or SIGSEGV).
|
||||
@ -69,12 +105,37 @@ static void show_stackframe (int signo, siginfo_t * info, void * p)
|
||||
int ret;
|
||||
char *si_code_str = "";
|
||||
|
||||
/* Do not print the stack trace */
|
||||
if( 0 > opal_stacktrace_output_fileno && 0 == opal_stacktrace_output_filename_max_len ) {
|
||||
/* Raise the signal again, so we don't accidentally mask critical signals.
|
||||
* For critical signals, it is preferred that we call 'raise' instead of
|
||||
* 'exit' or 'abort' so that the return status is set properly for this
|
||||
* process.
|
||||
*/
|
||||
signal(signo, SIG_DFL);
|
||||
raise(signo);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
/* Update the file name with the RANK, if available */
|
||||
if( 0 < opal_stacktrace_output_filename_max_len ) {
|
||||
set_stacktrace_filename();
|
||||
opal_stacktrace_output_fileno = open(opal_stacktrace_output_filename,
|
||||
O_CREAT|O_WRONLY|O_TRUNC, S_IRUSR|S_IWUSR);
|
||||
if( 0 > opal_stacktrace_output_fileno ) {
|
||||
opal_output(0, "Error: Failed to open the stacktrace output file. Default: stderr\n\tFilename: %s\n\tErrno: %s",
|
||||
opal_stacktrace_output_filename, strerror(errno));
|
||||
opal_stacktrace_output_fileno = fileno(stderr);
|
||||
}
|
||||
}
|
||||
|
||||
/* write out the footer information */
|
||||
memset (print_buffer, 0, sizeof (print_buffer));
|
||||
ret = snprintf(print_buffer, sizeof(print_buffer),
|
||||
HOSTFORMAT "*** Process received signal ***\n",
|
||||
stacktrace_hostname, getpid());
|
||||
write(fileno(stderr), print_buffer, ret);
|
||||
write(opal_stacktrace_output_fileno, print_buffer, ret);
|
||||
|
||||
|
||||
memset (print_buffer, 0, sizeof (print_buffer));
|
||||
@ -324,14 +385,14 @@ static void show_stackframe (int signo, siginfo_t * info, void * p)
|
||||
}
|
||||
|
||||
/* write out the signal information generated above */
|
||||
write(fileno(stderr), print_buffer, sizeof(print_buffer)-size);
|
||||
write(opal_stacktrace_output_fileno, print_buffer, sizeof(print_buffer)-size);
|
||||
|
||||
/* print out the stack trace */
|
||||
snprintf(print_buffer, sizeof(print_buffer), HOSTFORMAT,
|
||||
stacktrace_hostname, getpid());
|
||||
ret = opal_backtrace_print(stderr, print_buffer, 2);
|
||||
ret = opal_backtrace_print(NULL, print_buffer, 2);
|
||||
if (OPAL_SUCCESS != ret) {
|
||||
write(fileno(stderr), unable_to_print_msg, strlen(unable_to_print_msg));
|
||||
write(opal_stacktrace_output_fileno, unable_to_print_msg, strlen(unable_to_print_msg));
|
||||
}
|
||||
|
||||
/* write out the footer information */
|
||||
@ -340,9 +401,15 @@ static void show_stackframe (int signo, siginfo_t * info, void * p)
|
||||
HOSTFORMAT "*** End of error message ***\n",
|
||||
stacktrace_hostname, getpid());
|
||||
if (ret > 0) {
|
||||
write(fileno(stderr), print_buffer, ret);
|
||||
write(opal_stacktrace_output_fileno, print_buffer, ret);
|
||||
} else {
|
||||
write(fileno(stderr), unable_to_print_msg, strlen(unable_to_print_msg));
|
||||
write(opal_stacktrace_output_fileno, unable_to_print_msg, strlen(unable_to_print_msg));
|
||||
}
|
||||
|
||||
if( fileno(stdout) != opal_stacktrace_output_fileno &&
|
||||
fileno(stderr) != opal_stacktrace_output_fileno ) {
|
||||
close(opal_stacktrace_output_fileno);
|
||||
opal_stacktrace_output_fileno = -1;
|
||||
}
|
||||
|
||||
/* Raise the signal again, so we don't accidentally mask critical signals.
|
||||
@ -373,7 +440,30 @@ void opal_stackframe_output(int stream)
|
||||
opal_output(stream, "%s", traces[i]);
|
||||
}
|
||||
} else {
|
||||
opal_backtrace_print(stderr, NULL, 2);
|
||||
/* Do not print the stack trace */
|
||||
if( 0 > opal_stacktrace_output_fileno && 0 == opal_stacktrace_output_filename_max_len ) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* Update the file name with the RANK, if available */
|
||||
if( 0 < opal_stacktrace_output_filename_max_len ) {
|
||||
set_stacktrace_filename();
|
||||
opal_stacktrace_output_fileno = open(opal_stacktrace_output_filename,
|
||||
O_CREAT|O_WRONLY|O_TRUNC, S_IRUSR|S_IWUSR);
|
||||
if( 0 > opal_stacktrace_output_fileno ) {
|
||||
opal_output(0, "Error: Failed to open the stacktrace output file. Default: stderr\n\tFilename: %s\n\tErrno: %s",
|
||||
opal_stacktrace_output_filename, strerror(errno));
|
||||
opal_stacktrace_output_fileno = fileno(stderr);
|
||||
}
|
||||
}
|
||||
|
||||
opal_backtrace_print(NULL, NULL, 2);
|
||||
|
||||
if( fileno(stdout) != opal_stacktrace_output_fileno &&
|
||||
fileno(stderr) != opal_stacktrace_output_fileno ) {
|
||||
close(opal_stacktrace_output_fileno);
|
||||
opal_stacktrace_output_fileno = -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -444,6 +534,50 @@ int opal_util_register_stackhandlers (void)
|
||||
}
|
||||
}
|
||||
|
||||
/* Setup the output stream to use */
|
||||
if( NULL == opal_stacktrace_output_filename ||
|
||||
0 == strcasecmp(opal_stacktrace_output_filename, "none") ) {
|
||||
opal_stacktrace_output_fileno = -1;
|
||||
}
|
||||
else if( 0 == strcasecmp(opal_stacktrace_output_filename, "stdout") ) {
|
||||
opal_stacktrace_output_fileno = fileno(stdout);
|
||||
}
|
||||
else if( 0 == strcasecmp(opal_stacktrace_output_filename, "stderr") ) {
|
||||
opal_stacktrace_output_fileno = fileno(stdout);
|
||||
}
|
||||
else if( 0 == strcasecmp(opal_stacktrace_output_filename, "file" ) ||
|
||||
0 == strcasecmp(opal_stacktrace_output_filename, "file:") ) {
|
||||
opal_stacktrace_output_filename_base = strdup("stacktrace");
|
||||
|
||||
free(opal_stacktrace_output_filename);
|
||||
// Magic number: 8 = space for .PID and .RANK (allow 7 digits each)
|
||||
opal_stacktrace_output_filename_max_len = strlen("stacktrace") + 8 + 8;
|
||||
opal_stacktrace_output_filename = (char*)malloc(sizeof(char) * opal_stacktrace_output_filename_max_len);
|
||||
set_stacktrace_filename();
|
||||
opal_stacktrace_output_fileno = -1;
|
||||
}
|
||||
else if( 0 == strncasecmp(opal_stacktrace_output_filename, "file:", 5) ) {
|
||||
char *filename_cpy = NULL;
|
||||
next = strchr(opal_stacktrace_output_filename, ':');
|
||||
next++; // move past the ':' to the filename specified
|
||||
|
||||
opal_stacktrace_output_filename_base = strdup(next);
|
||||
|
||||
free(opal_stacktrace_output_filename);
|
||||
// Magic number: 8 = space for .PID and .RANK (allow 7 digits each)
|
||||
opal_stacktrace_output_filename_max_len = strlen(opal_stacktrace_output_filename_base) + 8 + 8;
|
||||
opal_stacktrace_output_filename = (char*)malloc(sizeof(char) * opal_stacktrace_output_filename_max_len);
|
||||
set_stacktrace_filename();
|
||||
opal_stacktrace_output_fileno = -1;
|
||||
|
||||
free(filename_cpy);
|
||||
}
|
||||
else {
|
||||
opal_stacktrace_output_fileno = fileno(stderr);
|
||||
}
|
||||
|
||||
|
||||
/* Setup the signals to catch */
|
||||
memset(&act, 0, sizeof(act));
|
||||
act.sa_sigaction = show_stackframe;
|
||||
act.sa_flags = SA_SIGINFO;
|
||||
|
@ -10,6 +10,7 @@
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2008 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2017 IBM Corporation. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -24,6 +25,12 @@
|
||||
|
||||
#include "opal_config.h"
|
||||
|
||||
/*
|
||||
* File descriptor to be used by the backtrace framework if opal_backtrace_print
|
||||
* is passed NULL for it's FILE file pointer.
|
||||
*/
|
||||
extern int opal_stacktrace_output_fileno;
|
||||
|
||||
/**
|
||||
* Output the current stack trace (not including the call to this
|
||||
* function) to the stream indicated.
|
||||
|
Загрузка…
Ссылка в новой задаче
Block a user