From 1e93437cd4b3b5d220e430d7c9b090c67e77aee7 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Sat, 16 Oct 2010 03:29:47 +0000 Subject: [PATCH] To help with debugging, add a new mca param that instructs ORTE_ERROR_LOG to output "silent" errors. Helps to track down silent errors that don't have an associated error message (e.g., via show_help). This commit was SVN r23893. --- orte/mca/errmgr/base/errmgr_base_fns.c | 8 ++++++-- orte/runtime/orte_globals.c | 1 + orte/runtime/orte_globals.h | 1 + orte/runtime/orte_mca_params.c | 6 +++++- orte/util/error_strings.c | 13 +++++++++++-- 5 files changed, 24 insertions(+), 5 deletions(-) diff --git a/orte/mca/errmgr/base/errmgr_base_fns.c b/orte/mca/errmgr/base/errmgr_base_fns.c index bd85d0acd4..3058d7e1e9 100644 --- a/orte/mca/errmgr/base/errmgr_base_fns.c +++ b/orte/mca/errmgr/base/errmgr_base_fns.c @@ -177,16 +177,20 @@ void orte_errmgr_predicted_map_destruct( orte_errmgr_predicted_map_t *item) */ void orte_errmgr_base_log(int error_code, char *filename, int line) { + char *errstring = NULL; + OPAL_TRACE(1); - if (ORTE_ERR_SILENT == OPAL_SOS_GET_ERROR_CODE(error_code)) { + errstring = (char*)ORTE_ERROR_NAME(error_code); + + if (NULL == errstring) { /* if the error is silent, say nothing */ return; } opal_output(0, "%s ORTE_ERROR_LOG: %s in file %s at line %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_ERROR_NAME(error_code), filename, line); + errstring, filename, line); } void orte_errmgr_base_abort(int error_code, char *fmt, ...) diff --git a/orte/runtime/orte_globals.c b/orte/runtime/orte_globals.c index 1d29a094cf..11d7b56f29 100644 --- a/orte/runtime/orte_globals.c +++ b/orte/runtime/orte_globals.c @@ -194,6 +194,7 @@ bool orte_xml_output = false; FILE *orte_xml_fp = NULL; char *orte_job_ident = NULL; bool orte_execute_quiet = false; +bool orte_report_silent_errors = false; /* See comment in orte/tools/orterun/debuggers.c about this MCA param */ diff --git a/orte/runtime/orte_globals.h b/orte/runtime/orte_globals.h index bb2d11d582..a70beeb612 100644 --- a/orte/runtime/orte_globals.h +++ b/orte/runtime/orte_globals.h @@ -56,6 +56,7 @@ ORTE_DECLSPEC extern bool orte_help_want_aggregate; /* instantiated in orte/uti ORTE_DECLSPEC extern char *orte_job_ident; /* instantiated in orte/runtime/orte_globals.c */ ORTE_DECLSPEC extern bool orte_create_session_dirs; /* instantiated in orte/runtime/orte_init.c */ ORTE_DECLSPEC extern bool orte_execute_quiet; /* instantiated in orte/runtime/orte_globals.c */ +ORTE_DECLSPEC extern bool orte_report_silent_errors; /* instantiated in orte/runtime/orte_globals.c */ /* Shortcut for some commonly used names */ #define ORTE_NAME_WILDCARD (&orte_name_wildcard) diff --git a/orte/runtime/orte_mca_params.c b/orte/runtime/orte_mca_params.c index 63b52855c8..a1b14ad942 100644 --- a/orte/runtime/orte_mca_params.c +++ b/orte/runtime/orte_mca_params.c @@ -93,7 +93,11 @@ int orte_register_params(void) false, false, (int) false, &value); orte_execute_quiet = OPAL_INT_TO_BOOL(value); - + mca_base_param_reg_int_name("orte", "report_silent_errors", + "Report all errors, including silent ones", + false, false, (int) false, &value); + orte_report_silent_errors = OPAL_INT_TO_BOOL(value); + #if !ORTE_DISABLE_FULL_SUPPORT mca_base_param_reg_int_name("orte", "send_profile", diff --git a/orte/util/error_strings.c b/orte/util/error_strings.c index 4b9b690157..2e11b59e4a 100644 --- a/orte/util/error_strings.c +++ b/orte/util/error_strings.c @@ -26,6 +26,7 @@ #include "opal/util/opal_sos.h" #include "orte/util/error_strings.h" +#include "orte/runtime/orte_globals.h" const char *orte_err2str(int errnum) { @@ -80,7 +81,11 @@ const char *orte_err2str(int errnum) retval = "Multiple applications were specified, but at least one failed to specify the number of processes to run"; break; case ORTE_ERR_SILENT: - retval = NULL; + if (orte_report_silent_errors) { + retval = "Silent error"; + } else { + retval = NULL; + } break; case ORTE_ERR_ADDRESSEE_UNKNOWN: retval = "A message is attempting to be sent to a process whose contact information is unknown"; @@ -136,7 +141,11 @@ const char *orte_err2str(int errnum) default: - retval = NULL; + if (orte_report_silent_errors) { + retval = "Unknown error"; + } else { + retval = NULL; + } } return retval;