From 5686e8119ef524273dcb2e278390589da063feb4 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Tue, 20 Sep 2005 17:09:11 +0000 Subject: [PATCH] Move the error name macro to the errmgr framework. Add a second level of tracing. Remove an obsolete file. This commit was SVN r7445. --- opal/util/trace.h | 13 ++++++++--- orte/mca/errmgr/base/errmgr_base_log.c | 32 -------------------------- orte/mca/errmgr/errmgr.h | 23 +++++++++++------- orte/mca/schema/schema_types.h | 7 ------ orte/tools/orterun/orterun.c | 24 +++++++++---------- 5 files changed, 37 insertions(+), 62 deletions(-) delete mode 100644 orte/mca/errmgr/base/errmgr_base_log.c diff --git a/opal/util/trace.h b/opal/util/trace.h index 0edcc98e15..1cdb26ab93 100644 --- a/opal/util/trace.h +++ b/opal/util/trace.h @@ -38,16 +38,23 @@ extern "C" { __func__, __FILE__, __LINE__); \ } while (0) -#define OPAL_TRACE_ARG(verbose, foo) \ +#define OPAL_TRACE_ARG1(verbose, foo) \ do { \ opal_output_verbose(verbose, opal_trace_handle, "TRACE: %s @ %s:%d arg: %lu", \ __func__, __FILE__, __LINE__, (unsigned long)foo); \ } while (0) - + +#define OPAL_TRACE_ARG2(verbose, foo, foo2) \ + do { \ + opal_output_verbose(verbose, opal_trace_handle, "TRACE: %s @ %s:%d arg: %lu\n\t0x%x", \ + __func__, __FILE__, __LINE__, (unsigned long)foo, (unsigned long)foo2); \ + } while (0) + #else #define OPAL_TRACE(verbose) -#define OPAL_TRACE_ARG(verbose, foo) +#define OPAL_TRACE_ARG1(verbose, foo) +#define OPAL_TRACE_ARG2(verbose, foo, foo2) #endif /* ENABLE_TRACE */ diff --git a/orte/mca/errmgr/base/errmgr_base_log.c b/orte/mca/errmgr/base/errmgr_base_log.c deleted file mode 100644 index cd4ad29826..0000000000 --- a/orte/mca/errmgr/base/errmgr_base_log.c +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University. - * All rights reserved. - * Copyright (c) 2004-2005 The Trustees of the University of Tennessee. - * All rights reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - - -#include "ompi_config.h" - -#include "opal/util/output.h" -#include "runtime/runtime.h" -#include "mca/ns/ns.h" - -#include "mca/errmgr/base/base.h" - - -void mca_errmgr_base_log(char *msg, char *filename, int line) -{ - opal_output(0, "[%lu,%lu,%lu] ORTE_ERROR_LOG: %s in file %s at line %d", - ORTE_NAME_ARGS(orte_process_info.my_name), - msg, filename, line); -} diff --git a/orte/mca/errmgr/errmgr.h b/orte/mca/errmgr/errmgr.h index aa9cab9c91..08a88e52dd 100644 --- a/orte/mca/errmgr/errmgr.h +++ b/orte/mca/errmgr/errmgr.h @@ -3,14 +3,14 @@ * All rights reserved. * Copyright (c) 2004-2005 The Trustees of the University of Tennessee. * All rights reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ /** @file: @@ -39,6 +39,13 @@ extern "C" { /* * Macro definitions */ +/* + * Thess macros and associated error name array are used to output intelligible error + * messages. + */ + +#define ORTE_ERROR_NAME(n) opal_strerror(n) + #define ORTE_ERROR_LOG(n) \ orte_errmgr.log((n), __FILE__, __LINE__) @@ -51,7 +58,7 @@ extern "C" { * Log an error * Log an error that occurred in the runtime environment, and call the "error_detected" * interface to see if further action is required. - * + * * @code * orte_errmgr.log("this is an error", __FILE__, __LINE__); * @endcode @@ -65,7 +72,7 @@ typedef void (*orte_errmgr_base_module_log_fn_t)(int error_code, char *filename, * process errors should always be reported through the error_detected interface and * NOT here. The function is called when a message is received from the universe daemon * indicating that another process in the job failed. For now, this function will - * simply cause the local process to gracefully finalize and terminate. + * simply cause the local process to gracefully finalize and terminate. */ typedef void (*orte_errmgr_base_module_proc_aborted_fn_t)(orte_process_name_t *proc); @@ -75,7 +82,7 @@ typedef void (*orte_errmgr_base_module_proc_aborted_fn_t)(orte_process_name_t *p * one or more processes to start. The function decides on the strategy for dealing * with this "incomplete start" situation - for now, it simply orders the resource * manager to terminate the entire job. - * + * * This function is only called by the respective process launcher, which is responsible * for detecting incomplete starts. */ @@ -96,7 +103,7 @@ typedef void (*orte_errmgr_base_module_error_detected_fn_t)(int error_code); * problems occur - i.e., when process status entries change to abnormal termination * values. Process status entries are changed by the appropriate state-of-health monitor * and/or the process launcher, depending upon the stage at which the problem occurs. - * + * * Monitoring of the job begins once the job has reached the "executing" stage. Prior * to that time, failure of processes to start are the responsibility of the respective * process launcher - which is expected to call the error manager via the "incomplete @@ -136,7 +143,7 @@ typedef orte_errmgr_base_module_t* (*orte_errmgr_base_component_init_fn_t)( int *priority); typedef int (*orte_errmgr_base_component_finalize_fn_t)(void); - + /* * the standard component data structure */ diff --git a/orte/mca/schema/schema_types.h b/orte/mca/schema/schema_types.h index bb91bd01c7..d2c5703f46 100644 --- a/orte/mca/schema/schema_types.h +++ b/orte/mca/schema/schema_types.h @@ -21,13 +21,6 @@ #include "mca/ns/ns_types.h" #include "opal/util/error.h" -/* - * This macro and array are used to output intelligible error - * messages. - */ - -#define ORTE_ERROR_NAME(n) opal_strerror(n) - /* * Standard names used across the system */ diff --git a/orte/tools/orterun/orterun.c b/orte/tools/orterun/orterun.c index 934a1c4a3b..ce1def96d5 100644 --- a/orte/tools/orterun/orterun.c +++ b/orte/tools/orterun/orterun.c @@ -4,14 +4,14 @@ * All rights reserved. * Copyright (c) 2004-2005 The Trustees of the University of Tennessee. * All rights reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -354,10 +354,10 @@ int orterun(int argc, char *argv[]) /* Prep to start the application */ - opal_signal_set(&term_handler, SIGTERM, + opal_signal_set(&term_handler, SIGTERM, signal_callback, NULL); opal_signal_add(&term_handler, NULL); - opal_signal_set(&int_handler, SIGINT, + opal_signal_set(&int_handler, SIGINT, signal_callback, NULL); opal_signal_add(&int_handler, NULL); @@ -436,8 +436,8 @@ static void dump_aborted_procs(orte_jobid_t jobid) NULL }; - OPAL_TRACE_ARG(1, jobid); - + OPAL_TRACE_ARG1(1, jobid); + /* query the job segment on the registry */ if(ORTE_SUCCESS != (rc = orte_schema.get_job_segment_name(&segment, jobid))) { ORTE_ERROR_LOG(rc); @@ -538,8 +538,8 @@ static void dump_aborted_procs(orte_jobid_t jobid) static void job_state_callback(orte_jobid_t jobid, orte_proc_state_t state) { - OPAL_TRACE_ARG(1, jobid); - + OPAL_TRACE_ARG2(1, jobid, state); + OPAL_THREAD_LOCK(&orterun_globals.lock); /* Note that there's only two states that we're interested in @@ -588,7 +588,7 @@ static void job_state_callback(orte_jobid_t jobid, orte_proc_state_t state) static void exit_callback(int fd, short event, void *arg) { OPAL_TRACE(1); - + opal_show_help("help-orterun.txt", "orterun:abnormal-exit", true, orterun_basename, orterun_basename); @@ -616,9 +616,9 @@ static void signal_callback(int fd, short flags, void *arg) opal_event_t* event; static int signalled = 0; - + OPAL_TRACE(1); - + if (0 != signalled++) { return; }