1
1

Move the error name macro to the errmgr framework. Add a second level of tracing. Remove an obsolete file.

This commit was SVN r7445.
Этот коммит содержится в:
Ralph Castain 2005-09-20 17:09:11 +00:00
родитель e4985c2a07
Коммит 5686e8119e
5 изменённых файлов: 37 добавлений и 62 удалений

Просмотреть файл

@ -38,16 +38,23 @@ extern "C" {
__func__, __FILE__, __LINE__); \ __func__, __FILE__, __LINE__); \
} while (0) } while (0)
#define OPAL_TRACE_ARG(verbose, foo) \ #define OPAL_TRACE_ARG1(verbose, foo) \
do { \ do { \
opal_output_verbose(verbose, opal_trace_handle, "TRACE: %s @ %s:%d arg: %lu", \ opal_output_verbose(verbose, opal_trace_handle, "TRACE: %s @ %s:%d arg: %lu", \
__func__, __FILE__, __LINE__, (unsigned long)foo); \ __func__, __FILE__, __LINE__, (unsigned long)foo); \
} while (0) } while (0)
#define OPAL_TRACE_ARG2(verbose, foo, foo2) \
do { \
opal_output_verbose(verbose, opal_trace_handle, "TRACE: %s @ %s:%d arg: %lu\n\t0x%x", \
__func__, __FILE__, __LINE__, (unsigned long)foo, (unsigned long)foo2); \
} while (0)
#else #else
#define OPAL_TRACE(verbose) #define OPAL_TRACE(verbose)
#define OPAL_TRACE_ARG(verbose, foo) #define OPAL_TRACE_ARG1(verbose, foo)
#define OPAL_TRACE_ARG2(verbose, foo, foo2)
#endif /* ENABLE_TRACE */ #endif /* ENABLE_TRACE */

Просмотреть файл

@ -1,32 +0,0 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "opal/util/output.h"
#include "runtime/runtime.h"
#include "mca/ns/ns.h"
#include "mca/errmgr/base/base.h"
void mca_errmgr_base_log(char *msg, char *filename, int line)
{
opal_output(0, "[%lu,%lu,%lu] ORTE_ERROR_LOG: %s in file %s at line %d",
ORTE_NAME_ARGS(orte_process_info.my_name),
msg, filename, line);
}

Просмотреть файл

@ -3,14 +3,14 @@
* All rights reserved. * All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee. * Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved. * All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved. * University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California. * Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved. * All rights reserved.
* $COPYRIGHT$ * $COPYRIGHT$
* *
* Additional copyrights may follow * Additional copyrights may follow
* *
* $HEADER$ * $HEADER$
*/ */
/** @file: /** @file:
@ -39,6 +39,13 @@ extern "C" {
/* /*
* Macro definitions * Macro definitions
*/ */
/*
* Thess macros and associated error name array are used to output intelligible error
* messages.
*/
#define ORTE_ERROR_NAME(n) opal_strerror(n)
#define ORTE_ERROR_LOG(n) \ #define ORTE_ERROR_LOG(n) \
orte_errmgr.log((n), __FILE__, __LINE__) orte_errmgr.log((n), __FILE__, __LINE__)
@ -51,7 +58,7 @@ extern "C" {
* Log an error * Log an error
* Log an error that occurred in the runtime environment, and call the "error_detected" * Log an error that occurred in the runtime environment, and call the "error_detected"
* interface to see if further action is required. * interface to see if further action is required.
* *
* @code * @code
* orte_errmgr.log("this is an error", __FILE__, __LINE__); * orte_errmgr.log("this is an error", __FILE__, __LINE__);
* @endcode * @endcode
@ -65,7 +72,7 @@ typedef void (*orte_errmgr_base_module_log_fn_t)(int error_code, char *filename,
* process errors should always be reported through the error_detected interface and * process errors should always be reported through the error_detected interface and
* NOT here. The function is called when a message is received from the universe daemon * NOT here. The function is called when a message is received from the universe daemon
* indicating that another process in the job failed. For now, this function will * indicating that another process in the job failed. For now, this function will
* simply cause the local process to gracefully finalize and terminate. * simply cause the local process to gracefully finalize and terminate.
*/ */
typedef void (*orte_errmgr_base_module_proc_aborted_fn_t)(orte_process_name_t *proc); typedef void (*orte_errmgr_base_module_proc_aborted_fn_t)(orte_process_name_t *proc);
@ -75,7 +82,7 @@ typedef void (*orte_errmgr_base_module_proc_aborted_fn_t)(orte_process_name_t *p
* one or more processes to start. The function decides on the strategy for dealing * one or more processes to start. The function decides on the strategy for dealing
* with this "incomplete start" situation - for now, it simply orders the resource * with this "incomplete start" situation - for now, it simply orders the resource
* manager to terminate the entire job. * manager to terminate the entire job.
* *
* This function is only called by the respective process launcher, which is responsible * This function is only called by the respective process launcher, which is responsible
* for detecting incomplete starts. * for detecting incomplete starts.
*/ */
@ -96,7 +103,7 @@ typedef void (*orte_errmgr_base_module_error_detected_fn_t)(int error_code);
* problems occur - i.e., when process status entries change to abnormal termination * problems occur - i.e., when process status entries change to abnormal termination
* values. Process status entries are changed by the appropriate state-of-health monitor * values. Process status entries are changed by the appropriate state-of-health monitor
* and/or the process launcher, depending upon the stage at which the problem occurs. * and/or the process launcher, depending upon the stage at which the problem occurs.
* *
* Monitoring of the job begins once the job has reached the "executing" stage. Prior * Monitoring of the job begins once the job has reached the "executing" stage. Prior
* to that time, failure of processes to start are the responsibility of the respective * to that time, failure of processes to start are the responsibility of the respective
* process launcher - which is expected to call the error manager via the "incomplete * process launcher - which is expected to call the error manager via the "incomplete
@ -136,7 +143,7 @@ typedef orte_errmgr_base_module_t* (*orte_errmgr_base_component_init_fn_t)(
int *priority); int *priority);
typedef int (*orte_errmgr_base_component_finalize_fn_t)(void); typedef int (*orte_errmgr_base_component_finalize_fn_t)(void);
/* /*
* the standard component data structure * the standard component data structure
*/ */

Просмотреть файл

@ -21,13 +21,6 @@
#include "mca/ns/ns_types.h" #include "mca/ns/ns_types.h"
#include "opal/util/error.h" #include "opal/util/error.h"
/*
* This macro and array are used to output intelligible error
* messages.
*/
#define ORTE_ERROR_NAME(n) opal_strerror(n)
/* /*
* Standard names used across the system * Standard names used across the system
*/ */

Просмотреть файл

@ -4,14 +4,14 @@
* All rights reserved. * All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee. * Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved. * All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved. * University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California. * Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved. * All rights reserved.
* $COPYRIGHT$ * $COPYRIGHT$
* *
* Additional copyrights may follow * Additional copyrights may follow
* *
* $HEADER$ * $HEADER$
*/ */
@ -354,10 +354,10 @@ int orterun(int argc, char *argv[])
/* Prep to start the application */ /* Prep to start the application */
opal_signal_set(&term_handler, SIGTERM, opal_signal_set(&term_handler, SIGTERM,
signal_callback, NULL); signal_callback, NULL);
opal_signal_add(&term_handler, NULL); opal_signal_add(&term_handler, NULL);
opal_signal_set(&int_handler, SIGINT, opal_signal_set(&int_handler, SIGINT,
signal_callback, NULL); signal_callback, NULL);
opal_signal_add(&int_handler, NULL); opal_signal_add(&int_handler, NULL);
@ -436,8 +436,8 @@ static void dump_aborted_procs(orte_jobid_t jobid)
NULL NULL
}; };
OPAL_TRACE_ARG(1, jobid); OPAL_TRACE_ARG1(1, jobid);
/* query the job segment on the registry */ /* query the job segment on the registry */
if(ORTE_SUCCESS != (rc = orte_schema.get_job_segment_name(&segment, jobid))) { if(ORTE_SUCCESS != (rc = orte_schema.get_job_segment_name(&segment, jobid))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
@ -538,8 +538,8 @@ static void dump_aborted_procs(orte_jobid_t jobid)
static void job_state_callback(orte_jobid_t jobid, orte_proc_state_t state) static void job_state_callback(orte_jobid_t jobid, orte_proc_state_t state)
{ {
OPAL_TRACE_ARG(1, jobid); OPAL_TRACE_ARG2(1, jobid, state);
OPAL_THREAD_LOCK(&orterun_globals.lock); OPAL_THREAD_LOCK(&orterun_globals.lock);
/* Note that there's only two states that we're interested in /* Note that there's only two states that we're interested in
@ -588,7 +588,7 @@ static void job_state_callback(orte_jobid_t jobid, orte_proc_state_t state)
static void exit_callback(int fd, short event, void *arg) static void exit_callback(int fd, short event, void *arg)
{ {
OPAL_TRACE(1); OPAL_TRACE(1);
opal_show_help("help-orterun.txt", "orterun:abnormal-exit", opal_show_help("help-orterun.txt", "orterun:abnormal-exit",
true, orterun_basename, orterun_basename); true, orterun_basename, orterun_basename);
@ -616,9 +616,9 @@ static void signal_callback(int fd, short flags, void *arg)
opal_event_t* event; opal_event_t* event;
static int signalled = 0; static int signalled = 0;
OPAL_TRACE(1); OPAL_TRACE(1);
if (0 != signalled++) { if (0 != signalled++) {
return; return;
} }