Move the error name macro to the errmgr framework. Add a second level of tracing. Remove an obsolete file.
This commit was SVN r7445.
Этот коммит содержится в:
родитель
e4985c2a07
Коммит
5686e8119e
@ -38,16 +38,23 @@ extern "C" {
|
|||||||
__func__, __FILE__, __LINE__); \
|
__func__, __FILE__, __LINE__); \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
#define OPAL_TRACE_ARG(verbose, foo) \
|
#define OPAL_TRACE_ARG1(verbose, foo) \
|
||||||
do { \
|
do { \
|
||||||
opal_output_verbose(verbose, opal_trace_handle, "TRACE: %s @ %s:%d arg: %lu", \
|
opal_output_verbose(verbose, opal_trace_handle, "TRACE: %s @ %s:%d arg: %lu", \
|
||||||
__func__, __FILE__, __LINE__, (unsigned long)foo); \
|
__func__, __FILE__, __LINE__, (unsigned long)foo); \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
|
#define OPAL_TRACE_ARG2(verbose, foo, foo2) \
|
||||||
|
do { \
|
||||||
|
opal_output_verbose(verbose, opal_trace_handle, "TRACE: %s @ %s:%d arg: %lu\n\t0x%x", \
|
||||||
|
__func__, __FILE__, __LINE__, (unsigned long)foo, (unsigned long)foo2); \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
#define OPAL_TRACE(verbose)
|
#define OPAL_TRACE(verbose)
|
||||||
#define OPAL_TRACE_ARG(verbose, foo)
|
#define OPAL_TRACE_ARG1(verbose, foo)
|
||||||
|
#define OPAL_TRACE_ARG2(verbose, foo, foo2)
|
||||||
|
|
||||||
#endif /* ENABLE_TRACE */
|
#endif /* ENABLE_TRACE */
|
||||||
|
|
||||||
|
@ -1,32 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (c) 2004-2005 The Trustees of Indiana University.
|
|
||||||
* All rights reserved.
|
|
||||||
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
|
|
||||||
* All rights reserved.
|
|
||||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
|
||||||
* University of Stuttgart. All rights reserved.
|
|
||||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
|
||||||
* All rights reserved.
|
|
||||||
* $COPYRIGHT$
|
|
||||||
*
|
|
||||||
* Additional copyrights may follow
|
|
||||||
*
|
|
||||||
* $HEADER$
|
|
||||||
*/
|
|
||||||
|
|
||||||
|
|
||||||
#include "ompi_config.h"
|
|
||||||
|
|
||||||
#include "opal/util/output.h"
|
|
||||||
#include "runtime/runtime.h"
|
|
||||||
#include "mca/ns/ns.h"
|
|
||||||
|
|
||||||
#include "mca/errmgr/base/base.h"
|
|
||||||
|
|
||||||
|
|
||||||
void mca_errmgr_base_log(char *msg, char *filename, int line)
|
|
||||||
{
|
|
||||||
opal_output(0, "[%lu,%lu,%lu] ORTE_ERROR_LOG: %s in file %s at line %d",
|
|
||||||
ORTE_NAME_ARGS(orte_process_info.my_name),
|
|
||||||
msg, filename, line);
|
|
||||||
}
|
|
@ -3,14 +3,14 @@
|
|||||||
* All rights reserved.
|
* All rights reserved.
|
||||||
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
|
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
|
||||||
* All rights reserved.
|
* All rights reserved.
|
||||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||||
* University of Stuttgart. All rights reserved.
|
* University of Stuttgart. All rights reserved.
|
||||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||||
* All rights reserved.
|
* All rights reserved.
|
||||||
* $COPYRIGHT$
|
* $COPYRIGHT$
|
||||||
*
|
*
|
||||||
* Additional copyrights may follow
|
* Additional copyrights may follow
|
||||||
*
|
*
|
||||||
* $HEADER$
|
* $HEADER$
|
||||||
*/
|
*/
|
||||||
/** @file:
|
/** @file:
|
||||||
@ -39,6 +39,13 @@ extern "C" {
|
|||||||
/*
|
/*
|
||||||
* Macro definitions
|
* Macro definitions
|
||||||
*/
|
*/
|
||||||
|
/*
|
||||||
|
* Thess macros and associated error name array are used to output intelligible error
|
||||||
|
* messages.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#define ORTE_ERROR_NAME(n) opal_strerror(n)
|
||||||
|
|
||||||
#define ORTE_ERROR_LOG(n) \
|
#define ORTE_ERROR_LOG(n) \
|
||||||
orte_errmgr.log((n), __FILE__, __LINE__)
|
orte_errmgr.log((n), __FILE__, __LINE__)
|
||||||
|
|
||||||
@ -51,7 +58,7 @@ extern "C" {
|
|||||||
* Log an error
|
* Log an error
|
||||||
* Log an error that occurred in the runtime environment, and call the "error_detected"
|
* Log an error that occurred in the runtime environment, and call the "error_detected"
|
||||||
* interface to see if further action is required.
|
* interface to see if further action is required.
|
||||||
*
|
*
|
||||||
* @code
|
* @code
|
||||||
* orte_errmgr.log("this is an error", __FILE__, __LINE__);
|
* orte_errmgr.log("this is an error", __FILE__, __LINE__);
|
||||||
* @endcode
|
* @endcode
|
||||||
@ -65,7 +72,7 @@ typedef void (*orte_errmgr_base_module_log_fn_t)(int error_code, char *filename,
|
|||||||
* process errors should always be reported through the error_detected interface and
|
* process errors should always be reported through the error_detected interface and
|
||||||
* NOT here. The function is called when a message is received from the universe daemon
|
* NOT here. The function is called when a message is received from the universe daemon
|
||||||
* indicating that another process in the job failed. For now, this function will
|
* indicating that another process in the job failed. For now, this function will
|
||||||
* simply cause the local process to gracefully finalize and terminate.
|
* simply cause the local process to gracefully finalize and terminate.
|
||||||
*/
|
*/
|
||||||
typedef void (*orte_errmgr_base_module_proc_aborted_fn_t)(orte_process_name_t *proc);
|
typedef void (*orte_errmgr_base_module_proc_aborted_fn_t)(orte_process_name_t *proc);
|
||||||
|
|
||||||
@ -75,7 +82,7 @@ typedef void (*orte_errmgr_base_module_proc_aborted_fn_t)(orte_process_name_t *p
|
|||||||
* one or more processes to start. The function decides on the strategy for dealing
|
* one or more processes to start. The function decides on the strategy for dealing
|
||||||
* with this "incomplete start" situation - for now, it simply orders the resource
|
* with this "incomplete start" situation - for now, it simply orders the resource
|
||||||
* manager to terminate the entire job.
|
* manager to terminate the entire job.
|
||||||
*
|
*
|
||||||
* This function is only called by the respective process launcher, which is responsible
|
* This function is only called by the respective process launcher, which is responsible
|
||||||
* for detecting incomplete starts.
|
* for detecting incomplete starts.
|
||||||
*/
|
*/
|
||||||
@ -96,7 +103,7 @@ typedef void (*orte_errmgr_base_module_error_detected_fn_t)(int error_code);
|
|||||||
* problems occur - i.e., when process status entries change to abnormal termination
|
* problems occur - i.e., when process status entries change to abnormal termination
|
||||||
* values. Process status entries are changed by the appropriate state-of-health monitor
|
* values. Process status entries are changed by the appropriate state-of-health monitor
|
||||||
* and/or the process launcher, depending upon the stage at which the problem occurs.
|
* and/or the process launcher, depending upon the stage at which the problem occurs.
|
||||||
*
|
*
|
||||||
* Monitoring of the job begins once the job has reached the "executing" stage. Prior
|
* Monitoring of the job begins once the job has reached the "executing" stage. Prior
|
||||||
* to that time, failure of processes to start are the responsibility of the respective
|
* to that time, failure of processes to start are the responsibility of the respective
|
||||||
* process launcher - which is expected to call the error manager via the "incomplete
|
* process launcher - which is expected to call the error manager via the "incomplete
|
||||||
@ -136,7 +143,7 @@ typedef orte_errmgr_base_module_t* (*orte_errmgr_base_component_init_fn_t)(
|
|||||||
int *priority);
|
int *priority);
|
||||||
|
|
||||||
typedef int (*orte_errmgr_base_component_finalize_fn_t)(void);
|
typedef int (*orte_errmgr_base_component_finalize_fn_t)(void);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* the standard component data structure
|
* the standard component data structure
|
||||||
*/
|
*/
|
||||||
|
@ -21,13 +21,6 @@
|
|||||||
#include "mca/ns/ns_types.h"
|
#include "mca/ns/ns_types.h"
|
||||||
#include "opal/util/error.h"
|
#include "opal/util/error.h"
|
||||||
|
|
||||||
/*
|
|
||||||
* This macro and array are used to output intelligible error
|
|
||||||
* messages.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#define ORTE_ERROR_NAME(n) opal_strerror(n)
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Standard names used across the system
|
* Standard names used across the system
|
||||||
*/
|
*/
|
||||||
|
@ -4,14 +4,14 @@
|
|||||||
* All rights reserved.
|
* All rights reserved.
|
||||||
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
|
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
|
||||||
* All rights reserved.
|
* All rights reserved.
|
||||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||||
* University of Stuttgart. All rights reserved.
|
* University of Stuttgart. All rights reserved.
|
||||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||||
* All rights reserved.
|
* All rights reserved.
|
||||||
* $COPYRIGHT$
|
* $COPYRIGHT$
|
||||||
*
|
*
|
||||||
* Additional copyrights may follow
|
* Additional copyrights may follow
|
||||||
*
|
*
|
||||||
* $HEADER$
|
* $HEADER$
|
||||||
*/
|
*/
|
||||||
|
|
||||||
@ -354,10 +354,10 @@ int orterun(int argc, char *argv[])
|
|||||||
|
|
||||||
/* Prep to start the application */
|
/* Prep to start the application */
|
||||||
|
|
||||||
opal_signal_set(&term_handler, SIGTERM,
|
opal_signal_set(&term_handler, SIGTERM,
|
||||||
signal_callback, NULL);
|
signal_callback, NULL);
|
||||||
opal_signal_add(&term_handler, NULL);
|
opal_signal_add(&term_handler, NULL);
|
||||||
opal_signal_set(&int_handler, SIGINT,
|
opal_signal_set(&int_handler, SIGINT,
|
||||||
signal_callback, NULL);
|
signal_callback, NULL);
|
||||||
opal_signal_add(&int_handler, NULL);
|
opal_signal_add(&int_handler, NULL);
|
||||||
|
|
||||||
@ -436,8 +436,8 @@ static void dump_aborted_procs(orte_jobid_t jobid)
|
|||||||
NULL
|
NULL
|
||||||
};
|
};
|
||||||
|
|
||||||
OPAL_TRACE_ARG(1, jobid);
|
OPAL_TRACE_ARG1(1, jobid);
|
||||||
|
|
||||||
/* query the job segment on the registry */
|
/* query the job segment on the registry */
|
||||||
if(ORTE_SUCCESS != (rc = orte_schema.get_job_segment_name(&segment, jobid))) {
|
if(ORTE_SUCCESS != (rc = orte_schema.get_job_segment_name(&segment, jobid))) {
|
||||||
ORTE_ERROR_LOG(rc);
|
ORTE_ERROR_LOG(rc);
|
||||||
@ -538,8 +538,8 @@ static void dump_aborted_procs(orte_jobid_t jobid)
|
|||||||
|
|
||||||
static void job_state_callback(orte_jobid_t jobid, orte_proc_state_t state)
|
static void job_state_callback(orte_jobid_t jobid, orte_proc_state_t state)
|
||||||
{
|
{
|
||||||
OPAL_TRACE_ARG(1, jobid);
|
OPAL_TRACE_ARG2(1, jobid, state);
|
||||||
|
|
||||||
OPAL_THREAD_LOCK(&orterun_globals.lock);
|
OPAL_THREAD_LOCK(&orterun_globals.lock);
|
||||||
|
|
||||||
/* Note that there's only two states that we're interested in
|
/* Note that there's only two states that we're interested in
|
||||||
@ -588,7 +588,7 @@ static void job_state_callback(orte_jobid_t jobid, orte_proc_state_t state)
|
|||||||
static void exit_callback(int fd, short event, void *arg)
|
static void exit_callback(int fd, short event, void *arg)
|
||||||
{
|
{
|
||||||
OPAL_TRACE(1);
|
OPAL_TRACE(1);
|
||||||
|
|
||||||
opal_show_help("help-orterun.txt", "orterun:abnormal-exit",
|
opal_show_help("help-orterun.txt", "orterun:abnormal-exit",
|
||||||
true, orterun_basename, orterun_basename);
|
true, orterun_basename, orterun_basename);
|
||||||
|
|
||||||
@ -616,9 +616,9 @@ static void signal_callback(int fd, short flags, void *arg)
|
|||||||
opal_event_t* event;
|
opal_event_t* event;
|
||||||
|
|
||||||
static int signalled = 0;
|
static int signalled = 0;
|
||||||
|
|
||||||
OPAL_TRACE(1);
|
OPAL_TRACE(1);
|
||||||
|
|
||||||
if (0 != signalled++) {
|
if (0 != signalled++) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
Загрузка…
Ссылка в новой задаче
Block a user