Merge improvements to the "notifier" framework from the OPAL SOS and the ORTE WDC mercurial branches into the SVN trunk.
A brief description of the improvements can be found at https://svn.open-mpi.org/trac/ompi/wiki/ORTEWDC#ChangesdonetotheORTEnotifier This commit was SVN r23157.
Этот коммит содержится в:
родитель
f5b9bc4ff1
Коммит
9c5860706f
@ -24,7 +24,7 @@ libmca_notifier_la_SOURCES =
|
||||
nobase_orte_HEADERS =
|
||||
|
||||
# local files
|
||||
headers = notifier.h
|
||||
headers = notifier.h notifier_event_types.h notifier_event_calls.h
|
||||
|
||||
libmca_notifier_la_SOURCES += $(headers)
|
||||
|
||||
|
@ -9,6 +9,7 @@
|
||||
# University of Stuttgart. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
# All rights reserved.
|
||||
# Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
@ -22,4 +23,6 @@ headers += \
|
||||
libmca_notifier_la_SOURCES += \
|
||||
base/notifier_base_close.c \
|
||||
base/notifier_base_select.c \
|
||||
base/notifier_base_wrappers.c \
|
||||
base/notifier_base_events.c \
|
||||
base/notifier_base_open.c
|
||||
|
@ -9,6 +9,7 @@
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -26,39 +27,95 @@
|
||||
*/
|
||||
#include "orte_config.h"
|
||||
|
||||
#include "opal/class/opal_list.h"
|
||||
#include "opal/mca/mca.h"
|
||||
#include "opal/class/opal_object.h"
|
||||
#include "opal/class/opal_list.h"
|
||||
|
||||
#include "orte/mca/ess/ess.h"
|
||||
#include "orte/mca/notifier/notifier.h"
|
||||
|
||||
BEGIN_C_DECLS
|
||||
|
||||
/*
|
||||
* Global functions for MCA overall collective open and close
|
||||
* Type for holding selected module / component pairs
|
||||
*/
|
||||
BEGIN_C_DECLS
|
||||
typedef struct {
|
||||
opal_list_item_t super;
|
||||
/* Component */
|
||||
orte_notifier_base_component_t *onbsp_component;
|
||||
/* Module */
|
||||
orte_notifier_base_module_t *onbsp_module;
|
||||
/* Priority */
|
||||
int onbsp_priority;
|
||||
} orte_notifier_base_selected_pair_t;
|
||||
|
||||
OBJ_CLASS_DECLARATION(orte_notifier_base_selected_pair_t);
|
||||
|
||||
#if !ORTE_DISABLE_FULL_SUPPORT
|
||||
|
||||
/*
|
||||
* function definitions
|
||||
*/
|
||||
ORTE_DECLSPEC int orte_notifier_base_open(void);
|
||||
ORTE_DECLSPEC int orte_notifier_base_select(void);
|
||||
ORTE_DECLSPEC int orte_notifier_base_close(void);
|
||||
ORTE_DECLSPEC int orte_notifier_base_open(void);
|
||||
ORTE_DECLSPEC int orte_notifier_base_select(void);
|
||||
ORTE_DECLSPEC void orte_notifier_log(orte_notifier_base_severity_t severity,
|
||||
int errcode,
|
||||
const char *msg, ...);
|
||||
ORTE_DECLSPEC void orte_notifier_show_help(orte_notifier_base_severity_t severity,
|
||||
int errcode,
|
||||
const char *file,
|
||||
const char *topic, ...);
|
||||
ORTE_DECLSPEC void orte_notifier_log_peer(orte_notifier_base_severity_t severity,
|
||||
int errcode,
|
||||
orte_process_name_t *peer_proc,
|
||||
const char *msg, ...);
|
||||
ORTE_DECLSPEC const char* orte_notifier_base_sev2str(orte_notifier_base_severity_t severity);
|
||||
ORTE_DECLSPEC char *orte_notifier_base_peer_log(int errcode,
|
||||
orte_process_name_t *peer_proc,
|
||||
const char *msg, va_list ap);
|
||||
ORTE_DECLSPEC int orte_notifier_base_close(void);
|
||||
|
||||
#if ORTE_WANT_NOTIFIER_LOG_EVENT
|
||||
|
||||
ORTE_DECLSPEC int orte_notifier_base_events_init(void);
|
||||
ORTE_DECLSPEC void orte_notifier_base_events_finalize(void);
|
||||
|
||||
#else /* ORTE_WANT_NOTIFIER_LOG_EVENT */
|
||||
|
||||
#define orte_notifier_base_events_init() do {} while (0)
|
||||
#define orte_notifier_base_events_finalize() do {} while (0)
|
||||
|
||||
#endif /* ORTE_WANT_NOTIFIER_LOG_EVENT */
|
||||
|
||||
/*
|
||||
* globals that might be needed
|
||||
* global variables in the base
|
||||
* Needs to be declspec'ed for ompi_info and others
|
||||
*/
|
||||
|
||||
/*
|
||||
* Indication of whether a component was successfully selected or not
|
||||
* (1 component per interface)
|
||||
*/
|
||||
ORTE_DECLSPEC extern bool orte_notifier_base_log_selected;
|
||||
ORTE_DECLSPEC extern bool orte_notifier_base_help_selected;
|
||||
ORTE_DECLSPEC extern bool orte_notifier_base_log_peer_selected;
|
||||
ORTE_DECLSPEC extern bool orte_notifier_base_log_event_selected;
|
||||
/*
|
||||
* Lists of selected modules (1 per interface)
|
||||
*/
|
||||
ORTE_DECLSPEC extern opal_list_t orte_notifier_log_selected_modules;
|
||||
ORTE_DECLSPEC extern opal_list_t orte_notifier_help_selected_modules;
|
||||
ORTE_DECLSPEC extern opal_list_t orte_notifier_log_peer_selected_modules;
|
||||
ORTE_DECLSPEC extern opal_list_t orte_notifier_log_event_selected_modules;
|
||||
/*
|
||||
* That one is a merge of the per interface lists
|
||||
* It is used during finalize phase to finalize only once each selected module
|
||||
*/
|
||||
ORTE_DECLSPEC extern opal_list_t orte_notifier_base_selected_modules;
|
||||
ORTE_DECLSPEC extern int orte_notifier_base_output;
|
||||
ORTE_DECLSPEC extern int orte_notifier_threshold_severity;
|
||||
ORTE_DECLSPEC extern bool mca_notifier_base_selected;
|
||||
ORTE_DECLSPEC extern opal_list_t mca_notifier_base_components_available;
|
||||
ORTE_DECLSPEC extern orte_notifier_base_component_t mca_notifier_base_selected_component;
|
||||
ORTE_DECLSPEC extern orte_notifier_base_severity_t orte_notifier_threshold_severity;
|
||||
ORTE_DECLSPEC extern opal_list_t orte_notifier_base_components_available;
|
||||
|
||||
#if !ORTE_DISABLE_FULL_SUPPORT
|
||||
|
||||
/* no base functions to protect at this time */
|
||||
|
||||
#endif /* ORTE_DISABLE_FULL_SUPPORT */
|
||||
#endif /* !ORTE_DISABLE_FULL_SUPPORT */
|
||||
|
||||
END_C_DECLS
|
||||
#endif
|
||||
|
@ -9,6 +9,7 @@
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -27,19 +28,37 @@
|
||||
|
||||
int orte_notifier_base_close(void)
|
||||
{
|
||||
/* If we have a selected component and module, then finalize it */
|
||||
|
||||
if (NULL != orte_notifier.finalize) {
|
||||
orte_notifier.finalize();
|
||||
opal_list_item_t *item;
|
||||
orte_notifier_base_selected_pair_t *pair;
|
||||
|
||||
if (orte_notifier_base_log_event_selected) {
|
||||
orte_notifier_base_events_finalize();
|
||||
}
|
||||
|
||||
/* Close all remaining available components (may be one if this is a
|
||||
OpenRTE program, or [possibly] multiple if this is ompi_info) */
|
||||
|
||||
|
||||
/* Finalize all the selected modules
|
||||
* orte_notifier_base_selected_modules has been built as a merge of the
|
||||
* per interface selected modules lists, so only going through that list
|
||||
* to invoke the finalize routines is enough.
|
||||
*/
|
||||
for (item = opal_list_remove_first(&orte_notifier_base_selected_modules);
|
||||
NULL != item;
|
||||
item = opal_list_remove_first(&orte_notifier_base_selected_modules)) {
|
||||
pair = (orte_notifier_base_selected_pair_t*) item;
|
||||
if (NULL != pair->onbsp_module->finalize) {
|
||||
pair->onbsp_module->finalize();
|
||||
}
|
||||
free(pair);
|
||||
}
|
||||
OBJ_DESTRUCT(&orte_notifier_base_selected_modules);
|
||||
OBJ_DESTRUCT(&orte_notifier_log_selected_modules);
|
||||
OBJ_DESTRUCT(&orte_notifier_help_selected_modules);
|
||||
OBJ_DESTRUCT(&orte_notifier_log_peer_selected_modules);
|
||||
OBJ_DESTRUCT(&orte_notifier_log_event_selected_modules);
|
||||
|
||||
/* Close all remaining available components */
|
||||
mca_base_components_close(orte_notifier_base_output,
|
||||
&mca_notifier_base_components_available, NULL);
|
||||
&orte_notifier_base_components_available, NULL);
|
||||
|
||||
/* All done */
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
197
orte/mca/notifier/base/notifier_base_events.c
Обычный файл
197
orte/mca/notifier/base/notifier_base_events.c
Обычный файл
@ -0,0 +1,197 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2009 Bull SAS. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
|
||||
#include "orte_config.h"
|
||||
#include "orte/types.h"
|
||||
#include "orte/constants.h"
|
||||
|
||||
#ifdef HAVE_STDLIB_H
|
||||
#include <stdlib.h>
|
||||
#endif /* HAVE_STDLIB_H */
|
||||
#ifdef HAVE_STDIO_H
|
||||
#include <stdio.h>
|
||||
#endif /* HAVE_STDIO_H */
|
||||
#ifdef HAVE_SYS_TIME_H
|
||||
#include <sys/time.h>
|
||||
#endif /* HAVE_SYS_TIME_H */
|
||||
|
||||
#include "opal/class/opal_object.h"
|
||||
|
||||
#include "opal/class/opal_list.h"
|
||||
|
||||
#include "orte/util/proc_info.h"
|
||||
#include "orte/util/name_fns.h"
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
#include "orte/mca/notifier/base/base.h"
|
||||
|
||||
#if ORTE_WANT_NOTIFIER_LOG_EVENT
|
||||
|
||||
/*
|
||||
* Definitions for the events that are accounted for before being logged.
|
||||
* They are stored in a list to ensure they are all unconditionally traced
|
||||
* out during finalize.
|
||||
*/
|
||||
opal_list_t orte_notifier_events_list;
|
||||
|
||||
|
||||
/*
|
||||
* Log format differs depending on the phase we are in.
|
||||
*/
|
||||
#define ORTE_NOTIFIER_LOG_FORMAT_0 "TIME=%ld MPI_NOTIFIER_EVENT FAMILY=%u JOB=%u VPID=%u HOST=%s EVENT=%d COUNT=%u: %s"
|
||||
|
||||
#define ORTE_NOTIFIER_LOG_FORMAT_1 "TIME=%ld MPI_NOTIFIER_EVENT FAMILY=%u JOB=%u VPID=%u HOST=%s EVENT=%d COUNT=%u (in %ld seconds): %s"
|
||||
|
||||
#define ORTE_NOTIFIER_LOG_FORMAT_2 "TIME=%ld MPI_NOTIFIER_EVENT FAMILY=%u JOB=%u VPID=%u HOST=%s EVENT=%d COUNT=%u (Finalize): %s"
|
||||
|
||||
|
||||
static void orte_notifier_event_construct(orte_notifier_event_t *ev)
|
||||
{
|
||||
ev->ev_cnt = 0;
|
||||
ev->ev_already_traced = 0;
|
||||
ev->ev_msg = NULL;
|
||||
}
|
||||
|
||||
static void orte_notifier_event_destruct(orte_notifier_event_t *ev)
|
||||
{
|
||||
if (NULL != ev->ev_msg) {
|
||||
free(ev->ev_msg);
|
||||
}
|
||||
}
|
||||
|
||||
OBJ_CLASS_INSTANCE(orte_notifier_event_t,
|
||||
opal_list_item_t,
|
||||
orte_notifier_event_construct,
|
||||
orte_notifier_event_destruct);
|
||||
|
||||
|
||||
int orte_notifier_base_events_init(void)
|
||||
{
|
||||
if (!ORTE_PROC_IS_HNP) {
|
||||
OBJ_CONSTRUCT(&orte_notifier_events_list, opal_list_t);
|
||||
}
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
void orte_notifier_base_events_finalize(void)
|
||||
{
|
||||
orte_notifier_event_t *ev;
|
||||
opal_list_item_t *item;
|
||||
int32_t count;
|
||||
|
||||
if (ORTE_PROC_IS_HNP) {
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Unconditionally trace any event that has been accounted for
|
||||
*/
|
||||
for (item = opal_list_remove_first(&orte_notifier_events_list);
|
||||
NULL != item;
|
||||
item = opal_list_remove_first(&orte_notifier_events_list)) {
|
||||
ev = (orte_notifier_event_t *) item;
|
||||
if ((count = ev->ev_cnt) && notifier_log_event_enabled()) {
|
||||
notifier_trace_event(ORTE_NOTIFIER_LOG_2, ev->ev_id, count,
|
||||
time(NULL), 0, ev->ev_msg);
|
||||
}
|
||||
OBJ_RELEASE(ev);
|
||||
}
|
||||
|
||||
OBJ_DESTRUCT(&orte_notifier_events_list);
|
||||
}
|
||||
|
||||
/*
|
||||
* log_type indicates whether we are tracing one of the following:
|
||||
* . ORTE_NOTIFIER_LOG_0 --> Very first trace
|
||||
* . ORTE_NOTIFIER_LOG_1 --> Intermediate trace
|
||||
* . ORTE_NOTIFIER_LOG_2 --> during finalize
|
||||
* Depending on the log_type the output format is different.
|
||||
*/
|
||||
void notifier_trace_event(int log_type, int ev_id, int32_t count, time_t t,
|
||||
time_t delay, const char *msg)
|
||||
{
|
||||
opal_list_item_t *item;
|
||||
orte_notifier_base_selected_pair_t *pair;
|
||||
orte_process_name_t *pname = ORTE_PROC_MY_NAME;
|
||||
char *out = NULL;
|
||||
|
||||
switch (log_type) {
|
||||
case ORTE_NOTIFIER_LOG_0:
|
||||
asprintf(&out, ORTE_NOTIFIER_LOG_FORMAT_0, t,
|
||||
ORTE_JOB_FAMILY(pname->jobid),
|
||||
ORTE_LOCAL_JOBID(pname->jobid),
|
||||
pname->vpid,
|
||||
orte_process_info.nodename,
|
||||
ev_id,
|
||||
count,
|
||||
msg);
|
||||
break;
|
||||
case ORTE_NOTIFIER_LOG_1:
|
||||
asprintf(&out, ORTE_NOTIFIER_LOG_FORMAT_1, t,
|
||||
ORTE_JOB_FAMILY(pname->jobid),
|
||||
ORTE_LOCAL_JOBID(pname->jobid),
|
||||
pname->vpid,
|
||||
orte_process_info.nodename,
|
||||
ev_id,
|
||||
count,
|
||||
delay,
|
||||
msg);
|
||||
break;
|
||||
case ORTE_NOTIFIER_LOG_2:
|
||||
asprintf(&out, ORTE_NOTIFIER_LOG_FORMAT_2, t,
|
||||
ORTE_JOB_FAMILY(pname->jobid),
|
||||
ORTE_LOCAL_JOBID(pname->jobid),
|
||||
pname->vpid,
|
||||
orte_process_info.nodename,
|
||||
ev_id,
|
||||
count,
|
||||
msg);
|
||||
break;
|
||||
default:
|
||||
asprintf(&out, "UNKNOWN!!!!!!!!!");
|
||||
break;
|
||||
}
|
||||
|
||||
if (NULL == out) {
|
||||
return;
|
||||
}
|
||||
|
||||
for (item = opal_list_get_first(&orte_notifier_log_event_selected_modules);
|
||||
opal_list_get_end(&orte_notifier_log_event_selected_modules) != item;
|
||||
item = opal_list_get_next(item)) {
|
||||
pair = (orte_notifier_base_selected_pair_t*) item;
|
||||
if (NULL != pair->onbsp_module->log_event) {
|
||||
pair->onbsp_module->log_event(out);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void notifier_event_store(orte_notifier_event_t *ev)
|
||||
{
|
||||
opal_list_append(&orte_notifier_events_list, &ev->super);
|
||||
}
|
||||
|
||||
bool notifier_log_event_enabled(void)
|
||||
{
|
||||
return orte_notifier_base_log_event_selected &&
|
||||
(ORTE_NOTIFIER_NOTICE <= orte_notifier_threshold_severity);
|
||||
}
|
||||
|
||||
#endif /* ORTE_WANT_NOTIFIER_LOG_EVENT */
|
@ -9,7 +9,7 @@
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2008 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2008-2009 Cisco Systems, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -32,7 +32,6 @@
|
||||
|
||||
#include "orte/mca/notifier/base/base.h"
|
||||
|
||||
|
||||
/*
|
||||
* The following file was created by configure. It contains extern
|
||||
* statements and the definition of an array of pointers to each
|
||||
@ -41,25 +40,24 @@
|
||||
|
||||
#include "orte/mca/notifier/base/static-components.h"
|
||||
|
||||
static void orte_base_log(int severity, int errcode, const char *msg, ...) __opal_attribute_format__(__printf__, 3, 4);
|
||||
static void orte_log_show_help(int severity, int errcode, const char *file, const char *topic, ...);
|
||||
static void orte_log_peer(int severity, int errcode, orte_process_name_t *peer_proc, const char *msg, ...) __opal_attribute_format__(__printf__, 4, 5);
|
||||
|
||||
/*
|
||||
* Global variables
|
||||
*/
|
||||
int orte_notifier_base_output = -1;
|
||||
int orte_notifier_threshold_severity = ORTE_NOTIFIER_INFRA;
|
||||
orte_notifier_base_module_t orte_notifier = {
|
||||
NULL,
|
||||
NULL,
|
||||
orte_base_log,
|
||||
orte_log_show_help,
|
||||
orte_log_peer
|
||||
};
|
||||
opal_list_t mca_notifier_base_components_available;
|
||||
orte_notifier_base_component_t mca_notifier_base_selected_component;
|
||||
orte_notifier_base_severity_t orte_notifier_threshold_severity =
|
||||
ORTE_NOTIFIER_ERROR;
|
||||
opal_list_t orte_notifier_base_components_available;
|
||||
opal_list_t orte_notifier_base_selected_modules;
|
||||
opal_list_t orte_notifier_log_selected_modules;
|
||||
opal_list_t orte_notifier_help_selected_modules;
|
||||
opal_list_t orte_notifier_log_peer_selected_modules;
|
||||
opal_list_t orte_notifier_log_event_selected_modules;
|
||||
|
||||
orte_notifier_API_module_t orte_notifier = {
|
||||
orte_notifier_log,
|
||||
orte_notifier_show_help,
|
||||
orte_notifier_log_peer,
|
||||
};
|
||||
|
||||
/**
|
||||
* Function for finding and opening either all MCA components, or the one
|
||||
@ -75,20 +73,40 @@ int orte_notifier_base_open(void)
|
||||
|
||||
/* let the user define a base level of severity to report */
|
||||
mca_base_param_reg_string_name("notifier", "threshold_severity",
|
||||
"Report all events at or above this severity [default: critical]",
|
||||
false, false, "critical", &level);
|
||||
if (0 == strcmp(level, "warning")) {
|
||||
orte_notifier_threshold_severity = ORTE_NOTIFIER_WARNING;
|
||||
} else if (0 == strcmp(level, "notice")) {
|
||||
"Report all events at or above this severity [default: error]",
|
||||
false, false, "error", &level);
|
||||
if (0 == strncasecmp(level, "emerg", strlen("emerg"))) {
|
||||
orte_notifier_threshold_severity = ORTE_NOTIFIER_EMERG;
|
||||
} else if (0 == strncasecmp(level, "alert", strlen("alert"))) {
|
||||
orte_notifier_threshold_severity = ORTE_NOTIFIER_ALERT;
|
||||
} else if (0 == strncasecmp(level, "crit", strlen("crit"))) {
|
||||
orte_notifier_threshold_severity = ORTE_NOTIFIER_CRIT;
|
||||
} else if (0 == strncasecmp(level, "warn", strlen("warn"))) {
|
||||
orte_notifier_threshold_severity = ORTE_NOTIFIER_WARN;
|
||||
} else if (0 == strncasecmp(level, "notice", strlen("notice"))) {
|
||||
orte_notifier_threshold_severity = ORTE_NOTIFIER_NOTICE;
|
||||
} else if (0 == strncasecmp(level, "info", strlen("info"))) {
|
||||
orte_notifier_threshold_severity = ORTE_NOTIFIER_INFO;
|
||||
} else if (0 == strncasecmp(level, "debug", strlen("debug"))) {
|
||||
orte_notifier_threshold_severity = ORTE_NOTIFIER_DEBUG;
|
||||
} else if (0 != strncasecmp(level, "error", strlen("error"))) {
|
||||
opal_output(0, "Unknown notifier level");
|
||||
return ORTE_ERROR;
|
||||
}
|
||||
|
||||
OBJ_CONSTRUCT(&orte_notifier_base_selected_modules, opal_list_t);
|
||||
OBJ_CONSTRUCT(&orte_notifier_log_selected_modules, opal_list_t);
|
||||
OBJ_CONSTRUCT(&orte_notifier_help_selected_modules, opal_list_t);
|
||||
OBJ_CONSTRUCT(&orte_notifier_log_peer_selected_modules, opal_list_t);
|
||||
OBJ_CONSTRUCT(&orte_notifier_log_event_selected_modules, opal_list_t);
|
||||
|
||||
/* Open up all available components */
|
||||
|
||||
if (ORTE_SUCCESS !=
|
||||
mca_base_components_open("notifier", orte_notifier_base_output,
|
||||
mca_notifier_base_static_components,
|
||||
&mca_notifier_base_components_available, true)) {
|
||||
&orte_notifier_base_components_available,
|
||||
true)) {
|
||||
return ORTE_ERROR;
|
||||
}
|
||||
|
||||
@ -96,36 +114,3 @@ int orte_notifier_base_open(void)
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
static void orte_base_log(int severity, int errcode, const char *msg, ...)
|
||||
{
|
||||
/* just do nothing - it is here just so someone calling it won't
|
||||
* segv. Put in va_start/va_end just so that compilers won't
|
||||
* complain.
|
||||
*/
|
||||
va_list ap;
|
||||
va_start(ap, msg);
|
||||
va_end(ap);
|
||||
}
|
||||
|
||||
static void orte_log_show_help(int severity, int errcode, const char *file, const char *topic, ...)
|
||||
{
|
||||
/* just do nothing - it is here just so someone calling it won't
|
||||
* segv. Put in va_start/va_end just so that compilers won't
|
||||
* complain.
|
||||
*/
|
||||
va_list ap;
|
||||
va_start(ap, topic);
|
||||
va_end(ap);
|
||||
}
|
||||
|
||||
static void orte_log_peer(int severity, int errcode, orte_process_name_t *peer_proc, const char *msg, ...)
|
||||
{
|
||||
/* just do nothing - it is here just so someone calling it won't
|
||||
* segv. Put in va_start/va_end just so that compilers won't
|
||||
* complain.
|
||||
*/
|
||||
va_list ap;
|
||||
va_start(ap, msg);
|
||||
va_end(ap);
|
||||
}
|
||||
|
@ -9,6 +9,7 @@
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -19,67 +20,376 @@
|
||||
|
||||
#include "orte_config.h"
|
||||
|
||||
#ifdef HAVE_STRING_H
|
||||
#include <string.h>
|
||||
#endif
|
||||
|
||||
#include "opal/mca/mca.h"
|
||||
#include "opal/util/argv.h"
|
||||
#include "opal/util/opal_sos.h"
|
||||
#include "opal/mca/base/base.h"
|
||||
#include "opal/util/output.h"
|
||||
|
||||
#include "orte/mca/notifier/base/base.h"
|
||||
|
||||
/* Global variables */
|
||||
/*
|
||||
* orte_notifier_base_XXX_selected is set to true if at least 1 module has
|
||||
* been selected for the notifier XXX API interface.
|
||||
*/
|
||||
bool orte_notifier_base_log_selected = false;
|
||||
bool orte_notifier_base_help_selected = false;
|
||||
bool orte_notifier_base_log_peer_selected = false;
|
||||
bool orte_notifier_base_log_event_selected = false;
|
||||
|
||||
static opal_sos_reporter_callback_fn_t prev_reporter_callback;
|
||||
static inline char **orte_notifier_get_include_list(const char *,
|
||||
const char *,
|
||||
char **);
|
||||
static bool orte_notifier_add_module(mca_base_component_t *component,
|
||||
orte_notifier_base_module_t *module,
|
||||
int priority,
|
||||
char **include_list,
|
||||
opal_list_t *selected_modules);
|
||||
|
||||
static void onbsp_construct(orte_notifier_base_selected_pair_t *obj)
|
||||
{
|
||||
obj->onbsp_component = NULL;
|
||||
obj->onbsp_module = NULL;
|
||||
obj->onbsp_priority = -1;
|
||||
}
|
||||
|
||||
static void onbsp_destruct(orte_notifier_base_selected_pair_t *obj)
|
||||
{
|
||||
onbsp_construct(obj);
|
||||
}
|
||||
|
||||
OBJ_CLASS_INSTANCE(orte_notifier_base_selected_pair_t,
|
||||
opal_list_item_t,
|
||||
onbsp_construct,
|
||||
onbsp_destruct);
|
||||
|
||||
|
||||
/**
|
||||
* Function for selecting one component from all those that are
|
||||
* Function for selecting a set of components from all those that are
|
||||
* available.
|
||||
*
|
||||
* It is possible to select a subset of these components for any interface.
|
||||
* The syntax is the following:
|
||||
* [ -mca notifier <list0> ] [ -mca notifier_log <list1> ]
|
||||
* [ -mca notifier_help <list2> ]
|
||||
* [ -mca notifier_log_peer <list3> ]
|
||||
* [ -mca notifier_log_event <list4> ]
|
||||
* Rules:
|
||||
* . <list0> empty means nothing selected
|
||||
* . <list0> to <list4> = comma separated lists of component names
|
||||
* . <list1> to <list4> may be one of:
|
||||
* . subsets of <list0>
|
||||
* . "none" keyword (means empty)
|
||||
* . 1 of <list1> to <list4> empty means = <list0>
|
||||
* Last point makes it possible to preserve the way it works today
|
||||
*
|
||||
* Examples:
|
||||
* 1)
|
||||
* -mca notifier syslog,smtp
|
||||
* --> syslog and smtp are selected for the log, show_help, log_peer and
|
||||
* log_event interfaces.
|
||||
* 2)
|
||||
* -mca notifier_log syslog
|
||||
* --> no interface is activated, no component is selected
|
||||
* 3)
|
||||
* -mca notifier syslog -mca notifier_help none
|
||||
* -mca notifier_log_peer none
|
||||
* -mca notifier_log_event none
|
||||
* --> only the log interface is activated, with the syslog component
|
||||
* 4)
|
||||
* -mca notifier syslog,smtp,hnp -mca notifier_help syslog
|
||||
* -mca notifier_log_peer smtp
|
||||
* -mca notifier_log_event none
|
||||
* --> the log interface is activated, with the syslog, smtp and hnp
|
||||
* components
|
||||
* the log_help interface is activated, with the syslog component
|
||||
* the log_peer interface is activated, with the smtp component
|
||||
* the log_event interface is not activated
|
||||
*/
|
||||
int orte_notifier_base_select(void)
|
||||
{
|
||||
int ret, exit_status = ORTE_SUCCESS;
|
||||
orte_notifier_base_component_t *best_component = NULL;
|
||||
orte_notifier_base_module_t *best_module = NULL;
|
||||
char *include_list = NULL;
|
||||
mca_base_component_list_item_t *cli = NULL;
|
||||
mca_base_component_t *component = NULL;
|
||||
mca_base_module_t *module = NULL;
|
||||
int i, ret, priority, exit_status = ORTE_SUCCESS;
|
||||
opal_list_item_t *item;
|
||||
orte_notifier_base_module_t *nmodule;
|
||||
char **imodules;
|
||||
char **imodules_log, **imodules_help, **imodules_log_peer;
|
||||
char **imodules_log_event = NULL;
|
||||
bool module_needed;
|
||||
|
||||
/*
|
||||
* Register the framework MCA param and look up include list
|
||||
*/
|
||||
mca_base_param_reg_string_name("notifier", NULL,
|
||||
"Which notifier component to use (empty = none)",
|
||||
false, false,
|
||||
NULL, &include_list);
|
||||
|
||||
/* If we do not have any components to select this is ok. Just use the default
|
||||
* "no-op" component and move on.
|
||||
*/
|
||||
if( 0 >= opal_list_get_size(&mca_notifier_base_components_available) || NULL == include_list) {
|
||||
/* Close all components since none will be used */
|
||||
mca_base_components_close(0, /* Pass 0 to keep this from closing the output handle */
|
||||
&mca_notifier_base_components_available,
|
||||
NULL);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/*
|
||||
* Select the best component
|
||||
*/
|
||||
if( OPAL_SUCCESS != mca_base_select("notifier", orte_notifier_base_output,
|
||||
&mca_notifier_base_components_available,
|
||||
(mca_base_module_t **) &best_module,
|
||||
(mca_base_component_t **) &best_component) ) {
|
||||
/* It is okay if no component was selected - we just leave
|
||||
* the orte_notifier module as the default
|
||||
*/
|
||||
exit_status = ORTE_SUCCESS;
|
||||
goto cleanup;
|
||||
imodules = orte_notifier_get_include_list("notifier",
|
||||
"Comma-delimisted list of notifier components to use "
|
||||
"(empty = none)", NULL);
|
||||
if (NULL == imodules) {
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
if (NULL != orte_notifier.init) {
|
||||
/* if an init function is provided, use it */
|
||||
if (ORTE_SUCCESS != (ret = orte_notifier.init()) ) {
|
||||
exit_status = ret;
|
||||
goto cleanup;
|
||||
/*
|
||||
* Also get the include lists for each interface
|
||||
*/
|
||||
imodules_log = orte_notifier_get_include_list("notifier_log",
|
||||
"Comma-delimisted list of notifier components to use "
|
||||
"for orte_notifier_log (empty = all selected)",
|
||||
imodules);
|
||||
|
||||
imodules_help = orte_notifier_get_include_list("notifier_help",
|
||||
"Comma-delimisted list of notifier components to use "
|
||||
"for orte_notifier_show_help (empty = all selected)",
|
||||
imodules);
|
||||
|
||||
imodules_log_peer = orte_notifier_get_include_list("notifier_log_peer",
|
||||
"Comma-delimisted list of notifier components to "
|
||||
"use for orte_notifier_log_peer (empty = all "
|
||||
"selected)", imodules);
|
||||
|
||||
#if ORTE_WANT_NOTIFIER_LOG_EVENT
|
||||
imodules_log_event = orte_notifier_get_include_list("notifier_log_event",
|
||||
"Comma-delimisted list of notifier components to "
|
||||
"use for ORTE_NOTIFIER_LOG_EVENT (empty = all "
|
||||
"selected)",
|
||||
imodules);
|
||||
#endif /* ORTE_WANT_NOTIFIER_LOG_EVENT */
|
||||
|
||||
/* Query all available components and ask if they have a module */
|
||||
for (item = opal_list_get_first(&orte_notifier_base_components_available);
|
||||
opal_list_get_end(&orte_notifier_base_components_available) != item;
|
||||
item = opal_list_get_next(item)) {
|
||||
cli = (mca_base_component_list_item_t *) item;
|
||||
component = (mca_base_component_t *) cli->cli_component;
|
||||
|
||||
/* If this component is not in the include list, skip it */
|
||||
for (i = 0; NULL != imodules[i]; ++i) {
|
||||
if (0 == strcmp(imodules[i], component->mca_component_name)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (NULL == imodules[i]) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* If there's no query function, skip it */
|
||||
if (NULL == component->mca_query_component) {
|
||||
opal_output_verbose(5, orte_notifier_base_output,
|
||||
"mca:notify:select: Skipping component [%s]. It does not implement a query function",
|
||||
component->mca_component_name );
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Query the component */
|
||||
opal_output_verbose(5, orte_notifier_base_output,
|
||||
"mca:notify:select: Querying component [%s]",
|
||||
component->mca_component_name);
|
||||
ret = component->mca_query_component(&module, &priority);
|
||||
|
||||
/* If no module was returned, then skip component */
|
||||
if (ORTE_SUCCESS != ret || NULL == module) {
|
||||
opal_output_verbose(5, orte_notifier_base_output,
|
||||
"mca:notify:select: Skipping component [%s]. Query failed to return a module",
|
||||
component->mca_component_name );
|
||||
continue;
|
||||
}
|
||||
|
||||
/* If we got a module, initialize it */
|
||||
nmodule = (orte_notifier_base_module_t*) module;
|
||||
if (NULL != nmodule->init) {
|
||||
/* If the module doesn't want to be used, skip it */
|
||||
if (ORTE_SUCCESS != (ret = nmodule->init()) ) {
|
||||
if (ORTE_ERR_NOT_SUPPORTED != OPAL_SOS_GET_ERROR_CODE(ret) &&
|
||||
ORTE_ERR_NOT_IMPLEMENTED != OPAL_SOS_GET_ERROR_CODE(ret)) {
|
||||
exit_status = ret;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
if (NULL != nmodule->finalize) {
|
||||
nmodule->finalize();
|
||||
}
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* OK, one module has been selected for the notifier framework, and
|
||||
* successfully initialized.
|
||||
* Now we have to include it in the per interface selected modules
|
||||
* lists if needed.
|
||||
*/
|
||||
ret = orte_notifier_add_module(component,
|
||||
nmodule,
|
||||
priority,
|
||||
imodules_log,
|
||||
&orte_notifier_log_selected_modules);
|
||||
|
||||
orte_notifier_base_log_selected = orte_notifier_base_log_selected
|
||||
|| ret;
|
||||
/*
|
||||
* This variable is set to check if the module is needed by at least
|
||||
* one interface.
|
||||
*/
|
||||
module_needed = ret;
|
||||
|
||||
ret = orte_notifier_add_module(component,
|
||||
nmodule,
|
||||
priority,
|
||||
imodules_help,
|
||||
&orte_notifier_help_selected_modules);
|
||||
orte_notifier_base_help_selected = orte_notifier_base_help_selected
|
||||
|| ret;
|
||||
module_needed = module_needed || ret;
|
||||
|
||||
ret = orte_notifier_add_module(component,
|
||||
nmodule,
|
||||
priority,
|
||||
imodules_log_peer,
|
||||
&orte_notifier_log_peer_selected_modules);
|
||||
orte_notifier_base_log_peer_selected =
|
||||
orte_notifier_base_log_peer_selected || ret;
|
||||
module_needed = module_needed || ret;
|
||||
|
||||
ret = orte_notifier_add_module(component,
|
||||
nmodule,
|
||||
priority,
|
||||
imodules_log_event,
|
||||
&orte_notifier_log_event_selected_modules);
|
||||
orte_notifier_base_log_event_selected =
|
||||
orte_notifier_base_log_event_selected || ret;
|
||||
module_needed = module_needed || ret;
|
||||
|
||||
/*
|
||||
* If the module is needed by at least one interface:
|
||||
* Unconditionally update the global list that will be used during
|
||||
* the close step. Else unload it.
|
||||
*/
|
||||
if (module_needed) {
|
||||
orte_notifier_add_module(component,
|
||||
nmodule,
|
||||
priority,
|
||||
imodules,
|
||||
&orte_notifier_base_selected_modules);
|
||||
} else {
|
||||
if (NULL != nmodule->finalize) {
|
||||
nmodule->finalize();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Save the winner */
|
||||
orte_notifier = *best_module;
|
||||
if (orte_notifier_base_log_event_selected) {
|
||||
/*
|
||||
* This has to be done whatever the selected module. That's why it's
|
||||
* done here.
|
||||
*/
|
||||
orte_notifier_base_events_init();
|
||||
}
|
||||
|
||||
/* Register a callback with OPAL SOS so that we can intercept
|
||||
* error messages */
|
||||
opal_sos_reg_reporter_callback((opal_sos_reporter_callback_fn_t) orte_notifier_log,
|
||||
&prev_reporter_callback);
|
||||
|
||||
cleanup:
|
||||
return exit_status;
|
||||
}
|
||||
|
||||
/**
|
||||
* Register an mca param that represents an include list and build that list.
|
||||
*
|
||||
* @param param_name (IN) param name to be registered
|
||||
* @param help_message (IN) help message for that param
|
||||
* @param default_modules (IN) list of module names to be inherited if an
|
||||
* empty include list is provided
|
||||
* @return list of modules names
|
||||
*/
|
||||
static inline char **orte_notifier_get_include_list(const char *param_name,
|
||||
const char *help_message,
|
||||
char **default_modules)
|
||||
{
|
||||
char *include_list = NULL;
|
||||
char **imodules;
|
||||
|
||||
mca_base_param_reg_string_name(param_name, NULL, help_message,
|
||||
false, false, NULL, &include_list);
|
||||
imodules = opal_argv_split(include_list, ',');
|
||||
if (NULL == imodules) {
|
||||
/*
|
||||
* Inherit the default list if nothing specified
|
||||
*/
|
||||
return default_modules;
|
||||
}
|
||||
if (!strcmp(include_list, "none")) {
|
||||
return NULL;
|
||||
}
|
||||
return imodules;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Check if a component name belongs to an include list and add it to the
|
||||
* list of selected modules.
|
||||
*
|
||||
* @param component (IN) component to be included
|
||||
* @param module (IN) module to be included
|
||||
* @param priority (IN) module priority
|
||||
* @param include_list (IN) list of module names to go through
|
||||
* @param selected_modules (OUT) list of selected modules to be updated
|
||||
* @return true/false depending on whether the module
|
||||
* has been added or not
|
||||
*/
|
||||
static bool orte_notifier_add_module(mca_base_component_t *component,
|
||||
orte_notifier_base_module_t *module,
|
||||
int priority,
|
||||
char **include_list,
|
||||
opal_list_t *selected_modules)
|
||||
{
|
||||
orte_notifier_base_selected_pair_t *pair, *pair2;
|
||||
char *module_name;
|
||||
opal_list_item_t *item;
|
||||
int i;
|
||||
|
||||
if (NULL == include_list) {
|
||||
return false;
|
||||
}
|
||||
|
||||
module_name = component->mca_component_name;
|
||||
|
||||
/* If this component is not in the include list, skip it */
|
||||
for (i = 0; NULL != include_list[i]; i++) {
|
||||
if (!strcmp(include_list[i], module_name)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (NULL == include_list[i]) {
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Make an item for the list */
|
||||
pair = OBJ_NEW(orte_notifier_base_selected_pair_t);
|
||||
pair->onbsp_component = (orte_notifier_base_component_t*) component;
|
||||
pair->onbsp_module = module;
|
||||
pair->onbsp_priority = priority;
|
||||
|
||||
/* Put it in the list in priority order */
|
||||
for (item = opal_list_get_first(selected_modules);
|
||||
opal_list_get_end(selected_modules) != item;
|
||||
item = opal_list_get_next(item)) {
|
||||
pair2 = (orte_notifier_base_selected_pair_t*) item;
|
||||
if (priority > pair2->onbsp_priority) {
|
||||
opal_list_insert_pos(selected_modules, item, &(pair->super));
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (opal_list_get_end(selected_modules) == item) {
|
||||
opal_list_append(selected_modules, &(pair->super));
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
190
orte/mca/notifier/base/notifier_base_wrappers.c
Обычный файл
190
orte/mca/notifier/base/notifier_base_wrappers.c
Обычный файл
@ -0,0 +1,190 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2008-2009 Cisco Systems, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
|
||||
#include "orte_config.h"
|
||||
|
||||
#include "opal/util/opal_sos.h"
|
||||
|
||||
#include "orte/constants.h"
|
||||
#include "orte/mca/ess/ess.h"
|
||||
#include "orte/util/error_strings.h"
|
||||
#include "orte/util/name_fns.h"
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
#include "orte/mca/notifier/base/base.h"
|
||||
|
||||
|
||||
void orte_notifier_log(orte_notifier_base_severity_t severity,
|
||||
int errcode, const char *msg, ...)
|
||||
{
|
||||
va_list ap;
|
||||
opal_list_item_t *item;
|
||||
orte_notifier_base_selected_pair_t *pair;
|
||||
|
||||
if (!orte_notifier_base_log_selected) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* is the severity value above the threshold - I know
|
||||
* this seems backward, but lower severity values are
|
||||
* considered "more severe"
|
||||
*/
|
||||
if (severity > orte_notifier_threshold_severity) {
|
||||
return;
|
||||
}
|
||||
|
||||
for (item = opal_list_get_first(&orte_notifier_log_selected_modules);
|
||||
opal_list_get_end(&orte_notifier_log_selected_modules) != item;
|
||||
item = opal_list_get_next(item)) {
|
||||
pair = (orte_notifier_base_selected_pair_t*) item;
|
||||
if (NULL != pair->onbsp_module->log) {
|
||||
va_start(ap, msg);
|
||||
pair->onbsp_module->log(severity, errcode, msg, ap);
|
||||
va_end(ap);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void orte_notifier_show_help(orte_notifier_base_severity_t severity,
|
||||
int errcode, const char *file,
|
||||
const char *topic, ...)
|
||||
{
|
||||
va_list ap;
|
||||
opal_list_item_t *item;
|
||||
orte_notifier_base_selected_pair_t *pair;
|
||||
|
||||
if (!orte_notifier_base_help_selected) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* is the severity value above the threshold - I know
|
||||
* this seems backward, but lower severity values are
|
||||
* considered "more severe"
|
||||
*/
|
||||
if (severity > orte_notifier_threshold_severity) {
|
||||
return;
|
||||
}
|
||||
|
||||
for (item = opal_list_get_first(&orte_notifier_help_selected_modules);
|
||||
opal_list_get_end(&orte_notifier_help_selected_modules) != item;
|
||||
item = opal_list_get_next(item)) {
|
||||
pair = (orte_notifier_base_selected_pair_t*) item;
|
||||
if (NULL != pair->onbsp_module->help) {
|
||||
va_start(ap, topic);
|
||||
pair->onbsp_module->help(severity, errcode, file, topic, ap);
|
||||
va_end(ap);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void orte_notifier_log_peer(orte_notifier_base_severity_t severity,
|
||||
int errcode,
|
||||
orte_process_name_t *peer_proc,
|
||||
const char *msg, ...)
|
||||
{
|
||||
va_list ap;
|
||||
opal_list_item_t *item;
|
||||
orte_notifier_base_selected_pair_t *pair;
|
||||
|
||||
if (!orte_notifier_base_log_peer_selected) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* is the severity value above the threshold - I know
|
||||
* this seems backward, but lower severity values are
|
||||
* considered "more severe"
|
||||
*/
|
||||
if (severity > orte_notifier_threshold_severity) {
|
||||
return;
|
||||
}
|
||||
|
||||
for (item = opal_list_get_first(&orte_notifier_log_peer_selected_modules);
|
||||
opal_list_get_end(&orte_notifier_log_peer_selected_modules) != item;
|
||||
item = opal_list_get_next(item)) {
|
||||
pair = (orte_notifier_base_selected_pair_t*) item;
|
||||
if (NULL != pair->onbsp_module->peer) {
|
||||
va_start(ap, msg);
|
||||
pair->onbsp_module->peer(severity, errcode, peer_proc, msg, ap);
|
||||
va_end(ap);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
const char* orte_notifier_base_sev2str(orte_notifier_base_severity_t severity)
|
||||
{
|
||||
switch (severity) {
|
||||
case ORTE_NOTIFIER_EMERG: return "EMERG"; break;
|
||||
case ORTE_NOTIFIER_ALERT: return "ALERT"; break;
|
||||
case ORTE_NOTIFIER_CRIT: return "CRIT"; break;
|
||||
case ORTE_NOTIFIER_ERROR: return "ERROR"; break;
|
||||
case ORTE_NOTIFIER_WARN: return "WARN"; break;
|
||||
case ORTE_NOTIFIER_NOTICE: return "NOTICE"; break;
|
||||
case ORTE_NOTIFIER_INFO: return "INFO"; break;
|
||||
case ORTE_NOTIFIER_DEBUG: return "DEBUG"; break;
|
||||
default: return "UNKNOWN"; break;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
char *orte_notifier_base_peer_log(int errcode, orte_process_name_t *peer_proc,
|
||||
const char *msg, va_list ap)
|
||||
{
|
||||
char *buf = malloc(ORTE_NOTIFIER_MAX_BUF + 1);
|
||||
char *peer_host = NULL, *peer_name = NULL;
|
||||
char *pos = buf;
|
||||
char *errstr = (char*)orte_err2str(errcode);
|
||||
int len, space = ORTE_NOTIFIER_MAX_BUF;
|
||||
|
||||
if (NULL == buf) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (peer_proc) {
|
||||
peer_host = orte_ess.proc_get_hostname(peer_proc);
|
||||
peer_name = ORTE_NAME_PRINT(peer_proc);
|
||||
}
|
||||
|
||||
len = snprintf(pos, space,
|
||||
"While communicating to proc %s on node %s,"
|
||||
" proc %s on node %s encountered an error ",
|
||||
peer_name ? peer_name : "UNKNOWN",
|
||||
peer_host ? peer_host : "UNKNOWN",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_process_info.nodename);
|
||||
space -= len;
|
||||
pos += len;
|
||||
|
||||
if (0 < space) {
|
||||
if (errstr) {
|
||||
len = snprintf(pos, space, "'%s':", errstr);
|
||||
} else {
|
||||
len = snprintf(pos, space, "(%d):", errcode);
|
||||
}
|
||||
space -= len;
|
||||
pos += len;
|
||||
}
|
||||
|
||||
if (0 < space) {
|
||||
vsnprintf(pos, space, msg, ap);
|
||||
}
|
||||
|
||||
buf[ORTE_NOTIFIER_MAX_BUF] = '\0';
|
||||
return buf;
|
||||
}
|
@ -12,7 +12,21 @@
|
||||
# MCA_notifier_command_CONFIG([action-if-found], [action-if-not-found])
|
||||
# -----------------------------------------------------------
|
||||
AC_DEFUN([MCA_notifier_command_CONFIG], [
|
||||
OMPI_VAR_SCOPE_PUSH(notifier_happy)
|
||||
|
||||
notifier_happy=no
|
||||
|
||||
# We need fork() and pipe()
|
||||
AC_CHECK_FUNC([fork],
|
||||
[AC_CHECK_FUNC([pipe], [$1], [$2])], [$2])
|
||||
[AC_CHECK_FUNC([pipe], [notifier_happy=yes])])
|
||||
|
||||
# We also need thread support
|
||||
AS_IF([test "$notifier_happy" = "yes"],
|
||||
[AC_MSG_CHECKING([for thread support])
|
||||
AC_MSG_RESULT([$THREAD_TYPE])
|
||||
AS_IF([test "$THREAD_TYPE" != "none"],
|
||||
[notifier_happy=yes])])
|
||||
|
||||
AS_IF([test "$notifier_happy" = "yes"], [$1], [$2])
|
||||
OMPI_VAR_SCOPE_POP
|
||||
])
|
||||
|
@ -51,6 +51,9 @@ typedef struct {
|
||||
|
||||
/* Pipe to the parent */
|
||||
int to_parent[2];
|
||||
|
||||
/* Do we want data sent to child via stdin? */
|
||||
bool pass_via_stdin;
|
||||
} orte_notifier_command_component_t;
|
||||
|
||||
|
||||
|
@ -42,11 +42,21 @@
|
||||
#include <signal.h>
|
||||
|
||||
#include "opal/util/argv.h"
|
||||
#include "opal/threads/threads.h"
|
||||
|
||||
#include "orte/constants.h"
|
||||
#include "orte/mca/notifier/base/base.h"
|
||||
|
||||
#include "notifier_command.h"
|
||||
|
||||
/* Structre for holding the argument to stdin_main() */
|
||||
typedef struct {
|
||||
int sat_pipe_fd;
|
||||
int sat_severity;
|
||||
int sat_errcode;
|
||||
char *sat_msg;
|
||||
} stdin_arg_t;
|
||||
|
||||
|
||||
int orte_notifier_command_split(const char *cmd_arg, char ***argv_arg)
|
||||
{
|
||||
@ -171,6 +181,30 @@ static void diediedie(int status)
|
||||
_exit(status);
|
||||
}
|
||||
|
||||
/*
|
||||
* Main entry point for stdin thread
|
||||
*/
|
||||
static void *stdin_main(opal_object_t *obj)
|
||||
{
|
||||
char *data;
|
||||
opal_thread_t *t = (opal_thread_t*) obj;
|
||||
stdin_arg_t *arg = (stdin_arg_t*) t->t_arg;
|
||||
|
||||
asprintf(&data, "<stdin>\n<notifier severity_int=\"%d\" severity_str=\"%s\" errcode=\"%d\">\n<message>%s</message>\n</notifier>\n</stdin>\n",
|
||||
arg->sat_severity,
|
||||
orte_notifier_base_sev2str(arg->sat_severity),
|
||||
arg->sat_errcode,
|
||||
arg->sat_msg);
|
||||
if (NULL != data) {
|
||||
orte_notifier_command_write_fd(arg->sat_pipe_fd,
|
||||
strlen(data) + 1, data);
|
||||
free(data);
|
||||
close(arg->sat_pipe_fd);
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Loop over waiting for a child to die
|
||||
*/
|
||||
@ -212,8 +246,11 @@ static void do_exec(void)
|
||||
pid_t pid;
|
||||
bool exited, killed;
|
||||
int sel[3], status;
|
||||
int pipe_to_stdin[2];
|
||||
char *msg, *p, *cmd, **argv = NULL;
|
||||
orte_notifier_command_component_t *c = &mca_notifier_command_component;
|
||||
opal_thread_t stdin_thread;
|
||||
stdin_arg_t arg;
|
||||
|
||||
/* First three items on the pipe are: severity, errcode, and
|
||||
string length (sel = Severity, Errcode, string Length. */
|
||||
@ -250,10 +287,7 @@ static void do_exec(void)
|
||||
while (NULL != (p = strstr(cmd, "$S"))) {
|
||||
*p = '\0';
|
||||
asprintf(&temp, "%s%s%s", cmd,
|
||||
((ORTE_NOTIFIER_INFRA == sel[0]) ? "INFRA" :
|
||||
((ORTE_NOTIFIER_WARNING == sel[0]) ? "WARNING" :
|
||||
((ORTE_NOTIFIER_NOTICE == sel[0]) ? "NOTICE" :
|
||||
"UNKNOWN"))), p + 2);
|
||||
orte_notifier_base_sev2str(sel[0]), p + 2);
|
||||
free(cmd);
|
||||
cmd = temp;
|
||||
}
|
||||
@ -279,6 +313,13 @@ static void do_exec(void)
|
||||
/* What else can we do? */
|
||||
}
|
||||
|
||||
/* Do we need a stdin pipe? */
|
||||
if (mca_notifier_command_component.pass_via_stdin) {
|
||||
if (0 != pipe(pipe_to_stdin)) {
|
||||
diediedie(8);
|
||||
}
|
||||
}
|
||||
|
||||
/* Fork off the child and run the command */
|
||||
pid = fork();
|
||||
if (pid < 0) {
|
||||
@ -286,8 +327,23 @@ static void do_exec(void)
|
||||
} else if (pid == 0) {
|
||||
int i;
|
||||
int fdmax = sysconf(_SC_OPEN_MAX);
|
||||
close(0);
|
||||
for (i = 3; i < fdmax; ++i) {
|
||||
close(i);
|
||||
if (!mca_notifier_command_component.pass_via_stdin ||
|
||||
pipe_to_stdin[0] != i) {
|
||||
close(i);
|
||||
}
|
||||
}
|
||||
|
||||
/* If we have a pipe to stdin, dup it */
|
||||
if (mca_notifier_command_component.pass_via_stdin) {
|
||||
close(pipe_to_stdin[1]);
|
||||
if (0 != pipe_to_stdin[0]) {
|
||||
if (dup2(pipe_to_stdin[0], 0) < 0) {
|
||||
diediedie(13);
|
||||
}
|
||||
close(pipe_to_stdin[0]);
|
||||
}
|
||||
}
|
||||
|
||||
/* Run it! */
|
||||
@ -295,14 +351,28 @@ static void do_exec(void)
|
||||
/* If we get here, bad */
|
||||
diediedie(9);
|
||||
}
|
||||
free(cmd);
|
||||
free(msg);
|
||||
opal_argv_free(argv);
|
||||
|
||||
/* Write down stdin. Start a thread because this has to run in
|
||||
parallel to the timer to kill the grandchild if it runs too
|
||||
long. */
|
||||
if (mca_notifier_command_component.pass_via_stdin) {
|
||||
close(pipe_to_stdin[0]);
|
||||
OBJ_CONSTRUCT(&stdin_thread, opal_thread_t);
|
||||
stdin_thread.t_run = stdin_main;
|
||||
arg.sat_pipe_fd = pipe_to_stdin[1];
|
||||
arg.sat_severity = sel[0];
|
||||
arg.sat_errcode = sel[1];
|
||||
arg.sat_msg = msg;
|
||||
stdin_thread.t_arg = (void *) &arg;
|
||||
if (OPAL_SUCCESS != opal_thread_start(&stdin_thread)) {
|
||||
diediedie(9);
|
||||
}
|
||||
}
|
||||
|
||||
/* Parent: wait for / reap the child. */
|
||||
do_wait(pid, mca_notifier_command_component.timeout, &status, &exited);
|
||||
|
||||
/* If it didn't die, try killing it nicely. If that fails, kill
|
||||
/* If the child didn't die, try killing it nicely. If that fails, kill
|
||||
it dead. */
|
||||
killed = false;
|
||||
if (!exited) {
|
||||
@ -316,6 +386,20 @@ static void do_exec(void)
|
||||
}
|
||||
}
|
||||
|
||||
/* Wait for the thread to complete */
|
||||
if (mca_notifier_command_component.pass_via_stdin) {
|
||||
void *ret;
|
||||
|
||||
close(pipe_to_stdin[1]);
|
||||
opal_thread_join(&stdin_thread, &ret);
|
||||
OBJ_DESTRUCT(&stdin_thread);
|
||||
}
|
||||
|
||||
/* Free stuff */
|
||||
free(cmd);
|
||||
free(msg);
|
||||
opal_argv_free(argv);
|
||||
|
||||
/* Handshake back up to the parent: just send the status value
|
||||
back up to the parent and let all interpretation occur up
|
||||
there. */
|
||||
|
@ -46,11 +46,11 @@
|
||||
|
||||
#include "notifier_command.h"
|
||||
|
||||
static int command_open(void);
|
||||
static int command_component_query(mca_base_module_t **module, int *priority);
|
||||
static int command_close(void);
|
||||
static int command_register(void);
|
||||
|
||||
|
||||
/*
|
||||
* Struct of function pointers that need to be initialized
|
||||
*/
|
||||
@ -65,7 +65,7 @@ orte_notifier_command_component_t mca_notifier_command_component = {
|
||||
ORTE_MINOR_VERSION,
|
||||
ORTE_RELEASE_VERSION,
|
||||
|
||||
command_open,
|
||||
NULL,
|
||||
command_close,
|
||||
command_component_query,
|
||||
command_register,
|
||||
@ -93,6 +93,9 @@ orte_notifier_command_component_t mca_notifier_command_component = {
|
||||
|
||||
/* To-parent pipe FDs */
|
||||
{ -1, -1 },
|
||||
|
||||
/* Pass via stdin? */
|
||||
true,
|
||||
};
|
||||
|
||||
/* Safety to ensure we don't try to write down a dead pipe */
|
||||
@ -107,6 +110,8 @@ static void child_death_cb(pid_t pid, int status, void *data)
|
||||
|
||||
static int command_register(void)
|
||||
{
|
||||
int val;
|
||||
|
||||
mca_base_param_reg_string(&mca_notifier_command_component.super.base_version,
|
||||
"cmd",
|
||||
"Command to execute, with substitution. $s = integer severity; $S = string severity; $e = integer error code; $m = string message",
|
||||
@ -121,6 +126,14 @@ static int command_register(void)
|
||||
mca_notifier_command_component.timeout,
|
||||
&mca_notifier_command_component.timeout);
|
||||
|
||||
mca_base_param_reg_int(&mca_notifier_command_component.super.base_version,
|
||||
"use_stdin",
|
||||
"If true, pass parameters to the command via stdin, formatted with trivial XML",
|
||||
false, false,
|
||||
(int) mca_notifier_command_component.pass_via_stdin,
|
||||
&val);
|
||||
mca_notifier_command_component.pass_via_stdin = OPAL_INT_TO_BOOL(val);
|
||||
|
||||
/* Priority */
|
||||
mca_base_param_reg_int(&mca_notifier_command_component.super.base_version,
|
||||
"priority",
|
||||
@ -132,11 +145,6 @@ static int command_register(void)
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
static int command_open(void)
|
||||
{
|
||||
/* Nothing to do */
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
static int command_close(void)
|
||||
{
|
||||
|
@ -55,12 +55,14 @@
|
||||
#include "notifier_command.h"
|
||||
|
||||
|
||||
static void command_log(int severity, int errcode, const char *msg, ...);
|
||||
static void command_help(int severity, int errcode, const char *filename,
|
||||
const char *topic, ...);
|
||||
static void command_peer(int severity, int errcode,
|
||||
orte_process_name_t *peer_proc,
|
||||
const char *msg, ...);
|
||||
static void command_log(orte_notifier_base_severity_t severity, int errcode,
|
||||
const char *msg, va_list ap);
|
||||
static void command_help(orte_notifier_base_severity_t severity, int errcode,
|
||||
const char *filename,
|
||||
const char *topic, va_list ap);
|
||||
static void command_peer(orte_notifier_base_severity_t severity, int errcode,
|
||||
orte_process_name_t *peer_proc,
|
||||
const char *msg, va_list ap);
|
||||
|
||||
/* Module */
|
||||
orte_notifier_base_module_t orte_notifier_command_module = {
|
||||
@ -68,16 +70,20 @@ orte_notifier_base_module_t orte_notifier_command_module = {
|
||||
NULL,
|
||||
command_log,
|
||||
command_help,
|
||||
command_peer
|
||||
command_peer,
|
||||
NULL
|
||||
};
|
||||
|
||||
/*
|
||||
* Back-end function to actually tell the child to fork the command
|
||||
*/
|
||||
static int send_command(int severity, int errcode, char *msg)
|
||||
static int send_command(orte_notifier_base_severity_t severity, int errcode,
|
||||
char *msg)
|
||||
{
|
||||
/* csel = Command, Severity, Errcode, string Length */
|
||||
int rc, csel[4];
|
||||
char *errmsg = NULL;
|
||||
|
||||
csel[0] = CMD_EXEC;
|
||||
csel[1] = severity;
|
||||
csel[2] = errcode;
|
||||
@ -87,6 +93,7 @@ static int send_command(int severity, int errcode, char *msg)
|
||||
if (ORTE_SUCCESS !=
|
||||
(rc = orte_notifier_command_write_fd(mca_notifier_command_component.to_child[1],
|
||||
sizeof(csel), csel))) {
|
||||
errmsg = "write";
|
||||
goto error;
|
||||
}
|
||||
|
||||
@ -94,6 +101,7 @@ static int send_command(int severity, int errcode, char *msg)
|
||||
if (ORTE_SUCCESS !=
|
||||
(rc = orte_notifier_command_write_fd(mca_notifier_command_component.to_child[1],
|
||||
csel[3] + 1, msg))) {
|
||||
errmsg = "write";
|
||||
goto error;
|
||||
}
|
||||
|
||||
@ -104,6 +112,7 @@ static int send_command(int severity, int errcode, char *msg)
|
||||
if (ORTE_SUCCESS !=
|
||||
(rc = orte_notifier_command_read_fd(mca_notifier_command_component.to_parent[0],
|
||||
sizeof(int) * 3, csel))) {
|
||||
errmsg = "read";
|
||||
goto error;
|
||||
}
|
||||
/* Did the grandchild exit? */
|
||||
@ -144,27 +153,17 @@ static int send_command(int severity, int errcode, char *msg)
|
||||
error:
|
||||
orte_show_help("help-orte-notifier-command.txt",
|
||||
"system call fail", true, orte_process_info.nodename,
|
||||
"write", opal_strerror(rc), rc);
|
||||
errmsg, opal_strerror(rc), rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
static void command_log(int severity, int errcode, const char *msg, ...)
|
||||
static void command_log(orte_notifier_base_severity_t severity, int errcode,
|
||||
const char *msg, va_list ap)
|
||||
{
|
||||
char *output;
|
||||
va_list arglist;
|
||||
|
||||
/* is the severity value above the threshold - I know
|
||||
* this seems backward, but lower severity values are
|
||||
* considered "more severe"
|
||||
*/
|
||||
if (severity > orte_notifier_threshold_severity) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* If there was a message, output it */
|
||||
va_start(arglist, msg);
|
||||
vasprintf(&output, msg, arglist);
|
||||
va_end(arglist);
|
||||
vasprintf(&output, msg, ap);
|
||||
|
||||
if (NULL != output) {
|
||||
send_command(severity, errcode, output);
|
||||
@ -172,23 +171,11 @@ static void command_log(int severity, int errcode, const char *msg, ...)
|
||||
}
|
||||
}
|
||||
|
||||
static void command_help(int severity, int errcode, const char *filename,
|
||||
const char *topic, ...)
|
||||
static void command_help(orte_notifier_base_severity_t severity, int errcode,
|
||||
const char *filename,
|
||||
const char *topic, va_list ap)
|
||||
{
|
||||
va_list arglist;
|
||||
char *output;
|
||||
|
||||
/* is the severity value above the threshold - I know
|
||||
* this seems backward, but lower severity values are
|
||||
* considered "more severe"
|
||||
*/
|
||||
if (severity > orte_notifier_threshold_severity) {
|
||||
return;
|
||||
}
|
||||
|
||||
va_start(arglist, topic);
|
||||
output = opal_show_help_vstring(filename, topic, false, arglist);
|
||||
va_end(arglist);
|
||||
char *output = opal_show_help_vstring(filename, topic, false, ap);
|
||||
|
||||
if (NULL != output) {
|
||||
send_command(severity, errcode, output);
|
||||
@ -196,55 +183,14 @@ static void command_help(int severity, int errcode, const char *filename,
|
||||
}
|
||||
}
|
||||
|
||||
static void command_peer(int severity, int errcode,
|
||||
orte_process_name_t *peer_proc, const char *msg, ...)
|
||||
static void command_peer(orte_notifier_base_severity_t severity, int errcode,
|
||||
orte_process_name_t *peer_proc, const char *msg,
|
||||
va_list ap)
|
||||
{
|
||||
va_list arglist;
|
||||
char buf[ORTE_NOTIFIER_MAX_BUF + 1];
|
||||
char *peer_host = NULL, *peer_name = NULL;
|
||||
char *pos = buf;
|
||||
char *errstr = (char*)orte_err2str(errcode);
|
||||
int len, space = ORTE_NOTIFIER_MAX_BUF;
|
||||
char *buf = orte_notifier_base_peer_log(errcode, peer_proc, msg, ap);
|
||||
|
||||
/* is the severity value above the threshold - I know
|
||||
* this seems backward, but lower severity values are
|
||||
* considered "more severe"
|
||||
*/
|
||||
if (severity > orte_notifier_threshold_severity) {
|
||||
return;
|
||||
if (NULL != buf) {
|
||||
send_command(severity, errcode, buf);
|
||||
free(buf);
|
||||
}
|
||||
|
||||
if (peer_proc) {
|
||||
peer_host = orte_ess.proc_get_hostname(peer_proc);
|
||||
peer_name = ORTE_NAME_PRINT(peer_proc);
|
||||
}
|
||||
|
||||
len = snprintf(pos, space,
|
||||
"While communicating to proc %s on node %s,"
|
||||
" proc %s on node %s encountered an error ",
|
||||
peer_name ? peer_name : "UNKNOWN",
|
||||
peer_host ? peer_host : "UNKNOWN",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_process_info.nodename);
|
||||
space -= len;
|
||||
pos += len;
|
||||
|
||||
if (0 < space) {
|
||||
if (errstr) {
|
||||
len = snprintf(pos, space, "'%s':", errstr);
|
||||
} else {
|
||||
len = snprintf(pos, space, "(%d):", errcode);
|
||||
}
|
||||
space -= len;
|
||||
pos += len;
|
||||
}
|
||||
|
||||
if (0 < space) {
|
||||
va_start(arglist, msg);
|
||||
vsnprintf(pos, space, msg, arglist);
|
||||
va_end(arglist);
|
||||
}
|
||||
|
||||
buf[ORTE_NOTIFIER_MAX_BUF] = '\0';
|
||||
send_command(severity, errcode, buf);
|
||||
}
|
||||
|
35
orte/mca/notifier/configure.m4
Обычный файл
35
orte/mca/notifier/configure.m4
Обычный файл
@ -0,0 +1,35 @@
|
||||
dnl -*- shell-script -*-
|
||||
dnl
|
||||
dnl Copyright (c) 2004-2006 High Performance Computing Center Stuttgart,
|
||||
dnl University of Stuttgart. All rights reserved.
|
||||
dnl Copyright (c) 2008 Cisco Systems, Inc. All rights reserved.
|
||||
dnl Copyright (c) 2009 Bull SAS. All rights reserved.
|
||||
dnl $COPYRIGHT$
|
||||
dnl
|
||||
dnl Additional copyrights may follow
|
||||
dnl
|
||||
dnl $HEADER$
|
||||
dnl
|
||||
|
||||
AC_DEFUN([MCA_notifier_CONFIG],[
|
||||
ompi_show_subsubtitle "Pre-configuring the framework notifier"
|
||||
|
||||
AC_MSG_CHECKING([if --enable-notifier-log-event was specified])
|
||||
AC_ARG_ENABLE(notifier-log-event,
|
||||
AC_HELP_STRING([--enable-notifier-log-event],
|
||||
[Enable unusual events notification. (default: disabled)]))
|
||||
if test "$enable_notifier_log_event" = "yes"; then
|
||||
AC_MSG_RESULT([yes])
|
||||
WANT_NOTIFIER_LOG_EVENT=1
|
||||
else
|
||||
AC_MSG_RESULT([no (disabling "notifier-log-event")])
|
||||
WANT_NOTIFIER_LOG_EVENT=0
|
||||
fi
|
||||
AC_DEFINE_UNQUOTED([ORTE_WANT_NOTIFIER_LOG_EVENT],
|
||||
[$WANT_NOTIFIER_LOG_EVENT],
|
||||
[if the notifier_log_event should be enabled])
|
||||
AM_CONDITIONAL([ORTE_WANT_NOTIFIER_LOG_EVENT],
|
||||
[test "$WANT_NOTIFIER_LOG_EVENT" = "1"])
|
||||
|
||||
MCA_CONFIGURE_FRAMEWORK($1, $2, 1)
|
||||
])
|
@ -18,6 +18,9 @@
|
||||
|
||||
AM_CPPFLAGS = $(notifier_ftb_CPPFLAGS)
|
||||
|
||||
dist_pkgdata_DATA = \
|
||||
help-orte-notifier-ftb.txt
|
||||
|
||||
sources = \
|
||||
notifier_ftb.h \
|
||||
notifier_ftb_module.c \
|
||||
|
@ -1,5 +1,6 @@
|
||||
# -*- text -*-
|
||||
#
|
||||
# Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
|
||||
# Copyright (c) 2009 The Trustees of Indiana University and Indiana
|
||||
# University Research and Technology Corporation.
|
||||
# All rights reserved.
|
||||
@ -11,6 +12,27 @@
|
||||
#
|
||||
# This is the US/English help file for Open MPI's FTB notifier support
|
||||
#
|
||||
[ftb connect failed]
|
||||
Open MPI's FTB notifier component failed to connect to the FTB server.
|
||||
Check if the FTB bootstrap server is running or not. For further help,
|
||||
refer the FTB documentation (Section 4.0: RUNNING FTB).
|
||||
|
||||
Reason: %s (errno: %d)
|
||||
#
|
||||
[declare events failed]
|
||||
The Open MPI FTB notifier component failed to declare publishable events
|
||||
to the FTB.
|
||||
|
||||
Reason: %s (errno: %d)
|
||||
#
|
||||
[publish failed]
|
||||
Sorry, Open MPI's FTB component failed to publish the following event to
|
||||
the FTB.
|
||||
|
||||
Reason: %s (errno: %d)
|
||||
Event info: [%s] %s
|
||||
Event properties: %s (errno: %d)
|
||||
#
|
||||
[invalid subscription style]
|
||||
Error: the Open MPI FTB component tried to register with an invalid
|
||||
FTB client subscription style.
|
||||
@ -21,7 +43,3 @@ FTB client subscription style.
|
||||
Error: the Open MPI FTB notifier component tried to register with an
|
||||
invalid value in the FTB client information.
|
||||
#
|
||||
[unable to connect]
|
||||
Open MPI's FTB notifier component was unable to establish a connection
|
||||
with the FTB backplane.
|
||||
#
|
||||
|
@ -10,6 +10,7 @@
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
|
@ -10,6 +10,7 @@
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -21,7 +22,7 @@
|
||||
* This component proxies notification events to the Fault Tolerant
|
||||
* Backplane (See http://www.mcs.anl.gov/research/cifts/).
|
||||
* The ORTE notifier severity is translated to the corresponding
|
||||
* FTB severity before the event is published to the FTB.
|
||||
* FTB severity before the event is published to the FTB.
|
||||
*/
|
||||
|
||||
/*
|
||||
@ -48,7 +49,7 @@ orte_notifier_ftb_component_t mca_notifier_ftb_component = {
|
||||
{
|
||||
{
|
||||
ORTE_NOTIFIER_BASE_VERSION_1_0_0,
|
||||
|
||||
|
||||
"ftb", /* MCA module name */
|
||||
ORTE_MAJOR_VERSION, /* MCA module major version */
|
||||
ORTE_MINOR_VERSION, /* MCA module minor version */
|
||||
@ -87,7 +88,8 @@ static int orte_notifier_ftb_close(void)
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
static int orte_notifier_ftb_component_query(mca_base_module_t **module, int *priority)
|
||||
static int orte_notifier_ftb_component_query(mca_base_module_t **module,
|
||||
int *priority)
|
||||
{
|
||||
int ret;
|
||||
*priority = 0;
|
||||
@ -100,8 +102,7 @@ static int orte_notifier_ftb_component_query(mca_base_module_t **module, int *pr
|
||||
/* We represent each client with a client name of the form
|
||||
openmpi/<hostname>/<PID> as a unique identifier in the
|
||||
FTB client namespace */
|
||||
sprintf(ftb_client_info.client_name, "openmpi/%s/%u",
|
||||
orte_process_info.nodename, orte_process_info.pid);
|
||||
sprintf(ftb_client_info.client_name, "ompi%u", orte_process_info.pid);
|
||||
|
||||
sprintf(ftb_client_info.client_jobid, "%u", ORTE_PROC_MY_NAME->jobid);
|
||||
|
||||
@ -117,18 +118,20 @@ static int orte_notifier_ftb_component_query(mca_base_module_t **module, int *pr
|
||||
orte_show_help("help-orte-notifier-ftb.txt",
|
||||
"invalid subscription style",
|
||||
true, ftb_client_info.client_subscription_style);
|
||||
break;
|
||||
|
||||
case FTB_ERR_INVALID_VALUE:
|
||||
orte_show_help("help-orte-notifier-ftb.txt",
|
||||
"invalid value",
|
||||
true);
|
||||
break;
|
||||
|
||||
default:
|
||||
orte_show_help("help-orte-notifier-ftb.txt",
|
||||
"unable to connect",
|
||||
"ftb connect failed",
|
||||
true);
|
||||
}
|
||||
|
||||
|
||||
return ORTE_ERR_NOT_FOUND;
|
||||
}
|
||||
|
||||
@ -154,7 +157,7 @@ static int orte_notifier_ftb_register(void)
|
||||
mca_base_param_reg_int(&mca_notifier_ftb_component.super.base_version,
|
||||
"priority",
|
||||
"Priority of this component",
|
||||
false, false,
|
||||
false, false,
|
||||
mca_notifier_ftb_component.priority,
|
||||
&mca_notifier_ftb_component.priority);
|
||||
|
||||
|
@ -10,6 +10,7 @@
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -20,6 +21,7 @@
|
||||
#include "orte_config.h"
|
||||
#include "orte/constants.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#ifdef HAVE_SYS_TIME_H
|
||||
#include <sys/time.h>
|
||||
@ -32,6 +34,7 @@
|
||||
|
||||
#include "orte/util/error_strings.h"
|
||||
#include "orte/mca/ess/ess.h"
|
||||
#include "orte/util/show_help.h"
|
||||
#include "orte/mca/notifier/base/base.h"
|
||||
#include "notifier_ftb.h"
|
||||
|
||||
@ -39,17 +42,22 @@
|
||||
/* Static API's */
|
||||
static int init(void);
|
||||
static void finalize(void);
|
||||
static void mylog(int severity, int errcode, const char *msg, ...);
|
||||
static void myhelplog(int severity, int errcode, const char *filename, const char *topic, ...);
|
||||
static void mypeerlog(int severity, int errcode, orte_process_name_t *peer_proc, const char *msg, ...);
|
||||
static void ftb_log(orte_notifier_base_severity_t severity, int errcode,
|
||||
const char *msg, va_list ap);
|
||||
static void ftb_help(orte_notifier_base_severity_t severity, int errcode,
|
||||
const char *filename, const char *topic, va_list ap);
|
||||
static void ftb_peer(orte_notifier_base_severity_t severity, int errcode,
|
||||
orte_process_name_t *peer_proc, const char *msg,
|
||||
va_list ap);
|
||||
|
||||
/* Module def */
|
||||
orte_notifier_base_module_t orte_notifier_ftb_module = {
|
||||
init,
|
||||
finalize,
|
||||
mylog,
|
||||
myhelplog,
|
||||
mypeerlog
|
||||
ftb_log,
|
||||
ftb_help,
|
||||
ftb_peer,
|
||||
NULL
|
||||
};
|
||||
|
||||
/* FTB client information */
|
||||
@ -71,7 +79,7 @@ static int orte_err2ftb(int errnum)
|
||||
{
|
||||
int retval;
|
||||
|
||||
switch (errnum) {
|
||||
switch (OPAL_SOS_GET_ERROR_CODE(errnum)) {
|
||||
case ORTE_ERR_OUT_OF_RESOURCE:
|
||||
case ORTE_ERR_TEMP_OUT_OF_RESOURCE:
|
||||
retval = 1;
|
||||
@ -102,8 +110,10 @@ static int init(void) {
|
||||
|
||||
ret = FTB_Declare_publishable_events(ftb_client_handle, 0, ftb_event_info, ftb_event_info_count);
|
||||
if (FTB_SUCCESS != ret) {
|
||||
opal_output(orte_notifier_base_output,
|
||||
"notifier:ftb:init FTB_Declare_publishable_events failed ret=%d\n", ret);
|
||||
orte_show_help("help-orte-notifier-ftb.txt",
|
||||
"declare events failed",
|
||||
true, "FTB_Declare_publishable_events() failed", ret);
|
||||
|
||||
FTB_Disconnect(ftb_client_handle);
|
||||
return ORTE_ERROR;
|
||||
}
|
||||
@ -115,7 +125,7 @@ static void finalize(void) {
|
||||
FTB_Disconnect(ftb_client_handle);
|
||||
}
|
||||
|
||||
static void convert2ftb(int errcode, char *payload)
|
||||
static void send_to_ftb(int errcode, char *payload)
|
||||
{
|
||||
int ret, event_id;
|
||||
FTB_event_handle_t ehandle;
|
||||
@ -126,73 +136,48 @@ static void convert2ftb(int errcode, char *payload)
|
||||
event_id = orte_err2ftb(errcode);
|
||||
ret = FTB_Publish(ftb_client_handle, ftb_event_info[event_id].event_name, &eprop, &ehandle);
|
||||
if (FTB_SUCCESS != ret) {
|
||||
opal_output(orte_notifier_base_output,
|
||||
"notifier:ftb:convert2ftb(%d,'%s') FTB_Publish failed ret=%d\n", errcode, eprop.event_payload, ret);
|
||||
orte_show_help("help-orte-notifier-ftb.txt",
|
||||
"publish failed",
|
||||
true, "FTB_Publish() failed", ret,
|
||||
ftb_event_info[event_id].severity,
|
||||
ftb_event_info[event_id].event_name,
|
||||
eprop.event_payload, errcode);
|
||||
}
|
||||
}
|
||||
|
||||
static void mylog(int severity, int errcode, const char *msg, ...)
|
||||
static void ftb_log(orte_notifier_base_severity_t severity, int errcode, const char *msg,
|
||||
va_list ap)
|
||||
{
|
||||
va_list arglist;
|
||||
char payload[FTB_MAX_PAYLOAD_DATA + 1];
|
||||
|
||||
/* is the severity value above the threshold - I know
|
||||
* this seems backward, but lower severity values are
|
||||
* considered "more severe"
|
||||
*/
|
||||
if (severity > orte_notifier_threshold_severity) {
|
||||
return;
|
||||
}
|
||||
char *payload;
|
||||
|
||||
/* If there was a message, output it */
|
||||
va_start(arglist, msg);
|
||||
vsnprintf(payload, FTB_MAX_PAYLOAD_DATA, msg, arglist);
|
||||
payload[FTB_MAX_PAYLOAD_DATA] = '\0'; /* not needed? */
|
||||
va_end(arglist);
|
||||
|
||||
convert2ftb(errcode, payload);
|
||||
vasprintf(&payload, msg, ap);
|
||||
if (NULL != payload) {
|
||||
send_to_ftb(errcode, payload);
|
||||
free(payload);
|
||||
}
|
||||
}
|
||||
|
||||
static void myhelplog(int severity, int errcode, const char *filename, const char *topic, ...)
|
||||
static void ftb_help(orte_notifier_base_severity_t severity, int errcode,
|
||||
const char *filename, const char *topic, va_list ap)
|
||||
{
|
||||
va_list arglist;
|
||||
char *output;
|
||||
char *output = opal_show_help_vstring(filename, topic, false, ap);
|
||||
|
||||
/* is the severity value above the threshold - I know
|
||||
* this seems backward, but lower severity values are
|
||||
* considered "more severe"
|
||||
*/
|
||||
if (severity > orte_notifier_threshold_severity) {
|
||||
return;
|
||||
}
|
||||
|
||||
va_start(arglist, topic);
|
||||
output = opal_show_help_vstring(filename, topic, false, arglist);
|
||||
va_end(arglist);
|
||||
|
||||
convert2ftb(errcode, output);
|
||||
|
||||
if (NULL != output) {
|
||||
send_to_ftb(errcode, output);
|
||||
free(output);
|
||||
}
|
||||
}
|
||||
|
||||
static void mypeerlog(int severity, int errcode, orte_process_name_t *peer_proc, const char *msg, ...)
|
||||
static void ftb_peer(orte_notifier_base_severity_t severity, int errcode,
|
||||
orte_process_name_t *peer_proc, const char *msg,
|
||||
va_list ap)
|
||||
{
|
||||
va_list arglist;
|
||||
char payload[FTB_MAX_PAYLOAD_DATA + 1];
|
||||
char *peer_host = NULL;
|
||||
char *pos = payload;
|
||||
int len, space = FTB_MAX_PAYLOAD_DATA;
|
||||
|
||||
/* is the severity value above the threshold - I know
|
||||
* this seems backward, but lower severity values are
|
||||
* considered "more severe"
|
||||
*/
|
||||
if (severity > orte_notifier_threshold_severity) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (peer_proc) {
|
||||
peer_host = orte_ess.proc_get_hostname(peer_proc);
|
||||
}
|
||||
@ -202,11 +187,9 @@ static void mypeerlog(int severity, int errcode, orte_process_name_t *peer_proc,
|
||||
|
||||
/* If there was a message, and space left, output it */
|
||||
if (0 < space) {
|
||||
va_start(arglist, msg);
|
||||
vsnprintf(pos, space, msg, arglist);
|
||||
va_end(arglist);
|
||||
vsnprintf(pos, space, msg, ap);
|
||||
}
|
||||
|
||||
payload[FTB_MAX_PAYLOAD_DATA] = '\0'; /* not needed? */
|
||||
convert2ftb(errcode, payload);
|
||||
payload[FTB_MAX_PAYLOAD_DATA] = '\0';
|
||||
send_to_ftb(errcode, payload);
|
||||
}
|
||||
|
@ -9,6 +9,7 @@
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2009 Cisco Systems, Inc. All Rights Reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -33,14 +34,18 @@
|
||||
*/
|
||||
|
||||
#include "orte_config.h"
|
||||
|
||||
#ifdef HAVE_STDARG_H
|
||||
#include <stdarg.h>
|
||||
#endif
|
||||
|
||||
#include "opal/mca/mca.h"
|
||||
#include "opal/util/opal_sos.h"
|
||||
|
||||
#include "orte/constants.h"
|
||||
#include "orte/types.h"
|
||||
|
||||
#ifdef HAVE_SYSLOG_H
|
||||
#include <syslog.h>
|
||||
#endif /* HAVE_SYSLOG_H */
|
||||
|
||||
#include "opal/mca/mca.h"
|
||||
#include "notifier_event_types.h"
|
||||
|
||||
BEGIN_C_DECLS
|
||||
|
||||
@ -51,13 +56,17 @@ BEGIN_C_DECLS
|
||||
*/
|
||||
#define ORTE_NOTIFIER_MAX_BUF 512
|
||||
|
||||
/* define severities - this will eventually be replaced by OPAL_SOS
|
||||
priorities */
|
||||
enum {
|
||||
ORTE_NOTIFIER_INFRA = LOG_CRIT,
|
||||
ORTE_NOTIFIER_WARNING = LOG_WARNING,
|
||||
ORTE_NOTIFIER_NOTICE = LOG_NOTICE
|
||||
};
|
||||
/* Severities, based on OPAL SOS */
|
||||
typedef enum {
|
||||
ORTE_NOTIFIER_EMERG = OPAL_SOS_SEVERITY_EMERG,
|
||||
ORTE_NOTIFIER_ALERT = OPAL_SOS_SEVERITY_ALERT,
|
||||
ORTE_NOTIFIER_CRIT = OPAL_SOS_SEVERITY_CRIT,
|
||||
ORTE_NOTIFIER_ERROR = OPAL_SOS_SEVERITY_ERROR,
|
||||
ORTE_NOTIFIER_WARN = OPAL_SOS_SEVERITY_WARN,
|
||||
ORTE_NOTIFIER_NOTICE = OPAL_SOS_SEVERITY_NOTICE,
|
||||
ORTE_NOTIFIER_INFO = OPAL_SOS_SEVERITY_INFO,
|
||||
ORTE_NOTIFIER_DEBUG = OPAL_SOS_SEVERITY_DEBUG
|
||||
} orte_notifier_base_severity_t;
|
||||
|
||||
/*
|
||||
* Component functions - all MUST be provided!
|
||||
@ -70,22 +79,25 @@ typedef int (*orte_notifier_base_module_init_fn_t)(void);
|
||||
typedef void (*orte_notifier_base_module_finalize_fn_t)(void);
|
||||
|
||||
/* Log a failure message */
|
||||
typedef void (*orte_notifier_base_module_log_fn_t)(int severity, int errcode, const char *msg, ...)
|
||||
typedef void (*orte_notifier_base_module_log_fn_t)(orte_notifier_base_severity_t severity, int errcode, const char *msg, va_list ap)
|
||||
# if OPAL_HAVE_ATTRIBUTE_FORMAT_FUNCPTR
|
||||
__opal_attribute_format__(__printf__, 3, 4)
|
||||
__opal_attribute_format__(__printf__, 3, 0)
|
||||
# endif
|
||||
;
|
||||
|
||||
/* Log a failure that is based upon a show_help message */
|
||||
typedef void (*orte_notifier_base_module_log_show_help_fn_t)(int severity, int errcode, const char *file, const char *topic, ...);
|
||||
typedef void (*orte_notifier_base_module_log_show_help_fn_t)(orte_notifier_base_severity_t severity, int errcode, const char *file, const char *topic, va_list ap);
|
||||
|
||||
/* Log a failure related to a peer */
|
||||
typedef void (*orte_notifier_base_module_log_peer_fn_t)(int severity, int errcode, orte_process_name_t *peer_proc, const char *msg, ...)
|
||||
typedef void (*orte_notifier_base_module_log_peer_fn_t)(orte_notifier_base_severity_t severity, int errcode, orte_process_name_t *peer_proc, const char *msg, va_list ap)
|
||||
# if OPAL_HAVE_ATTRIBUTE_FORMAT_FUNCPTR
|
||||
__opal_attribute_format__(__printf__, 4, 5)
|
||||
__opal_attribute_format__(__printf__, 4, 0)
|
||||
# endif
|
||||
;
|
||||
|
||||
/* Log an unusual event message */
|
||||
typedef void (*orte_notifier_base_module_log_event_fn_t)(const char *msg);
|
||||
|
||||
/*
|
||||
* Ver 1.0
|
||||
*/
|
||||
@ -95,11 +107,37 @@ struct orte_notifier_base_module_1_0_0_t {
|
||||
orte_notifier_base_module_log_fn_t log;
|
||||
orte_notifier_base_module_log_show_help_fn_t help;
|
||||
orte_notifier_base_module_log_peer_fn_t peer;
|
||||
orte_notifier_base_module_log_event_fn_t log_event;
|
||||
};
|
||||
|
||||
typedef struct orte_notifier_base_module_1_0_0_t orte_notifier_base_module_1_0_0_t;
|
||||
typedef orte_notifier_base_module_1_0_0_t orte_notifier_base_module_t;
|
||||
|
||||
/*
|
||||
* API functions
|
||||
*/
|
||||
/* Log a failure message */
|
||||
typedef void (*orte_notifier_base_API_log_fn_t)(orte_notifier_base_severity_t severity, int errcode, const char *msg, ...);
|
||||
|
||||
/* Log a failure that is based upon a show_help message */
|
||||
typedef void (*orte_notifier_base_API_log_show_help_fn_t)(orte_notifier_base_severity_t severity, int errcode, const char *file, const char *topic, ...);
|
||||
|
||||
/* Log a failure related to a peer */
|
||||
typedef void (*orte_notifier_base_API_log_peer_fn_t)(orte_notifier_base_severity_t severity, int errcode, orte_process_name_t *peer_proc, const char *msg, ...);
|
||||
|
||||
/*
|
||||
* Define a struct to hold the API functions that users will call
|
||||
*/
|
||||
struct orte_notifier_API_module_1_0_0_t {
|
||||
orte_notifier_base_API_log_fn_t log;
|
||||
orte_notifier_base_API_log_show_help_fn_t show_help;
|
||||
orte_notifier_base_API_log_peer_fn_t log_peer;
|
||||
};
|
||||
typedef struct orte_notifier_API_module_1_0_0_t orte_notifier_API_module_1_0_0_t;
|
||||
typedef orte_notifier_API_module_1_0_0_t orte_notifier_API_module_t;
|
||||
|
||||
ORTE_DECLSPEC extern orte_notifier_API_module_t orte_notifier;
|
||||
|
||||
/*
|
||||
* the standard component data structure
|
||||
*/
|
||||
@ -111,7 +149,6 @@ typedef struct orte_notifier_base_component_1_0_0_t orte_notifier_base_component
|
||||
typedef orte_notifier_base_component_1_0_0_t orte_notifier_base_component_t;
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* Macro for use in components that are of type notifier v1.0.0
|
||||
*/
|
||||
@ -121,9 +158,21 @@ typedef orte_notifier_base_component_1_0_0_t orte_notifier_base_component_t;
|
||||
/* notifier v1.0 */ \
|
||||
"notifier", 1, 0, 0
|
||||
|
||||
/* Global structure for accessing notifier functions
|
||||
/*
|
||||
* To manage unusual events notifications
|
||||
* Set to noop if not wanted
|
||||
*/
|
||||
ORTE_DECLSPEC extern orte_notifier_base_module_t orte_notifier; /* holds selected module's function pointers */
|
||||
|
||||
#if ORTE_WANT_NOTIFIER_LOG_EVENT
|
||||
|
||||
#include "notifier_event_calls.h"
|
||||
|
||||
#else /* ORTE_WANT_NOTIFIER_LOG_EVENT */
|
||||
|
||||
#define ORTE_NOTIFIER_DEFINE_EVENT(i, m)
|
||||
#define ORTE_NOTIFIER_LOG_EVENT(i, c, t) do {} while (0)
|
||||
|
||||
#endif /* ORTE_WANT_NOTIFIER_LOG_EVENT */
|
||||
|
||||
END_C_DECLS
|
||||
|
||||
|
170
orte/mca/notifier/notifier_event_calls.h
Обычный файл
170
orte/mca/notifier/notifier_event_calls.h
Обычный файл
@ -0,0 +1,170 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2009 Bull SAS. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#ifndef ORTE_NOTIFIER_EVENTS_CALLS_H
|
||||
#define ORTE_NOTIFIER_EVENTS_CALLS_H
|
||||
|
||||
#include "orte_config.h"
|
||||
|
||||
#ifdef HAVE_STDIO_H
|
||||
#include <stdio.h>
|
||||
#endif /* HAVE_STDIO_H */
|
||||
#ifdef HAVE_SYS_TIME_H
|
||||
#include <sys/time.h>
|
||||
#endif /* HAVE_SYS_TIME_H */
|
||||
|
||||
#include "opal/class/opal_object.h"
|
||||
|
||||
BEGIN_C_DECLS
|
||||
|
||||
|
||||
|
||||
#define ORTE_NOTIFIER_LOG_0 0 /* Initial log format needed (no delay) */
|
||||
#define ORTE_NOTIFIER_LOG_1 1 /* Intermediate log format needed (delay) */
|
||||
#define ORTE_NOTIFIER_LOG_2 2 /* Final log format needed (at finalize) */
|
||||
|
||||
ORTE_DECLSPEC bool notifier_log_event_enabled(void);
|
||||
ORTE_DECLSPEC void notifier_event_store(orte_notifier_event_t *);
|
||||
ORTE_DECLSPEC void notifier_trace_event(int, int, int32_t, time_t, time_t,
|
||||
const char *);
|
||||
|
||||
|
||||
/*
|
||||
* Do not use this function directly: use ORTE_NOTIFIER_DEFINE_EVENT() instead
|
||||
*/
|
||||
static inline orte_notifier_event_t *notifier_alloc_event(int ev_id,
|
||||
const char *msg)
|
||||
{
|
||||
orte_notifier_event_t *ev;
|
||||
|
||||
ev = OBJ_NEW(orte_notifier_event_t);
|
||||
if (NULL == ev) {
|
||||
return NULL;
|
||||
}
|
||||
asprintf(&ev->ev_msg, msg);
|
||||
if (NULL == ev->ev_msg) {
|
||||
OBJ_RELEASE(ev);
|
||||
return NULL;
|
||||
}
|
||||
ev->ev_id = ev_id;
|
||||
/*
|
||||
* Store the allocated event into a list to be able to manage the
|
||||
* unconditional event tracing and freeing during finalize.
|
||||
*/
|
||||
notifier_event_store(ev);
|
||||
return ev;
|
||||
}
|
||||
|
||||
|
||||
static inline void notifier_count_and_log_event(orte_notifier_event_t *ev,
|
||||
int ev_id,
|
||||
int cnt_thresh,
|
||||
int time_thresh)
|
||||
{
|
||||
time_t now, delay;
|
||||
int32_t count;
|
||||
|
||||
opal_atomic_add_32(&ev->ev_cnt, 1);
|
||||
if (ev->ev_cnt <= cnt_thresh) {
|
||||
return;
|
||||
}
|
||||
|
||||
count = ev->ev_cnt;
|
||||
now = time(NULL);
|
||||
if (ev->ev_already_traced) {
|
||||
if (now > ev->ev_time_trc + time_thresh) {
|
||||
delay = now - ev->ev_time_trc;
|
||||
ev->ev_cnt = 0;
|
||||
ev->ev_time_trc = now;
|
||||
notifier_trace_event(ORTE_NOTIFIER_LOG_1, ev_id, count, now, delay,
|
||||
ev->ev_msg);
|
||||
}
|
||||
} else {
|
||||
ev->ev_already_traced = 1;
|
||||
ev->ev_cnt = 0;
|
||||
ev->ev_time_trc = now;
|
||||
/* We don't care about the delay for the very 1st trace */
|
||||
notifier_trace_event(ORTE_NOTIFIER_LOG_0, ev_id, count, now, now,
|
||||
ev->ev_msg);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#define notifier_event_fn_prefix(i) notifier_log_event_ ## i
|
||||
|
||||
/*
|
||||
* This macro should be called each time a new event will be traced.
|
||||
* It expands to a static inline function suffixed by the event number.
|
||||
*/
|
||||
#define ORTE_NOTIFIER_DEFINE_EVENT(i, m) \
|
||||
static inline void notifier_event_fn_prefix(i) (int c_thr, int t_thr) \
|
||||
{ \
|
||||
static orte_notifier_event_t *prefix_ ## i = NULL; \
|
||||
if (!notifier_log_event_enabled()) { \
|
||||
return; \
|
||||
} \
|
||||
if (NULL == prefix_ ## i) { \
|
||||
prefix_ ## i = notifier_alloc_event(i, m); \
|
||||
if (NULL == prefix_ ## i) { \
|
||||
return; \
|
||||
} \
|
||||
} \
|
||||
notifier_count_and_log_event(prefix_ ## i, i, c_thr, t_thr); \
|
||||
}
|
||||
|
||||
/*
|
||||
* This is the log interface that should be called whenever an unsual event
|
||||
* should be warned about.
|
||||
* The event should have been defined before, using
|
||||
* ORTE_NOTIFIER_DEFINE_EVENT():
|
||||
*
|
||||
* (1) Event definition:
|
||||
*
|
||||
* Typically in a header file call the following:
|
||||
* ORTE_NOTIFIER_DEFINE_EVENT(0, "message 0")
|
||||
* This macro expands to
|
||||
* static inline void notifier_log_event_0(int c_thr, int t_thr)
|
||||
* {
|
||||
* static orte_notifier_event_t *prefix_0 = NULL;
|
||||
* if (!notifier_log_event_enabled()) {
|
||||
* return;
|
||||
* }
|
||||
* if (NULL == prefix_0) {
|
||||
* prefix_0 = notifier_alloc_event(0, "message 0");
|
||||
* if (NULL == prefix_0) {
|
||||
* return;
|
||||
* }
|
||||
* }
|
||||
* notifier_count_and_log_event(prefix_0, 0, c_thr, t_thr);
|
||||
* }
|
||||
*
|
||||
* (2) Event accounting and tracing:
|
||||
*
|
||||
* Whenever you want to trace the unusual event whose id is 0, just call:
|
||||
* ORTE_NOTIFIER_LOG_EVENT(0, 100, 1);
|
||||
* 100 and 1 are respectively the counter and time thresholds.
|
||||
* This actually expands to
|
||||
* notifier_log_event_0(100, 1);
|
||||
*/
|
||||
#define ORTE_NOTIFIER_LOG_EVENT(i, c, t) notifier_event_fn_prefix(i) (c, t)
|
||||
|
||||
|
||||
END_C_DECLS
|
||||
|
||||
#endif /* ORTE_NOTIFIER_EVENT_CALLS_H */
|
49
orte/mca/notifier/notifier_event_types.h
Обычный файл
49
orte/mca/notifier/notifier_event_types.h
Обычный файл
@ -0,0 +1,49 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2009 Bull SAS. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#ifndef ORTE_NOTIFIER_BASE_EVENTS_H
|
||||
#define ORTE_NOTIFIER_BASE_EVENTS_H
|
||||
|
||||
#include "orte_config.h"
|
||||
|
||||
#ifdef HAVE_SYS_TIME_H
|
||||
#include <sys/time.h>
|
||||
#endif /* HAVE_SYS_TIME_H */
|
||||
|
||||
#include "opal/sys/atomic.h"
|
||||
#include "opal/class/opal_list.h"
|
||||
#include "opal/class/opal_object.h"
|
||||
|
||||
BEGIN_C_DECLS
|
||||
|
||||
|
||||
typedef struct {
|
||||
opal_list_item_t super;
|
||||
volatile int32_t ev_cnt;
|
||||
int ev_id;
|
||||
int ev_already_traced;
|
||||
time_t ev_time_trc;
|
||||
char *ev_msg;
|
||||
} orte_notifier_event_t;
|
||||
ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_notifier_event_t);
|
||||
|
||||
|
||||
END_C_DECLS
|
||||
|
||||
#endif /* ORTE_NOTIFIER_BASE_EVENTS_H */
|
@ -31,7 +31,6 @@
|
||||
|
||||
#include "notifier_smtp.h"
|
||||
|
||||
static int smtp_open(void);
|
||||
static int smtp_component_query(mca_base_module_t **module, int *priority);
|
||||
static int smtp_close(void);
|
||||
static int smtp_register(void);
|
||||
@ -50,7 +49,7 @@ orte_notifier_smtp_component_t mca_notifier_smtp_component = {
|
||||
ORTE_MINOR_VERSION,
|
||||
ORTE_RELEASE_VERSION,
|
||||
|
||||
smtp_open,
|
||||
NULL,
|
||||
smtp_close,
|
||||
smtp_component_query,
|
||||
smtp_register,
|
||||
@ -160,12 +159,6 @@ static int smtp_register(void)
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
static int smtp_open(void)
|
||||
{
|
||||
/* Nothing to do */
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
static int smtp_close(void)
|
||||
{
|
||||
if (NULL != mca_notifier_smtp_component.server) {
|
||||
|
@ -51,11 +51,14 @@
|
||||
|
||||
|
||||
/* Static API's */
|
||||
static void mylog(int severity, int errcode, const char *msg, ...);
|
||||
static void myhelplog(int severity, int errcode, const char *filename,
|
||||
const char *topic, ...);
|
||||
static void mypeerlog(int severity, int errcode, orte_process_name_t *peer_proc,
|
||||
const char *msg, ...);
|
||||
static void mylog(orte_notifier_base_severity_t severity, int errcode,
|
||||
const char *msg, va_list ap);
|
||||
static void myhelplog(orte_notifier_base_severity_t severity, int errcode,
|
||||
const char *filename,
|
||||
const char *topic, va_list ap);
|
||||
static void mypeerlog(orte_notifier_base_severity_t severity, int errcode,
|
||||
orte_process_name_t *peer_proc,
|
||||
const char *msg, va_list ap);
|
||||
|
||||
/* Module */
|
||||
orte_notifier_base_module_t orte_notifier_smtp_module = {
|
||||
@ -63,7 +66,8 @@ orte_notifier_base_module_t orte_notifier_smtp_module = {
|
||||
NULL,
|
||||
mylog,
|
||||
myhelplog,
|
||||
mypeerlog
|
||||
mypeerlog,
|
||||
NULL
|
||||
};
|
||||
|
||||
typedef enum {
|
||||
@ -311,23 +315,13 @@ static int send_email(char *msg)
|
||||
return err;
|
||||
}
|
||||
|
||||
static void mylog(int severity, int errcode, const char *msg, ...)
|
||||
static void mylog(orte_notifier_base_severity_t severity, int errcode,
|
||||
const char *msg, va_list ap)
|
||||
{
|
||||
char *output;
|
||||
va_list arglist;
|
||||
|
||||
/* is the severity value above the threshold - I know
|
||||
* this seems backward, but lower severity values are
|
||||
* considered "more severe"
|
||||
*/
|
||||
if (severity > orte_notifier_threshold_severity) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* If there was a message, output it */
|
||||
va_start(arglist, msg);
|
||||
vasprintf(&output, msg, arglist);
|
||||
va_end(arglist);
|
||||
vasprintf(&output, msg, ap);
|
||||
|
||||
if (NULL != output) {
|
||||
send_email(output);
|
||||
@ -335,23 +329,11 @@ static void mylog(int severity, int errcode, const char *msg, ...)
|
||||
}
|
||||
}
|
||||
|
||||
static void myhelplog(int severity, int errcode, const char *filename,
|
||||
const char *topic, ...)
|
||||
static void myhelplog(orte_notifier_base_severity_t severity, int errcode,
|
||||
const char *filename,
|
||||
const char *topic, va_list ap)
|
||||
{
|
||||
va_list arglist;
|
||||
char *output;
|
||||
|
||||
/* is the severity value above the threshold - I know
|
||||
* this seems backward, but lower severity values are
|
||||
* considered "more severe"
|
||||
*/
|
||||
if (severity > orte_notifier_threshold_severity) {
|
||||
return;
|
||||
}
|
||||
|
||||
va_start(arglist, topic);
|
||||
output = opal_show_help_vstring(filename, topic, false, arglist);
|
||||
va_end(arglist);
|
||||
char *output = opal_show_help_vstring(filename, topic, false, ap);
|
||||
|
||||
if (NULL != output) {
|
||||
send_email(output);
|
||||
@ -359,55 +341,14 @@ static void myhelplog(int severity, int errcode, const char *filename,
|
||||
}
|
||||
}
|
||||
|
||||
static void mypeerlog(int severity, int errcode,
|
||||
orte_process_name_t *peer_proc, const char *msg, ...)
|
||||
static void mypeerlog(orte_notifier_base_severity_t severity, int errcode,
|
||||
orte_process_name_t *peer_proc, const char *msg,
|
||||
va_list ap)
|
||||
{
|
||||
va_list arglist;
|
||||
char buf[ORTE_NOTIFIER_MAX_BUF + 1];
|
||||
char *peer_host = NULL, *peer_name = NULL;
|
||||
char *pos = buf;
|
||||
char *errstr = (char*)orte_err2str(errcode);
|
||||
int len, space = ORTE_NOTIFIER_MAX_BUF;
|
||||
char *buf = orte_notifier_base_peer_log(errcode, peer_proc, msg, ap);
|
||||
|
||||
/* is the severity value above the threshold - I know
|
||||
* this seems backward, but lower severity values are
|
||||
* considered "more severe"
|
||||
*/
|
||||
if (severity > orte_notifier_threshold_severity) {
|
||||
return;
|
||||
if (NULL != buf) {
|
||||
send_email(buf);
|
||||
free(buf);
|
||||
}
|
||||
|
||||
if (peer_proc) {
|
||||
peer_host = orte_ess.proc_get_hostname(peer_proc);
|
||||
peer_name = ORTE_NAME_PRINT(peer_proc);
|
||||
}
|
||||
|
||||
len = snprintf(pos, space,
|
||||
"While communicating to proc %s on node %s,"
|
||||
" proc %s on node %s encountered an error ",
|
||||
peer_name ? peer_name : "UNKNOWN",
|
||||
peer_host ? peer_host : "UNKNOWN",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_process_info.nodename);
|
||||
space -= len;
|
||||
pos += len;
|
||||
|
||||
if (0 < space) {
|
||||
if (errstr) {
|
||||
len = snprintf(pos, space, "'%s':", errstr);
|
||||
} else {
|
||||
len = snprintf(pos, space, "(%d):", errcode);
|
||||
}
|
||||
space -= len;
|
||||
pos += len;
|
||||
}
|
||||
|
||||
if (0 < space) {
|
||||
va_start(arglist, msg);
|
||||
vsnprintf(pos, space, msg, arglist);
|
||||
va_end(arglist);
|
||||
}
|
||||
|
||||
buf[ORTE_NOTIFIER_MAX_BUF] = '\0';
|
||||
send_email(buf);
|
||||
}
|
||||
|
@ -10,6 +10,7 @@
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -26,14 +27,6 @@
|
||||
|
||||
BEGIN_C_DECLS
|
||||
|
||||
/*
|
||||
* Component open / close
|
||||
*/
|
||||
int orte_notifier_syslog_open(void);
|
||||
int orte_notifier_syslog_close(void);
|
||||
int orte_notifier_syslog_component_query(mca_base_module_t **module, int *priority);
|
||||
|
||||
|
||||
/*
|
||||
* Notifier interfaces
|
||||
*/
|
||||
|
@ -10,6 +10,7 @@
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -28,6 +29,10 @@
|
||||
|
||||
#include "notifier_syslog.h"
|
||||
|
||||
|
||||
static int orte_notifier_syslog_component_query(mca_base_module_t **module,
|
||||
int *priority);
|
||||
|
||||
/*
|
||||
* Struct of function pointers that need to be initialized
|
||||
*/
|
||||
@ -39,8 +44,8 @@ orte_notifier_base_component_t mca_notifier_syslog_component = {
|
||||
ORTE_MAJOR_VERSION, /* MCA module major version */
|
||||
ORTE_MINOR_VERSION, /* MCA module minor version */
|
||||
ORTE_RELEASE_VERSION, /* MCA module release version */
|
||||
orte_notifier_syslog_open, /* module open */
|
||||
orte_notifier_syslog_close, /* module close */
|
||||
NULL,
|
||||
NULL,
|
||||
orte_notifier_syslog_component_query /* module query */
|
||||
},
|
||||
{
|
||||
@ -49,20 +54,9 @@ orte_notifier_base_component_t mca_notifier_syslog_component = {
|
||||
}
|
||||
};
|
||||
|
||||
/* Open the component */
|
||||
int orte_notifier_syslog_open(void)
|
||||
static int orte_notifier_syslog_component_query(mca_base_module_t **module,
|
||||
int *priority)
|
||||
{
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
int orte_notifier_syslog_close(void)
|
||||
{
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
int orte_notifier_syslog_component_query(mca_base_module_t **module, int *priority)
|
||||
{
|
||||
/* we are a lower-level default, so set a low priority so we can be overridden */
|
||||
*priority = 1;
|
||||
*module = (mca_base_module_t *)&orte_notifier_syslog_module;
|
||||
return ORTE_SUCCESS;
|
||||
|
@ -33,10 +33,9 @@
|
||||
|
||||
#include "opal/util/show_help.h"
|
||||
|
||||
#include "orte/mca/ess/ess.h"
|
||||
#include "orte/util/error_strings.h"
|
||||
#include "orte/util/name_fns.h"
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
|
||||
#include "orte/mca/notifier/base/base.h"
|
||||
#include "notifier_syslog.h"
|
||||
|
||||
@ -44,9 +43,14 @@
|
||||
/* Static API's */
|
||||
static int init(void);
|
||||
static void finalize(void);
|
||||
static void mylog(int severity, int errcode, const char *msg, ...);
|
||||
static void myhelplog(int severity, int errcode, const char *filename, const char *topic, ...);
|
||||
static void mypeerlog(int severity, int errcode, orte_process_name_t *peer_proc, const char *msg, ...);
|
||||
static void mylog(orte_notifier_base_severity_t severity, int errcode,
|
||||
const char *msg, va_list ap);
|
||||
static void myhelplog(orte_notifier_base_severity_t severity, int errcode,
|
||||
const char *filename, const char *topic, va_list ap);
|
||||
static void mypeerlog(orte_notifier_base_severity_t severity, int errcode,
|
||||
orte_process_name_t *peer_proc, const char *msg,
|
||||
va_list ap);
|
||||
static void myeventlog(const char *msg);
|
||||
|
||||
/* Module def */
|
||||
orte_notifier_base_module_t orte_notifier_syslog_module = {
|
||||
@ -54,11 +58,13 @@ orte_notifier_base_module_t orte_notifier_syslog_module = {
|
||||
finalize,
|
||||
mylog,
|
||||
myhelplog,
|
||||
mypeerlog
|
||||
mypeerlog,
|
||||
myeventlog
|
||||
};
|
||||
|
||||
|
||||
static int init(void) {
|
||||
static int init(void)
|
||||
{
|
||||
int opts;
|
||||
|
||||
opts = LOG_CONS | LOG_PID | LOG_SYSLOG;
|
||||
@ -67,44 +73,22 @@ static int init(void) {
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
static void finalize(void) {
|
||||
static void finalize(void)
|
||||
{
|
||||
closelog();
|
||||
}
|
||||
|
||||
static void mylog(int severity, int errcode, const char *msg, ...)
|
||||
static void mylog(orte_notifier_base_severity_t severity, int errcode,
|
||||
const char *msg, va_list ap)
|
||||
{
|
||||
va_list arglist;
|
||||
|
||||
/* is the severity value above the threshold - I know
|
||||
* this seems backward, but lower severity values are
|
||||
* considered "more severe"
|
||||
*/
|
||||
if (severity > orte_notifier_threshold_severity) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* If there was a message, output it */
|
||||
va_start(arglist, msg);
|
||||
vsyslog(severity, msg, arglist);
|
||||
va_end(arglist);
|
||||
vsyslog(severity, msg, ap);
|
||||
}
|
||||
|
||||
static void myhelplog(int severity, int errcode, const char *filename, const char *topic, ...)
|
||||
static void myhelplog(orte_notifier_base_severity_t severity, int errcode,
|
||||
const char *filename, const char *topic, va_list ap)
|
||||
{
|
||||
va_list arglist;
|
||||
char *output;
|
||||
|
||||
/* is the severity value above the threshold - I know
|
||||
* this seems backward, but lower severity values are
|
||||
* considered "more severe"
|
||||
*/
|
||||
if (severity > orte_notifier_threshold_severity) {
|
||||
return;
|
||||
}
|
||||
|
||||
va_start(arglist, topic);
|
||||
output = opal_show_help_vstring(filename, topic, false, arglist);
|
||||
va_end(arglist);
|
||||
char *output = opal_show_help_vstring(filename, topic, false, ap);
|
||||
|
||||
/* if nothing came back, then nothing to do */
|
||||
if (NULL == output) {
|
||||
@ -116,54 +100,21 @@ static void myhelplog(int severity, int errcode, const char *filename, const cha
|
||||
free(output);
|
||||
}
|
||||
|
||||
static void mypeerlog(int severity, int errcode, orte_process_name_t *peer_proc, const char *msg, ...)
|
||||
static void mypeerlog(orte_notifier_base_severity_t severity, int errcode,
|
||||
orte_process_name_t *peer_proc, const char *msg,
|
||||
va_list ap)
|
||||
{
|
||||
va_list arglist;
|
||||
char buf[ORTE_NOTIFIER_MAX_BUF + 1];
|
||||
char *peer_host = NULL, *peer_name = NULL;
|
||||
char *pos = buf;
|
||||
char *errstr = (char*)orte_err2str(errcode);
|
||||
int len, space = ORTE_NOTIFIER_MAX_BUF;
|
||||
char *buf = orte_notifier_base_peer_log(errcode, peer_proc, msg, ap);
|
||||
|
||||
/* is the severity value above the threshold - I know
|
||||
* this seems backward, but lower severity values are
|
||||
* considered "more severe"
|
||||
*/
|
||||
if (severity > orte_notifier_threshold_severity) {
|
||||
return;
|
||||
if (NULL != buf) {
|
||||
syslog(severity, buf);
|
||||
free(buf);
|
||||
}
|
||||
|
||||
if (peer_proc) {
|
||||
peer_host = orte_ess.proc_get_hostname(peer_proc);
|
||||
peer_name = ORTE_NAME_PRINT(peer_proc);
|
||||
}
|
||||
|
||||
len = snprintf(pos, space,
|
||||
"While communicating to proc %s on node %s,"
|
||||
" proc %s on node %s encountered an error ",
|
||||
peer_name ? peer_name : "UNKNOWN",
|
||||
peer_host ? peer_host : "UNKNOWN",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_process_info.nodename);
|
||||
space -= len;
|
||||
pos += len;
|
||||
|
||||
if (0 < space) {
|
||||
if (errstr) {
|
||||
len = snprintf(pos, space, "'%s':", errstr);
|
||||
} else {
|
||||
len = snprintf(pos, space, "(%d):", errcode);
|
||||
}
|
||||
space -= len;
|
||||
pos += len;
|
||||
}
|
||||
|
||||
if (0 < space) {
|
||||
va_start(arglist, msg);
|
||||
vsnprintf(pos, space, msg, arglist);
|
||||
va_end(arglist);
|
||||
}
|
||||
|
||||
buf[ORTE_NOTIFIER_MAX_BUF] = '\0'; /* not needed? */
|
||||
syslog(severity, buf);
|
||||
}
|
||||
|
||||
static void myeventlog(const char *msg)
|
||||
{
|
||||
/* If there was a message, output it */
|
||||
syslog(LOG_LOCAL0 | LOG_NOTICE, msg);
|
||||
}
|
||||
|
||||
|
@ -35,7 +35,6 @@
|
||||
|
||||
#include "notifier_twitter.h"
|
||||
|
||||
static int twitter_open(void);
|
||||
static int twitter_component_query(mca_base_module_t **module, int *priority);
|
||||
static int twitter_close(void);
|
||||
static int twitter_register(void);
|
||||
@ -54,7 +53,7 @@ orte_notifier_twitter_component_t mca_notifier_twitter_component = {
|
||||
ORTE_MINOR_VERSION,
|
||||
ORTE_RELEASE_VERSION,
|
||||
|
||||
twitter_open,
|
||||
NULL,
|
||||
twitter_close,
|
||||
twitter_component_query,
|
||||
twitter_register,
|
||||
@ -108,12 +107,6 @@ static int twitter_register(void)
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
static int twitter_open(void)
|
||||
{
|
||||
/* Nothing to do */
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
static int twitter_close(void)
|
||||
{
|
||||
if (NULL != mca_notifier_twitter_component.url) {
|
||||
@ -134,9 +127,6 @@ static int twitter_component_query(mca_base_module_t **module,
|
||||
{
|
||||
char *str;
|
||||
|
||||
*priority = 10;
|
||||
*module = (mca_base_module_t *)&orte_notifier_twitter_module;
|
||||
|
||||
/* If we have no username or password, there's no love */
|
||||
if (NULL == mca_notifier_twitter_component.username ||
|
||||
NULL == mca_notifier_twitter_component.password) {
|
||||
@ -195,5 +185,7 @@ static int twitter_component_query(mca_base_module_t **module,
|
||||
return ORTE_ERR_NOT_FOUND;
|
||||
}
|
||||
|
||||
*priority = 10;
|
||||
*module = (mca_base_module_t *)&orte_notifier_twitter_module;
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
@ -56,11 +56,14 @@
|
||||
|
||||
|
||||
/* Static API's */
|
||||
static void mylog(int severity, int errcode, const char *msg, ...);
|
||||
static void myhelplog(int severity, int errcode, const char *filename,
|
||||
const char *topic, ...);
|
||||
static void mypeerlog(int severity, int errcode, orte_process_name_t *peer_proc,
|
||||
const char *msg, ...);
|
||||
static void mylog(orte_notifier_base_severity_t severity, int errcode,
|
||||
const char *msg, va_list ap);
|
||||
static void myhelplog(orte_notifier_base_severity_t severity, int errcode,
|
||||
const char *filename,
|
||||
const char *topic, va_list ap);
|
||||
static void mypeerlog(orte_notifier_base_severity_t severity, int errcode,
|
||||
orte_process_name_t *peer_proc,
|
||||
const char *msg, va_list ap);
|
||||
|
||||
/* Module */
|
||||
orte_notifier_base_module_t orte_notifier_twitter_module = {
|
||||
@ -68,7 +71,8 @@ orte_notifier_base_module_t orte_notifier_twitter_module = {
|
||||
NULL,
|
||||
mylog,
|
||||
myhelplog,
|
||||
mypeerlog
|
||||
mypeerlog,
|
||||
NULL
|
||||
};
|
||||
|
||||
static char base64_convert(uint8_t i)
|
||||
@ -265,23 +269,13 @@ static void tweet(char *msg)
|
||||
close(fd);
|
||||
}
|
||||
|
||||
static void mylog(int severity, int errcode, const char *msg, ...)
|
||||
static void mylog(orte_notifier_base_severity_t severity, int errcode,
|
||||
const char *msg, va_list ap)
|
||||
{
|
||||
char *output;
|
||||
va_list arglist;
|
||||
|
||||
/* is the severity value above the threshold - I know
|
||||
* this seems backward, but lower severity values are
|
||||
* considered "more severe"
|
||||
*/
|
||||
if (severity > orte_notifier_threshold_severity) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* If there was a message, output it */
|
||||
va_start(arglist, msg);
|
||||
vasprintf(&output, msg, arglist);
|
||||
va_end(arglist);
|
||||
vasprintf(&output, msg, ap);
|
||||
|
||||
if (NULL != output) {
|
||||
tweet(output);
|
||||
@ -289,23 +283,11 @@ static void mylog(int severity, int errcode, const char *msg, ...)
|
||||
}
|
||||
}
|
||||
|
||||
static void myhelplog(int severity, int errcode, const char *filename,
|
||||
const char *topic, ...)
|
||||
static void myhelplog(orte_notifier_base_severity_t severity, int errcode,
|
||||
const char *filename,
|
||||
const char *topic, va_list ap)
|
||||
{
|
||||
va_list arglist;
|
||||
char *output;
|
||||
|
||||
/* is the severity value above the threshold - I know
|
||||
* this seems backward, but lower severity values are
|
||||
* considered "more severe"
|
||||
*/
|
||||
if (severity > orte_notifier_threshold_severity) {
|
||||
return;
|
||||
}
|
||||
|
||||
va_start(arglist, topic);
|
||||
output = opal_show_help_vstring(filename, topic, false, arglist);
|
||||
va_end(arglist);
|
||||
char *output = opal_show_help_vstring(filename, topic, false, ap);
|
||||
|
||||
if (NULL != output) {
|
||||
tweet(output);
|
||||
@ -313,55 +295,15 @@ static void myhelplog(int severity, int errcode, const char *filename,
|
||||
}
|
||||
}
|
||||
|
||||
static void mypeerlog(int severity, int errcode,
|
||||
orte_process_name_t *peer_proc, const char *msg, ...)
|
||||
static void mypeerlog(orte_notifier_base_severity_t severity, int errcode,
|
||||
orte_process_name_t *peer_proc, const char *msg,
|
||||
va_list ap)
|
||||
{
|
||||
va_list arglist;
|
||||
char buf[ORTE_NOTIFIER_MAX_BUF + 1];
|
||||
char *peer_host = NULL, *peer_name = NULL;
|
||||
char *pos = buf;
|
||||
char *errstr = (char*)orte_err2str(errcode);
|
||||
int len, space = ORTE_NOTIFIER_MAX_BUF;
|
||||
char *buf = orte_notifier_base_peer_log(errcode, peer_proc, msg, ap);
|
||||
|
||||
/* is the severity value above the threshold - I know
|
||||
* this seems backward, but lower severity values are
|
||||
* considered "more severe"
|
||||
*/
|
||||
if (severity > orte_notifier_threshold_severity) {
|
||||
return;
|
||||
if (NULL != buf) {
|
||||
tweet(buf);
|
||||
free(buf);
|
||||
}
|
||||
|
||||
if (peer_proc) {
|
||||
peer_host = orte_ess.proc_get_hostname(peer_proc);
|
||||
peer_name = ORTE_NAME_PRINT(peer_proc);
|
||||
}
|
||||
|
||||
len = snprintf(pos, space,
|
||||
"While communicating to proc %s on node %s,"
|
||||
" proc %s on node %s encountered an error ",
|
||||
peer_name ? peer_name : "UNKNOWN",
|
||||
peer_host ? peer_host : "UNKNOWN",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
orte_process_info.nodename);
|
||||
space -= len;
|
||||
pos += len;
|
||||
|
||||
if (0 < space) {
|
||||
if (errstr) {
|
||||
len = snprintf(pos, space, "'%s':", errstr);
|
||||
} else {
|
||||
len = snprintf(pos, space, "(%d):", errcode);
|
||||
}
|
||||
space -= len;
|
||||
pos += len;
|
||||
}
|
||||
|
||||
if (0 < space) {
|
||||
va_start(arglist, msg);
|
||||
vsnprintf(pos, space, msg, arglist);
|
||||
va_end(arglist);
|
||||
}
|
||||
|
||||
buf[ORTE_NOTIFIER_MAX_BUF] = '\0';
|
||||
tweet(buf);
|
||||
}
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user