1
1

Updates to the notifier interfaces to support system events

Этот коммит содержится в:
Ralph Castain 2015-03-05 10:39:25 -08:00
родитель f758790d7a
Коммит 7ce0a9931c
4 изменённых файлов: 187 добавлений и 43 удалений

Просмотреть файл

@ -78,6 +78,7 @@ ORTE_DECLSPEC int orte_notifier_base_select(void);
/* base functions */
ORTE_DECLSPEC void orte_notifier_base_log(int sd, short args, void *cbdata);
ORTE_DECLSPEC void orte_notifier_base_event(int sd, short args, void *cbdata);
ORTE_DECLSPEC void orte_notifier_base_report(int sd, short args, void *cbdata);
/* severity to string */

Просмотреть файл

@ -27,6 +27,9 @@
#include "orte/mca/notifier/base/base.h"
static void orte_notifier_base_identify_modules(char ***modules,
orte_notifier_request_t *req);
void orte_notifier_base_log(int sd, short args, void *cbdata)
{
orte_notifier_request_t *req = (orte_notifier_request_t*)cbdata;
@ -46,34 +49,10 @@ void orte_notifier_base_log(int sd, short args, void *cbdata)
return;
}
if (ORTE_NOTIFIER_EMERG == req->severity &&
(NULL != orte_notifier_base.emerg_actions)) {
modules = opal_argv_split(orte_notifier_base.emerg_actions, ',');
} else if (ORTE_NOTIFIER_ALERT == req->severity &&
(NULL != orte_notifier_base.alert_actions)) {
modules = opal_argv_split(orte_notifier_base.alert_actions, ',');
} else if (ORTE_NOTIFIER_CRIT == req->severity &&
(NULL != orte_notifier_base.crit_actions)) {
modules = opal_argv_split(orte_notifier_base.crit_actions, ',');
} else if (ORTE_NOTIFIER_WARN == req->severity &&
(NULL != orte_notifier_base.warn_actions)) {
modules = opal_argv_split(orte_notifier_base.warn_actions, ',');
} else if (ORTE_NOTIFIER_NOTICE == req->severity &&
(NULL != orte_notifier_base.notice_actions)) {
modules = opal_argv_split(orte_notifier_base.notice_actions, ',');
} else if (ORTE_NOTIFIER_INFO == req->severity &&
(NULL != orte_notifier_base.info_actions)) {
modules = opal_argv_split(orte_notifier_base.info_actions, ',');
} else if (ORTE_NOTIFIER_DEBUG == req->severity &&
(NULL != orte_notifier_base.debug_actions)) {
modules = opal_argv_split(orte_notifier_base.debug_actions, ',');
} else if (ORTE_NOTIFIER_ERROR == req->severity &&
(NULL != orte_notifier_base.error_actions)) {
modules = opal_argv_split(orte_notifier_base.error_actions, ',');
} else if (NULL != orte_notifier_base.default_actions) {
modules = opal_argv_split(orte_notifier_base.default_actions, ',');
} else {
/* no modules selected */
orte_notifier_base_identify_modules(&modules, req);
/* no modules selected then nothing to do */
if (NULL == modules) {
return;
}
@ -87,10 +66,48 @@ void orte_notifier_base_log(int sd, short args, void *cbdata)
opal_argv_free(modules);
}
void orte_notifier_base_event(int sd, short args, void *cbdata)
{
orte_notifier_request_t *req = (orte_notifier_request_t*)cbdata;
char **modules = NULL;
orte_notifier_active_module_t *imod;
int i;
/* if no modules are active, then there is nothing to do */
if (0 == opal_list_get_size(&orte_notifier_base.modules)) {
return;
}
/* check if the severity is >= severity level set for
* reporting - note that the severity enum value goes up
* as severity goes down */
if (orte_notifier_base.severity_level < req->severity ) {
return;
}
orte_notifier_base_identify_modules(&modules, req);
/* no modules selected then nothing to do */
if (NULL == modules) {
return;
}
for (i=0; NULL != modules[i]; i++) {
OPAL_LIST_FOREACH(imod, &orte_notifier_base.modules, orte_notifier_active_module_t) {
if (NULL != imod->module->log &&
0 == strcmp(imod->component->base_version.mca_component_name, modules[i]))
imod->module->event(req);
}
}
opal_argv_free(modules);
}
void orte_notifier_base_report(int sd, short args, void *cbdata)
{
orte_notifier_request_t *req = (orte_notifier_request_t*)cbdata;
char *notifies = NULL;
char **modules = NULL;
orte_notifier_active_module_t *imod;
int i;
/* if no modules are active, then there is nothing to do */
if (0 == opal_list_get_size(&orte_notifier_base.modules)) {
@ -98,12 +115,28 @@ void orte_notifier_base_report(int sd, short args, void *cbdata)
}
/* see if the job requested any notifications */
if (!orte_get_attribute(&req->jdata->attributes, ORTE_JOB_NOTIFICATIONS, (void**)notifies, OPAL_STRING)) {
if (!orte_get_attribute(&req->jdata->attributes, ORTE_JOB_NOTIFICATIONS, (void**)modules, OPAL_STRING)) {
return;
}
/* need to process the notification string to get the names of the modules */
return;
if (NULL == modules) {
orte_notifier_base_identify_modules(&modules, req);
/* no modules selected then nothing to do */
if (NULL == modules) {
return;
}
}
for (i=0; NULL != modules[i]; i++) {
OPAL_LIST_FOREACH(imod, &orte_notifier_base.modules, orte_notifier_active_module_t) {
if (NULL != imod->module->log &&
0 == strcmp(imod->component->base_version.mca_component_name, modules[i]))
imod->module->report(req);
}
}
opal_argv_free(modules);
}
const char* orte_notifier_base_sev2str(orte_notifier_severity_t severity)
@ -121,3 +154,39 @@ const char* orte_notifier_base_sev2str(orte_notifier_severity_t severity)
}
}
static void orte_notifier_base_identify_modules(char ***modules,
orte_notifier_request_t *req)
{
if (NULL != req->action) {
*modules = opal_argv_split(req->action, ',');
} else {
if (ORTE_NOTIFIER_EMERG == req->severity &&
(NULL != orte_notifier_base.emerg_actions)) {
*modules = opal_argv_split(orte_notifier_base.emerg_actions, ',');
} else if (ORTE_NOTIFIER_ALERT == req->severity &&
(NULL != orte_notifier_base.alert_actions)) {
*modules = opal_argv_split(orte_notifier_base.alert_actions, ',');
} else if (ORTE_NOTIFIER_CRIT == req->severity &&
(NULL != orte_notifier_base.crit_actions)) {
*modules = opal_argv_split(orte_notifier_base.crit_actions, ',');
} else if (ORTE_NOTIFIER_WARN == req->severity &&
(NULL != orte_notifier_base.warn_actions)) {
*modules = opal_argv_split(orte_notifier_base.warn_actions, ',');
} else if (ORTE_NOTIFIER_NOTICE == req->severity &&
(NULL != orte_notifier_base.notice_actions)) {
*modules = opal_argv_split(orte_notifier_base.notice_actions, ',');
} else if (ORTE_NOTIFIER_INFO == req->severity &&
(NULL != orte_notifier_base.info_actions)) {
*modules = opal_argv_split(orte_notifier_base.info_actions, ',');
} else if (ORTE_NOTIFIER_DEBUG == req->severity &&
(NULL != orte_notifier_base.debug_actions)) {
*modules = opal_argv_split(orte_notifier_base.debug_actions, ',');
} else if (ORTE_NOTIFIER_ERROR == req->severity &&
(NULL != orte_notifier_base.error_actions)) {
*modules = opal_argv_split(orte_notifier_base.error_actions, ',');
} else if (NULL != orte_notifier_base.default_actions) {
*modules = opal_argv_split(orte_notifier_base.default_actions, ',');
}
}
return;
}

Просмотреть файл

@ -57,6 +57,10 @@
BEGIN_C_DECLS
/* make the verbose channel visible here so everyone
* doesn't have to include notifier/base/base.h */
extern int orte_notifier_debug_output;
/* The maximum size of any on-stack buffers used in the notifier
* so we can try to avoid calling malloc in OUT_OF_RESOURCES conditions.
* The code has NOT been auditied for use of malloc, so this still
@ -84,6 +88,7 @@ typedef struct {
orte_notifier_severity_t severity;
int errcode;
const char *msg;
const char *action;
time_t t;
} orte_notifier_request_t;
OBJ_CLASS_DECLARATION(orte_notifier_request_t);
@ -98,21 +103,27 @@ typedef int (*orte_notifier_base_module_init_fn_t)(void);
/* finalize the selected module */
typedef void (*orte_notifier_base_module_finalize_fn_t)(void);
/* Log an error */
/* Log an internal error - this will include the job that caused the
* error to occur */
typedef void (*orte_notifier_base_module_log_fn_t)(orte_notifier_request_t *req);
/* Report a system event - e.g., a temperature out-of-bound */
typedef void (*orte_notifier_base_module_event_fn_t)(orte_notifier_request_t *req);
/* Report a state */
/* Report a job state */
typedef void (*orte_notifier_base_module_report_fn_t)(orte_notifier_request_t *req);
#define ORTE_NOTIFIER_LOG_ERROR(j, st, s, e, m) \
#define ORTE_NOTIFIER_INTERNAL_ERROR(j, st, s, e, m) \
do { \
orte_notifier_request_t *_n; \
opal_output_verbose(2, orte_notifier_base_framework.framework_output, \
"%s notifier:log:error[%s:%d] for job %s error %s severity %s", \
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), __FILE__, __LINE__, \
ORTE_JOBID_PRINT((j)->jobid), ORTE_ERROR_NAME((e)), \
opal_output_verbose(2, orte_notifier_debug_output, \
"%s notifier:internal:error[%s:%d] " \
"job %s error %s severity %s", \
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \
__FILE__, __LINE__, \
ORTE_JOBID_PRINT((j)->jobid), \
ORTE_ERROR_NAME((e)), \
orte_notifier_base_sev2str(s)); \
_n = OBJ_NEW(orte_notifier_request_t); \
_n->jdata = (j); \
@ -121,6 +132,7 @@ typedef void (*orte_notifier_base_module_report_fn_t)(orte_notifier_request_t *r
_n->errcode = (e); \
_n->msg = (m); \
_n->t = time(NULL); \
_n->action = (NULL); \
/* add the event */ \
opal_event_set(orte_notifier_base.ev_base, &(_n)->ev, -1, \
OPAL_EV_WRITE, orte_notifier_base_log, (_n)); \
@ -128,18 +140,21 @@ typedef void (*orte_notifier_base_module_report_fn_t)(orte_notifier_request_t *r
opal_event_active(&(_n)->ev, OPAL_EV_WRITE, 1); \
} while(0);
#define ORTE_NOTIFIER_REPORT_STATE(j, st, m) \
#define ORTE_NOTIFIER_JOB_STATE(j, st, m) \
do { \
orte_notifier_request_t *_n; \
opal_output_verbose(2, orte_notifier_base_framework.framework_output, \
"%s notifier:report:event[%s:%d] for job %s state %s", \
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), __FILE__, __LINE__, \
opal_output_verbose(2, orte_notifier_debug_output, \
"%s notifier[%s:%d] job %s state %s", \
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \
__FILE__, __LINE__, \
ORTE_JOBID_PRINT((j)->jobid), \
orte_job_state_to_str(st)); \
_n = OBJ_NEW(orte_notifier_request_t); \
_n->jdata = (j); \
_n->state = (st); \
_n->msg = (m); \
_n->t = time(NULL); \
_n->action = (NULL); \
/* add the event */ \
opal_event_set(orte_notifier_base.ev_base, &(_n)->ev, -1, \
OPAL_EV_WRITE, orte_notifier_base_report, (_n)); \
@ -147,6 +162,29 @@ typedef void (*orte_notifier_base_module_report_fn_t)(orte_notifier_request_t *r
opal_event_active(&(_n)->ev, OPAL_EV_WRITE, 1); \
} while(0);
#define ORTE_NOTIFIER_SYSTEM_EVENT(s, m, a) \
do { \
orte_notifier_request_t *_n; \
opal_output_verbose(2, orte_notifier_debug_output, \
"%s notifier:sys:event[%s:%d] event %s", \
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \
__FILE__, __LINE__, \
orte_notifier_base_sev2str(s)); \
_n = OBJ_NEW(orte_notifier_request_t); \
_n->jdata = (NULL); \
_n->state = (NULL); \
_n->jdata = NULL; \
_n->msg = (m); \
_n->t = time(NULL); \
_n->severity = (s); \
_n->action = (a); \
/* add the event */ \
opal_event_set(orte_notifier_base.ev_base, &(_n)->ev, -1, \
OPAL_EV_WRITE, orte_notifier_base_event, (_n)); \
opal_event_set_priority(&(_n)->ev, ORTE_ERROR_PRI); \
opal_event_active(&(_n)->ev, OPAL_EV_WRITE, 1); \
} while(0);
/*
* Ver 1.0
*/
@ -154,6 +192,7 @@ typedef struct {
orte_notifier_base_module_init_fn_t init;
orte_notifier_base_module_finalize_fn_t finalize;
orte_notifier_base_module_log_fn_t log;
orte_notifier_base_module_event_fn_t event;
orte_notifier_base_module_report_fn_t report;
} orte_notifier_base_module_t;

Просмотреть файл

@ -45,6 +45,7 @@
static int init(void);
static void finalize(void);
static void mylog(orte_notifier_request_t *req);
static void myevent(orte_notifier_request_t *req);
static void myreport(orte_notifier_request_t *req);
/* Module def */
@ -52,6 +53,7 @@ orte_notifier_base_module_t orte_notifier_syslog_module = {
init,
finalize,
mylog,
myevent,
myreport
};
@ -90,7 +92,40 @@ static void mylog(orte_notifier_request_t *req)
(NULL == req->msg) ? "<N/A>" : req->msg);
}
static void myreport(orte_notifier_request_t *req)
static void myevent(orte_notifier_request_t *req)
{
char tod[48];
opal_output_verbose(5, orte_notifier_base_framework.framework_output,
"notifier:syslog:myevent function called with severity %d and messg %s",
(int)req->severity, req->msg);
/* If there was a message, output it */
(void)ctime_r(&req->t, tod);
/* trim the newline */
tod[strlen(tod)] = '\0';
syslog(req->severity, "[%s]%s SET EVENT : %s", tod,
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
(NULL == req->msg) ? "<N/A>" : req->msg);
}
static void myreport(orte_notifier_request_t *req)
{
char tod[48];
opal_output_verbose(5, orte_notifier_base_framework.framework_output,
"notifier:syslog:myreport function called with severity %d state %s and messg %s",
(int)req->severity, orte_job_state_to_str(req->state),
req->msg);
/* If there was a message, output it */
(void)ctime_r(&req->t, tod);
/* trim the newline */
tod[strlen(tod)] = '\0';
syslog(req->severity, "[%s]%s JOBID %s REPORTS STATE %s: %s", tod,
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_JOBID_PRINT(req->jdata->jobid),
orte_job_state_to_str(req->state),
(NULL == req->msg) ? "<N/A>" : req->msg);
}