1
1

Updates to the notifier interfaces to support system events

Этот коммит содержится в:
Ralph Castain 2015-03-05 10:39:25 -08:00
родитель f758790d7a
Коммит 7ce0a9931c
4 изменённых файлов: 187 добавлений и 43 удалений

Просмотреть файл

@ -78,6 +78,7 @@ ORTE_DECLSPEC int orte_notifier_base_select(void);
/* base functions */ /* base functions */
ORTE_DECLSPEC void orte_notifier_base_log(int sd, short args, void *cbdata); ORTE_DECLSPEC void orte_notifier_base_log(int sd, short args, void *cbdata);
ORTE_DECLSPEC void orte_notifier_base_event(int sd, short args, void *cbdata);
ORTE_DECLSPEC void orte_notifier_base_report(int sd, short args, void *cbdata); ORTE_DECLSPEC void orte_notifier_base_report(int sd, short args, void *cbdata);
/* severity to string */ /* severity to string */

Просмотреть файл

@ -27,6 +27,9 @@
#include "orte/mca/notifier/base/base.h" #include "orte/mca/notifier/base/base.h"
static void orte_notifier_base_identify_modules(char ***modules,
orte_notifier_request_t *req);
void orte_notifier_base_log(int sd, short args, void *cbdata) void orte_notifier_base_log(int sd, short args, void *cbdata)
{ {
orte_notifier_request_t *req = (orte_notifier_request_t*)cbdata; orte_notifier_request_t *req = (orte_notifier_request_t*)cbdata;
@ -46,34 +49,10 @@ void orte_notifier_base_log(int sd, short args, void *cbdata)
return; return;
} }
if (ORTE_NOTIFIER_EMERG == req->severity && orte_notifier_base_identify_modules(&modules, req);
(NULL != orte_notifier_base.emerg_actions)) {
modules = opal_argv_split(orte_notifier_base.emerg_actions, ','); /* no modules selected then nothing to do */
} else if (ORTE_NOTIFIER_ALERT == req->severity && if (NULL == modules) {
(NULL != orte_notifier_base.alert_actions)) {
modules = opal_argv_split(orte_notifier_base.alert_actions, ',');
} else if (ORTE_NOTIFIER_CRIT == req->severity &&
(NULL != orte_notifier_base.crit_actions)) {
modules = opal_argv_split(orte_notifier_base.crit_actions, ',');
} else if (ORTE_NOTIFIER_WARN == req->severity &&
(NULL != orte_notifier_base.warn_actions)) {
modules = opal_argv_split(orte_notifier_base.warn_actions, ',');
} else if (ORTE_NOTIFIER_NOTICE == req->severity &&
(NULL != orte_notifier_base.notice_actions)) {
modules = opal_argv_split(orte_notifier_base.notice_actions, ',');
} else if (ORTE_NOTIFIER_INFO == req->severity &&
(NULL != orte_notifier_base.info_actions)) {
modules = opal_argv_split(orte_notifier_base.info_actions, ',');
} else if (ORTE_NOTIFIER_DEBUG == req->severity &&
(NULL != orte_notifier_base.debug_actions)) {
modules = opal_argv_split(orte_notifier_base.debug_actions, ',');
} else if (ORTE_NOTIFIER_ERROR == req->severity &&
(NULL != orte_notifier_base.error_actions)) {
modules = opal_argv_split(orte_notifier_base.error_actions, ',');
} else if (NULL != orte_notifier_base.default_actions) {
modules = opal_argv_split(orte_notifier_base.default_actions, ',');
} else {
/* no modules selected */
return; return;
} }
@ -87,10 +66,48 @@ void orte_notifier_base_log(int sd, short args, void *cbdata)
opal_argv_free(modules); opal_argv_free(modules);
} }
void orte_notifier_base_event(int sd, short args, void *cbdata)
{
orte_notifier_request_t *req = (orte_notifier_request_t*)cbdata;
char **modules = NULL;
orte_notifier_active_module_t *imod;
int i;
/* if no modules are active, then there is nothing to do */
if (0 == opal_list_get_size(&orte_notifier_base.modules)) {
return;
}
/* check if the severity is >= severity level set for
* reporting - note that the severity enum value goes up
* as severity goes down */
if (orte_notifier_base.severity_level < req->severity ) {
return;
}
orte_notifier_base_identify_modules(&modules, req);
/* no modules selected then nothing to do */
if (NULL == modules) {
return;
}
for (i=0; NULL != modules[i]; i++) {
OPAL_LIST_FOREACH(imod, &orte_notifier_base.modules, orte_notifier_active_module_t) {
if (NULL != imod->module->log &&
0 == strcmp(imod->component->base_version.mca_component_name, modules[i]))
imod->module->event(req);
}
}
opal_argv_free(modules);
}
void orte_notifier_base_report(int sd, short args, void *cbdata) void orte_notifier_base_report(int sd, short args, void *cbdata)
{ {
orte_notifier_request_t *req = (orte_notifier_request_t*)cbdata; orte_notifier_request_t *req = (orte_notifier_request_t*)cbdata;
char *notifies = NULL; char **modules = NULL;
orte_notifier_active_module_t *imod;
int i;
/* if no modules are active, then there is nothing to do */ /* if no modules are active, then there is nothing to do */
if (0 == opal_list_get_size(&orte_notifier_base.modules)) { if (0 == opal_list_get_size(&orte_notifier_base.modules)) {
@ -98,12 +115,28 @@ void orte_notifier_base_report(int sd, short args, void *cbdata)
} }
/* see if the job requested any notifications */ /* see if the job requested any notifications */
if (!orte_get_attribute(&req->jdata->attributes, ORTE_JOB_NOTIFICATIONS, (void**)notifies, OPAL_STRING)) { if (!orte_get_attribute(&req->jdata->attributes, ORTE_JOB_NOTIFICATIONS, (void**)modules, OPAL_STRING)) {
return; return;
} }
/* need to process the notification string to get the names of the modules */ /* need to process the notification string to get the names of the modules */
return; if (NULL == modules) {
orte_notifier_base_identify_modules(&modules, req);
/* no modules selected then nothing to do */
if (NULL == modules) {
return;
}
}
for (i=0; NULL != modules[i]; i++) {
OPAL_LIST_FOREACH(imod, &orte_notifier_base.modules, orte_notifier_active_module_t) {
if (NULL != imod->module->log &&
0 == strcmp(imod->component->base_version.mca_component_name, modules[i]))
imod->module->report(req);
}
}
opal_argv_free(modules);
} }
const char* orte_notifier_base_sev2str(orte_notifier_severity_t severity) const char* orte_notifier_base_sev2str(orte_notifier_severity_t severity)
@ -121,3 +154,39 @@ const char* orte_notifier_base_sev2str(orte_notifier_severity_t severity)
} }
} }
static void orte_notifier_base_identify_modules(char ***modules,
orte_notifier_request_t *req)
{
if (NULL != req->action) {
*modules = opal_argv_split(req->action, ',');
} else {
if (ORTE_NOTIFIER_EMERG == req->severity &&
(NULL != orte_notifier_base.emerg_actions)) {
*modules = opal_argv_split(orte_notifier_base.emerg_actions, ',');
} else if (ORTE_NOTIFIER_ALERT == req->severity &&
(NULL != orte_notifier_base.alert_actions)) {
*modules = opal_argv_split(orte_notifier_base.alert_actions, ',');
} else if (ORTE_NOTIFIER_CRIT == req->severity &&
(NULL != orte_notifier_base.crit_actions)) {
*modules = opal_argv_split(orte_notifier_base.crit_actions, ',');
} else if (ORTE_NOTIFIER_WARN == req->severity &&
(NULL != orte_notifier_base.warn_actions)) {
*modules = opal_argv_split(orte_notifier_base.warn_actions, ',');
} else if (ORTE_NOTIFIER_NOTICE == req->severity &&
(NULL != orte_notifier_base.notice_actions)) {
*modules = opal_argv_split(orte_notifier_base.notice_actions, ',');
} else if (ORTE_NOTIFIER_INFO == req->severity &&
(NULL != orte_notifier_base.info_actions)) {
*modules = opal_argv_split(orte_notifier_base.info_actions, ',');
} else if (ORTE_NOTIFIER_DEBUG == req->severity &&
(NULL != orte_notifier_base.debug_actions)) {
*modules = opal_argv_split(orte_notifier_base.debug_actions, ',');
} else if (ORTE_NOTIFIER_ERROR == req->severity &&
(NULL != orte_notifier_base.error_actions)) {
*modules = opal_argv_split(orte_notifier_base.error_actions, ',');
} else if (NULL != orte_notifier_base.default_actions) {
*modules = opal_argv_split(orte_notifier_base.default_actions, ',');
}
}
return;
}

Просмотреть файл

@ -57,6 +57,10 @@
BEGIN_C_DECLS BEGIN_C_DECLS
/* make the verbose channel visible here so everyone
* doesn't have to include notifier/base/base.h */
extern int orte_notifier_debug_output;
/* The maximum size of any on-stack buffers used in the notifier /* The maximum size of any on-stack buffers used in the notifier
* so we can try to avoid calling malloc in OUT_OF_RESOURCES conditions. * so we can try to avoid calling malloc in OUT_OF_RESOURCES conditions.
* The code has NOT been auditied for use of malloc, so this still * The code has NOT been auditied for use of malloc, so this still
@ -84,6 +88,7 @@ typedef struct {
orte_notifier_severity_t severity; orte_notifier_severity_t severity;
int errcode; int errcode;
const char *msg; const char *msg;
const char *action;
time_t t; time_t t;
} orte_notifier_request_t; } orte_notifier_request_t;
OBJ_CLASS_DECLARATION(orte_notifier_request_t); OBJ_CLASS_DECLARATION(orte_notifier_request_t);
@ -98,21 +103,27 @@ typedef int (*orte_notifier_base_module_init_fn_t)(void);
/* finalize the selected module */ /* finalize the selected module */
typedef void (*orte_notifier_base_module_finalize_fn_t)(void); typedef void (*orte_notifier_base_module_finalize_fn_t)(void);
/* Log an error */ /* Log an internal error - this will include the job that caused the
* error to occur */
typedef void (*orte_notifier_base_module_log_fn_t)(orte_notifier_request_t *req); typedef void (*orte_notifier_base_module_log_fn_t)(orte_notifier_request_t *req);
/* Report a system event - e.g., a temperature out-of-bound */
typedef void (*orte_notifier_base_module_event_fn_t)(orte_notifier_request_t *req);
/* Report a state */ /* Report a job state */
typedef void (*orte_notifier_base_module_report_fn_t)(orte_notifier_request_t *req); typedef void (*orte_notifier_base_module_report_fn_t)(orte_notifier_request_t *req);
#define ORTE_NOTIFIER_LOG_ERROR(j, st, s, e, m) \ #define ORTE_NOTIFIER_INTERNAL_ERROR(j, st, s, e, m) \
do { \ do { \
orte_notifier_request_t *_n; \ orte_notifier_request_t *_n; \
opal_output_verbose(2, orte_notifier_base_framework.framework_output, \ opal_output_verbose(2, orte_notifier_debug_output, \
"%s notifier:log:error[%s:%d] for job %s error %s severity %s", \ "%s notifier:internal:error[%s:%d] " \
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), __FILE__, __LINE__, \ "job %s error %s severity %s", \
ORTE_JOBID_PRINT((j)->jobid), ORTE_ERROR_NAME((e)), \ ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \
__FILE__, __LINE__, \
ORTE_JOBID_PRINT((j)->jobid), \
ORTE_ERROR_NAME((e)), \
orte_notifier_base_sev2str(s)); \ orte_notifier_base_sev2str(s)); \
_n = OBJ_NEW(orte_notifier_request_t); \ _n = OBJ_NEW(orte_notifier_request_t); \
_n->jdata = (j); \ _n->jdata = (j); \
@ -121,6 +132,7 @@ typedef void (*orte_notifier_base_module_report_fn_t)(orte_notifier_request_t *r
_n->errcode = (e); \ _n->errcode = (e); \
_n->msg = (m); \ _n->msg = (m); \
_n->t = time(NULL); \ _n->t = time(NULL); \
_n->action = (NULL); \
/* add the event */ \ /* add the event */ \
opal_event_set(orte_notifier_base.ev_base, &(_n)->ev, -1, \ opal_event_set(orte_notifier_base.ev_base, &(_n)->ev, -1, \
OPAL_EV_WRITE, orte_notifier_base_log, (_n)); \ OPAL_EV_WRITE, orte_notifier_base_log, (_n)); \
@ -128,18 +140,21 @@ typedef void (*orte_notifier_base_module_report_fn_t)(orte_notifier_request_t *r
opal_event_active(&(_n)->ev, OPAL_EV_WRITE, 1); \ opal_event_active(&(_n)->ev, OPAL_EV_WRITE, 1); \
} while(0); } while(0);
#define ORTE_NOTIFIER_REPORT_STATE(j, st, m) \ #define ORTE_NOTIFIER_JOB_STATE(j, st, m) \
do { \ do { \
orte_notifier_request_t *_n; \ orte_notifier_request_t *_n; \
opal_output_verbose(2, orte_notifier_base_framework.framework_output, \ opal_output_verbose(2, orte_notifier_debug_output, \
"%s notifier:report:event[%s:%d] for job %s state %s", \ "%s notifier[%s:%d] job %s state %s", \
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), __FILE__, __LINE__, \ ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \
__FILE__, __LINE__, \
ORTE_JOBID_PRINT((j)->jobid), \ ORTE_JOBID_PRINT((j)->jobid), \
orte_job_state_to_str(st)); \ orte_job_state_to_str(st)); \
_n = OBJ_NEW(orte_notifier_request_t); \ _n = OBJ_NEW(orte_notifier_request_t); \
_n->jdata = (j); \ _n->jdata = (j); \
_n->state = (st); \
_n->msg = (m); \ _n->msg = (m); \
_n->t = time(NULL); \ _n->t = time(NULL); \
_n->action = (NULL); \
/* add the event */ \ /* add the event */ \
opal_event_set(orte_notifier_base.ev_base, &(_n)->ev, -1, \ opal_event_set(orte_notifier_base.ev_base, &(_n)->ev, -1, \
OPAL_EV_WRITE, orte_notifier_base_report, (_n)); \ OPAL_EV_WRITE, orte_notifier_base_report, (_n)); \
@ -147,6 +162,29 @@ typedef void (*orte_notifier_base_module_report_fn_t)(orte_notifier_request_t *r
opal_event_active(&(_n)->ev, OPAL_EV_WRITE, 1); \ opal_event_active(&(_n)->ev, OPAL_EV_WRITE, 1); \
} while(0); } while(0);
#define ORTE_NOTIFIER_SYSTEM_EVENT(s, m, a) \
do { \
orte_notifier_request_t *_n; \
opal_output_verbose(2, orte_notifier_debug_output, \
"%s notifier:sys:event[%s:%d] event %s", \
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \
__FILE__, __LINE__, \
orte_notifier_base_sev2str(s)); \
_n = OBJ_NEW(orte_notifier_request_t); \
_n->jdata = (NULL); \
_n->state = (NULL); \
_n->jdata = NULL; \
_n->msg = (m); \
_n->t = time(NULL); \
_n->severity = (s); \
_n->action = (a); \
/* add the event */ \
opal_event_set(orte_notifier_base.ev_base, &(_n)->ev, -1, \
OPAL_EV_WRITE, orte_notifier_base_event, (_n)); \
opal_event_set_priority(&(_n)->ev, ORTE_ERROR_PRI); \
opal_event_active(&(_n)->ev, OPAL_EV_WRITE, 1); \
} while(0);
/* /*
* Ver 1.0 * Ver 1.0
*/ */
@ -154,6 +192,7 @@ typedef struct {
orte_notifier_base_module_init_fn_t init; orte_notifier_base_module_init_fn_t init;
orte_notifier_base_module_finalize_fn_t finalize; orte_notifier_base_module_finalize_fn_t finalize;
orte_notifier_base_module_log_fn_t log; orte_notifier_base_module_log_fn_t log;
orte_notifier_base_module_event_fn_t event;
orte_notifier_base_module_report_fn_t report; orte_notifier_base_module_report_fn_t report;
} orte_notifier_base_module_t; } orte_notifier_base_module_t;

Просмотреть файл

@ -45,6 +45,7 @@
static int init(void); static int init(void);
static void finalize(void); static void finalize(void);
static void mylog(orte_notifier_request_t *req); static void mylog(orte_notifier_request_t *req);
static void myevent(orte_notifier_request_t *req);
static void myreport(orte_notifier_request_t *req); static void myreport(orte_notifier_request_t *req);
/* Module def */ /* Module def */
@ -52,6 +53,7 @@ orte_notifier_base_module_t orte_notifier_syslog_module = {
init, init,
finalize, finalize,
mylog, mylog,
myevent,
myreport myreport
}; };
@ -90,7 +92,40 @@ static void mylog(orte_notifier_request_t *req)
(NULL == req->msg) ? "<N/A>" : req->msg); (NULL == req->msg) ? "<N/A>" : req->msg);
} }
static void myreport(orte_notifier_request_t *req) static void myevent(orte_notifier_request_t *req)
{ {
char tod[48];
opal_output_verbose(5, orte_notifier_base_framework.framework_output,
"notifier:syslog:myevent function called with severity %d and messg %s",
(int)req->severity, req->msg);
/* If there was a message, output it */
(void)ctime_r(&req->t, tod);
/* trim the newline */
tod[strlen(tod)] = '\0';
syslog(req->severity, "[%s]%s SET EVENT : %s", tod,
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
(NULL == req->msg) ? "<N/A>" : req->msg);
}
static void myreport(orte_notifier_request_t *req)
{
char tod[48];
opal_output_verbose(5, orte_notifier_base_framework.framework_output,
"notifier:syslog:myreport function called with severity %d state %s and messg %s",
(int)req->severity, orte_job_state_to_str(req->state),
req->msg);
/* If there was a message, output it */
(void)ctime_r(&req->t, tod);
/* trim the newline */
tod[strlen(tod)] = '\0';
syslog(req->severity, "[%s]%s JOBID %s REPORTS STATE %s: %s", tod,
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_JOBID_PRINT(req->jdata->jobid),
orte_job_state_to_str(req->state),
(NULL == req->msg) ? "<N/A>" : req->msg);
} }