1
1
* register FTB events from an event schema file
  * define more FTB events
  * minor fixes

This commit was SVN r23180.
Этот коммит содержится в:
Abhishek Kulkarni 2010-05-19 22:05:06 +00:00
родитель b56ab33ff6
Коммит 118ce0e166
4 изменённых файлов: 145 добавлений и 78 удалений

Просмотреть файл

@ -19,7 +19,8 @@
AM_CPPFLAGS = $(notifier_ftb_CPPFLAGS) AM_CPPFLAGS = $(notifier_ftb_CPPFLAGS)
dist_pkgdata_DATA = \ dist_pkgdata_DATA = \
help-orte-notifier-ftb.txt help-orte-notifier-ftb.txt \
help-ftb-event-schema.txt
sources = \ sources = \
notifier_ftb.h \ notifier_ftb.h \

Просмотреть файл

@ -0,0 +1,37 @@
# -*- text -*-
#
# Copyright (c) 2010 The Trustees of Indiana University and Indiana
# University Research and Technology Corporation.
# All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# This is the FTB event schema file for Open MPI's FTB notifier
#
start
ftb.mpi.openmpi
MPI_INIT INFO
MPI_FINALIZE INFO
MPI_NODE_DEAD ERROR
MPI_NODE_RESTORED INFO
MPI_RANK_DEAD ERROR
MPI_RANK_RESTORED INFO
MPI_NODE_MIGRATE_DONE INFO
MPI_JOB_ABORT_CMD ERROR
MPI_JOB_RESUME_CMD INFO
MPI_JOB_ABORTED ERROR
MPI_JOB_RESUMED INFO
MPI_MSG_CORRUPT ERROR
MPI_IFACE_DEAD ERROR
MPI_IFACE_RESTORED ERROR
MPI_UNKNOWN_ERROR ERROR
MPI_OUT_OF_RESOURCE ERROR
MPI_NODE_UNREACHABLE ERROR
MPI_COMM_FAILURE ERROR
end

Просмотреть файл

@ -39,20 +39,23 @@ typedef struct {
int priority; int priority;
} orte_notifier_ftb_component_t; } orte_notifier_ftb_component_t;
/* /* Notifier interfaces */
* Notifier interfaces
*/
ORTE_MODULE_DECLSPEC extern orte_notifier_ftb_component_t mca_notifier_ftb_component; ORTE_MODULE_DECLSPEC extern orte_notifier_ftb_component_t mca_notifier_ftb_component;
extern orte_notifier_base_module_t orte_notifier_ftb_module; extern orte_notifier_base_module_t orte_notifier_ftb_module;
/* /* FTB client information */
* FTB client information
*/
extern FTB_client_t ftb_client_info; extern FTB_client_t ftb_client_info;
extern FTB_client_handle_t ftb_client_handle; extern FTB_client_handle_t ftb_client_handle;
/* FTB event types */
typedef enum {
FTB_EVENT_NORMAL = 1,
FTB_EVENT_RESPONSE = 2
} ftb_event_type_t;
/* Macro that returns FTB event name given the FTB event code */
#define FTB_ERROR(errnum) #errnum
END_C_DECLS END_C_DECLS
#endif #endif

Просмотреть файл

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2004-2009 The Trustees of Indiana University and Indiana * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana
* University Research and Technology * University Research and Technology
* Corporation. All rights reserved. * Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University * Copyright (c) 2004-2005 The University of Tennessee and The University
@ -30,15 +30,15 @@
#include <stdarg.h> #include <stdarg.h>
#endif #endif
#include "opal/mca/installdirs/installdirs.h"
#include "opal/util/show_help.h" #include "opal/util/show_help.h"
#include "opal/util/os_path.h"
#include "orte/util/error_strings.h"
#include "orte/mca/ess/ess.h" #include "orte/mca/ess/ess.h"
#include "orte/util/show_help.h" #include "orte/util/show_help.h"
#include "orte/mca/notifier/base/base.h" #include "orte/mca/notifier/base/base.h"
#include "notifier_ftb.h" #include "notifier_ftb.h"
/* Static API's */ /* Static API's */
static int init(void); static int init(void);
static void finalize(void); static void finalize(void);
@ -66,53 +66,23 @@ FTB_client_t ftb_client_info;
/* FTB client handle */ /* FTB client handle */
FTB_client_handle_t ftb_client_handle; FTB_client_handle_t ftb_client_handle;
static FTB_event_info_t ftb_event_info[] = {
/* 0 */ {"UNKNOWN_ERROR", "error"},
/* 1 */ {"OUT_OF_RESOURCES", "error"},
/* 2 */ {"UNREACHABLE", "error"},
/* 3 */ {"COMM_FAILURE", "error"},
/* 4 */ {"FATAL", "fatal"},
};
static const int ftb_event_info_count = sizeof(ftb_event_info)/sizeof(FTB_event_info_t);
static int orte_err2ftb(int errnum)
{
int retval;
switch (OPAL_SOS_GET_ERROR_CODE(errnum)) {
case ORTE_ERR_OUT_OF_RESOURCE:
case ORTE_ERR_TEMP_OUT_OF_RESOURCE:
retval = 1;
break;
case ORTE_ERR_CONNECTION_REFUSED:
case ORTE_ERR_CONNECTION_FAILED:
case ORTE_ERR_UNREACH:
retval = 2;
break;
case ORTE_ERR_COMM_FAILURE:
retval = 3;
break;
case ORTE_ERR_FATAL:
retval = 4;
break;
default:
retval = 0;
}
if ((ftb_event_info_count <= retval) || (0 > retval)) {
retval = 0;
}
return retval;
}
static int init(void) { static int init(void) {
int ret; int ret;
char *schema_file;
/* Locate the FTB events schema file */
if (NULL == (schema_file = opal_os_path(false, opal_install_dirs.pkgdatadir,
"help-ftb-event-schema.txt", NULL))) {
schema_file = strdup("help-ftb-event-schema.txt");
}
/* Declare the Open MPI publishable events to the FTB */
ret = FTB_Declare_publishable_events(ftb_client_handle, schema_file, NULL, 0);
free(schema_file);
ret = FTB_Declare_publishable_events(ftb_client_handle, 0, ftb_event_info, ftb_event_info_count);
if (FTB_SUCCESS != ret) { if (FTB_SUCCESS != ret) {
orte_show_help("help-orte-notifier-ftb.txt", orte_show_help("help-orte-notifier-ftb.txt", "declare events failed", true,
"declare events failed", "FTB_Declare_publishable_events() failed", ret);
true, "FTB_Declare_publishable_events() failed", ret);
FTB_Disconnect(ftb_client_handle); FTB_Disconnect(ftb_client_handle);
return ORTE_ERROR; return ORTE_ERROR;
@ -125,53 +95,109 @@ static void finalize(void) {
FTB_Disconnect(ftb_client_handle); FTB_Disconnect(ftb_client_handle);
} }
static void send_to_ftb(int errcode, char *payload) static const char* get_ftb_event_severity(orte_notifier_base_severity_t severity)
{ {
int ret, event_id; switch (severity) {
case ORTE_NOTIFIER_EMERG:
case ORTE_NOTIFIER_ALERT:
return "ALL";
case ORTE_NOTIFIER_CRIT:
return "FATAL";
case ORTE_NOTIFIER_ERROR:
return "ERROR";
case ORTE_NOTIFIER_WARN:
case ORTE_NOTIFIER_NOTICE:
return "WARNING";
case ORTE_NOTIFIER_INFO:
case ORTE_NOTIFIER_DEBUG:
return "INFO";
default:
return "UNKNOWN";
}
}
static const char* get_ftb_event_name(int errnum)
{
/* If it an OMPI error, translate it to an equivalent FTB event */
if (OPAL_SUCCESS > errnum) {
switch (errnum) {
case ORTE_ERR_OUT_OF_RESOURCE:
case ORTE_ERR_TEMP_OUT_OF_RESOURCE:
return FTB_ERROR(MPI_OUT_OF_RESOURCE);
case ORTE_ERR_CONNECTION_REFUSED:
case ORTE_ERR_CONNECTION_FAILED:
case ORTE_ERR_UNREACH:
return FTB_ERROR(MPI_NODE_DEAD);
case ORTE_ERR_COMM_FAILURE:
return FTB_ERROR(MPI_COMM_FAILURE);
case ORTE_ERR_PROC_DEAD:
return FTB_ERROR(MPI_RANK_DEAD);
case ORTE_ERR_FATAL:
default:
return FTB_ERROR(MPI_UNKNOWN_ERROR);
}
}
return FTB_ERROR(MPI_UNKNOWN_ERROR);
}
static void publish_ftb_event(orte_notifier_base_severity_t severity, int errcode, char *payload)
{
int ret;
const char *event_name;
FTB_event_handle_t ehandle; FTB_event_handle_t ehandle;
FTB_event_properties_t eprop; FTB_event_properties_t eprop;
eprop.event_type = 1;
snprintf(eprop.event_payload, FTB_MAX_PAYLOAD_DATA, "%s", (payload != NULL) ? payload : "");
event_id = orte_err2ftb(errcode); /* Only normal FTB events are supported currently. */
ret = FTB_Publish(ftb_client_handle, ftb_event_info[event_id].event_name, &eprop, &ehandle); eprop.event_type = (int) FTB_EVENT_NORMAL;
/* Copy the event payload, if we have one */
if (NULL != payload) {
strncpy(eprop.event_payload, payload, FTB_MAX_PAYLOAD_DATA);
}
/* Publish the event to the Fault Tolerant Backplane */
event_name = get_ftb_event_name(errcode);
ret = FTB_Publish(ftb_client_handle, event_name, &eprop, &ehandle);
if (FTB_SUCCESS != ret) { if (FTB_SUCCESS != ret) {
orte_show_help("help-orte-notifier-ftb.txt", orte_show_help("help-orte-notifier-ftb.txt", "publish failed", true,
"publish failed", "FTB_Publish() failed", ret, get_ftb_event_severity(severity),
true, "FTB_Publish() failed", ret, event_name, payload, errcode);
ftb_event_info[event_id].severity,
ftb_event_info[event_id].event_name,
eprop.event_payload, errcode);
} }
} }
static void ftb_log(orte_notifier_base_severity_t severity, int errcode, const char *msg, static void ftb_log(orte_notifier_base_severity_t severity, int errcode, const char *msg,
va_list ap) va_list ap)
{ {
char *payload; char *payload;
/* If there was a message, output it */ /* If there was a message, output it */
vasprintf(&payload, msg, ap); vasprintf(&payload, msg, ap);
if (NULL != payload) { if (NULL != payload) {
send_to_ftb(errcode, payload); publish_ftb_event(severity, errcode, payload);
free(payload); free(payload);
} }
} }
static void ftb_help(orte_notifier_base_severity_t severity, int errcode, static void ftb_help(orte_notifier_base_severity_t severity, int errcode,
const char *filename, const char *topic, va_list ap) const char *filename, const char *topic, va_list ap)
{ {
char *output = opal_show_help_vstring(filename, topic, false, ap); char *payload;
if (NULL != output) { payload = opal_show_help_vstring(filename, topic, false, ap);
send_to_ftb(errcode, output); if (NULL != payload) {
free(output); publish_ftb_event(severity, errcode, payload);
free(payload);
} }
} }
static void ftb_peer(orte_notifier_base_severity_t severity, int errcode, static void ftb_peer(orte_notifier_base_severity_t severity, int errcode,
orte_process_name_t *peer_proc, const char *msg, orte_process_name_t *peer_proc, const char *msg,
va_list ap) va_list ap)
{ {
char payload[FTB_MAX_PAYLOAD_DATA + 1]; char payload[FTB_MAX_PAYLOAD_DATA + 1];
char *peer_host = NULL; char *peer_host = NULL;
@ -191,5 +217,5 @@ static void ftb_peer(orte_notifier_base_severity_t severity, int errcode,
} }
payload[FTB_MAX_PAYLOAD_DATA] = '\0'; payload[FTB_MAX_PAYLOAD_DATA] = '\0';
send_to_ftb(errcode, payload); publish_ftb_event(severity, errcode, payload);
} }