From d1a4cc33ddcc1f05fca8df4e7f8d89d8f0b50f7a Mon Sep 17 00:00:00 2001 From: Abhishek Kulkarni Date: Sun, 7 Nov 2010 22:06:32 +0000 Subject: [PATCH] Update the FTB notifier wrt events decided by the CIFTS working group This commit was SVN r24001. --- .../notifier/ftb/help-ftb-event-schema.txt | 28 +++++--------- orte/mca/notifier/ftb/notifier_ftb.h | 4 +- orte/mca/notifier/ftb/notifier_ftb_module.c | 38 ++++++++++--------- 3 files changed, 32 insertions(+), 38 deletions(-) diff --git a/orte/mca/notifier/ftb/help-ftb-event-schema.txt b/orte/mca/notifier/ftb/help-ftb-event-schema.txt index 3e22f00c9a..77e0a574e1 100644 --- a/orte/mca/notifier/ftb/help-ftb-event-schema.txt +++ b/orte/mca/notifier/ftb/help-ftb-event-schema.txt @@ -15,23 +15,15 @@ start ftb.mpi.openmpi -MPI_INIT INFO -MPI_FINALIZE INFO -MPI_NODE_DEAD ERROR -MPI_NODE_RESTORED INFO -MPI_RANK_DEAD ERROR -MPI_RANK_RESTORED INFO -MPI_NODE_MIGRATE_DONE INFO -MPI_JOB_ABORT_CMD ERROR -MPI_JOB_RESUME_CMD INFO -MPI_JOB_ABORTED ERROR -MPI_JOB_RESUMED INFO -MPI_MSG_CORRUPT ERROR -MPI_IFACE_DEAD ERROR -MPI_IFACE_RESTORED ERROR -MPI_UNKNOWN_ERROR ERROR -MPI_OUT_OF_RESOURCE ERROR -MPI_NODE_UNREACHABLE ERROR -MPI_COMM_FAILURE ERROR +FTB_MPI_PROCS_DEAD ERROR +FTB_MPI_PROCS_UNREACHABLE ERROR +FTB_MPI_PROCS_COMM_ERROR WARN +FTB_MPI_PROCS_MIGRATED INFO +FTB_MPI_PROCS_MIGRATE_FAIL ERROR +FTB_MPI_PROCS_CKPTED INFO +FTB_MPI_PROCS_CKPT_FAIL ERROR +FTB_MPI_PROCS_RESTARTED INFO +FTB_MPI_PROCS_RESTART_FAIL ERROR +FTB_MPI_PROCS_ABORTED ERROR end \ No newline at end of file diff --git a/orte/mca/notifier/ftb/notifier_ftb.h b/orte/mca/notifier/ftb/notifier_ftb.h index 5d97040881..42facaf447 100644 --- a/orte/mca/notifier/ftb/notifier_ftb.h +++ b/orte/mca/notifier/ftb/notifier_ftb.h @@ -53,8 +53,8 @@ typedef enum { FTB_EVENT_RESPONSE = 2 } ftb_event_type_t; -/* Macro that returns FTB event name given the FTB event code */ -#define FTB_ERROR(errnum) #errnum +/* Returns the FTB event name (as a string) given the event code */ +#define FTB_EVENT(errnum) #errnum END_C_DECLS diff --git a/orte/mca/notifier/ftb/notifier_ftb_module.c b/orte/mca/notifier/ftb/notifier_ftb_module.c index fd4cc4d22e..6f1d1b1b9c 100644 --- a/orte/mca/notifier/ftb/notifier_ftb_module.c +++ b/orte/mca/notifier/ftb/notifier_ftb_module.c @@ -119,30 +119,31 @@ static const char* get_ftb_event_severity(orte_notifier_base_severity_t severity static const char* get_ftb_event_name(int errnum) { /* If it an OMPI error, translate it to an equivalent FTB event */ - if (OPAL_SUCCESS > errnum) { + if (ORTE_SUCCESS > errnum) { switch (errnum) { - case ORTE_ERR_OUT_OF_RESOURCE: - case ORTE_ERR_TEMP_OUT_OF_RESOURCE: - return FTB_ERROR(MPI_OUT_OF_RESOURCE); + + case ORTE_SNAPC_CKPT_STATE_ESTABLISHED: + case ORTE_SNAPC_CKPT_STATE_RECOVERED: + return FTB_EVENT(FTB_MPI_PROCS_CKPTED); + + case ORTE_SNAPC_CKPT_STATE_NO_CKPT: + case ORTE_SNAPC_CKPT_STATE_ERROR: + return FTB_EVENT(FTB_MPI_PROCS_CKPT_FAIL); case ORTE_ERR_CONNECTION_REFUSED: case ORTE_ERR_CONNECTION_FAILED: case ORTE_ERR_UNREACH: - return FTB_ERROR(MPI_NODE_DEAD); + return FTB_EVENT(FTB_MPI_PROCS_UNREACHABLE); case ORTE_ERR_COMM_FAILURE: - return FTB_ERROR(MPI_COMM_FAILURE); + return FTB_EVENT(FTB_MPI_PROCS_COMM_ERROR); - case ORTE_ERR_PROC_DEAD: - return FTB_ERROR(MPI_RANK_DEAD); - - case ORTE_ERR_FATAL: default: - return FTB_ERROR(MPI_UNKNOWN_ERROR); + return NULL; } } - return FTB_ERROR(MPI_UNKNOWN_ERROR); + return NULL; } static void publish_ftb_event(orte_notifier_base_severity_t severity, int errcode, char *payload) @@ -162,11 +163,13 @@ static void publish_ftb_event(orte_notifier_base_severity_t severity, int errcod /* Publish the event to the Fault Tolerant Backplane */ event_name = get_ftb_event_name(errcode); - ret = FTB_Publish(ftb_client_handle, event_name, &eprop, &ehandle); - if (FTB_SUCCESS != ret) { - orte_show_help("help-orte-notifier-ftb.txt", "publish failed", true, - "FTB_Publish() failed", ret, get_ftb_event_severity(severity), - event_name, payload, errcode); + if (NULL != event_name) { + ret = FTB_Publish(ftb_client_handle, event_name, &eprop, &ehandle); + if (FTB_SUCCESS != ret) { + orte_show_help("help-orte-notifier-ftb.txt", "publish failed", true, + "FTB_Publish() failed", ret, get_ftb_event_severity(severity), + event_name, payload, errcode); + } } } @@ -175,7 +178,6 @@ static void ftb_log(orte_notifier_base_severity_t severity, int errcode, const c { char *payload; - /* If there was a message, output it */ vasprintf(&payload, msg, ap); if (NULL != payload) { publish_ftb_event(severity, errcode, payload);