diff --git a/orte/mca/notifier/ftb/help-orte-notifier-ftb.txt b/orte/mca/notifier/ftb/help-orte-notifier-ftb.txt new file mode 100644 index 0000000000..b18968dd54 --- /dev/null +++ b/orte/mca/notifier/ftb/help-orte-notifier-ftb.txt @@ -0,0 +1,27 @@ +# -*- text -*- +# +# Copyright (c) 2009 The Trustees of Indiana University and Indiana +# University Research and Technology Corporation. +# All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# +# This is the US/English help file for Open MPI's FTB notifier support +# +[invalid subscription style] +Error: the Open MPI FTB component tried to register with an invalid +FTB client subscription style. + + Subscription style: %s +# +[invalid value] +Error: the Open MPI FTB notifier component tried to register with an +invalid value in the FTB client information. +# +[unable to connect] +Open MPI's FTB notifier component was unable to establish a connection +with the FTB backplane. +# diff --git a/orte/mca/notifier/ftb/notifier_ftb.h b/orte/mca/notifier/ftb/notifier_ftb.h index a46c99259a..1bcdc51eb9 100644 --- a/orte/mca/notifier/ftb/notifier_ftb.h +++ b/orte/mca/notifier/ftb/notifier_ftb.h @@ -28,21 +28,30 @@ BEGIN_C_DECLS -/* - * Component open / close - */ -int orte_notifier_ftb_open(void); -int orte_notifier_ftb_close(void); -int orte_notifier_ftb_component_query(mca_base_module_t **module, int *priority); +typedef struct { + orte_notifier_base_component_t super; + /* FTB client subscription style */ + char *subscription_style; + + /* Priority of this component */ + int priority; +} orte_notifier_ftb_component_t; /* * Notifier interfaces */ -ORTE_MODULE_DECLSPEC extern orte_notifier_base_component_t mca_notifier_ftb_component; +ORTE_MODULE_DECLSPEC extern orte_notifier_ftb_component_t mca_notifier_ftb_component; extern orte_notifier_base_module_t orte_notifier_ftb_module; +/* + * FTB client information + */ + +extern FTB_client_t ftb_client_info; +extern FTB_client_handle_t ftb_client_handle; + END_C_DECLS #endif diff --git a/orte/mca/notifier/ftb/notifier_ftb_component.c b/orte/mca/notifier/ftb/notifier_ftb_component.c index da815d7344..44ed8ad4a3 100644 --- a/orte/mca/notifier/ftb/notifier_ftb_component.c +++ b/orte/mca/notifier/ftb/notifier_ftb_component.c @@ -18,6 +18,10 @@ */ /** @file: * +* This component proxies notification events to the Fault Tolerant +* Backplane (See http://www.mcs.anl.gov/research/cifts/). +* The ORTE notifier severity is translated to the corresponding +* FTB severity before the event is published to the FTB. */ /* @@ -26,44 +30,133 @@ #include "orte_config.h" #include "orte/constants.h" +#include + +#include "orte/util/show_help.h" +#include "orte/runtime/orte_globals.h" +#include "opal/mca/base/mca_base_param.h" #include "notifier_ftb.h" +static int orte_notifier_ftb_close(void); +static int orte_notifier_ftb_component_query(mca_base_module_t **module, int *priority); +static int orte_notifier_ftb_register(void); + /* * Struct of function pointers that need to be initialized */ -orte_notifier_base_component_t mca_notifier_ftb_component = { +orte_notifier_ftb_component_t mca_notifier_ftb_component = { { - ORTE_NOTIFIER_BASE_VERSION_1_0_0, - - "ftb", /* MCA module name */ - ORTE_MAJOR_VERSION, /* MCA module major version */ - ORTE_MINOR_VERSION, /* MCA module minor version */ - ORTE_RELEASE_VERSION, /* MCA module release version */ - orte_notifier_ftb_open, /* module open */ - orte_notifier_ftb_close, /* module close */ - orte_notifier_ftb_component_query /* module query */ + { + ORTE_NOTIFIER_BASE_VERSION_1_0_0, + + "ftb", /* MCA module name */ + ORTE_MAJOR_VERSION, /* MCA module major version */ + ORTE_MINOR_VERSION, /* MCA module minor version */ + ORTE_RELEASE_VERSION, /* MCA module release version */ + + NULL, + orte_notifier_ftb_close, /* module close */ + orte_notifier_ftb_component_query, /* module query */ + orte_notifier_ftb_register, /* module register */ + }, + { + /* The component is checkpoint ready */ + MCA_BASE_METADATA_PARAM_CHECKPOINT + } }, - { - /* The component is checkpoint ready */ - MCA_BASE_METADATA_PARAM_CHECKPOINT - } + + /* FTB client subscription style */ + "FTB_SUBSCRIPTION_NONE", + + /* Priority */ + 10, }; -/* Open the component */ -int orte_notifier_ftb_open(void) +static int orte_notifier_ftb_close(void) { + + if (NULL != mca_notifier_ftb_component.subscription_style) { + free(mca_notifier_ftb_component.subscription_style); + } + + /* If the FTB client handle is valid, disconnect the client */ + if (1 == ftb_client_handle.valid) { + FTB_Disconnect(ftb_client_handle); + } + return ORTE_SUCCESS; } -int orte_notifier_ftb_close(void) +static int orte_notifier_ftb_component_query(mca_base_module_t **module, int *priority) { - return ORTE_SUCCESS; -} + int ret; + *priority = 0; + *module = NULL; -int orte_notifier_ftb_component_query(mca_base_module_t **module, int *priority) -{ - /* we are a lower-level default, so set a low priority so we can be overridden */ - *priority = 1; + /* Fill the FTB client information structure */ + memset(&ftb_client_info, 0, sizeof(ftb_client_info)); + strcpy(ftb_client_info.event_space, "ftb.mpi.openmpi"); + + /* We represent each client with a client name of the form + openmpi// as a unique identifier in the + FTB client namespace */ + sprintf(ftb_client_info.client_name, "openmpi/%s/%u", + orte_process_info.nodename, orte_process_info.pid); + + sprintf(ftb_client_info.client_jobid, "%u", ORTE_PROC_MY_NAME->jobid); + + strncpy(ftb_client_info.client_subscription_style, + mca_notifier_ftb_component.subscription_style, + strlen(mca_notifier_ftb_component.subscription_style)); + + /* We try to connect to the FTB backplane now, and we abort + if we cannot connect for some reason. */ + if (FTB_SUCCESS != (ret = FTB_Connect(&ftb_client_info, &ftb_client_handle))) { + switch (ret) { + case FTB_ERR_SUBSCRIPTION_STYLE: + orte_show_help("help-orte-notifier-ftb.txt", + "invalid subscription style", + true, ftb_client_info.client_subscription_style); + + case FTB_ERR_INVALID_VALUE: + orte_show_help("help-orte-notifier-ftb.txt", + "invalid value", + true); + + default: + orte_show_help("help-orte-notifier-ftb.txt", + "unable to connect", + true); + } + + return ORTE_ERR_NOT_FOUND; + } + + *priority = 10; *module = (mca_base_module_t *)&orte_notifier_ftb_module; - return ORTE_SUCCESS; + + return ORTE_SUCCESS; +} + +static int orte_notifier_ftb_register(void) +{ + + /* FTB client subscription style */ + mca_base_param_reg_string(&mca_notifier_ftb_component.super.base_version, + "subscription_style", + "FTB client subscription style. " + "Possible values are none, polling, notify and both (polling and notify).", + false, false, + mca_notifier_ftb_component.subscription_style, + &mca_notifier_ftb_component.subscription_style); + + /* Priority */ + mca_base_param_reg_int(&mca_notifier_ftb_component.super.base_version, + "priority", + "Priority of this component", + false, false, + mca_notifier_ftb_component.priority, + &mca_notifier_ftb_component.priority); + + return ORTE_SUCCESS; } diff --git a/orte/mca/notifier/ftb/notifier_ftb_module.c b/orte/mca/notifier/ftb/notifier_ftb_module.c index 969de40ced..5779f31eb5 100644 --- a/orte/mca/notifier/ftb/notifier_ftb_module.c +++ b/orte/mca/notifier/ftb/notifier_ftb_module.c @@ -51,14 +51,11 @@ orte_notifier_base_module_t orte_notifier_ftb_module = { mypeerlog }; -/* Module "global" variables */ -static FTB_client_t cinfo = { - .event_space = "ftb.mpi.openmpi", - .client_name = "", - .client_jobid = "", - .client_subscription_style = "FTB_SUBSCRIPTION_NONE" -}; -static FTB_client_handle_t chandle; +/* FTB client information */ +FTB_client_t ftb_client_info; + +/* FTB client handle */ +FTB_client_handle_t ftb_client_handle; static FTB_event_info_t ftb_event_info[] = { /* 0 */ {"UNKNOWN_ERROR", "error"}, @@ -102,23 +99,11 @@ static int orte_err2ftb(int errnum) static int init(void) { int ret; -/* snprintf(cinfo.client_name, FTB_MAX_CLIENT_NAME, "%s", argv[0]); - * How to obtain argv[0] at this point? I don't know... - * similarly, how do we obtain client_jobid? - * snprintf(cinfo.client_jobid, FTB_MAX_CLIENT_JOBID, "%s", orte_jobid???); - */ - - if (FTB_SUCCESS != (ret = FTB_Connect(&cinfo, &chandle))) { - opal_output(orte_notifier_base_output, - "notifier:ftb:init FTB_Connect failed ret=%d\n", ret); - return ORTE_ERROR; - } - - ret = FTB_Declare_publishable_events(chandle, 0, ftb_event_info, ftb_event_info_count); + ret = FTB_Declare_publishable_events(ftb_client_handle, 0, ftb_event_info, ftb_event_info_count); if (FTB_SUCCESS != ret) { opal_output(orte_notifier_base_output, "notifier:ftb:init FTB_Declare_publishable_events failed ret=%d\n", ret); - FTB_Disconnect(chandle); + FTB_Disconnect(ftb_client_handle); return ORTE_ERROR; } @@ -126,7 +111,7 @@ static int init(void) { } static void finalize(void) { - FTB_Disconnect(chandle); + FTB_Disconnect(ftb_client_handle); } static void convert2ftb(int errcode, char *payload) @@ -138,7 +123,7 @@ static void convert2ftb(int errcode, char *payload) snprintf(eprop.event_payload, FTB_MAX_PAYLOAD_DATA, "%s", (payload != NULL) ? payload : ""); event_id = orte_err2ftb(errcode); - ret = FTB_Publish(chandle, ftb_event_info[event_id].event_name, &eprop, &ehandle); + ret = FTB_Publish(ftb_client_handle, ftb_event_info[event_id].event_name, &eprop, &ehandle); if (FTB_SUCCESS != ret) { opal_output(orte_notifier_base_output, "notifier:ftb:convert2ftb(%d,'%s') FTB_Publish failed ret=%d\n", errcode, eprop.event_payload, ret);