1
1

A few changes to the FTB notifier interface:

- add an orte ftb notifier help file for more verbose error messages
- check if we can connect to the FTB during component->query and close
  the component, if we cannot.
- make the ftb component interface methods static.
- add mca parameters to set override the default subscription style and
  priority.

This commit was SVN r22011.
Этот коммит содержится в:
Abhishek Kulkarni 2009-09-24 23:56:41 +00:00
родитель 3340f62e5f
Коммит 2af7657db1
4 изменённых файлов: 169 добавлений и 55 удалений

Просмотреть файл

@ -0,0 +1,27 @@
# -*- text -*-
#
# Copyright (c) 2009 The Trustees of Indiana University and Indiana
# University Research and Technology Corporation.
# All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# This is the US/English help file for Open MPI's FTB notifier support
#
[invalid subscription style]
Error: the Open MPI FTB component tried to register with an invalid
FTB client subscription style.
Subscription style: %s
#
[invalid value]
Error: the Open MPI FTB notifier component tried to register with an
invalid value in the FTB client information.
#
[unable to connect]
Open MPI's FTB notifier component was unable to establish a connection
with the FTB backplane.
#

Просмотреть файл

@ -28,21 +28,30 @@
BEGIN_C_DECLS BEGIN_C_DECLS
/* typedef struct {
* Component open / close orte_notifier_base_component_t super;
*/
int orte_notifier_ftb_open(void);
int orte_notifier_ftb_close(void);
int orte_notifier_ftb_component_query(mca_base_module_t **module, int *priority);
/* FTB client subscription style */
char *subscription_style;
/* Priority of this component */
int priority;
} orte_notifier_ftb_component_t;
/* /*
* Notifier interfaces * Notifier interfaces
*/ */
ORTE_MODULE_DECLSPEC extern orte_notifier_base_component_t mca_notifier_ftb_component; ORTE_MODULE_DECLSPEC extern orte_notifier_ftb_component_t mca_notifier_ftb_component;
extern orte_notifier_base_module_t orte_notifier_ftb_module; extern orte_notifier_base_module_t orte_notifier_ftb_module;
/*
* FTB client information
*/
extern FTB_client_t ftb_client_info;
extern FTB_client_handle_t ftb_client_handle;
END_C_DECLS END_C_DECLS
#endif #endif

Просмотреть файл

@ -18,6 +18,10 @@
*/ */
/** @file: /** @file:
* *
* This component proxies notification events to the Fault Tolerant
* Backplane (See http://www.mcs.anl.gov/research/cifts/).
* The ORTE notifier severity is translated to the corresponding
* FTB severity before the event is published to the FTB.
*/ */
/* /*
@ -26,44 +30,133 @@
#include "orte_config.h" #include "orte_config.h"
#include "orte/constants.h" #include "orte/constants.h"
#include <string.h>
#include "orte/util/show_help.h"
#include "orte/runtime/orte_globals.h"
#include "opal/mca/base/mca_base_param.h"
#include "notifier_ftb.h" #include "notifier_ftb.h"
static int orte_notifier_ftb_close(void);
static int orte_notifier_ftb_component_query(mca_base_module_t **module, int *priority);
static int orte_notifier_ftb_register(void);
/* /*
* Struct of function pointers that need to be initialized * Struct of function pointers that need to be initialized
*/ */
orte_notifier_base_component_t mca_notifier_ftb_component = { orte_notifier_ftb_component_t mca_notifier_ftb_component = {
{ {
ORTE_NOTIFIER_BASE_VERSION_1_0_0, {
ORTE_NOTIFIER_BASE_VERSION_1_0_0,
"ftb", /* MCA module name */
ORTE_MAJOR_VERSION, /* MCA module major version */ "ftb", /* MCA module name */
ORTE_MINOR_VERSION, /* MCA module minor version */ ORTE_MAJOR_VERSION, /* MCA module major version */
ORTE_RELEASE_VERSION, /* MCA module release version */ ORTE_MINOR_VERSION, /* MCA module minor version */
orte_notifier_ftb_open, /* module open */ ORTE_RELEASE_VERSION, /* MCA module release version */
orte_notifier_ftb_close, /* module close */
orte_notifier_ftb_component_query /* module query */ NULL,
orte_notifier_ftb_close, /* module close */
orte_notifier_ftb_component_query, /* module query */
orte_notifier_ftb_register, /* module register */
},
{
/* The component is checkpoint ready */
MCA_BASE_METADATA_PARAM_CHECKPOINT
}
}, },
{
/* The component is checkpoint ready */ /* FTB client subscription style */
MCA_BASE_METADATA_PARAM_CHECKPOINT "FTB_SUBSCRIPTION_NONE",
}
/* Priority */
10,
}; };
/* Open the component */ static int orte_notifier_ftb_close(void)
int orte_notifier_ftb_open(void)
{ {
if (NULL != mca_notifier_ftb_component.subscription_style) {
free(mca_notifier_ftb_component.subscription_style);
}
/* If the FTB client handle is valid, disconnect the client */
if (1 == ftb_client_handle.valid) {
FTB_Disconnect(ftb_client_handle);
}
return ORTE_SUCCESS; return ORTE_SUCCESS;
} }
int orte_notifier_ftb_close(void) static int orte_notifier_ftb_component_query(mca_base_module_t **module, int *priority)
{ {
return ORTE_SUCCESS; int ret;
} *priority = 0;
*module = NULL;
int orte_notifier_ftb_component_query(mca_base_module_t **module, int *priority) /* Fill the FTB client information structure */
{ memset(&ftb_client_info, 0, sizeof(ftb_client_info));
/* we are a lower-level default, so set a low priority so we can be overridden */ strcpy(ftb_client_info.event_space, "ftb.mpi.openmpi");
*priority = 1;
/* We represent each client with a client name of the form
openmpi/<hostname>/<PID> as a unique identifier in the
FTB client namespace */
sprintf(ftb_client_info.client_name, "openmpi/%s/%u",
orte_process_info.nodename, orte_process_info.pid);
sprintf(ftb_client_info.client_jobid, "%u", ORTE_PROC_MY_NAME->jobid);
strncpy(ftb_client_info.client_subscription_style,
mca_notifier_ftb_component.subscription_style,
strlen(mca_notifier_ftb_component.subscription_style));
/* We try to connect to the FTB backplane now, and we abort
if we cannot connect for some reason. */
if (FTB_SUCCESS != (ret = FTB_Connect(&ftb_client_info, &ftb_client_handle))) {
switch (ret) {
case FTB_ERR_SUBSCRIPTION_STYLE:
orte_show_help("help-orte-notifier-ftb.txt",
"invalid subscription style",
true, ftb_client_info.client_subscription_style);
case FTB_ERR_INVALID_VALUE:
orte_show_help("help-orte-notifier-ftb.txt",
"invalid value",
true);
default:
orte_show_help("help-orte-notifier-ftb.txt",
"unable to connect",
true);
}
return ORTE_ERR_NOT_FOUND;
}
*priority = 10;
*module = (mca_base_module_t *)&orte_notifier_ftb_module; *module = (mca_base_module_t *)&orte_notifier_ftb_module;
return ORTE_SUCCESS;
return ORTE_SUCCESS;
}
static int orte_notifier_ftb_register(void)
{
/* FTB client subscription style */
mca_base_param_reg_string(&mca_notifier_ftb_component.super.base_version,
"subscription_style",
"FTB client subscription style. "
"Possible values are none, polling, notify and both (polling and notify).",
false, false,
mca_notifier_ftb_component.subscription_style,
&mca_notifier_ftb_component.subscription_style);
/* Priority */
mca_base_param_reg_int(&mca_notifier_ftb_component.super.base_version,
"priority",
"Priority of this component",
false, false,
mca_notifier_ftb_component.priority,
&mca_notifier_ftb_component.priority);
return ORTE_SUCCESS;
} }

Просмотреть файл

@ -51,14 +51,11 @@ orte_notifier_base_module_t orte_notifier_ftb_module = {
mypeerlog mypeerlog
}; };
/* Module "global" variables */ /* FTB client information */
static FTB_client_t cinfo = { FTB_client_t ftb_client_info;
.event_space = "ftb.mpi.openmpi",
.client_name = "", /* FTB client handle */
.client_jobid = "", FTB_client_handle_t ftb_client_handle;
.client_subscription_style = "FTB_SUBSCRIPTION_NONE"
};
static FTB_client_handle_t chandle;
static FTB_event_info_t ftb_event_info[] = { static FTB_event_info_t ftb_event_info[] = {
/* 0 */ {"UNKNOWN_ERROR", "error"}, /* 0 */ {"UNKNOWN_ERROR", "error"},
@ -102,23 +99,11 @@ static int orte_err2ftb(int errnum)
static int init(void) { static int init(void) {
int ret; int ret;
/* snprintf(cinfo.client_name, FTB_MAX_CLIENT_NAME, "%s", argv[0]); ret = FTB_Declare_publishable_events(ftb_client_handle, 0, ftb_event_info, ftb_event_info_count);
* How to obtain argv[0] at this point? I don't know...
* similarly, how do we obtain client_jobid?
* snprintf(cinfo.client_jobid, FTB_MAX_CLIENT_JOBID, "%s", orte_jobid???);
*/
if (FTB_SUCCESS != (ret = FTB_Connect(&cinfo, &chandle))) {
opal_output(orte_notifier_base_output,
"notifier:ftb:init FTB_Connect failed ret=%d\n", ret);
return ORTE_ERROR;
}
ret = FTB_Declare_publishable_events(chandle, 0, ftb_event_info, ftb_event_info_count);
if (FTB_SUCCESS != ret) { if (FTB_SUCCESS != ret) {
opal_output(orte_notifier_base_output, opal_output(orte_notifier_base_output,
"notifier:ftb:init FTB_Declare_publishable_events failed ret=%d\n", ret); "notifier:ftb:init FTB_Declare_publishable_events failed ret=%d\n", ret);
FTB_Disconnect(chandle); FTB_Disconnect(ftb_client_handle);
return ORTE_ERROR; return ORTE_ERROR;
} }
@ -126,7 +111,7 @@ static int init(void) {
} }
static void finalize(void) { static void finalize(void) {
FTB_Disconnect(chandle); FTB_Disconnect(ftb_client_handle);
} }
static void convert2ftb(int errcode, char *payload) static void convert2ftb(int errcode, char *payload)
@ -138,7 +123,7 @@ static void convert2ftb(int errcode, char *payload)
snprintf(eprop.event_payload, FTB_MAX_PAYLOAD_DATA, "%s", (payload != NULL) ? payload : ""); snprintf(eprop.event_payload, FTB_MAX_PAYLOAD_DATA, "%s", (payload != NULL) ? payload : "");
event_id = orte_err2ftb(errcode); event_id = orte_err2ftb(errcode);
ret = FTB_Publish(chandle, ftb_event_info[event_id].event_name, &eprop, &ehandle); ret = FTB_Publish(ftb_client_handle, ftb_event_info[event_id].event_name, &eprop, &ehandle);
if (FTB_SUCCESS != ret) { if (FTB_SUCCESS != ret) {
opal_output(orte_notifier_base_output, opal_output(orte_notifier_base_output,
"notifier:ftb:convert2ftb(%d,'%s') FTB_Publish failed ret=%d\n", errcode, eprop.event_payload, ret); "notifier:ftb:convert2ftb(%d,'%s') FTB_Publish failed ret=%d\n", errcode, eprop.event_payload, ret);