1
1

A few changes to the FTB notifier interface:

- add an orte ftb notifier help file for more verbose error messages
- check if we can connect to the FTB during component->query and close
  the component, if we cannot.
- make the ftb component interface methods static.
- add mca parameters to set override the default subscription style and
  priority.

This commit was SVN r22011.
Этот коммит содержится в:
Abhishek Kulkarni 2009-09-24 23:56:41 +00:00
родитель 3340f62e5f
Коммит 2af7657db1
4 изменённых файлов: 169 добавлений и 55 удалений

Просмотреть файл

@ -0,0 +1,27 @@
# -*- text -*-
#
# Copyright (c) 2009 The Trustees of Indiana University and Indiana
# University Research and Technology Corporation.
# All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# This is the US/English help file for Open MPI's FTB notifier support
#
[invalid subscription style]
Error: the Open MPI FTB component tried to register with an invalid
FTB client subscription style.
Subscription style: %s
#
[invalid value]
Error: the Open MPI FTB notifier component tried to register with an
invalid value in the FTB client information.
#
[unable to connect]
Open MPI's FTB notifier component was unable to establish a connection
with the FTB backplane.
#

Просмотреть файл

@ -28,21 +28,30 @@
BEGIN_C_DECLS
/*
* Component open / close
*/
int orte_notifier_ftb_open(void);
int orte_notifier_ftb_close(void);
int orte_notifier_ftb_component_query(mca_base_module_t **module, int *priority);
typedef struct {
orte_notifier_base_component_t super;
/* FTB client subscription style */
char *subscription_style;
/* Priority of this component */
int priority;
} orte_notifier_ftb_component_t;
/*
* Notifier interfaces
*/
ORTE_MODULE_DECLSPEC extern orte_notifier_base_component_t mca_notifier_ftb_component;
ORTE_MODULE_DECLSPEC extern orte_notifier_ftb_component_t mca_notifier_ftb_component;
extern orte_notifier_base_module_t orte_notifier_ftb_module;
/*
* FTB client information
*/
extern FTB_client_t ftb_client_info;
extern FTB_client_handle_t ftb_client_handle;
END_C_DECLS
#endif

Просмотреть файл

@ -18,6 +18,10 @@
*/
/** @file:
*
* This component proxies notification events to the Fault Tolerant
* Backplane (See http://www.mcs.anl.gov/research/cifts/).
* The ORTE notifier severity is translated to the corresponding
* FTB severity before the event is published to the FTB.
*/
/*
@ -26,44 +30,133 @@
#include "orte_config.h"
#include "orte/constants.h"
#include <string.h>
#include "orte/util/show_help.h"
#include "orte/runtime/orte_globals.h"
#include "opal/mca/base/mca_base_param.h"
#include "notifier_ftb.h"
static int orte_notifier_ftb_close(void);
static int orte_notifier_ftb_component_query(mca_base_module_t **module, int *priority);
static int orte_notifier_ftb_register(void);
/*
* Struct of function pointers that need to be initialized
*/
orte_notifier_base_component_t mca_notifier_ftb_component = {
orte_notifier_ftb_component_t mca_notifier_ftb_component = {
{
ORTE_NOTIFIER_BASE_VERSION_1_0_0,
"ftb", /* MCA module name */
ORTE_MAJOR_VERSION, /* MCA module major version */
ORTE_MINOR_VERSION, /* MCA module minor version */
ORTE_RELEASE_VERSION, /* MCA module release version */
orte_notifier_ftb_open, /* module open */
orte_notifier_ftb_close, /* module close */
orte_notifier_ftb_component_query /* module query */
{
ORTE_NOTIFIER_BASE_VERSION_1_0_0,
"ftb", /* MCA module name */
ORTE_MAJOR_VERSION, /* MCA module major version */
ORTE_MINOR_VERSION, /* MCA module minor version */
ORTE_RELEASE_VERSION, /* MCA module release version */
NULL,
orte_notifier_ftb_close, /* module close */
orte_notifier_ftb_component_query, /* module query */
orte_notifier_ftb_register, /* module register */
},
{
/* The component is checkpoint ready */
MCA_BASE_METADATA_PARAM_CHECKPOINT
}
},
{
/* The component is checkpoint ready */
MCA_BASE_METADATA_PARAM_CHECKPOINT
}
/* FTB client subscription style */
"FTB_SUBSCRIPTION_NONE",
/* Priority */
10,
};
/* Open the component */
int orte_notifier_ftb_open(void)
static int orte_notifier_ftb_close(void)
{
if (NULL != mca_notifier_ftb_component.subscription_style) {
free(mca_notifier_ftb_component.subscription_style);
}
/* If the FTB client handle is valid, disconnect the client */
if (1 == ftb_client_handle.valid) {
FTB_Disconnect(ftb_client_handle);
}
return ORTE_SUCCESS;
}
int orte_notifier_ftb_close(void)
static int orte_notifier_ftb_component_query(mca_base_module_t **module, int *priority)
{
return ORTE_SUCCESS;
}
int ret;
*priority = 0;
*module = NULL;
int orte_notifier_ftb_component_query(mca_base_module_t **module, int *priority)
{
/* we are a lower-level default, so set a low priority so we can be overridden */
*priority = 1;
/* Fill the FTB client information structure */
memset(&ftb_client_info, 0, sizeof(ftb_client_info));
strcpy(ftb_client_info.event_space, "ftb.mpi.openmpi");
/* We represent each client with a client name of the form
openmpi/<hostname>/<PID> as a unique identifier in the
FTB client namespace */
sprintf(ftb_client_info.client_name, "openmpi/%s/%u",
orte_process_info.nodename, orte_process_info.pid);
sprintf(ftb_client_info.client_jobid, "%u", ORTE_PROC_MY_NAME->jobid);
strncpy(ftb_client_info.client_subscription_style,
mca_notifier_ftb_component.subscription_style,
strlen(mca_notifier_ftb_component.subscription_style));
/* We try to connect to the FTB backplane now, and we abort
if we cannot connect for some reason. */
if (FTB_SUCCESS != (ret = FTB_Connect(&ftb_client_info, &ftb_client_handle))) {
switch (ret) {
case FTB_ERR_SUBSCRIPTION_STYLE:
orte_show_help("help-orte-notifier-ftb.txt",
"invalid subscription style",
true, ftb_client_info.client_subscription_style);
case FTB_ERR_INVALID_VALUE:
orte_show_help("help-orte-notifier-ftb.txt",
"invalid value",
true);
default:
orte_show_help("help-orte-notifier-ftb.txt",
"unable to connect",
true);
}
return ORTE_ERR_NOT_FOUND;
}
*priority = 10;
*module = (mca_base_module_t *)&orte_notifier_ftb_module;
return ORTE_SUCCESS;
return ORTE_SUCCESS;
}
static int orte_notifier_ftb_register(void)
{
/* FTB client subscription style */
mca_base_param_reg_string(&mca_notifier_ftb_component.super.base_version,
"subscription_style",
"FTB client subscription style. "
"Possible values are none, polling, notify and both (polling and notify).",
false, false,
mca_notifier_ftb_component.subscription_style,
&mca_notifier_ftb_component.subscription_style);
/* Priority */
mca_base_param_reg_int(&mca_notifier_ftb_component.super.base_version,
"priority",
"Priority of this component",
false, false,
mca_notifier_ftb_component.priority,
&mca_notifier_ftb_component.priority);
return ORTE_SUCCESS;
}

Просмотреть файл

@ -51,14 +51,11 @@ orte_notifier_base_module_t orte_notifier_ftb_module = {
mypeerlog
};
/* Module "global" variables */
static FTB_client_t cinfo = {
.event_space = "ftb.mpi.openmpi",
.client_name = "",
.client_jobid = "",
.client_subscription_style = "FTB_SUBSCRIPTION_NONE"
};
static FTB_client_handle_t chandle;
/* FTB client information */
FTB_client_t ftb_client_info;
/* FTB client handle */
FTB_client_handle_t ftb_client_handle;
static FTB_event_info_t ftb_event_info[] = {
/* 0 */ {"UNKNOWN_ERROR", "error"},
@ -102,23 +99,11 @@ static int orte_err2ftb(int errnum)
static int init(void) {
int ret;
/* snprintf(cinfo.client_name, FTB_MAX_CLIENT_NAME, "%s", argv[0]);
* How to obtain argv[0] at this point? I don't know...
* similarly, how do we obtain client_jobid?
* snprintf(cinfo.client_jobid, FTB_MAX_CLIENT_JOBID, "%s", orte_jobid???);
*/
if (FTB_SUCCESS != (ret = FTB_Connect(&cinfo, &chandle))) {
opal_output(orte_notifier_base_output,
"notifier:ftb:init FTB_Connect failed ret=%d\n", ret);
return ORTE_ERROR;
}
ret = FTB_Declare_publishable_events(chandle, 0, ftb_event_info, ftb_event_info_count);
ret = FTB_Declare_publishable_events(ftb_client_handle, 0, ftb_event_info, ftb_event_info_count);
if (FTB_SUCCESS != ret) {
opal_output(orte_notifier_base_output,
"notifier:ftb:init FTB_Declare_publishable_events failed ret=%d\n", ret);
FTB_Disconnect(chandle);
FTB_Disconnect(ftb_client_handle);
return ORTE_ERROR;
}
@ -126,7 +111,7 @@ static int init(void) {
}
static void finalize(void) {
FTB_Disconnect(chandle);
FTB_Disconnect(ftb_client_handle);
}
static void convert2ftb(int errcode, char *payload)
@ -138,7 +123,7 @@ static void convert2ftb(int errcode, char *payload)
snprintf(eprop.event_payload, FTB_MAX_PAYLOAD_DATA, "%s", (payload != NULL) ? payload : "");
event_id = orte_err2ftb(errcode);
ret = FTB_Publish(chandle, ftb_event_info[event_id].event_name, &eprop, &ehandle);
ret = FTB_Publish(ftb_client_handle, ftb_event_info[event_id].event_name, &eprop, &ehandle);
if (FTB_SUCCESS != ret) {
opal_output(orte_notifier_base_output,
"notifier:ftb:convert2ftb(%d,'%s') FTB_Publish failed ret=%d\n", errcode, eprop.event_payload, ret);