A few changes to the FTB notifier interface:
- add an orte ftb notifier help file for more verbose error messages - check if we can connect to the FTB during component->query and close the component, if we cannot. - make the ftb component interface methods static. - add mca parameters to set override the default subscription style and priority. This commit was SVN r22011.
Этот коммит содержится в:
родитель
3340f62e5f
Коммит
2af7657db1
27
orte/mca/notifier/ftb/help-orte-notifier-ftb.txt
Обычный файл
27
orte/mca/notifier/ftb/help-orte-notifier-ftb.txt
Обычный файл
@ -0,0 +1,27 @@
|
||||
# -*- text -*-
|
||||
#
|
||||
# Copyright (c) 2009 The Trustees of Indiana University and Indiana
|
||||
# University Research and Technology Corporation.
|
||||
# All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
# This is the US/English help file for Open MPI's FTB notifier support
|
||||
#
|
||||
[invalid subscription style]
|
||||
Error: the Open MPI FTB component tried to register with an invalid
|
||||
FTB client subscription style.
|
||||
|
||||
Subscription style: %s
|
||||
#
|
||||
[invalid value]
|
||||
Error: the Open MPI FTB notifier component tried to register with an
|
||||
invalid value in the FTB client information.
|
||||
#
|
||||
[unable to connect]
|
||||
Open MPI's FTB notifier component was unable to establish a connection
|
||||
with the FTB backplane.
|
||||
#
|
@ -28,21 +28,30 @@
|
||||
|
||||
BEGIN_C_DECLS
|
||||
|
||||
/*
|
||||
* Component open / close
|
||||
*/
|
||||
int orte_notifier_ftb_open(void);
|
||||
int orte_notifier_ftb_close(void);
|
||||
int orte_notifier_ftb_component_query(mca_base_module_t **module, int *priority);
|
||||
typedef struct {
|
||||
orte_notifier_base_component_t super;
|
||||
|
||||
/* FTB client subscription style */
|
||||
char *subscription_style;
|
||||
|
||||
/* Priority of this component */
|
||||
int priority;
|
||||
} orte_notifier_ftb_component_t;
|
||||
|
||||
/*
|
||||
* Notifier interfaces
|
||||
*/
|
||||
|
||||
ORTE_MODULE_DECLSPEC extern orte_notifier_base_component_t mca_notifier_ftb_component;
|
||||
ORTE_MODULE_DECLSPEC extern orte_notifier_ftb_component_t mca_notifier_ftb_component;
|
||||
extern orte_notifier_base_module_t orte_notifier_ftb_module;
|
||||
|
||||
/*
|
||||
* FTB client information
|
||||
*/
|
||||
|
||||
extern FTB_client_t ftb_client_info;
|
||||
extern FTB_client_handle_t ftb_client_handle;
|
||||
|
||||
END_C_DECLS
|
||||
|
||||
#endif
|
||||
|
@ -18,6 +18,10 @@
|
||||
*/
|
||||
/** @file:
|
||||
*
|
||||
* This component proxies notification events to the Fault Tolerant
|
||||
* Backplane (See http://www.mcs.anl.gov/research/cifts/).
|
||||
* The ORTE notifier severity is translated to the corresponding
|
||||
* FTB severity before the event is published to the FTB.
|
||||
*/
|
||||
|
||||
/*
|
||||
@ -26,44 +30,133 @@
|
||||
#include "orte_config.h"
|
||||
#include "orte/constants.h"
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#include "orte/util/show_help.h"
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
#include "opal/mca/base/mca_base_param.h"
|
||||
#include "notifier_ftb.h"
|
||||
|
||||
static int orte_notifier_ftb_close(void);
|
||||
static int orte_notifier_ftb_component_query(mca_base_module_t **module, int *priority);
|
||||
static int orte_notifier_ftb_register(void);
|
||||
|
||||
/*
|
||||
* Struct of function pointers that need to be initialized
|
||||
*/
|
||||
orte_notifier_base_component_t mca_notifier_ftb_component = {
|
||||
orte_notifier_ftb_component_t mca_notifier_ftb_component = {
|
||||
{
|
||||
ORTE_NOTIFIER_BASE_VERSION_1_0_0,
|
||||
|
||||
"ftb", /* MCA module name */
|
||||
ORTE_MAJOR_VERSION, /* MCA module major version */
|
||||
ORTE_MINOR_VERSION, /* MCA module minor version */
|
||||
ORTE_RELEASE_VERSION, /* MCA module release version */
|
||||
orte_notifier_ftb_open, /* module open */
|
||||
orte_notifier_ftb_close, /* module close */
|
||||
orte_notifier_ftb_component_query /* module query */
|
||||
{
|
||||
ORTE_NOTIFIER_BASE_VERSION_1_0_0,
|
||||
|
||||
"ftb", /* MCA module name */
|
||||
ORTE_MAJOR_VERSION, /* MCA module major version */
|
||||
ORTE_MINOR_VERSION, /* MCA module minor version */
|
||||
ORTE_RELEASE_VERSION, /* MCA module release version */
|
||||
|
||||
NULL,
|
||||
orte_notifier_ftb_close, /* module close */
|
||||
orte_notifier_ftb_component_query, /* module query */
|
||||
orte_notifier_ftb_register, /* module register */
|
||||
},
|
||||
{
|
||||
/* The component is checkpoint ready */
|
||||
MCA_BASE_METADATA_PARAM_CHECKPOINT
|
||||
}
|
||||
},
|
||||
{
|
||||
/* The component is checkpoint ready */
|
||||
MCA_BASE_METADATA_PARAM_CHECKPOINT
|
||||
}
|
||||
|
||||
/* FTB client subscription style */
|
||||
"FTB_SUBSCRIPTION_NONE",
|
||||
|
||||
/* Priority */
|
||||
10,
|
||||
};
|
||||
|
||||
/* Open the component */
|
||||
int orte_notifier_ftb_open(void)
|
||||
static int orte_notifier_ftb_close(void)
|
||||
{
|
||||
|
||||
if (NULL != mca_notifier_ftb_component.subscription_style) {
|
||||
free(mca_notifier_ftb_component.subscription_style);
|
||||
}
|
||||
|
||||
/* If the FTB client handle is valid, disconnect the client */
|
||||
if (1 == ftb_client_handle.valid) {
|
||||
FTB_Disconnect(ftb_client_handle);
|
||||
}
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
int orte_notifier_ftb_close(void)
|
||||
static int orte_notifier_ftb_component_query(mca_base_module_t **module, int *priority)
|
||||
{
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
int ret;
|
||||
*priority = 0;
|
||||
*module = NULL;
|
||||
|
||||
int orte_notifier_ftb_component_query(mca_base_module_t **module, int *priority)
|
||||
{
|
||||
/* we are a lower-level default, so set a low priority so we can be overridden */
|
||||
*priority = 1;
|
||||
/* Fill the FTB client information structure */
|
||||
memset(&ftb_client_info, 0, sizeof(ftb_client_info));
|
||||
strcpy(ftb_client_info.event_space, "ftb.mpi.openmpi");
|
||||
|
||||
/* We represent each client with a client name of the form
|
||||
openmpi/<hostname>/<PID> as a unique identifier in the
|
||||
FTB client namespace */
|
||||
sprintf(ftb_client_info.client_name, "openmpi/%s/%u",
|
||||
orte_process_info.nodename, orte_process_info.pid);
|
||||
|
||||
sprintf(ftb_client_info.client_jobid, "%u", ORTE_PROC_MY_NAME->jobid);
|
||||
|
||||
strncpy(ftb_client_info.client_subscription_style,
|
||||
mca_notifier_ftb_component.subscription_style,
|
||||
strlen(mca_notifier_ftb_component.subscription_style));
|
||||
|
||||
/* We try to connect to the FTB backplane now, and we abort
|
||||
if we cannot connect for some reason. */
|
||||
if (FTB_SUCCESS != (ret = FTB_Connect(&ftb_client_info, &ftb_client_handle))) {
|
||||
switch (ret) {
|
||||
case FTB_ERR_SUBSCRIPTION_STYLE:
|
||||
orte_show_help("help-orte-notifier-ftb.txt",
|
||||
"invalid subscription style",
|
||||
true, ftb_client_info.client_subscription_style);
|
||||
|
||||
case FTB_ERR_INVALID_VALUE:
|
||||
orte_show_help("help-orte-notifier-ftb.txt",
|
||||
"invalid value",
|
||||
true);
|
||||
|
||||
default:
|
||||
orte_show_help("help-orte-notifier-ftb.txt",
|
||||
"unable to connect",
|
||||
true);
|
||||
}
|
||||
|
||||
return ORTE_ERR_NOT_FOUND;
|
||||
}
|
||||
|
||||
*priority = 10;
|
||||
*module = (mca_base_module_t *)&orte_notifier_ftb_module;
|
||||
return ORTE_SUCCESS;
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
static int orte_notifier_ftb_register(void)
|
||||
{
|
||||
|
||||
/* FTB client subscription style */
|
||||
mca_base_param_reg_string(&mca_notifier_ftb_component.super.base_version,
|
||||
"subscription_style",
|
||||
"FTB client subscription style. "
|
||||
"Possible values are none, polling, notify and both (polling and notify).",
|
||||
false, false,
|
||||
mca_notifier_ftb_component.subscription_style,
|
||||
&mca_notifier_ftb_component.subscription_style);
|
||||
|
||||
/* Priority */
|
||||
mca_base_param_reg_int(&mca_notifier_ftb_component.super.base_version,
|
||||
"priority",
|
||||
"Priority of this component",
|
||||
false, false,
|
||||
mca_notifier_ftb_component.priority,
|
||||
&mca_notifier_ftb_component.priority);
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
@ -51,14 +51,11 @@ orte_notifier_base_module_t orte_notifier_ftb_module = {
|
||||
mypeerlog
|
||||
};
|
||||
|
||||
/* Module "global" variables */
|
||||
static FTB_client_t cinfo = {
|
||||
.event_space = "ftb.mpi.openmpi",
|
||||
.client_name = "",
|
||||
.client_jobid = "",
|
||||
.client_subscription_style = "FTB_SUBSCRIPTION_NONE"
|
||||
};
|
||||
static FTB_client_handle_t chandle;
|
||||
/* FTB client information */
|
||||
FTB_client_t ftb_client_info;
|
||||
|
||||
/* FTB client handle */
|
||||
FTB_client_handle_t ftb_client_handle;
|
||||
|
||||
static FTB_event_info_t ftb_event_info[] = {
|
||||
/* 0 */ {"UNKNOWN_ERROR", "error"},
|
||||
@ -102,23 +99,11 @@ static int orte_err2ftb(int errnum)
|
||||
static int init(void) {
|
||||
int ret;
|
||||
|
||||
/* snprintf(cinfo.client_name, FTB_MAX_CLIENT_NAME, "%s", argv[0]);
|
||||
* How to obtain argv[0] at this point? I don't know...
|
||||
* similarly, how do we obtain client_jobid?
|
||||
* snprintf(cinfo.client_jobid, FTB_MAX_CLIENT_JOBID, "%s", orte_jobid???);
|
||||
*/
|
||||
|
||||
if (FTB_SUCCESS != (ret = FTB_Connect(&cinfo, &chandle))) {
|
||||
opal_output(orte_notifier_base_output,
|
||||
"notifier:ftb:init FTB_Connect failed ret=%d\n", ret);
|
||||
return ORTE_ERROR;
|
||||
}
|
||||
|
||||
ret = FTB_Declare_publishable_events(chandle, 0, ftb_event_info, ftb_event_info_count);
|
||||
ret = FTB_Declare_publishable_events(ftb_client_handle, 0, ftb_event_info, ftb_event_info_count);
|
||||
if (FTB_SUCCESS != ret) {
|
||||
opal_output(orte_notifier_base_output,
|
||||
"notifier:ftb:init FTB_Declare_publishable_events failed ret=%d\n", ret);
|
||||
FTB_Disconnect(chandle);
|
||||
FTB_Disconnect(ftb_client_handle);
|
||||
return ORTE_ERROR;
|
||||
}
|
||||
|
||||
@ -126,7 +111,7 @@ static int init(void) {
|
||||
}
|
||||
|
||||
static void finalize(void) {
|
||||
FTB_Disconnect(chandle);
|
||||
FTB_Disconnect(ftb_client_handle);
|
||||
}
|
||||
|
||||
static void convert2ftb(int errcode, char *payload)
|
||||
@ -138,7 +123,7 @@ static void convert2ftb(int errcode, char *payload)
|
||||
snprintf(eprop.event_payload, FTB_MAX_PAYLOAD_DATA, "%s", (payload != NULL) ? payload : "");
|
||||
|
||||
event_id = orte_err2ftb(errcode);
|
||||
ret = FTB_Publish(chandle, ftb_event_info[event_id].event_name, &eprop, &ehandle);
|
||||
ret = FTB_Publish(ftb_client_handle, ftb_event_info[event_id].event_name, &eprop, &ehandle);
|
||||
if (FTB_SUCCESS != ret) {
|
||||
opal_output(orte_notifier_base_output,
|
||||
"notifier:ftb:convert2ftb(%d,'%s') FTB_Publish failed ret=%d\n", errcode, eprop.event_payload, ret);
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user