A few changes to the FTB notifier interface:
- add an orte ftb notifier help file for more verbose error messages - check if we can connect to the FTB during component->query and close the component, if we cannot. - make the ftb component interface methods static. - add mca parameters to set override the default subscription style and priority. This commit was SVN r22011.
Этот коммит содержится в:
родитель
3340f62e5f
Коммит
2af7657db1
27
orte/mca/notifier/ftb/help-orte-notifier-ftb.txt
Обычный файл
27
orte/mca/notifier/ftb/help-orte-notifier-ftb.txt
Обычный файл
@ -0,0 +1,27 @@
|
|||||||
|
# -*- text -*-
|
||||||
|
#
|
||||||
|
# Copyright (c) 2009 The Trustees of Indiana University and Indiana
|
||||||
|
# University Research and Technology Corporation.
|
||||||
|
# All rights reserved.
|
||||||
|
# $COPYRIGHT$
|
||||||
|
#
|
||||||
|
# Additional copyrights may follow
|
||||||
|
#
|
||||||
|
# $HEADER$
|
||||||
|
#
|
||||||
|
# This is the US/English help file for Open MPI's FTB notifier support
|
||||||
|
#
|
||||||
|
[invalid subscription style]
|
||||||
|
Error: the Open MPI FTB component tried to register with an invalid
|
||||||
|
FTB client subscription style.
|
||||||
|
|
||||||
|
Subscription style: %s
|
||||||
|
#
|
||||||
|
[invalid value]
|
||||||
|
Error: the Open MPI FTB notifier component tried to register with an
|
||||||
|
invalid value in the FTB client information.
|
||||||
|
#
|
||||||
|
[unable to connect]
|
||||||
|
Open MPI's FTB notifier component was unable to establish a connection
|
||||||
|
with the FTB backplane.
|
||||||
|
#
|
@ -28,21 +28,30 @@
|
|||||||
|
|
||||||
BEGIN_C_DECLS
|
BEGIN_C_DECLS
|
||||||
|
|
||||||
/*
|
typedef struct {
|
||||||
* Component open / close
|
orte_notifier_base_component_t super;
|
||||||
*/
|
|
||||||
int orte_notifier_ftb_open(void);
|
|
||||||
int orte_notifier_ftb_close(void);
|
|
||||||
int orte_notifier_ftb_component_query(mca_base_module_t **module, int *priority);
|
|
||||||
|
|
||||||
|
/* FTB client subscription style */
|
||||||
|
char *subscription_style;
|
||||||
|
|
||||||
|
/* Priority of this component */
|
||||||
|
int priority;
|
||||||
|
} orte_notifier_ftb_component_t;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Notifier interfaces
|
* Notifier interfaces
|
||||||
*/
|
*/
|
||||||
|
|
||||||
ORTE_MODULE_DECLSPEC extern orte_notifier_base_component_t mca_notifier_ftb_component;
|
ORTE_MODULE_DECLSPEC extern orte_notifier_ftb_component_t mca_notifier_ftb_component;
|
||||||
extern orte_notifier_base_module_t orte_notifier_ftb_module;
|
extern orte_notifier_base_module_t orte_notifier_ftb_module;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* FTB client information
|
||||||
|
*/
|
||||||
|
|
||||||
|
extern FTB_client_t ftb_client_info;
|
||||||
|
extern FTB_client_handle_t ftb_client_handle;
|
||||||
|
|
||||||
END_C_DECLS
|
END_C_DECLS
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -18,6 +18,10 @@
|
|||||||
*/
|
*/
|
||||||
/** @file:
|
/** @file:
|
||||||
*
|
*
|
||||||
|
* This component proxies notification events to the Fault Tolerant
|
||||||
|
* Backplane (See http://www.mcs.anl.gov/research/cifts/).
|
||||||
|
* The ORTE notifier severity is translated to the corresponding
|
||||||
|
* FTB severity before the event is published to the FTB.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -26,44 +30,133 @@
|
|||||||
#include "orte_config.h"
|
#include "orte_config.h"
|
||||||
#include "orte/constants.h"
|
#include "orte/constants.h"
|
||||||
|
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
#include "orte/util/show_help.h"
|
||||||
|
#include "orte/runtime/orte_globals.h"
|
||||||
|
#include "opal/mca/base/mca_base_param.h"
|
||||||
#include "notifier_ftb.h"
|
#include "notifier_ftb.h"
|
||||||
|
|
||||||
|
static int orte_notifier_ftb_close(void);
|
||||||
|
static int orte_notifier_ftb_component_query(mca_base_module_t **module, int *priority);
|
||||||
|
static int orte_notifier_ftb_register(void);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Struct of function pointers that need to be initialized
|
* Struct of function pointers that need to be initialized
|
||||||
*/
|
*/
|
||||||
orte_notifier_base_component_t mca_notifier_ftb_component = {
|
orte_notifier_ftb_component_t mca_notifier_ftb_component = {
|
||||||
{
|
{
|
||||||
ORTE_NOTIFIER_BASE_VERSION_1_0_0,
|
{
|
||||||
|
ORTE_NOTIFIER_BASE_VERSION_1_0_0,
|
||||||
"ftb", /* MCA module name */
|
|
||||||
ORTE_MAJOR_VERSION, /* MCA module major version */
|
"ftb", /* MCA module name */
|
||||||
ORTE_MINOR_VERSION, /* MCA module minor version */
|
ORTE_MAJOR_VERSION, /* MCA module major version */
|
||||||
ORTE_RELEASE_VERSION, /* MCA module release version */
|
ORTE_MINOR_VERSION, /* MCA module minor version */
|
||||||
orte_notifier_ftb_open, /* module open */
|
ORTE_RELEASE_VERSION, /* MCA module release version */
|
||||||
orte_notifier_ftb_close, /* module close */
|
|
||||||
orte_notifier_ftb_component_query /* module query */
|
NULL,
|
||||||
|
orte_notifier_ftb_close, /* module close */
|
||||||
|
orte_notifier_ftb_component_query, /* module query */
|
||||||
|
orte_notifier_ftb_register, /* module register */
|
||||||
|
},
|
||||||
|
{
|
||||||
|
/* The component is checkpoint ready */
|
||||||
|
MCA_BASE_METADATA_PARAM_CHECKPOINT
|
||||||
|
}
|
||||||
},
|
},
|
||||||
{
|
|
||||||
/* The component is checkpoint ready */
|
/* FTB client subscription style */
|
||||||
MCA_BASE_METADATA_PARAM_CHECKPOINT
|
"FTB_SUBSCRIPTION_NONE",
|
||||||
}
|
|
||||||
|
/* Priority */
|
||||||
|
10,
|
||||||
};
|
};
|
||||||
|
|
||||||
/* Open the component */
|
static int orte_notifier_ftb_close(void)
|
||||||
int orte_notifier_ftb_open(void)
|
|
||||||
{
|
{
|
||||||
|
|
||||||
|
if (NULL != mca_notifier_ftb_component.subscription_style) {
|
||||||
|
free(mca_notifier_ftb_component.subscription_style);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* If the FTB client handle is valid, disconnect the client */
|
||||||
|
if (1 == ftb_client_handle.valid) {
|
||||||
|
FTB_Disconnect(ftb_client_handle);
|
||||||
|
}
|
||||||
|
|
||||||
return ORTE_SUCCESS;
|
return ORTE_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
int orte_notifier_ftb_close(void)
|
static int orte_notifier_ftb_component_query(mca_base_module_t **module, int *priority)
|
||||||
{
|
{
|
||||||
return ORTE_SUCCESS;
|
int ret;
|
||||||
}
|
*priority = 0;
|
||||||
|
*module = NULL;
|
||||||
|
|
||||||
int orte_notifier_ftb_component_query(mca_base_module_t **module, int *priority)
|
/* Fill the FTB client information structure */
|
||||||
{
|
memset(&ftb_client_info, 0, sizeof(ftb_client_info));
|
||||||
/* we are a lower-level default, so set a low priority so we can be overridden */
|
strcpy(ftb_client_info.event_space, "ftb.mpi.openmpi");
|
||||||
*priority = 1;
|
|
||||||
|
/* We represent each client with a client name of the form
|
||||||
|
openmpi/<hostname>/<PID> as a unique identifier in the
|
||||||
|
FTB client namespace */
|
||||||
|
sprintf(ftb_client_info.client_name, "openmpi/%s/%u",
|
||||||
|
orte_process_info.nodename, orte_process_info.pid);
|
||||||
|
|
||||||
|
sprintf(ftb_client_info.client_jobid, "%u", ORTE_PROC_MY_NAME->jobid);
|
||||||
|
|
||||||
|
strncpy(ftb_client_info.client_subscription_style,
|
||||||
|
mca_notifier_ftb_component.subscription_style,
|
||||||
|
strlen(mca_notifier_ftb_component.subscription_style));
|
||||||
|
|
||||||
|
/* We try to connect to the FTB backplane now, and we abort
|
||||||
|
if we cannot connect for some reason. */
|
||||||
|
if (FTB_SUCCESS != (ret = FTB_Connect(&ftb_client_info, &ftb_client_handle))) {
|
||||||
|
switch (ret) {
|
||||||
|
case FTB_ERR_SUBSCRIPTION_STYLE:
|
||||||
|
orte_show_help("help-orte-notifier-ftb.txt",
|
||||||
|
"invalid subscription style",
|
||||||
|
true, ftb_client_info.client_subscription_style);
|
||||||
|
|
||||||
|
case FTB_ERR_INVALID_VALUE:
|
||||||
|
orte_show_help("help-orte-notifier-ftb.txt",
|
||||||
|
"invalid value",
|
||||||
|
true);
|
||||||
|
|
||||||
|
default:
|
||||||
|
orte_show_help("help-orte-notifier-ftb.txt",
|
||||||
|
"unable to connect",
|
||||||
|
true);
|
||||||
|
}
|
||||||
|
|
||||||
|
return ORTE_ERR_NOT_FOUND;
|
||||||
|
}
|
||||||
|
|
||||||
|
*priority = 10;
|
||||||
*module = (mca_base_module_t *)&orte_notifier_ftb_module;
|
*module = (mca_base_module_t *)&orte_notifier_ftb_module;
|
||||||
return ORTE_SUCCESS;
|
|
||||||
|
return ORTE_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int orte_notifier_ftb_register(void)
|
||||||
|
{
|
||||||
|
|
||||||
|
/* FTB client subscription style */
|
||||||
|
mca_base_param_reg_string(&mca_notifier_ftb_component.super.base_version,
|
||||||
|
"subscription_style",
|
||||||
|
"FTB client subscription style. "
|
||||||
|
"Possible values are none, polling, notify and both (polling and notify).",
|
||||||
|
false, false,
|
||||||
|
mca_notifier_ftb_component.subscription_style,
|
||||||
|
&mca_notifier_ftb_component.subscription_style);
|
||||||
|
|
||||||
|
/* Priority */
|
||||||
|
mca_base_param_reg_int(&mca_notifier_ftb_component.super.base_version,
|
||||||
|
"priority",
|
||||||
|
"Priority of this component",
|
||||||
|
false, false,
|
||||||
|
mca_notifier_ftb_component.priority,
|
||||||
|
&mca_notifier_ftb_component.priority);
|
||||||
|
|
||||||
|
return ORTE_SUCCESS;
|
||||||
}
|
}
|
||||||
|
@ -51,14 +51,11 @@ orte_notifier_base_module_t orte_notifier_ftb_module = {
|
|||||||
mypeerlog
|
mypeerlog
|
||||||
};
|
};
|
||||||
|
|
||||||
/* Module "global" variables */
|
/* FTB client information */
|
||||||
static FTB_client_t cinfo = {
|
FTB_client_t ftb_client_info;
|
||||||
.event_space = "ftb.mpi.openmpi",
|
|
||||||
.client_name = "",
|
/* FTB client handle */
|
||||||
.client_jobid = "",
|
FTB_client_handle_t ftb_client_handle;
|
||||||
.client_subscription_style = "FTB_SUBSCRIPTION_NONE"
|
|
||||||
};
|
|
||||||
static FTB_client_handle_t chandle;
|
|
||||||
|
|
||||||
static FTB_event_info_t ftb_event_info[] = {
|
static FTB_event_info_t ftb_event_info[] = {
|
||||||
/* 0 */ {"UNKNOWN_ERROR", "error"},
|
/* 0 */ {"UNKNOWN_ERROR", "error"},
|
||||||
@ -102,23 +99,11 @@ static int orte_err2ftb(int errnum)
|
|||||||
static int init(void) {
|
static int init(void) {
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
/* snprintf(cinfo.client_name, FTB_MAX_CLIENT_NAME, "%s", argv[0]);
|
ret = FTB_Declare_publishable_events(ftb_client_handle, 0, ftb_event_info, ftb_event_info_count);
|
||||||
* How to obtain argv[0] at this point? I don't know...
|
|
||||||
* similarly, how do we obtain client_jobid?
|
|
||||||
* snprintf(cinfo.client_jobid, FTB_MAX_CLIENT_JOBID, "%s", orte_jobid???);
|
|
||||||
*/
|
|
||||||
|
|
||||||
if (FTB_SUCCESS != (ret = FTB_Connect(&cinfo, &chandle))) {
|
|
||||||
opal_output(orte_notifier_base_output,
|
|
||||||
"notifier:ftb:init FTB_Connect failed ret=%d\n", ret);
|
|
||||||
return ORTE_ERROR;
|
|
||||||
}
|
|
||||||
|
|
||||||
ret = FTB_Declare_publishable_events(chandle, 0, ftb_event_info, ftb_event_info_count);
|
|
||||||
if (FTB_SUCCESS != ret) {
|
if (FTB_SUCCESS != ret) {
|
||||||
opal_output(orte_notifier_base_output,
|
opal_output(orte_notifier_base_output,
|
||||||
"notifier:ftb:init FTB_Declare_publishable_events failed ret=%d\n", ret);
|
"notifier:ftb:init FTB_Declare_publishable_events failed ret=%d\n", ret);
|
||||||
FTB_Disconnect(chandle);
|
FTB_Disconnect(ftb_client_handle);
|
||||||
return ORTE_ERROR;
|
return ORTE_ERROR;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -126,7 +111,7 @@ static int init(void) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
static void finalize(void) {
|
static void finalize(void) {
|
||||||
FTB_Disconnect(chandle);
|
FTB_Disconnect(ftb_client_handle);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void convert2ftb(int errcode, char *payload)
|
static void convert2ftb(int errcode, char *payload)
|
||||||
@ -138,7 +123,7 @@ static void convert2ftb(int errcode, char *payload)
|
|||||||
snprintf(eprop.event_payload, FTB_MAX_PAYLOAD_DATA, "%s", (payload != NULL) ? payload : "");
|
snprintf(eprop.event_payload, FTB_MAX_PAYLOAD_DATA, "%s", (payload != NULL) ? payload : "");
|
||||||
|
|
||||||
event_id = orte_err2ftb(errcode);
|
event_id = orte_err2ftb(errcode);
|
||||||
ret = FTB_Publish(chandle, ftb_event_info[event_id].event_name, &eprop, &ehandle);
|
ret = FTB_Publish(ftb_client_handle, ftb_event_info[event_id].event_name, &eprop, &ehandle);
|
||||||
if (FTB_SUCCESS != ret) {
|
if (FTB_SUCCESS != ret) {
|
||||||
opal_output(orte_notifier_base_output,
|
opal_output(orte_notifier_base_output,
|
||||||
"notifier:ftb:convert2ftb(%d,'%s') FTB_Publish failed ret=%d\n", errcode, eprop.event_payload, ret);
|
"notifier:ftb:convert2ftb(%d,'%s') FTB_Publish failed ret=%d\n", errcode, eprop.event_payload, ret);
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user