Add a new framework to ORTE for saving and recovering state information. Two components are included that use the db or dbm library for storing the data, with a distributed hash table component coming later.
Note that each of these components will only be selected if specifically requested - otherwise, a "NULL" component will be used. The framework is only opened by the HNP and orteds, though neither is currently coded to save/restore state This commit was SVN r22839.
Этот коммит содержится в:
родитель
7b3ac4fb73
Коммит
ffd5be6aa1
@ -525,7 +525,7 @@ AC_CHECK_HEADERS([alloca.h aio.h arpa/inet.h dirent.h \
|
||||
sys/types.h sys/uio.h net/uio.h sys/utsname.h sys/vfs.h sys/wait.h syslog.h \
|
||||
time.h termios.h ulimit.h unistd.h util.h utmp.h malloc.h \
|
||||
ifaddrs.h sys/sysctl.h crt_externs.h regex.h \
|
||||
ioLib.h sockLib.h hostLib.h shlwapi.h sys/synch.h])
|
||||
ioLib.h sockLib.h hostLib.h shlwapi.h sys/synch.h limits.h db.h ndbm.h])
|
||||
|
||||
# Needed to work around Darwin requiring sys/socket.h for
|
||||
# net/if.h
|
||||
@ -656,7 +656,7 @@ OMPI_CHECK_FUNC_LIB([dirname], [gen])
|
||||
# Darwin doesn't need -lm, as it's a symlink to libSystem.dylib
|
||||
OMPI_CHECK_FUNC_LIB([ceil], [m])
|
||||
|
||||
AC_CHECK_FUNCS([asprintf snprintf vasprintf vsnprintf openpty isatty getpwuid fork waitpid execve pipe ptsname setsid mmap tcgetpgrp posix_memalign strsignal sysconf syslog regcmp regexec regfree _NSGetEnviron socketpair strncpy_s _strdup usleep mkfifo])
|
||||
AC_CHECK_FUNCS([asprintf snprintf vasprintf vsnprintf openpty isatty getpwuid fork waitpid execve pipe ptsname setsid mmap tcgetpgrp posix_memalign strsignal sysconf syslog regcmp regexec regfree _NSGetEnviron socketpair strncpy_s _strdup usleep mkfifo dbopen dbm_open])
|
||||
|
||||
# On some hosts, htonl is a define, so the AC_CHECK_FUNC will get
|
||||
# confused. On others, it's in the standard library, but stubbed with
|
||||
|
@ -59,6 +59,7 @@
|
||||
#include "orte/util/show_help.h"
|
||||
#include "orte/mca/notifier/base/base.h"
|
||||
#include "orte/mca/rmcast/base/base.h"
|
||||
#include "orte/mca/state/base/base.h"
|
||||
|
||||
#include "orte/runtime/orte_cr.h"
|
||||
#include "orte/runtime/orte_wait.h"
|
||||
@ -388,6 +389,18 @@ int orte_ess_base_orted_setup(char **hosts)
|
||||
goto error;
|
||||
}
|
||||
|
||||
/* setup the state framework */
|
||||
if (ORTE_SUCCESS != (ret = orte_state_base_open())) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "orte_state_open";
|
||||
goto error;
|
||||
}
|
||||
if (ORTE_SUCCESS != (ret = orte_state_base_select())) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "orte_state_select";
|
||||
goto error;
|
||||
}
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
|
||||
error:
|
||||
@ -415,6 +428,7 @@ int orte_ess_base_orted_finalize(void)
|
||||
orte_grpcomm.onesided_barrier();
|
||||
}
|
||||
|
||||
orte_state_base_close();
|
||||
orte_notifier_base_close();
|
||||
|
||||
orte_cr_finalize();
|
||||
|
@ -54,6 +54,7 @@
|
||||
#include "orte/mca/odls/base/base.h"
|
||||
#include "orte/mca/notifier/base/base.h"
|
||||
#include "orte/mca/rmcast/base/base.h"
|
||||
#include "orte/mca/state/base/base.h"
|
||||
|
||||
#include "orte/mca/rmaps/base/base.h"
|
||||
#if OPAL_ENABLE_FT_CR == 1
|
||||
@ -547,6 +548,18 @@ static int rte_init(void)
|
||||
goto error;
|
||||
}
|
||||
|
||||
/* setup the state framework */
|
||||
if (ORTE_SUCCESS != (ret = orte_state_base_open())) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "orte_state_open";
|
||||
goto error;
|
||||
}
|
||||
if (ORTE_SUCCESS != (ret = orte_state_base_select())) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "orte_state_select";
|
||||
goto error;
|
||||
}
|
||||
|
||||
/* if a tool has launched us and is requesting event reports,
|
||||
* then set its contact info into the comm system
|
||||
*/
|
||||
@ -607,6 +620,7 @@ static int rte_finalize(void)
|
||||
unlink(contact_path);
|
||||
free(contact_path);
|
||||
|
||||
orte_state_base_close();
|
||||
orte_notifier_base_close();
|
||||
|
||||
orte_cr_finalize();
|
||||
|
35
orte/mca/state/Makefile.am
Обычный файл
35
orte/mca/state/Makefile.am
Обычный файл
@ -0,0 +1,35 @@
|
||||
#
|
||||
# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
AM_CPPFLAGS = $(LTDLINCL)
|
||||
|
||||
# main library setup
|
||||
noinst_LTLIBRARIES = libmca_state.la
|
||||
libmca_state_la_SOURCES =
|
||||
|
||||
# header setup
|
||||
nobase_orte_HEADERS =
|
||||
dist_pkgdata_DATA =
|
||||
|
||||
# local files
|
||||
headers = state.h
|
||||
libmca_state_la_SOURCES += $(headers)
|
||||
|
||||
# Conditionally install the header files
|
||||
if WANT_INSTALL_HEADERS
|
||||
nobase_orte_HEADERS += $(headers)
|
||||
ortedir = $(includedir)/openmpi/orte/mca/state
|
||||
else
|
||||
ortedir = $(includedir)
|
||||
endif
|
||||
|
||||
include base/Makefile.am
|
||||
|
||||
distclean-local:
|
||||
rm -f base/static-components.h
|
16
orte/mca/state/base/Makefile.am
Обычный файл
16
orte/mca/state/base/Makefile.am
Обычный файл
@ -0,0 +1,16 @@
|
||||
#
|
||||
# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
headers += \
|
||||
base/base.h
|
||||
|
||||
libmca_state_la_SOURCES += \
|
||||
base/state_base_open.c \
|
||||
base/state_base_close.c \
|
||||
base/state_base_select.c
|
54
orte/mca/state/base/base.h
Обычный файл
54
orte/mca/state/base/base.h
Обычный файл
@ -0,0 +1,54 @@
|
||||
/*
|
||||
* Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
/** @file:
|
||||
*/
|
||||
|
||||
#ifndef MCA_STATE_BASE_H
|
||||
#define MCA_STATE_BASE_H
|
||||
|
||||
#include "orte_config.h"
|
||||
#include "orte/types.h"
|
||||
|
||||
#include "opal/mca/mca.h"
|
||||
|
||||
|
||||
#include "orte/mca/state/state.h"
|
||||
|
||||
BEGIN_C_DECLS
|
||||
|
||||
/*
|
||||
* Global functions for MCA overall collective open and close
|
||||
*/
|
||||
|
||||
/**
|
||||
* Open the state framework
|
||||
*/
|
||||
ORTE_DECLSPEC int orte_state_base_open(void);
|
||||
|
||||
/**
|
||||
* Select a state module
|
||||
*/
|
||||
ORTE_DECLSPEC int orte_state_base_select(void);
|
||||
|
||||
/**
|
||||
* Close the state framework
|
||||
*/
|
||||
ORTE_DECLSPEC int orte_state_base_close(void);
|
||||
|
||||
/*
|
||||
* The verbose channel for debug output
|
||||
*/
|
||||
ORTE_DECLSPEC extern int orte_state_base_output;
|
||||
|
||||
ORTE_DECLSPEC extern opal_list_t orte_state_base_components_available;
|
||||
|
||||
|
||||
END_C_DECLS
|
||||
|
||||
#endif
|
50
orte/mca/state/base/state_base_close.c
Обычный файл
50
orte/mca/state/base/state_base_close.c
Обычный файл
@ -0,0 +1,50 @@
|
||||
/*
|
||||
* Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "orte_config.h"
|
||||
#include "orte/constants.h"
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
#include "opal/mca/mca.h"
|
||||
#include "opal/util/output.h"
|
||||
#include "opal/mca/base/base.h"
|
||||
#include "opal/mca/base/mca_base_component_repository.h"
|
||||
|
||||
#include "orte/mca/state/base/base.h"
|
||||
|
||||
extern opal_list_t orte_state_base_components_available;
|
||||
|
||||
int
|
||||
orte_state_base_close(void)
|
||||
{
|
||||
opal_list_item_t *item;
|
||||
mca_base_component_list_item_t *cli;
|
||||
|
||||
if (NULL != orte_state.finalize) {
|
||||
orte_state.finalize();
|
||||
}
|
||||
|
||||
/* unload all remaining components */
|
||||
while (NULL != (item = opal_list_remove_first(&orte_state_base_components_available))) {
|
||||
orte_state_base_component_t* component;
|
||||
cli = (mca_base_component_list_item_t *) item;
|
||||
component = (orte_state_base_component_t*) cli->cli_component;
|
||||
opal_output_verbose(10, 0,
|
||||
"orte_state_base_close: module %s unloaded",
|
||||
component->base_version.mca_component_name);
|
||||
mca_base_component_repository_release((mca_base_component_t *) component);
|
||||
OBJ_RELEASE(item);
|
||||
}
|
||||
|
||||
OBJ_DESTRUCT(&orte_state_base_components_available);
|
||||
opal_output_close(orte_state_base_output);
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
85
orte/mca/state/base/state_base_open.c
Обычный файл
85
orte/mca/state/base/state_base_open.c
Обычный файл
@ -0,0 +1,85 @@
|
||||
/*
|
||||
* Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
|
||||
#include "orte_config.h"
|
||||
#include "orte/constants.h"
|
||||
|
||||
#include "opal/mca/mca.h"
|
||||
#include "opal/util/output.h"
|
||||
#include "opal/mca/base/base.h"
|
||||
#include "opal/dss/dss_types.h"
|
||||
|
||||
#include "orte/mca/state/base/base.h"
|
||||
|
||||
|
||||
/*
|
||||
* The following file was created by configure. It contains extern
|
||||
* statements and the definition of an array of pointers to each
|
||||
* module's public mca_base_module_t struct.
|
||||
*/
|
||||
|
||||
#include "orte/mca/state/base/static-components.h"
|
||||
|
||||
opal_list_t orte_state_base_components_available;
|
||||
|
||||
/* provide "NULL" functions */
|
||||
static int init(void);
|
||||
static int finalize(void);
|
||||
static int save(void *object, opal_data_type_t type);
|
||||
static int set_recover_source(orte_process_name_t *name);
|
||||
static int recover(void *object, opal_data_type_t type);
|
||||
|
||||
orte_state_base_module_t orte_state = {
|
||||
init,
|
||||
finalize,
|
||||
save,
|
||||
set_recover_source,
|
||||
recover
|
||||
};
|
||||
int orte_state_base_output;
|
||||
|
||||
int
|
||||
orte_state_base_open(void)
|
||||
{
|
||||
orte_state_base_output = opal_output_open(NULL);
|
||||
|
||||
OBJ_CONSTRUCT(&orte_state_base_components_available, opal_list_t);
|
||||
|
||||
/* Open up all available components */
|
||||
if (ORTE_SUCCESS !=
|
||||
mca_base_components_open("state", orte_state_base_output, mca_state_base_static_components,
|
||||
&orte_state_base_components_available,
|
||||
true)) {
|
||||
return ORTE_ERROR;
|
||||
}
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
static int init(void)
|
||||
{
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
static int finalize(void)
|
||||
{
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
static int save(void *object, opal_data_type_t type)
|
||||
{
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
static int set_recover_source(orte_process_name_t *name)
|
||||
{
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
static int recover(void *object, opal_data_type_t type)
|
||||
{
|
||||
return ORTE_SUCCESS;
|
||||
}
|
48
orte/mca/state/base/state_base_select.c
Обычный файл
48
orte/mca/state/base/state_base_select.c
Обычный файл
@ -0,0 +1,48 @@
|
||||
/*
|
||||
* Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "orte_config.h"
|
||||
#include "orte/constants.h"
|
||||
|
||||
#include "opal/class/opal_list.h"
|
||||
#include "opal/mca/mca.h"
|
||||
#include "opal/mca/base/base.h"
|
||||
#include "opal/mca/base/mca_base_component_repository.h"
|
||||
|
||||
#include "orte/mca/state/base/base.h"
|
||||
|
||||
extern opal_list_t orte_state_base_components_available;
|
||||
|
||||
int
|
||||
orte_state_base_select(void)
|
||||
{
|
||||
orte_state_base_component_t *best_component = NULL;
|
||||
orte_state_base_module_t *best_module = NULL;
|
||||
|
||||
/*
|
||||
* Select the best component
|
||||
*/
|
||||
if( OPAL_SUCCESS != mca_base_select("state", orte_state_base_output,
|
||||
&orte_state_base_components_available,
|
||||
(mca_base_module_t **) &best_module,
|
||||
(mca_base_component_t **) &best_component) ) {
|
||||
/* It is okay to not select a component - default
|
||||
* to using the base NULL component
|
||||
*/
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
/* Save and init the winner */
|
||||
orte_state = *best_module;
|
||||
if (NULL != orte_state.init) {
|
||||
orte_state.init();
|
||||
}
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
34
orte/mca/state/db/Makefile.am
Обычный файл
34
orte/mca/state/db/Makefile.am
Обычный файл
@ -0,0 +1,34 @@
|
||||
#
|
||||
# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
sources = \
|
||||
state_db.h \
|
||||
state_db_component.c \
|
||||
state_db.c
|
||||
|
||||
# Make the output library in this directory, and name it either
|
||||
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
|
||||
# (for static builds).
|
||||
|
||||
if OMPI_BUILD_state_db_DSO
|
||||
component_noinst =
|
||||
component_install = mca_state_db.la
|
||||
else
|
||||
component_noinst = libmca_state_db.la
|
||||
component_install =
|
||||
endif
|
||||
|
||||
mcacomponentdir = $(pkglibdir)
|
||||
mcacomponent_LTLIBRARIES = $(component_install)
|
||||
mca_state_db_la_SOURCES = $(sources)
|
||||
mca_state_db_la_LDFLAGS = -module -avoid-version
|
||||
|
||||
noinst_LTLIBRARIES = $(component_noinst)
|
||||
libmca_state_db_la_SOURCES =$(sources)
|
||||
libmca_state_db_la_LDFLAGS = -module -avoid-version
|
16
orte/mca/state/db/configure.m4
Обычный файл
16
orte/mca/state/db/configure.m4
Обычный файл
@ -0,0 +1,16 @@
|
||||
dnl -*- shell-script -*-
|
||||
dnl
|
||||
dnl Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
|
||||
dnl $COPYRIGHT$
|
||||
dnl
|
||||
dnl Additional copyrights may follow
|
||||
dnl
|
||||
dnl $HEADER$
|
||||
dnl
|
||||
|
||||
# MCA_state_db_CONFIG([action-if-found], [action-if-not-found])
|
||||
# -----------------------------------------------------------
|
||||
AC_DEFUN([MCA_state_db_CONFIG], [
|
||||
# only build if db.h is found
|
||||
AC_CHECK_HEADERS([db.h], [$1], [$2], [AC_INCLUDES_DEFAULT])
|
||||
])dnl
|
11
orte/mca/state/db/configure.params
Обычный файл
11
orte/mca/state/db/configure.params
Обычный файл
@ -0,0 +1,11 @@
|
||||
# -*- shell-script -*-
|
||||
#
|
||||
# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
PARAM_CONFIG_FILES="Makefile"
|
248
orte/mca/state/db/state_db.c
Обычный файл
248
orte/mca/state/db/state_db.c
Обычный файл
@ -0,0 +1,248 @@
|
||||
/*
|
||||
* Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*
|
||||
*/
|
||||
|
||||
#include "orte_config.h"
|
||||
#include "orte/constants.h"
|
||||
|
||||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
#ifdef HAVE_LIMITS_H
|
||||
#include <limits.h>
|
||||
#endif
|
||||
#include <stdio.h>
|
||||
#ifdef HAVE_FCNTL_H
|
||||
#include <fcntl.h>
|
||||
#endif
|
||||
#ifdef HAVE_UNISTD_H
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
#include <db.h>
|
||||
|
||||
#include "opal/dss/dss_types.h"
|
||||
#include "opal/util/os_dirpath.h"
|
||||
#include "opal/util/os_path.h"
|
||||
#include "opal/util/output.h"
|
||||
#include "opal/util/malloc.h"
|
||||
#include "opal/util/basename.h"
|
||||
#include "opal/mca/pstat/base/base.h"
|
||||
#include "opal/mca/paffinity/base/base.h"
|
||||
#include "opal/mca/sysinfo/base/base.h"
|
||||
|
||||
#include "orte/util/show_help.h"
|
||||
#include "orte/mca/errmgr/base/base.h"
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
|
||||
#include "state_db.h"
|
||||
|
||||
static int init(void);
|
||||
static int finalize(void);
|
||||
static int save(void *object, opal_data_type_t type);
|
||||
static int set_recover_source(orte_process_name_t *name);
|
||||
static int recover(void *object, opal_data_type_t type);
|
||||
|
||||
orte_state_base_module_t orte_state_db_module = {
|
||||
init,
|
||||
finalize,
|
||||
save,
|
||||
set_recover_source,
|
||||
recover
|
||||
};
|
||||
|
||||
/* local variables */
|
||||
static DB *save_db=NULL, *recover_db=NULL;
|
||||
|
||||
static int init(void)
|
||||
{
|
||||
char *path, *name;
|
||||
|
||||
/* setup the database */
|
||||
if (ORTE_SUCCESS != opal_os_dirpath_create(orte_state_db_directory, S_IRWXU)) {
|
||||
orte_show_help("help-state-db.txt", "cannot-create-dir", true,
|
||||
orte_state_db_directory);
|
||||
return ORTE_ERR_FILE_OPEN_FAILURE;
|
||||
}
|
||||
orte_util_convert_process_name_to_string(&name, ORTE_PROC_MY_NAME);
|
||||
path = opal_os_path(false, orte_state_db_directory, name, NULL);
|
||||
free(name);
|
||||
if (NULL == (save_db = dbopen(path, O_CREAT | O_RDWR | O_TRUNC, S_IRWXU, DB_HASH, NULL))) {
|
||||
orte_show_help("help-state-db.txt", "cannot-create-db", true, path);
|
||||
free(path);
|
||||
return ORTE_ERR_FILE_OPEN_FAILURE;
|
||||
}
|
||||
free(path);
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
static int finalize(void)
|
||||
{
|
||||
/* if we are normally terminating, remove the recovery file */
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
static int save(void *object, opal_data_type_t type)
|
||||
{
|
||||
DBT key, data;
|
||||
opal_buffer_t buf;
|
||||
orte_job_t *jdata;
|
||||
orte_proc_t *proc;
|
||||
char *name;
|
||||
int rc=ORTE_SUCCESS, size;
|
||||
|
||||
/* construct the buffer we will use for packing the data */
|
||||
OBJ_CONSTRUCT(&buf, opal_buffer_t);
|
||||
key.data = NULL;
|
||||
data.data = NULL;
|
||||
|
||||
switch (type) {
|
||||
case ORTE_JOB:
|
||||
jdata = (orte_job_t*)object;
|
||||
opal_dss.pack(&buf, &jdata->state, 1, ORTE_JOB_STATE_T);
|
||||
asprintf((char**)&key.data, "JOB:%s", ORTE_JOBID_PRINT(jdata->jobid));
|
||||
key.size = strlen(key.data);
|
||||
break;
|
||||
case ORTE_PROC:
|
||||
proc = (orte_proc_t*)object;
|
||||
opal_dss.pack(&buf, &proc->state, 1, ORTE_PROC_STATE_T);
|
||||
orte_util_convert_process_name_to_string(&name, &proc->name);
|
||||
asprintf((char**)&key.data, "PROC:%s", name);
|
||||
free(name);
|
||||
key.size = strlen(key.data);
|
||||
break;
|
||||
default:
|
||||
orte_show_help("help-state-db.txt", "unrecognized-type", true, type);
|
||||
rc = ORTE_ERR_BAD_PARAM;
|
||||
goto cleanup;
|
||||
break;
|
||||
}
|
||||
|
||||
/* unload the data */
|
||||
opal_dss.unload(&buf, (void**)&data.data, &size);
|
||||
data.size = size;
|
||||
OBJ_DESTRUCT(&buf);
|
||||
|
||||
/* put the info into the db */
|
||||
if (0 > save_db->put(save_db, &key, &data, 0)) {
|
||||
orte_show_help("help-state-db.txt", "error-writing-db", true, (char*)key.data, strerror(errno));
|
||||
rc = ORTE_ERR_FILE_WRITE_FAILURE;
|
||||
}
|
||||
/* sync it to force it to disk */
|
||||
if (0 > save_db->sync(save_db, 0)) {
|
||||
orte_show_help("help-state-db.txt", "error-syncing-db", true, (char*)key.data, strerror(errno));
|
||||
rc = ORTE_ERR_FILE_WRITE_FAILURE;
|
||||
}
|
||||
|
||||
cleanup:
|
||||
/* cleanup */
|
||||
if (NULL != key.data) {
|
||||
free(key.data);
|
||||
}
|
||||
if (NULL != data.data) {
|
||||
free(data.data);
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int set_recover_source(orte_process_name_t *name)
|
||||
{
|
||||
char *path, *pname;
|
||||
int rc=ORTE_SUCCESS;
|
||||
|
||||
/* setup the database */
|
||||
orte_util_convert_process_name_to_string(&pname, name);
|
||||
path = opal_os_path(false, orte_state_db_directory, pname, NULL);
|
||||
free(pname);
|
||||
if (NULL == (recover_db = dbopen(path, O_RDONLY, S_IRWXU, DB_HASH, NULL))) {
|
||||
orte_show_help("help-state-db.txt", "cannot-open-db", true, path);
|
||||
free(path);
|
||||
return ORTE_ERR_FILE_OPEN_FAILURE;
|
||||
}
|
||||
free(path);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int recover(void *object, opal_data_type_t type)
|
||||
{
|
||||
DBT key, data;
|
||||
opal_buffer_t buf;
|
||||
orte_job_t *jdata;
|
||||
orte_proc_t *proc;
|
||||
char *name;
|
||||
int rc=ORTE_SUCCESS;
|
||||
int32_t n;
|
||||
orte_job_state_t *jstate;
|
||||
orte_proc_state_t *pstate;
|
||||
|
||||
if (NULL == recover_db) {
|
||||
orte_show_help("help-state-db.txt", "recover-source-undef", true);
|
||||
rc = ORTE_ERR_NOT_FOUND;
|
||||
}
|
||||
|
||||
/* construct the buffer we will use for unpacking the data */
|
||||
OBJ_CONSTRUCT(&buf, opal_buffer_t);
|
||||
key.data = NULL;
|
||||
data.data = NULL;
|
||||
|
||||
switch (type) {
|
||||
case ORTE_JOB:
|
||||
jdata = (orte_job_t*)object;
|
||||
asprintf((char**)&key.data, "JOB:%s", ORTE_JOBID_PRINT(jdata->jobid));
|
||||
key.size = strlen(key.data);
|
||||
break;
|
||||
case ORTE_PROC:
|
||||
proc = (orte_proc_t*)object;
|
||||
orte_util_convert_process_name_to_string(&name, &proc->name);
|
||||
asprintf((char**)&key.data, "PROC:%s", name);
|
||||
free(name);
|
||||
key.size = strlen(key.data);
|
||||
break;
|
||||
default:
|
||||
orte_show_help("help-state-db.txt", "unrecognized-type", true, type);
|
||||
rc = ORTE_ERR_BAD_PARAM;
|
||||
goto cleanup;
|
||||
break;
|
||||
}
|
||||
|
||||
/* get the specified data */
|
||||
if (0 > recover_db->get(recover_db, &key, &data, 0)) {
|
||||
orte_show_help("help-state-db.txt", "error-reading-db", true, (char*)key.data, strerror(errno));
|
||||
rc = ORTE_ERR_FILE_READ_FAILURE;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/* populate the recovered info */
|
||||
opal_dss.load(&buf, data.data, data.size);
|
||||
switch (type) {
|
||||
case ORTE_JOB:
|
||||
n=1;
|
||||
opal_dss.unpack(&buf, &jstate, &n, ORTE_JOB_STATE_T);
|
||||
jdata->state = *jstate;
|
||||
break;
|
||||
case ORTE_PROC:
|
||||
n=1;
|
||||
opal_dss.unpack(&buf, &pstate, &n, ORTE_PROC_STATE_T);
|
||||
proc->state = *pstate;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
cleanup:
|
||||
if (NULL != key.data) {
|
||||
free(key.data);
|
||||
}
|
||||
if (NULL != data.data) {
|
||||
free(data.data);
|
||||
}
|
||||
OBJ_DESTRUCT(&buf);
|
||||
return rc;
|
||||
}
|
31
orte/mca/state/db/state_db.h
Обычный файл
31
orte/mca/state/db/state_db.h
Обычный файл
@ -0,0 +1,31 @@
|
||||
/*
|
||||
* Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#ifndef ORTE_STATE_DB_H
|
||||
#define ORTE_STATE_DB_H
|
||||
|
||||
#include "orte/mca/state/state.h"
|
||||
|
||||
BEGIN_C_DECLS
|
||||
|
||||
/*
|
||||
* Module open / close
|
||||
*/
|
||||
int orte_state_db_component_open(void);
|
||||
int orte_state_db_component_close(void);
|
||||
int orte_state_db_component_query(mca_base_module_t **module, int *priority);
|
||||
|
||||
|
||||
ORTE_MODULE_DECLSPEC extern orte_state_base_component_t mca_state_db_component;
|
||||
ORTE_DECLSPEC extern orte_state_base_module_t orte_state_db_module;
|
||||
extern char *orte_state_db_directory;
|
||||
|
||||
END_C_DECLS
|
||||
|
||||
#endif /* ORTE_STATE_DB_H */
|
101
orte/mca/state/db/state_db_component.c
Обычный файл
101
orte/mca/state/db/state_db_component.c
Обычный файл
@ -0,0 +1,101 @@
|
||||
/*
|
||||
* Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*
|
||||
* These symbols are in a file by themselves to provide nice linker
|
||||
* semantics. Since linkers generally pull in symbols by object
|
||||
* files, keeping these symbols as the only symbols in this file
|
||||
* prevents utility programs such as "ompi_info" from having to import
|
||||
* entire components just to query their version and parameters.
|
||||
*/
|
||||
|
||||
#include "orte_config.h"
|
||||
#include "orte/constants.h"
|
||||
|
||||
#include "opal/mca/base/base.h"
|
||||
#include "opal/mca/base/mca_base_param.h"
|
||||
|
||||
#include "orte/util/proc_info.h"
|
||||
|
||||
#include "orte/mca/state/state.h"
|
||||
#include "orte/mca/state/base/base.h"
|
||||
#include "state_db.h"
|
||||
|
||||
extern orte_state_base_module_t orte_state_db_module;
|
||||
char *orte_state_db_filename;
|
||||
|
||||
/*
|
||||
* Instantiate the public struct with all of our public information
|
||||
* and pointers to our public functions in it
|
||||
*/
|
||||
orte_state_base_component_t mca_state_db_component = {
|
||||
{
|
||||
ORTE_STATE_BASE_VERSION_1_0_0,
|
||||
|
||||
/* Component name and version */
|
||||
"db",
|
||||
ORTE_MAJOR_VERSION,
|
||||
ORTE_MINOR_VERSION,
|
||||
ORTE_RELEASE_VERSION,
|
||||
|
||||
/* Component open and close functions */
|
||||
orte_state_db_component_open,
|
||||
orte_state_db_component_close,
|
||||
orte_state_db_component_query
|
||||
},
|
||||
{
|
||||
/* The component is checkpoint ready */
|
||||
MCA_BASE_METADATA_PARAM_CHECKPOINT
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
int
|
||||
orte_state_db_component_open(void)
|
||||
{
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
int orte_state_db_component_query(mca_base_module_t **module, int *priority)
|
||||
{
|
||||
|
||||
/* we are the file module - we need to be selected
|
||||
* IFF we are requested
|
||||
*/
|
||||
bool is_required = false;
|
||||
mca_base_component_t *c = &mca_state_db_component.base_version;
|
||||
|
||||
/* retrieve the name of the file to be used */
|
||||
mca_base_param_reg_string(c, "dir",
|
||||
"Name of directory to be used for storing and recovering state information",
|
||||
false, false, NULL, &orte_state_db_directory);
|
||||
|
||||
mca_base_is_component_required(&orte_state_base_components_available,
|
||||
&mca_state_db_component.base_version,
|
||||
true,
|
||||
&is_required);
|
||||
|
||||
if( is_required || NULL != orte_state_db_directory) {
|
||||
*priority = 1000;
|
||||
*module = (mca_base_module_t*)&orte_state_db_module;
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
*priority = 0;
|
||||
*module = NULL;
|
||||
return ORTE_ERROR;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
orte_state_db_component_close(void)
|
||||
{
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
34
orte/mca/state/dbm/Makefile.am
Обычный файл
34
orte/mca/state/dbm/Makefile.am
Обычный файл
@ -0,0 +1,34 @@
|
||||
#
|
||||
# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
sources = \
|
||||
state_dbm.h \
|
||||
state_dbm_component.c \
|
||||
state_dbm.c
|
||||
|
||||
# Make the output library in this directory, and name it either
|
||||
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
|
||||
# (for static builds).
|
||||
|
||||
if OMPI_BUILD_state_dbm_DSO
|
||||
component_noinst =
|
||||
component_install = mca_state_dbm.la
|
||||
else
|
||||
component_noinst = libmca_state_dbm.la
|
||||
component_install =
|
||||
endif
|
||||
|
||||
mcacomponentdir = $(pkglibdir)
|
||||
mcacomponent_LTLIBRARIES = $(component_install)
|
||||
mca_state_dbm_la_SOURCES = $(sources)
|
||||
mca_state_dbm_la_LDFLAGS = -module -avoid-version -ldbm
|
||||
|
||||
noinst_LTLIBRARIES = $(component_noinst)
|
||||
libmca_state_dbm_la_SOURCES =$(sources)
|
||||
libmca_state_dbm_la_LDFLAGS = -module -avoid-version -ldbm
|
16
orte/mca/state/dbm/configure.m4
Обычный файл
16
orte/mca/state/dbm/configure.m4
Обычный файл
@ -0,0 +1,16 @@
|
||||
dnl -*- shell-script -*-
|
||||
dnl
|
||||
dnl Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
|
||||
dnl $COPYRIGHT$
|
||||
dnl
|
||||
dnl Additional copyrights may follow
|
||||
dnl
|
||||
dnl $HEADER$
|
||||
dnl
|
||||
|
||||
# MCA_state_dbm_CONFIG([action-if-found], [action-if-not-found])
|
||||
# -----------------------------------------------------------
|
||||
AC_DEFUN([MCA_state_dbm_CONFIG], [
|
||||
# only build if ndbm.h is found
|
||||
AC_CHECK_HEADERS([ndbm.h], [$1], [$2], [AC_INCLUDES_DEFAULT])
|
||||
])dnl
|
11
orte/mca/state/dbm/configure.params
Обычный файл
11
orte/mca/state/dbm/configure.params
Обычный файл
@ -0,0 +1,11 @@
|
||||
# -*- shell-script -*-
|
||||
#
|
||||
# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
PARAM_CONFIG_FILES="Makefile"
|
244
orte/mca/state/dbm/state_dbm.c
Обычный файл
244
orte/mca/state/dbm/state_dbm.c
Обычный файл
@ -0,0 +1,244 @@
|
||||
/*
|
||||
* Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*
|
||||
*/
|
||||
|
||||
#include "orte_config.h"
|
||||
#include "orte/constants.h"
|
||||
|
||||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
#ifdef HAVE_LIMITS_H
|
||||
#include <limits.h>
|
||||
#endif
|
||||
#include <stdio.h>
|
||||
#ifdef HAVE_FCNTL_H
|
||||
#include <fcntl.h>
|
||||
#endif
|
||||
#ifdef HAVE_UNISTD_H
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
#include <ndbm.h>
|
||||
|
||||
#include "opal/dss/dss_types.h"
|
||||
#include "opal/util/os_dirpath.h"
|
||||
#include "opal/util/os_path.h"
|
||||
#include "opal/util/output.h"
|
||||
#include "opal/util/malloc.h"
|
||||
#include "opal/util/basename.h"
|
||||
#include "opal/mca/pstat/base/base.h"
|
||||
#include "opal/mca/paffinity/base/base.h"
|
||||
#include "opal/mca/sysinfo/base/base.h"
|
||||
|
||||
#include "orte/util/show_help.h"
|
||||
#include "orte/mca/errmgr/base/base.h"
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
|
||||
#include "state_dbm.h"
|
||||
|
||||
static int init(void);
|
||||
static int finalize(void);
|
||||
static int save(void *object, opal_data_type_t type);
|
||||
static int set_recover_source(orte_process_name_t *name);
|
||||
static int recover(void *object, opal_data_type_t type);
|
||||
|
||||
orte_state_base_module_t orte_state_dbm_module = {
|
||||
init,
|
||||
finalize,
|
||||
save,
|
||||
set_recover_source,
|
||||
recover
|
||||
};
|
||||
|
||||
/* local variables */
|
||||
static DBM *save_dbm=NULL, *recover_dbm=NULL;
|
||||
|
||||
static int init(void)
|
||||
{
|
||||
char *path, *name;
|
||||
|
||||
/* setup the database */
|
||||
if (ORTE_SUCCESS != opal_os_dirpath_create(orte_state_dbm_directory, S_IRWXU)) {
|
||||
orte_show_help("help-state-dbm.txt", "cannot-create-dir", true,
|
||||
orte_state_dbm_directory);
|
||||
return ORTE_ERR_FILE_OPEN_FAILURE;
|
||||
}
|
||||
orte_util_convert_process_name_to_string(&name, ORTE_PROC_MY_NAME);
|
||||
path = opal_os_path(false, orte_state_dbm_directory, name, NULL);
|
||||
free(name);
|
||||
if (NULL == (save_dbm = dbm_open(path, O_CREAT | O_RDWR | O_TRUNC, S_IRWXU))) {
|
||||
orte_show_help("help-state-dbm.txt", "cannot-create-dbm", true, path);
|
||||
free(path);
|
||||
return ORTE_ERR_FILE_OPEN_FAILURE;
|
||||
}
|
||||
free(path);
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
static int finalize(void)
|
||||
{
|
||||
/* if we are normally terminating, remove the recovery file */
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
static int save(void *object, opal_data_type_t type)
|
||||
{
|
||||
datum key, data;
|
||||
opal_buffer_t buf;
|
||||
orte_job_t *jdata;
|
||||
orte_proc_t *proc;
|
||||
char *name;
|
||||
int rc=ORTE_SUCCESS, size;
|
||||
|
||||
/* construct the buffer we will use for packing the data */
|
||||
OBJ_CONSTRUCT(&buf, opal_buffer_t);
|
||||
key.dptr = NULL;
|
||||
data.dptr = NULL;
|
||||
|
||||
switch (type) {
|
||||
case ORTE_JOB:
|
||||
jdata = (orte_job_t*)object;
|
||||
opal_dss.pack(&buf, &jdata->state, 1, ORTE_JOB_STATE_T);
|
||||
asprintf((char**)&key.dptr, "JOB:%s", ORTE_JOBID_PRINT(jdata->jobid));
|
||||
key.dsize = strlen(key.dptr);
|
||||
break;
|
||||
case ORTE_PROC:
|
||||
proc = (orte_proc_t*)object;
|
||||
opal_dss.pack(&buf, &proc->state, 1, ORTE_PROC_STATE_T);
|
||||
orte_util_convert_process_name_to_string(&name, &proc->name);
|
||||
asprintf((char**)&key.dptr, "PROC:%s", name);
|
||||
free(name);
|
||||
key.dsize = strlen(key.dptr);
|
||||
break;
|
||||
default:
|
||||
orte_show_help("help-state-dbm.txt", "unrecognized-type", true, type);
|
||||
rc = ORTE_ERR_BAD_PARAM;
|
||||
goto cleanup;
|
||||
break;
|
||||
}
|
||||
|
||||
/* unload the data */
|
||||
opal_dss.unload(&buf, (void**)&data.dptr, &size);
|
||||
data.dsize = size;
|
||||
OBJ_DESTRUCT(&buf);
|
||||
|
||||
/* put the info into the dbm */
|
||||
if (0 > dbm_store(save_dbm, key, data, DBM_REPLACE)) {
|
||||
orte_show_help("help-state-dbm.txt", "error-writing-dbm", true, (char*)key.dptr, strerror(errno));
|
||||
rc = ORTE_ERR_FILE_WRITE_FAILURE;
|
||||
}
|
||||
|
||||
cleanup:
|
||||
/* cleanup */
|
||||
if (NULL != key.dptr) {
|
||||
free(key.dptr);
|
||||
}
|
||||
if (NULL != data.dptr) {
|
||||
free(data.dptr);
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int set_recover_source(orte_process_name_t *name)
|
||||
{
|
||||
char *path, *pname;
|
||||
int rc=ORTE_SUCCESS;
|
||||
|
||||
/* setup the database */
|
||||
orte_util_convert_process_name_to_string(&pname, name);
|
||||
path = opal_os_path(false, orte_state_dbm_directory, pname, NULL);
|
||||
free(pname);
|
||||
if (NULL == (recover_dbm = dbm_open(path, O_RDONLY, S_IRWXU))) {
|
||||
orte_show_help("help-state-dbm.txt", "cannot-open-dbm", true, path);
|
||||
free(path);
|
||||
return ORTE_ERR_FILE_OPEN_FAILURE;
|
||||
}
|
||||
free(path);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int recover(void *object, opal_data_type_t type)
|
||||
{
|
||||
datum key, data;
|
||||
opal_buffer_t buf;
|
||||
orte_job_t *jdata;
|
||||
orte_proc_t *proc;
|
||||
char *name;
|
||||
int rc=ORTE_SUCCESS;
|
||||
int32_t n;
|
||||
orte_job_state_t *jstate;
|
||||
orte_proc_state_t *pstate;
|
||||
|
||||
if (NULL == recover_dbm) {
|
||||
orte_show_help("help-state-dbm.txt", "recover-source-undef", true);
|
||||
rc = ORTE_ERR_NOT_FOUND;
|
||||
}
|
||||
|
||||
/* construct the buffer we will use for unpacking the data */
|
||||
OBJ_CONSTRUCT(&buf, opal_buffer_t);
|
||||
key.dptr = NULL;
|
||||
data.dptr = NULL;
|
||||
|
||||
switch (type) {
|
||||
case ORTE_JOB:
|
||||
jdata = (orte_job_t*)object;
|
||||
asprintf((char**)&key.dptr, "JOB:%s", ORTE_JOBID_PRINT(jdata->jobid));
|
||||
key.dsize = strlen(key.dptr);
|
||||
break;
|
||||
case ORTE_PROC:
|
||||
proc = (orte_proc_t*)object;
|
||||
orte_util_convert_process_name_to_string(&name, &proc->name);
|
||||
asprintf((char**)&key.dptr, "PROC:%s", name);
|
||||
free(name);
|
||||
key.dsize = strlen(key.dptr);
|
||||
break;
|
||||
default:
|
||||
orte_show_help("help-state-dbm.txt", "unrecognized-type", true, type);
|
||||
rc = ORTE_ERR_BAD_PARAM;
|
||||
goto cleanup;
|
||||
break;
|
||||
}
|
||||
|
||||
/* get the specified data */
|
||||
data = dbm_fetch(recover_dbm, key);
|
||||
if (NULL == data.dptr) {
|
||||
orte_show_help("help-state-dbm.txt", "error-reading-dbm", true, (char*)key.dptr, strerror(errno));
|
||||
rc = ORTE_ERR_FILE_READ_FAILURE;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/* populate the recovered info */
|
||||
opal_dss.load(&buf, data.dptr, data.dsize);
|
||||
switch (type) {
|
||||
case ORTE_JOB:
|
||||
n=1;
|
||||
opal_dss.unpack(&buf, &jstate, &n, ORTE_JOB_STATE_T);
|
||||
jdata->state = *jstate;
|
||||
break;
|
||||
case ORTE_PROC:
|
||||
n=1;
|
||||
opal_dss.unpack(&buf, &pstate, &n, ORTE_PROC_STATE_T);
|
||||
proc->state = *pstate;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
cleanup:
|
||||
if (NULL != key.dptr) {
|
||||
free(key.dptr);
|
||||
}
|
||||
if (NULL != data.dptr) {
|
||||
free(data.dptr);
|
||||
}
|
||||
OBJ_DESTRUCT(&buf);
|
||||
return rc;
|
||||
}
|
31
orte/mca/state/dbm/state_dbm.h
Обычный файл
31
orte/mca/state/dbm/state_dbm.h
Обычный файл
@ -0,0 +1,31 @@
|
||||
/*
|
||||
* Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#ifndef ORTE_STATE_DBM_H
|
||||
#define ORTE_STATE_DBM_H
|
||||
|
||||
#include "orte/mca/state/state.h"
|
||||
|
||||
BEGIN_C_DECLS
|
||||
|
||||
/*
|
||||
* Module open / close
|
||||
*/
|
||||
int orte_state_dbm_component_open(void);
|
||||
int orte_state_dbm_component_close(void);
|
||||
int orte_state_dbm_component_query(mca_base_module_t **module, int *priority);
|
||||
|
||||
|
||||
ORTE_MODULE_DECLSPEC extern orte_state_base_component_t mca_state_dbm_component;
|
||||
ORTE_DECLSPEC extern orte_state_base_module_t orte_state_dbm_module;
|
||||
extern char *orte_state_dbm_directory;
|
||||
|
||||
END_C_DECLS
|
||||
|
||||
#endif /* ORTE_STATE_DBM_H */
|
101
orte/mca/state/dbm/state_dbm_component.c
Обычный файл
101
orte/mca/state/dbm/state_dbm_component.c
Обычный файл
@ -0,0 +1,101 @@
|
||||
/*
|
||||
* Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*
|
||||
* These symbols are in a file by themselves to provide nice linker
|
||||
* semantics. Since linkers generally pull in symbols by object
|
||||
* files, keeping these symbols as the only symbols in this file
|
||||
* prevents utility programs such as "ompi_info" from having to import
|
||||
* entire components just to query their version and parameters.
|
||||
*/
|
||||
|
||||
#include "orte_config.h"
|
||||
#include "orte/constants.h"
|
||||
|
||||
#include "opal/mca/base/base.h"
|
||||
#include "opal/mca/base/mca_base_param.h"
|
||||
|
||||
#include "orte/util/proc_info.h"
|
||||
|
||||
#include "orte/mca/state/state.h"
|
||||
#include "orte/mca/state/base/base.h"
|
||||
#include "state_dbm.h"
|
||||
|
||||
extern orte_state_base_module_t orte_state_dbm_module;
|
||||
char *orte_state_dbm_filename;
|
||||
|
||||
/*
|
||||
* Instantiate the public struct with all of our public information
|
||||
* and pointers to our public functions in it
|
||||
*/
|
||||
orte_state_base_component_t mca_state_dbm_component = {
|
||||
{
|
||||
ORTE_STATE_BASE_VERSION_1_0_0,
|
||||
|
||||
/* Component name and version */
|
||||
"dbm",
|
||||
ORTE_MAJOR_VERSION,
|
||||
ORTE_MINOR_VERSION,
|
||||
ORTE_RELEASE_VERSION,
|
||||
|
||||
/* Component open and close functions */
|
||||
orte_state_dbm_component_open,
|
||||
orte_state_dbm_component_close,
|
||||
orte_state_dbm_component_query
|
||||
},
|
||||
{
|
||||
/* The component is checkpoint ready */
|
||||
MCA_BASE_METADATA_PARAM_CHECKPOINT
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
int
|
||||
orte_state_dbm_component_open(void)
|
||||
{
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
int orte_state_dbm_component_query(mca_base_module_t **module, int *priority)
|
||||
{
|
||||
|
||||
/* we are the file module - we need to be selected
|
||||
* IFF we are requested
|
||||
*/
|
||||
bool is_required = false;
|
||||
mca_base_component_t *c = &mca_state_dbm_component.base_version;
|
||||
|
||||
/* retrieve the name of the file to be used */
|
||||
mca_base_param_reg_string(c, "dir",
|
||||
"Name of directory to be used for storing and recovering state information",
|
||||
false, false, NULL, &orte_state_dbm_directory);
|
||||
|
||||
mca_base_is_component_required(&orte_state_base_components_available,
|
||||
&mca_state_dbm_component.base_version,
|
||||
true,
|
||||
&is_required);
|
||||
|
||||
if( is_required || NULL != orte_state_dbm_directory) {
|
||||
*priority = 1000;
|
||||
*module = (mca_base_module_t*)&orte_state_dbm_module;
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
*priority = 0;
|
||||
*module = NULL;
|
||||
return ORTE_ERROR;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
orte_state_dbm_component_close(void)
|
||||
{
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
91
orte/mca/state/state.h
Обычный файл
91
orte/mca/state/state.h
Обычный файл
@ -0,0 +1,91 @@
|
||||
/*
|
||||
* Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
/** @file:
|
||||
*
|
||||
* The OpenRTE State Save/Recovery Service
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef ORTE_STATE_H
|
||||
#define ORTE_STATE_H
|
||||
|
||||
#include "orte_config.h"
|
||||
#include "orte/types.h"
|
||||
|
||||
#include "opal/mca/mca.h"
|
||||
#include "opal/dss/dss_types.h"
|
||||
|
||||
BEGIN_C_DECLS
|
||||
|
||||
/*
|
||||
* API functions
|
||||
*/
|
||||
|
||||
/*
|
||||
* Initialize the module
|
||||
*/
|
||||
typedef int (*orte_state_base_module_init_fn_t)(void);
|
||||
|
||||
/*
|
||||
* Finalize the module
|
||||
*/
|
||||
typedef int (*orte_state_base_module_finalize_fn_t)(void);
|
||||
|
||||
/*
|
||||
* Save the state of the provided object
|
||||
*/
|
||||
typedef int (*orte_state_base_module_save_fn_t)(void *object, opal_data_type_t type);
|
||||
|
||||
/*
|
||||
* Set the source for recovering state info
|
||||
*/
|
||||
typedef int (*orte_state_base_module_set_recover_source_fn_t)(orte_process_name_t *name);
|
||||
|
||||
/*
|
||||
* Recover the state of an object
|
||||
*/
|
||||
typedef int (*orte_state_base_module_recover_fn_t)(void *object, opal_data_type_t type);
|
||||
|
||||
|
||||
/*
|
||||
* the standard module data structure
|
||||
*/
|
||||
struct orte_state_base_module_1_0_0_t {
|
||||
orte_state_base_module_init_fn_t init;
|
||||
orte_state_base_module_finalize_fn_t finalize;
|
||||
orte_state_base_module_save_fn_t save;
|
||||
orte_state_base_module_set_recover_source_fn_t set_recover_source;
|
||||
orte_state_base_module_recover_fn_t recover;
|
||||
};
|
||||
typedef struct orte_state_base_module_1_0_0_t orte_state_base_module_1_0_0_t;
|
||||
typedef struct orte_state_base_module_1_0_0_t orte_state_base_module_t;
|
||||
|
||||
/*
|
||||
* the standard component data structure
|
||||
*/
|
||||
struct orte_state_base_component_1_0_0_t {
|
||||
mca_base_component_t base_version;
|
||||
mca_base_component_data_t base_data;
|
||||
};
|
||||
typedef struct orte_state_base_component_1_0_0_t orte_state_base_component_1_0_0_t;
|
||||
typedef struct orte_state_base_component_1_0_0_t orte_state_base_component_t;
|
||||
|
||||
/*
|
||||
* Macro for use in components that are of type state
|
||||
*/
|
||||
#define ORTE_STATE_BASE_VERSION_1_0_0 \
|
||||
MCA_BASE_VERSION_2_0_0, \
|
||||
"state", 1, 0, 0
|
||||
|
||||
/* Global structure for accessing STATE functions */
|
||||
ORTE_DECLSPEC extern orte_state_base_module_t orte_state; /* holds selected module's function pointers */
|
||||
|
||||
END_C_DECLS
|
||||
|
||||
#endif
|
Загрузка…
Ссылка в новой задаче
Block a user