Add a new ras module to support bootstrap operations. Additional functionality may eventually be required in the component, but for now all it does is provide a mechanism for ensuring that other allocations don't confuse the system.
Only active if specifically directed to use it This commit was SVN r22040.
Этот коммит содержится в:
родитель
358528309e
Коммит
51f64aaf96
@ -99,7 +99,8 @@ enum {
|
||||
ORTE_ERR_FILE_NOT_EXECUTABLE = (ORTE_ERR_BASE - 27),
|
||||
ORTE_ERR_HNP_COULD_NOT_START = (ORTE_ERR_BASE - 28),
|
||||
ORTE_ERR_SYS_LIMITS_SOCKETS = (ORTE_ERR_BASE - 29),
|
||||
ORTE_ERR_SOCKET_NOT_AVAILABLE = (ORTE_ERR_BASE - 30)
|
||||
ORTE_ERR_SOCKET_NOT_AVAILABLE = (ORTE_ERR_BASE - 30),
|
||||
ORTE_ERR_SYSTEM_WILL_BOOTSTRAP = (ORTE_ERR_BASE - 31)
|
||||
};
|
||||
|
||||
#define ORTE_ERR_MAX (ORTE_ERR_BASE - 100)
|
||||
|
@ -137,6 +137,15 @@ int orte_ras_base_allocate(orte_job_t *jdata)
|
||||
if (NULL != orte_ras_base.active_module) {
|
||||
/* read the allocation */
|
||||
if (ORTE_SUCCESS != (rc = orte_ras_base.active_module->allocate(&nodes))) {
|
||||
if (ORTE_ERR_SYSTEM_WILL_BOOTSTRAP == rc) {
|
||||
/* this module indicates that nodes will be discovered
|
||||
* on a bootstrap basis, so there is nothing more
|
||||
* for us to do
|
||||
*/
|
||||
OBJ_DESTRUCT(&nodes);
|
||||
rc = ORTE_SUCCESS;
|
||||
goto DISPLAY;
|
||||
}
|
||||
ORTE_ERROR_LOG(rc);
|
||||
OBJ_DESTRUCT(&nodes);
|
||||
return rc;
|
||||
|
39
orte/mca/ras/cm/Makefile.am
Обычный файл
39
orte/mca/ras/cm/Makefile.am
Обычный файл
@ -0,0 +1,39 @@
|
||||
#
|
||||
# Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
sources = \
|
||||
ras_cm.h \
|
||||
ras_cm_component.c \
|
||||
ras_cm_module.c
|
||||
|
||||
|
||||
# Make the output library in this directory, and name it either
|
||||
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
|
||||
# (for static builds).
|
||||
|
||||
if OMPI_BUILD_ras_cm_DSO
|
||||
lib =
|
||||
lib_sources =
|
||||
component = mca_ras_cm.la
|
||||
component_sources = $(sources)
|
||||
else
|
||||
lib = libmca_ras_cm.la
|
||||
lib_sources = $(sources)
|
||||
component =
|
||||
component_sources =
|
||||
endif
|
||||
|
||||
mcacomponentdir = $(pkglibdir)
|
||||
mcacomponent_LTLIBRARIES = $(component)
|
||||
mca_ras_cm_la_SOURCES = $(component_sources)
|
||||
mca_ras_cm_la_LDFLAGS = -module -avoid-version
|
||||
|
||||
noinst_LTLIBRARIES = $(lib)
|
||||
libmca_ras_cm_la_SOURCES = $(lib_sources)
|
||||
libmca_ras_cm_la_LDFLAGS = -module -avoid-version
|
11
orte/mca/ras/cm/configure.params
Обычный файл
11
orte/mca/ras/cm/configure.params
Обычный файл
@ -0,0 +1,11 @@
|
||||
# -*- shell-script -*-
|
||||
#
|
||||
# Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
PARAM_CONFIG_FILES="Makefile"
|
27
orte/mca/ras/cm/ras_cm.h
Обычный файл
27
orte/mca/ras/cm/ras_cm.h
Обычный файл
@ -0,0 +1,27 @@
|
||||
/*
|
||||
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
/**
|
||||
* @file
|
||||
*
|
||||
*/
|
||||
#ifndef ORTE_RAS_CM_H
|
||||
#define ORTE_RAS_CM_H
|
||||
|
||||
#include "orte_config.h"
|
||||
#include "orte/mca/ras/ras.h"
|
||||
#include "orte/mca/ras/base/base.h"
|
||||
|
||||
BEGIN_C_DECLS
|
||||
|
||||
ORTE_DECLSPEC extern orte_ras_base_component_t mca_ras_cm_component;
|
||||
ORTE_DECLSPEC extern orte_ras_base_module_t orte_ras_cm_module;
|
||||
|
||||
END_C_DECLS
|
||||
|
||||
#endif
|
73
orte/mca/ras/cm/ras_cm_component.c
Обычный файл
73
orte/mca/ras/cm/ras_cm_component.c
Обычный файл
@ -0,0 +1,73 @@
|
||||
/*
|
||||
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "orte_config.h"
|
||||
#include "orte/constants.h"
|
||||
|
||||
#include "opal/mca/base/base.h"
|
||||
|
||||
#include "ras_cm.h"
|
||||
|
||||
|
||||
/*
|
||||
* Local functions
|
||||
*/
|
||||
static int ras_cm_open(void);
|
||||
static int orte_ras_cm_component_query(mca_base_module_t **module, int *priority);
|
||||
|
||||
|
||||
orte_ras_base_component_t mca_ras_cm_component = {
|
||||
/* First, the mca_base_component_t struct containing meta
|
||||
information about the component itself */
|
||||
|
||||
{
|
||||
ORTE_RAS_BASE_VERSION_2_0_0,
|
||||
|
||||
/* Component name and version */
|
||||
"cm",
|
||||
ORTE_MAJOR_VERSION,
|
||||
ORTE_MINOR_VERSION,
|
||||
ORTE_RELEASE_VERSION,
|
||||
|
||||
/* Component open and close functions */
|
||||
ras_cm_open,
|
||||
NULL,
|
||||
orte_ras_cm_component_query
|
||||
},
|
||||
{
|
||||
/* The component is checkpoint ready */
|
||||
MCA_BASE_METADATA_PARAM_CHECKPOINT
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
static int ras_cm_open(void)
|
||||
{
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
static int orte_ras_cm_component_query(mca_base_module_t **module, int *priority)
|
||||
{
|
||||
char *directive;
|
||||
|
||||
/* determine if we were specified */
|
||||
directive = getenv("OMPI_MCA_ras");
|
||||
|
||||
if (NULL == directive || 0 != strcmp("cm", directive)) {
|
||||
*priority = 0;
|
||||
*module = NULL;
|
||||
return ORTE_ERROR;
|
||||
}
|
||||
|
||||
/* we were specified */
|
||||
*priority = 100;
|
||||
*module = (mca_base_module_t *) &orte_ras_cm_module;
|
||||
return ORTE_SUCCESS;
|
||||
}
|
60
orte/mca/ras/cm/ras_cm_module.c
Обычный файл
60
orte/mca/ras/cm/ras_cm_module.c
Обычный файл
@ -0,0 +1,60 @@
|
||||
/*
|
||||
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
#include "orte_config.h"
|
||||
#include "orte/constants.h"
|
||||
#include "orte/types.h"
|
||||
|
||||
#include "opal/class/opal_list.h"
|
||||
#include "opal/util/output.h"
|
||||
|
||||
#include "orte/util/name_fns.h"
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
|
||||
#include "ras_cm.h"
|
||||
|
||||
|
||||
/*
|
||||
* Local functions
|
||||
*/
|
||||
static int allocate(opal_list_t *nodes);
|
||||
static int finalize(void);
|
||||
|
||||
|
||||
/*
|
||||
* Module APIs
|
||||
*/
|
||||
orte_ras_base_module_t orte_ras_cm_module = {
|
||||
allocate,
|
||||
finalize
|
||||
};
|
||||
|
||||
/**
|
||||
* Since the system will be bootstrapping, there is
|
||||
* nothing to do here
|
||||
*/
|
||||
static int allocate(opal_list_t *nodes)
|
||||
{
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_ras_base.ras_output,
|
||||
"%s ras:cm:allocate: success",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
/* indicate that nodes will be discovered via bootstrap */
|
||||
return ORTE_ERR_SYSTEM_WILL_BOOTSTRAP;
|
||||
}
|
||||
|
||||
/*
|
||||
* There's really nothing to do here
|
||||
*/
|
||||
static int finalize(void)
|
||||
{
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_ras_base.ras_output,
|
||||
"%s ras:cm:finalize: success (nothing to do)",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
return ORTE_SUCCESS;
|
||||
}
|
@ -267,10 +267,11 @@ PROCESS:
|
||||
* in the mapper
|
||||
*/
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base.rmaps_output,
|
||||
"%s rmaps:base: mapping proc for job %s to node %s",
|
||||
"%s rmaps:base: mapping proc for job %s to node %s whose daemon is %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_JOBID_PRINT(proc->name.jobid),
|
||||
(NULL == node->name) ? "NULL" : node->name));
|
||||
(NULL == node->name) ? "NULL" : node->name,
|
||||
(NULL == node->daemon) ? "NULL" : ORTE_NAME_PRINT(&(node->daemon->name))));
|
||||
|
||||
if (0 > (rc = opal_pointer_array_add(node->procs, (void*)proc))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
|
@ -121,6 +121,9 @@ orte_err2str(int errnum)
|
||||
case ORTE_ERR_SOCKET_NOT_AVAILABLE:
|
||||
retval = "Unable to open a TCP socket for out-of-band communications";
|
||||
break;
|
||||
case ORTE_ERR_SYSTEM_WILL_BOOTSTRAP:
|
||||
retval = "System will determine resources during bootstrap of daemons";
|
||||
break;
|
||||
default:
|
||||
retval = NULL;
|
||||
}
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user