1
1

Add a new ras module to support bootstrap operations. Additional functionality may eventually be required in the component, but for now all it does is provide a mechanism for ensuring that other allocations don't confuse the system.

Only active if specifically directed to use it

This commit was SVN r22040.
Этот коммит содержится в:
Ralph Castain 2009-09-30 23:30:24 +00:00
родитель 358528309e
Коммит 51f64aaf96
9 изменённых файлов: 227 добавлений и 3 удалений

Просмотреть файл

@ -99,7 +99,8 @@ enum {
ORTE_ERR_FILE_NOT_EXECUTABLE = (ORTE_ERR_BASE - 27),
ORTE_ERR_HNP_COULD_NOT_START = (ORTE_ERR_BASE - 28),
ORTE_ERR_SYS_LIMITS_SOCKETS = (ORTE_ERR_BASE - 29),
ORTE_ERR_SOCKET_NOT_AVAILABLE = (ORTE_ERR_BASE - 30)
ORTE_ERR_SOCKET_NOT_AVAILABLE = (ORTE_ERR_BASE - 30),
ORTE_ERR_SYSTEM_WILL_BOOTSTRAP = (ORTE_ERR_BASE - 31)
};
#define ORTE_ERR_MAX (ORTE_ERR_BASE - 100)

Просмотреть файл

@ -137,6 +137,15 @@ int orte_ras_base_allocate(orte_job_t *jdata)
if (NULL != orte_ras_base.active_module) {
/* read the allocation */
if (ORTE_SUCCESS != (rc = orte_ras_base.active_module->allocate(&nodes))) {
if (ORTE_ERR_SYSTEM_WILL_BOOTSTRAP == rc) {
/* this module indicates that nodes will be discovered
* on a bootstrap basis, so there is nothing more
* for us to do
*/
OBJ_DESTRUCT(&nodes);
rc = ORTE_SUCCESS;
goto DISPLAY;
}
ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&nodes);
return rc;

39
orte/mca/ras/cm/Makefile.am Обычный файл
Просмотреть файл

@ -0,0 +1,39 @@
#
# Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
sources = \
ras_cm.h \
ras_cm_component.c \
ras_cm_module.c
# Make the output library in this directory, and name it either
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
# (for static builds).
if OMPI_BUILD_ras_cm_DSO
lib =
lib_sources =
component = mca_ras_cm.la
component_sources = $(sources)
else
lib = libmca_ras_cm.la
lib_sources = $(sources)
component =
component_sources =
endif
mcacomponentdir = $(pkglibdir)
mcacomponent_LTLIBRARIES = $(component)
mca_ras_cm_la_SOURCES = $(component_sources)
mca_ras_cm_la_LDFLAGS = -module -avoid-version
noinst_LTLIBRARIES = $(lib)
libmca_ras_cm_la_SOURCES = $(lib_sources)
libmca_ras_cm_la_LDFLAGS = -module -avoid-version

11
orte/mca/ras/cm/configure.params Обычный файл
Просмотреть файл

@ -0,0 +1,11 @@
# -*- shell-script -*-
#
# Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
PARAM_CONFIG_FILES="Makefile"

27
orte/mca/ras/cm/ras_cm.h Обычный файл
Просмотреть файл

@ -0,0 +1,27 @@
/*
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/**
* @file
*
*/
#ifndef ORTE_RAS_CM_H
#define ORTE_RAS_CM_H
#include "orte_config.h"
#include "orte/mca/ras/ras.h"
#include "orte/mca/ras/base/base.h"
BEGIN_C_DECLS
ORTE_DECLSPEC extern orte_ras_base_component_t mca_ras_cm_component;
ORTE_DECLSPEC extern orte_ras_base_module_t orte_ras_cm_module;
END_C_DECLS
#endif

73
orte/mca/ras/cm/ras_cm_component.c Обычный файл
Просмотреть файл

@ -0,0 +1,73 @@
/*
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include "orte/constants.h"
#include "opal/mca/base/base.h"
#include "ras_cm.h"
/*
* Local functions
*/
static int ras_cm_open(void);
static int orte_ras_cm_component_query(mca_base_module_t **module, int *priority);
orte_ras_base_component_t mca_ras_cm_component = {
/* First, the mca_base_component_t struct containing meta
information about the component itself */
{
ORTE_RAS_BASE_VERSION_2_0_0,
/* Component name and version */
"cm",
ORTE_MAJOR_VERSION,
ORTE_MINOR_VERSION,
ORTE_RELEASE_VERSION,
/* Component open and close functions */
ras_cm_open,
NULL,
orte_ras_cm_component_query
},
{
/* The component is checkpoint ready */
MCA_BASE_METADATA_PARAM_CHECKPOINT
}
};
static int ras_cm_open(void)
{
return ORTE_SUCCESS;
}
static int orte_ras_cm_component_query(mca_base_module_t **module, int *priority)
{
char *directive;
/* determine if we were specified */
directive = getenv("OMPI_MCA_ras");
if (NULL == directive || 0 != strcmp("cm", directive)) {
*priority = 0;
*module = NULL;
return ORTE_ERROR;
}
/* we were specified */
*priority = 100;
*module = (mca_base_module_t *) &orte_ras_cm_module;
return ORTE_SUCCESS;
}

60
orte/mca/ras/cm/ras_cm_module.c Обычный файл
Просмотреть файл

@ -0,0 +1,60 @@
/*
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include "orte/constants.h"
#include "orte/types.h"
#include "opal/class/opal_list.h"
#include "opal/util/output.h"
#include "orte/util/name_fns.h"
#include "orte/runtime/orte_globals.h"
#include "ras_cm.h"
/*
* Local functions
*/
static int allocate(opal_list_t *nodes);
static int finalize(void);
/*
* Module APIs
*/
orte_ras_base_module_t orte_ras_cm_module = {
allocate,
finalize
};
/**
* Since the system will be bootstrapping, there is
* nothing to do here
*/
static int allocate(opal_list_t *nodes)
{
OPAL_OUTPUT_VERBOSE((1, orte_ras_base.ras_output,
"%s ras:cm:allocate: success",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
/* indicate that nodes will be discovered via bootstrap */
return ORTE_ERR_SYSTEM_WILL_BOOTSTRAP;
}
/*
* There's really nothing to do here
*/
static int finalize(void)
{
OPAL_OUTPUT_VERBOSE((1, orte_ras_base.ras_output,
"%s ras:cm:finalize: success (nothing to do)",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
return ORTE_SUCCESS;
}

Просмотреть файл

@ -267,10 +267,11 @@ PROCESS:
* in the mapper
*/
OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base.rmaps_output,
"%s rmaps:base: mapping proc for job %s to node %s",
"%s rmaps:base: mapping proc for job %s to node %s whose daemon is %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_JOBID_PRINT(proc->name.jobid),
(NULL == node->name) ? "NULL" : node->name));
(NULL == node->name) ? "NULL" : node->name,
(NULL == node->daemon) ? "NULL" : ORTE_NAME_PRINT(&(node->daemon->name))));
if (0 > (rc = opal_pointer_array_add(node->procs, (void*)proc))) {
ORTE_ERROR_LOG(rc);

Просмотреть файл

@ -121,6 +121,9 @@ orte_err2str(int errnum)
case ORTE_ERR_SOCKET_NOT_AVAILABLE:
retval = "Unable to open a TCP socket for out-of-band communications";
break;
case ORTE_ERR_SYSTEM_WILL_BOOTSTRAP:
retval = "System will determine resources during bootstrap of daemons";
break;
default:
retval = NULL;
}