1
1
openmpi/opal/mca/crs/base/crs_base_select.c
Josh Hursey dadca7da88 Merging in the jjhursey-ft-cr-stable branch (r13912 : HEAD).
This merge adds Checkpoint/Restart support to Open MPI. The initial
frameworks and components support a LAM/MPI-like implementation.

This commit follows the risk assessment presented to the Open MPI core
development group on Feb. 22, 2007.

This commit closes trac:158

More details to follow.

This commit was SVN r14051.

The following SVN revisions from the original message are invalid or
inconsistent and therefore were not cross-referenced:
  r13912

The following Trac tickets were found above:
  Ticket 158 --> https://svn.open-mpi.org/trac/ompi/ticket/158
2007-03-16 23:11:45 +00:00

239 строки
8.2 KiB
C

/*
* Copyright (c) 2004-2007 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "opal/include/opal/constants.h"
#include "opal/util/output.h"
#include "opal/mca/mca.h"
#include "opal/mca/base/base.h"
#include "opal/mca/base/mca_base_param.h"
#include "opal/mca/crs/crs.h"
#include "opal/mca/crs/base/base.h"
#include "opal/runtime/opal_cr.h"
/*
* Instantiate the public struct with all of our public information
* and pointer to our public functions in it
*/
static opal_crs_base_component_t none_component = {
/* Handle the general mca_component_t struct containing
* meta information about the component itself
*/
{
OPAL_CRS_BASE_VERSION_1_0_0,
/* Component name and version */
"none",
OMPI_MAJOR_VERSION,
OMPI_MINOR_VERSION,
OMPI_RELEASE_VERSION,
/* Component open and close functions */
opal_crs_base_none_open,
opal_crs_base_none_close
},
/* Next the MCA v1.0.0 component meta data */
{
/* The component is checkpoint ready */
MCA_BASE_METADATA_PARAM_CHECKPOINT
},
/* Query Function */
NULL,
/* Verbosity level */
0,
/* opal_output handler */
-1,
/* Default priority */
0
};
static opal_crs_base_module_t none_module = {
/** Initialization Function */
opal_crs_base_none_module_init,
/** Finalization Function */
opal_crs_base_none_module_finalize,
/** Checkpoint interface */
opal_crs_base_none_checkpoint,
/** Restart Command Access */
opal_crs_base_none_restart,
/** Disable checkpoints */
opal_crs_base_none_disable_checkpoint,
/** Enable checkpoints */
opal_crs_base_none_enable_checkpoint
};
int opal_crs_base_select(void)
{
int priority = 0, best_priority = -1;
opal_list_item_t *item = NULL;
mca_base_component_list_item_t *cli = NULL;
opal_crs_base_component_t *component = NULL, *best_component = NULL;
opal_crs_base_module_t *module = NULL, *best_module = NULL;
char *crs_include_list = NULL;
bool fail_on_non_selection = false;
bool do_not_select = false;
int int_value = 0;
/*
* Note: If we are a tool, then we will manually run the selection routine
* for the checkpointer. The tool will set the MCA parameter
* 'crs_base_do_not_select' before opal_init and then reset it after to
* disable the selection logic.
* This is useful for opal_restart because it reads the metadata file
* that indicates the checkpointer to be used after calling opal_init.
* Therefore it would need to select a specific module, but it doesn't
* know which one until later. It will set the MCA parameter 'crs'
* before calling this function.
*/
mca_base_param_reg_int_name("crs_base",
"do_not_select",
"Do not do the selection of the CRS component",
true, false,
false,
&int_value);
if(0 != int_value)
do_not_select = true;
else
do_not_select = false;
if(do_not_select) {
opal_output_verbose(10, opal_crs_base_output,
"crs:select: Not selecting at this time!");
return OPAL_SUCCESS;
}
/* Register the framework MCA param and look it up */
mca_base_param_reg_string_name("crs", NULL,
"Which CRS component to use (empty = auto-select)",
false, false,
NULL, &crs_include_list);
if (NULL == crs_include_list || 0 == strlen(crs_include_list)) {
opal_output_verbose(10, opal_crs_base_output,
"crs:select: auto-selecting");
} else {
opal_output_verbose(10, opal_crs_base_output,
"crs:select: looking for %s component", crs_include_list);
if(0 == strncmp(crs_include_list, "none", strlen("none")) ) {
goto do_none_comp;
}
else {
/* If we can't find the requested component, then fail */
fail_on_non_selection = true;
}
}
/* Traverse the list of available components;
* calling their init functions
*/
for (item = opal_list_get_first(&opal_crs_base_components_available);
item != opal_list_get_end(&opal_crs_base_components_available);
item = opal_list_get_next(item) ) {
cli = (mca_base_component_list_item_t *) item;
component = (opal_crs_base_component_t *) cli->cli_component;
/* If there is an include list -
* the item must be in the list to be included :)
*/
if (NULL != crs_include_list &&
0 < strlen(crs_include_list) &&
0 != strncmp(component->crs_version.mca_component_name,
crs_include_list, strlen(crs_include_list)) ) {
opal_output_verbose(10, opal_crs_base_output,
"crs:select: Skipping %s component",
component->crs_version.mca_component_name);
continue;
}
if (NULL == component->crs_query) {
opal_output_verbose(10, opal_crs_base_output,
"crs:select: No init function! Ignoring component %s",
component->crs_version.mca_component_name );
continue;
}
opal_output_verbose(10, opal_crs_base_output,
"crs:select: Initializing component %s",
component->crs_version.mca_component_name);
module = component->crs_query(&priority);
if (NULL == module) {
opal_output_verbose(10, opal_crs_base_output,
"crs:select: Init returned failure for component %s",
component->crs_version.mca_component_name );
continue;
}
opal_output_verbose(10, opal_crs_base_output,
"crs:select: Init returned priority %d",
priority);
if (priority > best_priority) {
best_priority = priority;
best_component = component;
best_module = module;
}
}
/* Finished querying all components.
* Check for the bozo case.
*/
do_none_comp:
if (NULL == best_component) {
if( fail_on_non_selection ) {
return OPAL_ERROR;
}
else {
opal_output_verbose(19, opal_crs_base_output,
"crs:select: No component found, using the base component. ;(");
best_component = &none_component;
best_module = &none_module;
}
}
/* Go through the list and close
* the non-selected components
*/
mca_base_components_close(0, /* We must pass it 0, to keep it from closing it */
&opal_crs_base_components_available,
(mca_base_component_t *) best_component);
/* Save the winner */
opal_crs_base_selected_component = *best_component;
opal_crs = *best_module;
opal_output_verbose(5, opal_crs_base_output,
"crs:select: Component %s selected",
best_component->crs_version.mca_component_name);
/* Initialize the winner */
if (NULL != best_module) {
if (OPAL_SUCCESS != opal_crs.crs_init()) {
return OPAL_ERROR;
}
}
if( NULL != crs_include_list ) {
free(crs_include_list);
crs_include_list = NULL;
}
return OPAL_SUCCESS;
}