2004-08-02 00:24:22 +00:00
|
|
|
/*
|
2008-06-25 14:55:09 +00:00
|
|
|
* Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana
|
2005-11-05 19:57:48 +00:00
|
|
|
* University Research and Technology
|
|
|
|
* Corporation. All rights reserved.
|
2012-08-08 12:43:13 +00:00
|
|
|
* Copyright (c) 2004-2012 The University of Tennessee and The University
|
2005-11-05 19:57:48 +00:00
|
|
|
* of Tennessee Research Foundation. All rights
|
|
|
|
* reserved.
|
2004-11-28 20:09:25 +00:00
|
|
|
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
|
|
|
* University of Stuttgart. All rights reserved.
|
2005-03-24 12:43:37 +00:00
|
|
|
* Copyright (c) 2004-2005 The Regents of the University of California.
|
|
|
|
* All rights reserved.
|
Refs trac:3275.
We ran into a case where the OMPI SVN trunk grew a new acceptable MCA
parameter value, but this new value was not accepted on the v1.6
branch (hwloc_base_mem_bind_failure_action -- on the trunk it accepts
the value "silent", but on the older v1.6 branch, it doesn't). If you
set "hwloc_base_mem_bind_failure_action=silent" in the default MCA
params file and then accidentally ran with the v1.6 branch, every OMPI
executable (including ompi_info) just failed because hwloc_base_open()
would say "hey, 'silent' is not a valid value for
hwloc_base_mem_bind_failure_action!". Kaboom.
The only problem is that it didn't give you any indication of where
this value was being set. Quite maddening, from a user perspective.
So we changed the ompi_info handles this case. If any framework open
function return OMPI_ERR_BAD_PARAM (either because its base MCA params
got a bad value or because one of its component register/open
functions return OMPI_ERR_BAD_PARAM), ompi_info will stop, print out
a warning that it received and error, and then dump out the parameters
that it has received so far in the framework that had a problem.
At a minimum, this will show the user the MCA param that had an error
(it's usually the last one), and ''where it was set from'' (so that
they can go fix it).
We updated ompi_info to check for O???_ERR_BAD_PARAM from each from
the framework opens. Also updated the doxygen docs in mca.h for this
O???_BAD_PARAM behavior. And we noticed that mca.h had MCA_SUCCESS
and MCA_ERR_??? codes. Why? I think we used them in exactly one
place in the code base (mca_base_components_open.c). So we deleted
those and just used the normal OPAL_* codes instead.
While we were doing this, we also cleaned up a little memory
management during ompi_info/orte-info/opal-info finalization.
Valgrind still reports a truckload of memory still in use at ompi_info
termination, but they mostly look to be components not freeing
memory/resources properly (and outside the scope of this fix).
This commit was SVN r27306.
The following Trac tickets were found above:
Ticket 3275 --> https://svn.open-mpi.org/trac/ompi/ticket/3275
2012-09-11 20:47:24 +00:00
|
|
|
* Copyright (c) 2008-2012 Cisco Systems, Inc. All rights reserved.
|
2012-04-06 14:23:13 +00:00
|
|
|
* Copyright (c) 2011-2012 Los Alamos National Security, LLC.
|
|
|
|
* All rights reserved.
|
2004-11-22 01:38:40 +00:00
|
|
|
* $COPYRIGHT$
|
|
|
|
*
|
|
|
|
* Additional copyrights may follow
|
|
|
|
*
|
2004-08-02 00:24:22 +00:00
|
|
|
* $HEADER$
|
|
|
|
*/
|
|
|
|
|
2006-02-12 01:33:29 +00:00
|
|
|
#include "opal_config.h"
|
2004-08-02 00:24:22 +00:00
|
|
|
|
|
|
|
#include <stdio.h>
|
|
|
|
#include <string.h>
|
|
|
|
#include <stdlib.h>
|
|
|
|
|
2005-07-03 16:22:16 +00:00
|
|
|
#include "opal/class/opal_list.h"
|
2005-07-04 00:13:44 +00:00
|
|
|
#include "opal/util/argv.h"
|
2005-07-03 23:31:27 +00:00
|
|
|
#include "opal/util/output.h"
|
2005-07-12 18:06:31 +00:00
|
|
|
#include "opal/util/show_help.h"
|
2005-08-12 20:46:25 +00:00
|
|
|
#include "opal/mca/mca.h"
|
|
|
|
#include "opal/mca/base/base.h"
|
2005-09-01 12:16:36 +00:00
|
|
|
#include "opal/mca/base/mca_base_component_repository.h"
|
2006-02-12 01:33:29 +00:00
|
|
|
#include "opal/constants.h"
|
2004-08-02 00:24:22 +00:00
|
|
|
|
|
|
|
struct component_name_t {
|
2005-07-03 16:22:16 +00:00
|
|
|
opal_list_item_t super;
|
2004-08-02 00:24:22 +00:00
|
|
|
|
|
|
|
char mn_name[MCA_BASE_MAX_COMPONENT_NAME_LEN];
|
|
|
|
};
|
|
|
|
typedef struct component_name_t component_name_t;
|
|
|
|
|
2007-07-13 14:54:01 +00:00
|
|
|
/*
|
|
|
|
* Dummy structure for casting for open_only logic
|
|
|
|
*/
|
|
|
|
struct mca_base_open_only_dummy_component_t {
|
|
|
|
/** MCA base component */
|
|
|
|
mca_base_component_t version;
|
|
|
|
/** MCA base data */
|
2008-07-28 22:40:57 +00:00
|
|
|
mca_base_component_data_t data;
|
2007-07-13 14:54:01 +00:00
|
|
|
};
|
|
|
|
typedef struct mca_base_open_only_dummy_component_t mca_base_open_only_dummy_component_t;
|
|
|
|
|
2004-10-15 10:54:39 +00:00
|
|
|
/*
|
|
|
|
* Local variables
|
|
|
|
*/
|
|
|
|
static bool show_errors = false;
|
2005-09-26 21:55:32 +00:00
|
|
|
static const char negate = '^';
|
2004-10-15 10:54:39 +00:00
|
|
|
|
|
|
|
|
2004-08-02 00:24:22 +00:00
|
|
|
/*
|
|
|
|
* Local functions
|
|
|
|
*/
|
2005-09-26 21:55:32 +00:00
|
|
|
static int parse_requested(int mca_param, bool *include_mode,
|
|
|
|
char ***requested_component_names);
|
2004-08-02 00:24:22 +00:00
|
|
|
static int open_components(const char *type_name, int output_id,
|
2005-09-26 21:55:32 +00:00
|
|
|
opal_list_t *src, opal_list_t *dest);
|
2004-08-02 00:24:22 +00:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Function for finding and opening either all MCA components, or the
|
|
|
|
* one that was specifically requested via a MCA parameter.
|
|
|
|
*/
|
|
|
|
int mca_base_components_open(const char *type_name, int output_id,
|
|
|
|
const mca_base_component_t **static_components,
|
2005-07-03 16:22:16 +00:00
|
|
|
opal_list_t *components_available,
|
2005-04-13 03:19:48 +00:00
|
|
|
bool open_dso_components)
|
2004-08-02 00:24:22 +00:00
|
|
|
{
|
2007-07-13 14:54:01 +00:00
|
|
|
int ret, param;
|
2008-02-13 19:26:25 +00:00
|
|
|
opal_list_item_t *item;
|
2007-07-13 14:54:01 +00:00
|
|
|
opal_list_t components_found;
|
|
|
|
char **requested_component_names;
|
|
|
|
int param_verbose = -1;
|
|
|
|
int param_type = -1;
|
|
|
|
int verbose_level;
|
|
|
|
char *str;
|
|
|
|
bool include_mode;
|
2008-02-13 19:26:25 +00:00
|
|
|
#if (OPAL_ENABLE_FT == 1) && (OPAL_ENABLE_FT_CR == 1)
|
|
|
|
opal_list_item_t *next;
|
2007-07-13 14:54:01 +00:00
|
|
|
uint32_t open_only_flags = MCA_BASE_METADATA_PARAM_NONE;
|
|
|
|
const mca_base_component_t *component;
|
2008-02-13 19:26:25 +00:00
|
|
|
#endif
|
2007-07-13 14:54:01 +00:00
|
|
|
|
|
|
|
/* Register MCA parameters */
|
|
|
|
/* Check to see if it exists first */
|
|
|
|
if( 0 > (param_type = mca_base_param_find(type_name, NULL, NULL) ) ) {
|
|
|
|
asprintf(&str, "Default selection set of components for the %s framework (<none>"
|
|
|
|
" means use all components that can be found)", type_name);
|
|
|
|
param_type =
|
|
|
|
mca_base_param_reg_string_name(type_name, NULL, str,
|
|
|
|
false, false, NULL, NULL);
|
|
|
|
free(str);
|
|
|
|
}
|
2004-08-02 00:24:22 +00:00
|
|
|
|
2007-07-13 14:54:01 +00:00
|
|
|
param = mca_base_param_find("mca", NULL, "component_show_load_errors");
|
|
|
|
mca_base_param_lookup_int(param, &ret);
|
|
|
|
show_errors = OPAL_INT_TO_BOOL(ret);
|
|
|
|
|
|
|
|
/* Setup verbosity for this MCA type */
|
|
|
|
asprintf(&str, "Verbosity level for the %s framework (0 = no verbosity)", type_name);
|
|
|
|
param_verbose =
|
|
|
|
mca_base_param_reg_int_name(type_name, "base_verbose",
|
|
|
|
str, false, false, 0, NULL);
|
|
|
|
free(str);
|
|
|
|
mca_base_param_lookup_int(param_verbose, &verbose_level);
|
|
|
|
if (output_id != 0) {
|
|
|
|
opal_output_set_verbosity(output_id, verbose_level);
|
|
|
|
}
|
|
|
|
opal_output_verbose(10, output_id,
|
|
|
|
"mca: base: components_open: Looking for %s components",
|
|
|
|
type_name);
|
2004-08-02 00:24:22 +00:00
|
|
|
|
2007-07-13 14:54:01 +00:00
|
|
|
ret = parse_requested(param_type, &include_mode, &requested_component_names);
|
|
|
|
if( OPAL_SUCCESS != ret ) {
|
|
|
|
return ret;
|
|
|
|
}
|
2004-08-02 00:24:22 +00:00
|
|
|
|
2007-07-13 14:54:01 +00:00
|
|
|
/* Find and load requested components */
|
2008-06-25 14:55:09 +00:00
|
|
|
if (OPAL_SUCCESS != (ret =
|
2007-07-13 14:54:01 +00:00
|
|
|
mca_base_component_find(NULL, type_name, static_components,
|
|
|
|
requested_component_names, include_mode,
|
2008-06-25 14:55:09 +00:00
|
|
|
&components_found, open_dso_components)) ) {
|
|
|
|
return ret;
|
2007-07-13 14:54:01 +00:00
|
|
|
}
|
2005-09-26 21:55:32 +00:00
|
|
|
|
2007-07-13 14:54:01 +00:00
|
|
|
#if (OPAL_ENABLE_FT == 1) && (OPAL_ENABLE_FT_CR == 1)
|
|
|
|
{
|
|
|
|
int param_id = -1;
|
|
|
|
int param_val = 0;
|
|
|
|
/*
|
|
|
|
* Extract supported mca parameters for selection contraints
|
|
|
|
* Supported Options:
|
|
|
|
* - mca_base_component_distill_checkpoint_ready = Checkpoint Ready
|
|
|
|
*/
|
|
|
|
param_id = mca_base_param_reg_int_name("mca", "base_component_distill_checkpoint_ready",
|
|
|
|
"Distill only those components that are Checkpoint Ready",
|
|
|
|
false, false,
|
|
|
|
0, ¶m_val);
|
|
|
|
if( 0 != param_val ) { /* Select Checkpoint Ready */
|
|
|
|
open_only_flags |= MCA_BASE_METADATA_PARAM_CHECKPOINT;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
#endif /* (OPAL_ENABLE_FT == 1) && (OPAL_ENABLE_FT_CR == 1) */
|
2005-09-26 21:55:32 +00:00
|
|
|
|
2007-07-13 14:54:01 +00:00
|
|
|
/*
|
|
|
|
* Pre-process the list with parameter constraints
|
|
|
|
* e.g., If requested to select only CR enabled components
|
|
|
|
* then only make available those components.
|
2008-02-13 19:26:25 +00:00
|
|
|
*
|
|
|
|
* JJH Note: Currently checkpoint/restart is the only user of this
|
|
|
|
* functionality. If other component constraint options are
|
|
|
|
* added, then this logic can be used for all contraint
|
|
|
|
* options.
|
2007-07-13 14:54:01 +00:00
|
|
|
*/
|
|
|
|
#if (OPAL_ENABLE_FT == 1) && (OPAL_ENABLE_FT_CR == 1)
|
2008-02-13 19:26:25 +00:00
|
|
|
if( !(MCA_BASE_METADATA_PARAM_NONE & open_only_flags) ) {
|
2007-07-13 14:54:01 +00:00
|
|
|
if( MCA_BASE_METADATA_PARAM_CHECKPOINT & open_only_flags) {
|
|
|
|
opal_output_verbose(10, output_id,
|
|
|
|
"mca: base: components_open: "
|
|
|
|
"including only %s components that are checkpoint enabled", type_name);
|
|
|
|
}
|
2008-02-13 19:26:25 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Check all the components to make sure they adhere to the user
|
|
|
|
* expressed requirements.
|
|
|
|
*/
|
2007-07-13 14:54:01 +00:00
|
|
|
for(item = opal_list_get_first(&components_found);
|
|
|
|
item != opal_list_get_end(&components_found);
|
|
|
|
item = next ) {
|
|
|
|
mca_base_open_only_dummy_component_t *dummy;
|
|
|
|
mca_base_component_list_item_t *cli = (mca_base_component_list_item_t *) item;
|
|
|
|
dummy = (mca_base_open_only_dummy_component_t*) cli->cli_component;
|
|
|
|
component = cli->cli_component;
|
|
|
|
|
|
|
|
next = opal_list_get_next(item);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If the user asked for a checkpoint enabled run
|
|
|
|
* then only load checkpoint enabled components.
|
|
|
|
*/
|
|
|
|
if( MCA_BASE_METADATA_PARAM_CHECKPOINT & open_only_flags) {
|
|
|
|
if( MCA_BASE_METADATA_PARAM_CHECKPOINT & dummy->data.param_field) {
|
|
|
|
opal_output_verbose(10, output_id,
|
|
|
|
"mca: base: components_open: "
|
|
|
|
"(%s) Component %s is Checkpointable",
|
|
|
|
type_name,
|
|
|
|
dummy->version.mca_component_name);
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
opal_output_verbose(10, output_id,
|
|
|
|
"mca: base: components_open: "
|
|
|
|
"(%s) Component %s is *NOT* Checkpointable - Disabled",
|
|
|
|
type_name,
|
|
|
|
dummy->version.mca_component_name);
|
|
|
|
opal_list_remove_item(&components_found, item);
|
2009-12-09 15:45:53 +00:00
|
|
|
/* Make sure to release the component since we are not
|
|
|
|
* opening it */
|
|
|
|
mca_base_component_repository_release(component);
|
2007-07-13 14:54:01 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2008-02-13 19:26:25 +00:00
|
|
|
#endif /* (OPAL_ENABLE_FT == 1) && (OPAL_ENABLE_FT_CR == 1) */
|
2004-08-02 00:24:22 +00:00
|
|
|
|
2007-07-13 14:54:01 +00:00
|
|
|
/* Open all remaining components */
|
|
|
|
ret = open_components(type_name, output_id,
|
|
|
|
&components_found, components_available);
|
2004-08-02 00:24:22 +00:00
|
|
|
|
2007-07-13 14:54:01 +00:00
|
|
|
/* Free resources */
|
|
|
|
for (item = opal_list_remove_first(&components_found); NULL != item;
|
|
|
|
item = opal_list_remove_first(&components_found)) {
|
|
|
|
OBJ_RELEASE(item);
|
|
|
|
}
|
|
|
|
OBJ_DESTRUCT(&components_found);
|
2004-08-02 00:24:22 +00:00
|
|
|
|
2007-07-13 14:54:01 +00:00
|
|
|
if (NULL != requested_component_names) {
|
|
|
|
opal_argv_free(requested_component_names);
|
|
|
|
}
|
2004-08-02 00:24:22 +00:00
|
|
|
|
2007-07-13 14:54:01 +00:00
|
|
|
/* All done */
|
|
|
|
return ret;
|
2004-08-02 00:24:22 +00:00
|
|
|
}
|
|
|
|
|
2009-12-17 15:12:26 +00:00
|
|
|
int mca_base_is_component_required(opal_list_t *components_available,
|
|
|
|
mca_base_component_t *component,
|
|
|
|
bool exclusive,
|
|
|
|
bool *is_required)
|
|
|
|
{
|
|
|
|
opal_list_item_t *item = NULL;
|
|
|
|
mca_base_component_list_item_t *cli = NULL;
|
|
|
|
mca_base_component_t *comp = NULL;
|
|
|
|
|
|
|
|
/* Sanity check */
|
|
|
|
if( NULL == components_available ||
|
|
|
|
NULL == component) {
|
|
|
|
return OPAL_ERR_BAD_PARAM;
|
|
|
|
}
|
|
|
|
|
|
|
|
*is_required = false;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Look through the components available for opening
|
|
|
|
*/
|
|
|
|
if( exclusive ) {
|
|
|
|
/* Must be the -only- component in the list */
|
|
|
|
if( 1 == opal_list_get_size(components_available) ) {
|
|
|
|
item = opal_list_get_first(components_available);
|
|
|
|
cli = (mca_base_component_list_item_t *) item;
|
|
|
|
comp = (mca_base_component_t *) cli->cli_component;
|
|
|
|
|
|
|
|
if( 0 == strncmp(comp->mca_component_name,
|
|
|
|
component->mca_component_name,
|
|
|
|
strlen(component->mca_component_name)) ) {
|
|
|
|
*is_required = true;
|
|
|
|
return OPAL_SUCCESS;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
/* Must be one of the components in the list */
|
|
|
|
for (item = opal_list_get_first(components_available);
|
|
|
|
item != opal_list_get_end(components_available);
|
|
|
|
item = opal_list_get_next(item) ) {
|
|
|
|
cli = (mca_base_component_list_item_t *) item;
|
|
|
|
comp = (mca_base_component_t *) cli->cli_component;
|
|
|
|
|
|
|
|
if( 0 == strncmp(comp->mca_component_name,
|
|
|
|
component->mca_component_name,
|
|
|
|
strlen(component->mca_component_name)) ) {
|
|
|
|
*is_required = true;
|
|
|
|
return OPAL_SUCCESS;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return OPAL_SUCCESS;
|
|
|
|
}
|
|
|
|
|
2004-08-02 00:24:22 +00:00
|
|
|
|
2005-09-26 21:55:32 +00:00
|
|
|
static int parse_requested(int mca_param, bool *include_mode,
|
|
|
|
char ***requested_component_names)
|
2004-08-02 00:24:22 +00:00
|
|
|
{
|
2005-09-26 21:55:32 +00:00
|
|
|
int i;
|
2006-07-19 14:19:44 +00:00
|
|
|
char *requested, *requested_orig;
|
2004-08-02 00:24:22 +00:00
|
|
|
|
|
|
|
*requested_component_names = NULL;
|
2005-12-06 18:44:52 +00:00
|
|
|
*include_mode = true;
|
2004-08-02 00:24:22 +00:00
|
|
|
|
|
|
|
/* See if the user requested anything */
|
|
|
|
|
2010-05-18 20:54:11 +00:00
|
|
|
if (0 > mca_base_param_lookup_string(mca_param, &requested)) {
|
|
|
|
return OPAL_ERROR;
|
2004-08-02 00:24:22 +00:00
|
|
|
}
|
2005-12-06 20:06:40 +00:00
|
|
|
if (NULL == requested || 0 == strlen(requested)) {
|
2006-02-12 01:33:29 +00:00
|
|
|
return OPAL_SUCCESS;
|
2004-08-02 00:24:22 +00:00
|
|
|
}
|
2006-07-19 14:19:44 +00:00
|
|
|
requested_orig = requested;
|
2004-08-02 00:24:22 +00:00
|
|
|
|
2006-07-19 14:19:44 +00:00
|
|
|
/* Are we including or excluding? We only allow the negate
|
|
|
|
character to be the *first* character of the value (but be nice
|
|
|
|
and allow any number of negate characters in the beginning). */
|
2005-09-26 21:55:32 +00:00
|
|
|
|
2006-07-19 14:19:44 +00:00
|
|
|
while (negate == requested[0] && '\0' != requested[0]) {
|
|
|
|
*include_mode = false;
|
|
|
|
++requested;
|
|
|
|
}
|
2005-09-26 21:55:32 +00:00
|
|
|
|
2006-07-19 14:19:44 +00:00
|
|
|
/* Double check to ensure that the user did not specify the negate
|
|
|
|
character anywhere else in the value. */
|
|
|
|
|
|
|
|
i = 0;
|
|
|
|
while ('\0' != requested[i]) {
|
|
|
|
if (negate == requested[i]) {
|
|
|
|
opal_show_help("help-mca-base.txt",
|
|
|
|
"framework-param:too-many-negates",
|
|
|
|
true, requested_orig);
|
|
|
|
free(requested_orig);
|
|
|
|
return OPAL_ERROR;
|
2005-09-26 21:55:32 +00:00
|
|
|
}
|
2006-07-19 14:19:44 +00:00
|
|
|
++i;
|
2005-09-26 21:55:32 +00:00
|
|
|
}
|
|
|
|
|
2006-07-19 14:19:44 +00:00
|
|
|
/* Split up the value into individual component names */
|
|
|
|
|
|
|
|
*requested_component_names = opal_argv_split(requested, ',');
|
|
|
|
|
2004-08-02 00:24:22 +00:00
|
|
|
/* All done */
|
|
|
|
|
2006-07-19 14:19:44 +00:00
|
|
|
free(requested_orig);
|
2006-02-12 01:33:29 +00:00
|
|
|
return OPAL_SUCCESS;
|
2004-08-02 00:24:22 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2005-09-26 21:55:32 +00:00
|
|
|
/*
|
|
|
|
* Traverse the entire list of found components (a list of
|
|
|
|
* mca_base_component_t instances). If the requested_component_names
|
|
|
|
* array is empty, or the name of each component in the list of found
|
|
|
|
* components is in the requested_components_array, try to open it.
|
|
|
|
* If it opens, add it to the components_available list.
|
|
|
|
*/
|
|
|
|
static int open_components(const char *type_name, int output_id,
|
|
|
|
opal_list_t *src, opal_list_t *dest)
|
|
|
|
{
|
2010-01-12 19:29:12 +00:00
|
|
|
int ret;
|
2005-09-26 21:55:32 +00:00
|
|
|
opal_list_item_t *item;
|
|
|
|
const mca_base_component_t *component;
|
|
|
|
mca_base_component_list_item_t *cli;
|
|
|
|
bool called_open;
|
2008-07-28 22:40:57 +00:00
|
|
|
bool opened, registered;
|
2005-09-26 21:55:32 +00:00
|
|
|
|
|
|
|
/* Announce */
|
|
|
|
|
|
|
|
opal_output_verbose(10, output_id,
|
|
|
|
"mca: base: components_open: opening %s components",
|
|
|
|
type_name);
|
|
|
|
|
|
|
|
/* Traverse the list of found components */
|
|
|
|
|
|
|
|
OBJ_CONSTRUCT(dest, opal_list_t);
|
|
|
|
for (item = opal_list_get_first(src);
|
|
|
|
opal_list_get_end(src) != item;
|
|
|
|
item = opal_list_get_next(item)) {
|
|
|
|
cli = (mca_base_component_list_item_t *) item;
|
|
|
|
component = cli->cli_component;
|
|
|
|
|
2008-07-28 22:40:57 +00:00
|
|
|
registered = opened = called_open = false;
|
2005-07-03 23:31:27 +00:00
|
|
|
opal_output_verbose(10, output_id,
|
2005-09-26 21:55:32 +00:00
|
|
|
"mca: base: components_open: found loaded component %s",
|
2004-10-15 10:54:39 +00:00
|
|
|
component->mca_component_name);
|
2008-07-28 22:40:57 +00:00
|
|
|
|
|
|
|
/* Call the component's MCA parameter registration function */
|
|
|
|
if (NULL == component->mca_register_component_params) {
|
|
|
|
registered = true;
|
|
|
|
opal_output_verbose(10, output_id,
|
|
|
|
"mca: base: components_open: "
|
|
|
|
"component %s has no register function",
|
|
|
|
component->mca_component_name);
|
|
|
|
} else {
|
2010-01-12 19:29:12 +00:00
|
|
|
ret = component->mca_register_component_params();
|
Refs trac:3275.
We ran into a case where the OMPI SVN trunk grew a new acceptable MCA
parameter value, but this new value was not accepted on the v1.6
branch (hwloc_base_mem_bind_failure_action -- on the trunk it accepts
the value "silent", but on the older v1.6 branch, it doesn't). If you
set "hwloc_base_mem_bind_failure_action=silent" in the default MCA
params file and then accidentally ran with the v1.6 branch, every OMPI
executable (including ompi_info) just failed because hwloc_base_open()
would say "hey, 'silent' is not a valid value for
hwloc_base_mem_bind_failure_action!". Kaboom.
The only problem is that it didn't give you any indication of where
this value was being set. Quite maddening, from a user perspective.
So we changed the ompi_info handles this case. If any framework open
function return OMPI_ERR_BAD_PARAM (either because its base MCA params
got a bad value or because one of its component register/open
functions return OMPI_ERR_BAD_PARAM), ompi_info will stop, print out
a warning that it received and error, and then dump out the parameters
that it has received so far in the framework that had a problem.
At a minimum, this will show the user the MCA param that had an error
(it's usually the last one), and ''where it was set from'' (so that
they can go fix it).
We updated ompi_info to check for O???_ERR_BAD_PARAM from each from
the framework opens. Also updated the doxygen docs in mca.h for this
O???_BAD_PARAM behavior. And we noticed that mca.h had MCA_SUCCESS
and MCA_ERR_??? codes. Why? I think we used them in exactly one
place in the code base (mca_base_components_open.c). So we deleted
those and just used the normal OPAL_* codes instead.
While we were doing this, we also cleaned up a little memory
management during ompi_info/orte-info/opal-info finalization.
Valgrind still reports a truckload of memory still in use at ompi_info
termination, but they mostly look to be components not freeing
memory/resources properly (and outside the scope of this fix).
This commit was SVN r27306.
The following Trac tickets were found above:
Ticket 3275 --> https://svn.open-mpi.org/trac/ompi/ticket/3275
2012-09-11 20:47:24 +00:00
|
|
|
if (OPAL_SUCCESS == ret) {
|
2008-07-28 22:40:57 +00:00
|
|
|
registered = true;
|
2012-08-08 12:43:13 +00:00
|
|
|
opal_output_verbose(10, output_id,
|
|
|
|
"mca: base: components_open: "
|
|
|
|
"component %s register function successful",
|
|
|
|
component->mca_component_name);
|
2012-04-06 14:23:13 +00:00
|
|
|
} else if (OPAL_ERR_NOT_AVAILABLE != ret) {
|
2010-01-12 19:29:12 +00:00
|
|
|
/* If the component returns OPAL_ERR_NOT_AVAILABLE,
|
|
|
|
it's a cue to "silently ignore me" -- it's not a
|
|
|
|
failure, it's just a way for the component to say
|
|
|
|
"nope!".
|
|
|
|
|
|
|
|
Otherwise, however, display an error. We may end
|
|
|
|
up displaying this twice, but it may go to separate
|
|
|
|
streams. So better to be redundant than to not
|
|
|
|
display the error in the stream where it was
|
|
|
|
expected. */
|
2008-07-28 22:40:57 +00:00
|
|
|
|
|
|
|
if (show_errors) {
|
|
|
|
opal_output(0, "mca: base: components_open: "
|
|
|
|
"component %s / %s register function failed",
|
|
|
|
component->mca_type_name,
|
|
|
|
component->mca_component_name);
|
|
|
|
}
|
2012-08-08 12:43:13 +00:00
|
|
|
opal_output_verbose(10, output_id,
|
|
|
|
"mca: base: components_open: "
|
|
|
|
"component %s register function failed",
|
|
|
|
component->mca_component_name);
|
2008-07-28 22:40:57 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2005-09-26 21:55:32 +00:00
|
|
|
if (NULL == component->mca_open_component) {
|
|
|
|
opened = true;
|
2005-07-03 23:31:27 +00:00
|
|
|
opal_output_verbose(10, output_id,
|
2004-10-15 10:54:39 +00:00
|
|
|
"mca: base: components_open: "
|
2005-09-26 21:55:32 +00:00
|
|
|
"component %s has no open function",
|
2004-10-15 10:54:39 +00:00
|
|
|
component->mca_component_name);
|
2005-09-26 21:55:32 +00:00
|
|
|
} else {
|
|
|
|
called_open = true;
|
2010-01-12 19:29:12 +00:00
|
|
|
ret = component->mca_open_component();
|
Refs trac:3275.
We ran into a case where the OMPI SVN trunk grew a new acceptable MCA
parameter value, but this new value was not accepted on the v1.6
branch (hwloc_base_mem_bind_failure_action -- on the trunk it accepts
the value "silent", but on the older v1.6 branch, it doesn't). If you
set "hwloc_base_mem_bind_failure_action=silent" in the default MCA
params file and then accidentally ran with the v1.6 branch, every OMPI
executable (including ompi_info) just failed because hwloc_base_open()
would say "hey, 'silent' is not a valid value for
hwloc_base_mem_bind_failure_action!". Kaboom.
The only problem is that it didn't give you any indication of where
this value was being set. Quite maddening, from a user perspective.
So we changed the ompi_info handles this case. If any framework open
function return OMPI_ERR_BAD_PARAM (either because its base MCA params
got a bad value or because one of its component register/open
functions return OMPI_ERR_BAD_PARAM), ompi_info will stop, print out
a warning that it received and error, and then dump out the parameters
that it has received so far in the framework that had a problem.
At a minimum, this will show the user the MCA param that had an error
(it's usually the last one), and ''where it was set from'' (so that
they can go fix it).
We updated ompi_info to check for O???_ERR_BAD_PARAM from each from
the framework opens. Also updated the doxygen docs in mca.h for this
O???_BAD_PARAM behavior. And we noticed that mca.h had MCA_SUCCESS
and MCA_ERR_??? codes. Why? I think we used them in exactly one
place in the code base (mca_base_components_open.c). So we deleted
those and just used the normal OPAL_* codes instead.
While we were doing this, we also cleaned up a little memory
management during ompi_info/orte-info/opal-info finalization.
Valgrind still reports a truckload of memory still in use at ompi_info
termination, but they mostly look to be components not freeing
memory/resources properly (and outside the scope of this fix).
This commit was SVN r27306.
The following Trac tickets were found above:
Ticket 3275 --> https://svn.open-mpi.org/trac/ompi/ticket/3275
2012-09-11 20:47:24 +00:00
|
|
|
if (OPAL_SUCCESS == ret) {
|
2005-09-26 21:55:32 +00:00
|
|
|
opened = true;
|
|
|
|
opal_output_verbose(10, output_id,
|
|
|
|
"mca: base: components_open: "
|
|
|
|
"component %s open function successful",
|
|
|
|
component->mca_component_name);
|
2012-04-06 14:23:13 +00:00
|
|
|
} else if (OPAL_ERR_NOT_AVAILABLE != ret) {
|
2010-01-12 19:29:12 +00:00
|
|
|
/* If the component returns OPAL_ERR_NOT_AVAILABLE,
|
|
|
|
it's a cue to "silently ignore me" -- it's not a
|
|
|
|
failure, it's just a way for the component to say
|
|
|
|
"nope!".
|
|
|
|
|
|
|
|
Otherwise, however, display an error. We may end
|
|
|
|
up displaying this twice, but it may go to separate
|
|
|
|
streams. So better to be redundant than to not
|
|
|
|
display the error in the stream where it was
|
|
|
|
expected. */
|
2005-09-26 21:55:32 +00:00
|
|
|
|
|
|
|
if (show_errors) {
|
|
|
|
opal_output(0, "mca: base: components_open: "
|
|
|
|
"component %s / %s open function failed",
|
|
|
|
component->mca_type_name,
|
|
|
|
component->mca_component_name);
|
|
|
|
}
|
|
|
|
opal_output_verbose(10, output_id,
|
|
|
|
"mca: base: components_open: "
|
|
|
|
"component %s open function failed",
|
|
|
|
component->mca_component_name);
|
|
|
|
}
|
2004-08-02 00:24:22 +00:00
|
|
|
}
|
2005-09-26 21:55:32 +00:00
|
|
|
|
|
|
|
/* If it didn't open, close it out and get rid of it */
|
|
|
|
|
|
|
|
if (!opened) {
|
2007-02-27 02:07:33 +00:00
|
|
|
char *name;
|
2005-09-26 21:55:32 +00:00
|
|
|
if (called_open) {
|
|
|
|
if (NULL != component->mca_close_component) {
|
|
|
|
component->mca_close_component();
|
|
|
|
}
|
|
|
|
opal_output_verbose(10, output_id,
|
|
|
|
"mca: base: components_open: component %s closed",
|
|
|
|
component->mca_component_name);
|
|
|
|
called_open = false;
|
|
|
|
}
|
2007-02-27 02:07:33 +00:00
|
|
|
name = strdup(component->mca_component_name);
|
2005-09-26 21:55:32 +00:00
|
|
|
mca_base_component_repository_release(component);
|
|
|
|
opal_output_verbose(10, output_id,
|
|
|
|
"mca: base: components_open: component %s unloaded",
|
2007-02-27 02:07:33 +00:00
|
|
|
name);
|
|
|
|
free(name);
|
2004-08-02 00:24:22 +00:00
|
|
|
}
|
2005-09-26 21:55:32 +00:00
|
|
|
|
|
|
|
/* If it did open, register its "priority" MCA parameter (if
|
|
|
|
it doesn't already have one) and save it in the
|
|
|
|
opened_components list */
|
|
|
|
|
|
|
|
else {
|
2010-05-18 20:54:11 +00:00
|
|
|
if (0 > mca_base_param_find(type_name,
|
|
|
|
component->mca_component_name,
|
|
|
|
"priority")) {
|
2012-10-30 19:45:18 +00:00
|
|
|
char *tmp_name;
|
|
|
|
asprintf (&tmp_name, "%s_priority", component->mca_component_name);
|
|
|
|
|
|
|
|
if (NULL != tmp_name) {
|
|
|
|
(void) mca_base_param_reg_int_name (type_name, tmp_name, NULL,
|
|
|
|
false, false, 0, NULL);
|
|
|
|
free (tmp_name);
|
|
|
|
}
|
2005-09-26 21:55:32 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
cli = OBJ_NEW(mca_base_component_list_item_t);
|
|
|
|
if (NULL == cli) {
|
2006-02-12 01:33:29 +00:00
|
|
|
return OPAL_ERR_OUT_OF_RESOURCE;
|
2005-09-26 21:55:32 +00:00
|
|
|
}
|
|
|
|
cli->cli_component = component;
|
|
|
|
opal_list_append(dest, (opal_list_item_t *) cli);
|
2004-08-02 00:24:22 +00:00
|
|
|
}
|
|
|
|
}
|
2005-09-26 21:55:32 +00:00
|
|
|
|
|
|
|
/* All done */
|
|
|
|
|
2006-02-12 01:33:29 +00:00
|
|
|
return OPAL_SUCCESS;
|
2004-08-02 00:24:22 +00:00
|
|
|
}
|