1
1

mtl: add query method to mtl components

Switch to using the query/priority method for selecting
MTLs.  This switch was motivated by the fact that now
on some platforms, its possible for multiple MTLs to
be initializable, but only one MTL should be selected.

In addition, there is a complication with the PSM and
IFO (with PSM provider) MTLs owing to the fact that
they cannot both intialize the underlying PSM context,
i.e. only one call to psm_init is allowed per process.

The mxm component has not been compiled as the author
doesn't currently have access to a system with a recent
enough mxm installed to allow for a compile.

The portals4, ofi, and psm components have been checked
for compilation.  The ofi and psm components have been
checked for runtime correctness on a intel/qlogic system
with up to date PSM installed.
Этот коммит содержится в:
Howard Pritchard 2015-01-28 10:06:13 -07:00
родитель 2d79963cb3
Коммит eb977de5e9
5 изменённых файлов: 124 добавлений и 50 удалений

Просмотреть файл

@ -49,64 +49,50 @@ int
ompi_mtl_base_select(bool enable_progress_threads,
bool enable_mpi_threads)
{
opal_list_item_t *item = NULL;
mca_base_component_list_item_t *cli = NULL;
mca_mtl_base_component_t *component = NULL;
mca_mtl_base_module_t *module = NULL;
int ret = OMPI_ERR_NOT_FOUND;
mca_mtl_base_component_t *best_component = NULL;
mca_mtl_base_module_t *best_module = NULL;
/* Traverse the list of available components; call their init
functions. */
for (item = opal_list_get_first(&ompi_mtl_base_framework.framework_components);
opal_list_get_end(&ompi_mtl_base_framework.framework_components) != item;
item = opal_list_get_next(item) ) {
cli = (mca_base_component_list_item_t *) item;
component = (mca_mtl_base_component_t *) cli->cli_component;
if (NULL == component->mtl_init) {
opal_output_verbose( 10, ompi_mtl_base_framework.framework_output,
"select: no init function; ignoring component %s",
component->mtl_version.mca_component_name );
continue;
}
opal_output_verbose( 10, ompi_mtl_base_framework.framework_output,
"select: initializing %s component %s",
component->mtl_version.mca_type_name,
component->mtl_version.mca_component_name );
module = component->mtl_init(enable_progress_threads,
enable_mpi_threads);
if (NULL == module) {
opal_output_verbose( 10, ompi_mtl_base_framework.framework_output,
"select: init returned failure for component %s",
component->mtl_version.mca_component_name );
continue;
}
opal_output_verbose( 10, ompi_mtl_base_framework.framework_output,
"select: init returned success");
ompi_mtl_base_selected_component = component;
ompi_mtl = module;
/*
* Select the best component
*/
if( OPAL_SUCCESS != mca_base_select("mtl", ompi_mtl_base_framework.framework_output,
&ompi_mtl_base_framework.framework_components,
(mca_base_module_t **) &best_module,
(mca_base_component_t **) &best_component) ) {
/* notify caller that no available component found */
return ret;
}
/* This base function closes, unloads, and removes from the
available list all unselected components. The available list will
contain only the selected component. */
if (ompi_mtl_base_selected_component) {
(void) mca_base_framework_components_close(&ompi_mtl_base_framework,
(mca_base_component_t *) ompi_mtl_base_selected_component);
opal_output_verbose( 10, ompi_mtl_base_framework.framework_output,
"select: initializing %s component %s",
best_component->mtl_version.mca_type_name,
best_component->mtl_version.mca_component_name );
if (NULL == best_component->mtl_init(enable_progress_threads,
enable_mpi_threads)) {
opal_output_verbose( 10, ompi_mtl_base_framework.framework_output,
"select: init returned failure for component %s",
best_component->mtl_version.mca_component_name );
} else {
opal_output_verbose( 10, ompi_mtl_base_framework.framework_output,
"select: init returned success");
ompi_mtl_base_selected_component = best_component;
ompi_mtl = best_module;
ret = OMPI_SUCCESS;
}
/* All done */
if (NULL == module) {
if (NULL == ompi_mtl) {
opal_output_verbose( 10, ompi_mtl_base_framework.framework_output,
"select: no component selected");
return OMPI_ERR_NOT_FOUND;
} else {
opal_output_verbose( 10, ompi_mtl_base_framework.framework_output,
"select: component %s selected",
ompi_mtl_base_selected_component->
mtl_version.mca_component_name );
return OMPI_SUCCESS;
}
return ret;
}

Просмотреть файл

@ -24,9 +24,12 @@
#include <unistd.h>
static int ompi_mtl_mxm_component_open(void);
static int ompi_mtl_mxm_component_query(mca_base_module_t **module, int *priority);
static int ompi_mtl_mxm_component_close(void);
static int ompi_mtl_mxm_component_register(void);
static int param_priority;
int mca_mtl_mxm_output = -1;
@ -48,7 +51,7 @@ mca_mtl_mxm_component_t mca_mtl_mxm_component = {
OMPI_RELEASE_VERSION, /* MCA component release version */
ompi_mtl_mxm_component_open, /* component open */
ompi_mtl_mxm_component_close, /* component close */
NULL,
ompi_mtl_mxm_component_query, /* component query */
ompi_mtl_mxm_component_register
},
{
@ -125,6 +128,15 @@ static int ompi_mtl_mxm_component_register(void)
free(runtime_version);
#endif
param_priority = 100;
(void) mca_base_component_var_register (c,
"priority", "Priority of the MXM MTL component",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&param_priority);
#if MXM_API >= MXM_VERSION(3,1)
{
unsigned long cur_ver = mxm_get_version();
@ -245,6 +257,18 @@ static int ompi_mtl_mxm_component_open(void)
return OMPI_SUCCESS;
}
static int ompi_mtl_mxm_component_query(mca_base_module_t **module, int *priority)
{
/*
* if we get here it means that mxm is available so give high priority
*/
*priority = param_priority;
*module = &ompi_mtl_mxm.super;
return OMPI_SUCCESS;
}
static int ompi_mtl_mxm_component_close(void)
{
if (ompi_mtl_mxm.mxm_context != NULL) {

Просмотреть файл

@ -21,6 +21,7 @@
#include "mtl_ofi_message.h"
static int ompi_mtl_ofi_component_open(void);
static int ompi_mtl_ofi_component_query(mca_base_module_t **module, int *priority);
static int ompi_mtl_ofi_component_close(void);
static int ompi_mtl_ofi_component_register(void);
@ -28,6 +29,7 @@ static mca_mtl_base_module_t*
ompi_mtl_ofi_component_init(bool enable_progress_threads,
bool enable_mpi_threads);
static int param_priority;
mca_mtl_ofi_component_t mca_mtl_ofi_component = {
{
@ -44,7 +46,7 @@ mca_mtl_ofi_component_t mca_mtl_ofi_component = {
OMPI_RELEASE_VERSION, /* MCA component release version */
ompi_mtl_ofi_component_open, /* component open */
ompi_mtl_ofi_component_close, /* component close */
NULL,
ompi_mtl_ofi_component_query,
ompi_mtl_ofi_component_register
},
{
@ -59,7 +61,6 @@ mca_mtl_ofi_component_t mca_mtl_ofi_component = {
static int
ompi_mtl_ofi_component_register(void)
{
ompi_mtl_ofi.provider_name = NULL;
(void) mca_base_component_var_register(&mca_mtl_ofi_component.super.mtl_version,
"provider",
@ -68,10 +69,18 @@ ompi_mtl_ofi_component_register(void)
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&ompi_mtl_ofi.provider_name);
param_priority = 10; /* for now give a lower priority than the psm mtl */
mca_base_component_var_register (&mca_mtl_ofi_component.super.mtl_version,
"priority", "Priority of the OFI MTL component",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&param_priority);
return OMPI_SUCCESS;
}
static int
ompi_mtl_ofi_component_open(void)
{
@ -93,6 +102,13 @@ ompi_mtl_ofi_component_open(void)
return OMPI_SUCCESS;
}
static int
ompi_mtl_ofi_component_query(mca_base_module_t **module, int *priority)
{
*priority = param_priority;
*module = &ompi_mtl_ofi.base;
return OMPI_SUCCESS;
}
static int
ompi_mtl_ofi_component_close(void)

Просмотреть файл

@ -29,9 +29,12 @@
#include "mtl_portals4_recv_short.h"
#include "mtl_portals4_message.h"
static int param_priority;
static int ompi_mtl_portals4_component_register(void);
static int ompi_mtl_portals4_component_open(void);
static int ompi_mtl_portals4_component_close(void);
static int ompi_mtl_portals4_component_query(mca_base_module_t **module, int *priority);
static mca_mtl_base_module_t*
ompi_mtl_portals4_component_init(bool enable_progress_threads,
bool enable_mpi_threads);
@ -51,8 +54,8 @@ mca_mtl_base_component_2_0_0_t mca_mtl_portals4_component = {
OMPI_MINOR_VERSION, /* MCA component minor version */
OMPI_RELEASE_VERSION, /* MCA component release version */
ompi_mtl_portals4_component_open, /* component open */
ompi_mtl_portals4_component_query, /* component close */
ompi_mtl_portals4_component_close, /* component close */
NULL,
ompi_mtl_portals4_component_register
},
{
@ -75,6 +78,14 @@ ompi_mtl_portals4_component_register(void)
mca_base_var_enum_t *new_enum;
int ret;
param_priority = 10;
(void) mca_base_component_var_register (&mca_mtl_portals4_component.mtl_version,
"priority", "Priority of the Portals4 MTL component",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&param_priority);
ompi_mtl_portals4.eager_limit = 2 * 1024;
(void) mca_base_component_var_register(&mca_mtl_portals4_component.mtl_version,
"eager_limit",
@ -211,6 +222,18 @@ ompi_mtl_portals4_component_open(void)
return OMPI_SUCCESS;
}
static int
ompi_mtl_portals4_component_query(mca_base_module_t **module, int *priority)
{
/*
* assume if portals4 MTL was compiled, the user wants it
*/
*priority = param_priority;
*module = &ompi_mtl_portals4.base;
return OMPI_SUCCESS;
}
static int
ompi_mtl_portals4_component_close(void)

Просмотреть файл

@ -37,8 +37,11 @@
#include <sys/stat.h>
#include <unistd.h>
static int param_priority;
static int ompi_mtl_psm_component_open(void);
static int ompi_mtl_psm_component_close(void);
static int ompi_mtl_psm_component_query(mca_base_module_t **module, int *priority);
static int ompi_mtl_psm_component_register(void);
static mca_mtl_base_module_t* ompi_mtl_psm_component_init( bool enable_progress_threads,
@ -59,8 +62,8 @@ mca_mtl_psm_component_t mca_mtl_psm_component = {
OMPI_RELEASE_VERSION, /* MCA component release version */
ompi_mtl_psm_component_open, /* component open */
ompi_mtl_psm_component_close, /* component close */
NULL,
ompi_mtl_psm_component_register
ompi_mtl_psm_component_query, /* component close */
ompi_mtl_psm_component_register
},
{
/* The component is not checkpoint ready */
@ -86,6 +89,15 @@ ompi_mtl_psm_component_register(void)
mca_base_var_enum_t *new_enum;
#endif
param_priority = 100;
(void) mca_base_component_var_register (&mca_mtl_psm_component.super.mtl_version,
"priority", "Priority of the PSM MTL component",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&param_priority);
ompi_mtl_psm.connect_timeout = 180;
(void) mca_base_component_var_register(&mca_mtl_psm_component.super.mtl_version,
"connect_timeout",
@ -180,6 +192,19 @@ ompi_mtl_psm_component_open(void)
}
}
static int
ompi_mtl_psm_component_query(mca_base_module_t **module, int *priority)
{
/*
* if we get here it means that PSM is available so give high priority
*/
*priority = param_priority;
*module = &ompi_mtl_psm.super;
return OMPI_SUCCESS;
}
static int
ompi_mtl_psm_component_close(void)
{