From eb977de5e911c88b28760fb4afcc513caea3055e Mon Sep 17 00:00:00 2001 From: Howard Pritchard Date: Wed, 28 Jan 2015 10:06:13 -0700 Subject: [PATCH] mtl: add query method to mtl components Switch to using the query/priority method for selecting MTLs. This switch was motivated by the fact that now on some platforms, its possible for multiple MTLs to be initializable, but only one MTL should be selected. In addition, there is a complication with the PSM and IFO (with PSM provider) MTLs owing to the fact that they cannot both intialize the underlying PSM context, i.e. only one call to psm_init is allowed per process. The mxm component has not been compiled as the author doesn't currently have access to a system with a recent enough mxm installed to allow for a compile. The portals4, ofi, and psm components have been checked for compilation. The ofi and psm components have been checked for runtime correctness on a intel/qlogic system with up to date PSM installed. --- ompi/mca/mtl/base/mtl_base_frame.c | 74 ++++++++----------- ompi/mca/mtl/mxm/mtl_mxm_component.c | 26 ++++++- ompi/mca/mtl/ofi/mtl_ofi_component.c | 20 ++++- .../mca/mtl/portals4/mtl_portals4_component.c | 25 ++++++- ompi/mca/mtl/psm/mtl_psm_component.c | 29 +++++++- 5 files changed, 124 insertions(+), 50 deletions(-) diff --git a/ompi/mca/mtl/base/mtl_base_frame.c b/ompi/mca/mtl/base/mtl_base_frame.c index 1d15625e71..3ab250da3f 100644 --- a/ompi/mca/mtl/base/mtl_base_frame.c +++ b/ompi/mca/mtl/base/mtl_base_frame.c @@ -49,64 +49,50 @@ int ompi_mtl_base_select(bool enable_progress_threads, bool enable_mpi_threads) { - opal_list_item_t *item = NULL; - mca_base_component_list_item_t *cli = NULL; - mca_mtl_base_component_t *component = NULL; - mca_mtl_base_module_t *module = NULL; + int ret = OMPI_ERR_NOT_FOUND; + mca_mtl_base_component_t *best_component = NULL; + mca_mtl_base_module_t *best_module = NULL; - /* Traverse the list of available components; call their init - functions. */ - for (item = opal_list_get_first(&ompi_mtl_base_framework.framework_components); - opal_list_get_end(&ompi_mtl_base_framework.framework_components) != item; - item = opal_list_get_next(item) ) { - cli = (mca_base_component_list_item_t *) item; - component = (mca_mtl_base_component_t *) cli->cli_component; - - if (NULL == component->mtl_init) { - opal_output_verbose( 10, ompi_mtl_base_framework.framework_output, - "select: no init function; ignoring component %s", - component->mtl_version.mca_component_name ); - continue; - } - opal_output_verbose( 10, ompi_mtl_base_framework.framework_output, - "select: initializing %s component %s", - component->mtl_version.mca_type_name, - component->mtl_version.mca_component_name ); - module = component->mtl_init(enable_progress_threads, - enable_mpi_threads); - if (NULL == module) { - opal_output_verbose( 10, ompi_mtl_base_framework.framework_output, - "select: init returned failure for component %s", - component->mtl_version.mca_component_name ); - continue; - } - opal_output_verbose( 10, ompi_mtl_base_framework.framework_output, - "select: init returned success"); - - ompi_mtl_base_selected_component = component; - ompi_mtl = module; + /* + * Select the best component + */ + if( OPAL_SUCCESS != mca_base_select("mtl", ompi_mtl_base_framework.framework_output, + &ompi_mtl_base_framework.framework_components, + (mca_base_module_t **) &best_module, + (mca_base_component_t **) &best_component) ) { + /* notify caller that no available component found */ + return ret; } - /* This base function closes, unloads, and removes from the - available list all unselected components. The available list will - contain only the selected component. */ - if (ompi_mtl_base_selected_component) { - (void) mca_base_framework_components_close(&ompi_mtl_base_framework, - (mca_base_component_t *) ompi_mtl_base_selected_component); + opal_output_verbose( 10, ompi_mtl_base_framework.framework_output, + "select: initializing %s component %s", + best_component->mtl_version.mca_type_name, + best_component->mtl_version.mca_component_name ); + + if (NULL == best_component->mtl_init(enable_progress_threads, + enable_mpi_threads)) { + opal_output_verbose( 10, ompi_mtl_base_framework.framework_output, + "select: init returned failure for component %s", + best_component->mtl_version.mca_component_name ); + } else { + opal_output_verbose( 10, ompi_mtl_base_framework.framework_output, + "select: init returned success"); + ompi_mtl_base_selected_component = best_component; + ompi_mtl = best_module; + ret = OMPI_SUCCESS; } /* All done */ - if (NULL == module) { + if (NULL == ompi_mtl) { opal_output_verbose( 10, ompi_mtl_base_framework.framework_output, "select: no component selected"); - return OMPI_ERR_NOT_FOUND; } else { opal_output_verbose( 10, ompi_mtl_base_framework.framework_output, "select: component %s selected", ompi_mtl_base_selected_component-> mtl_version.mca_component_name ); - return OMPI_SUCCESS; } + return ret; } diff --git a/ompi/mca/mtl/mxm/mtl_mxm_component.c b/ompi/mca/mtl/mxm/mtl_mxm_component.c index 0d7291ef70..49a5d4ec98 100644 --- a/ompi/mca/mtl/mxm/mtl_mxm_component.c +++ b/ompi/mca/mtl/mxm/mtl_mxm_component.c @@ -24,9 +24,12 @@ #include static int ompi_mtl_mxm_component_open(void); +static int ompi_mtl_mxm_component_query(mca_base_module_t **module, int *priority); static int ompi_mtl_mxm_component_close(void); static int ompi_mtl_mxm_component_register(void); +static int param_priority; + int mca_mtl_mxm_output = -1; @@ -48,7 +51,7 @@ mca_mtl_mxm_component_t mca_mtl_mxm_component = { OMPI_RELEASE_VERSION, /* MCA component release version */ ompi_mtl_mxm_component_open, /* component open */ ompi_mtl_mxm_component_close, /* component close */ - NULL, + ompi_mtl_mxm_component_query, /* component query */ ompi_mtl_mxm_component_register }, { @@ -125,6 +128,15 @@ static int ompi_mtl_mxm_component_register(void) free(runtime_version); #endif + param_priority = 100; + (void) mca_base_component_var_register (c, + "priority", "Priority of the MXM MTL component", + MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, + OPAL_INFO_LVL_9, + MCA_BASE_VAR_SCOPE_READONLY, + ¶m_priority); + + #if MXM_API >= MXM_VERSION(3,1) { unsigned long cur_ver = mxm_get_version(); @@ -245,6 +257,18 @@ static int ompi_mtl_mxm_component_open(void) return OMPI_SUCCESS; } +static int ompi_mtl_mxm_component_query(mca_base_module_t **module, int *priority) +{ + + /* + * if we get here it means that mxm is available so give high priority + */ + + *priority = param_priority; + *module = &ompi_mtl_mxm.super; + return OMPI_SUCCESS; +} + static int ompi_mtl_mxm_component_close(void) { if (ompi_mtl_mxm.mxm_context != NULL) { diff --git a/ompi/mca/mtl/ofi/mtl_ofi_component.c b/ompi/mca/mtl/ofi/mtl_ofi_component.c index c71f88a804..1545afcc58 100644 --- a/ompi/mca/mtl/ofi/mtl_ofi_component.c +++ b/ompi/mca/mtl/ofi/mtl_ofi_component.c @@ -21,6 +21,7 @@ #include "mtl_ofi_message.h" static int ompi_mtl_ofi_component_open(void); +static int ompi_mtl_ofi_component_query(mca_base_module_t **module, int *priority); static int ompi_mtl_ofi_component_close(void); static int ompi_mtl_ofi_component_register(void); @@ -28,6 +29,7 @@ static mca_mtl_base_module_t* ompi_mtl_ofi_component_init(bool enable_progress_threads, bool enable_mpi_threads); +static int param_priority; mca_mtl_ofi_component_t mca_mtl_ofi_component = { { @@ -44,7 +46,7 @@ mca_mtl_ofi_component_t mca_mtl_ofi_component = { OMPI_RELEASE_VERSION, /* MCA component release version */ ompi_mtl_ofi_component_open, /* component open */ ompi_mtl_ofi_component_close, /* component close */ - NULL, + ompi_mtl_ofi_component_query, ompi_mtl_ofi_component_register }, { @@ -59,7 +61,6 @@ mca_mtl_ofi_component_t mca_mtl_ofi_component = { static int ompi_mtl_ofi_component_register(void) { - ompi_mtl_ofi.provider_name = NULL; (void) mca_base_component_var_register(&mca_mtl_ofi_component.super.mtl_version, "provider", @@ -68,10 +69,18 @@ ompi_mtl_ofi_component_register(void) OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY, &ompi_mtl_ofi.provider_name); + param_priority = 10; /* for now give a lower priority than the psm mtl */ + mca_base_component_var_register (&mca_mtl_ofi_component.super.mtl_version, + "priority", "Priority of the OFI MTL component", + MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, + OPAL_INFO_LVL_9, + MCA_BASE_VAR_SCOPE_READONLY, + ¶m_priority); return OMPI_SUCCESS; } + static int ompi_mtl_ofi_component_open(void) { @@ -93,6 +102,13 @@ ompi_mtl_ofi_component_open(void) return OMPI_SUCCESS; } +static int +ompi_mtl_ofi_component_query(mca_base_module_t **module, int *priority) +{ + *priority = param_priority; + *module = &ompi_mtl_ofi.base; + return OMPI_SUCCESS; +} static int ompi_mtl_ofi_component_close(void) diff --git a/ompi/mca/mtl/portals4/mtl_portals4_component.c b/ompi/mca/mtl/portals4/mtl_portals4_component.c index 5b08e477ce..bccd4ec746 100644 --- a/ompi/mca/mtl/portals4/mtl_portals4_component.c +++ b/ompi/mca/mtl/portals4/mtl_portals4_component.c @@ -29,9 +29,12 @@ #include "mtl_portals4_recv_short.h" #include "mtl_portals4_message.h" +static int param_priority; + static int ompi_mtl_portals4_component_register(void); static int ompi_mtl_portals4_component_open(void); static int ompi_mtl_portals4_component_close(void); +static int ompi_mtl_portals4_component_query(mca_base_module_t **module, int *priority); static mca_mtl_base_module_t* ompi_mtl_portals4_component_init(bool enable_progress_threads, bool enable_mpi_threads); @@ -51,8 +54,8 @@ mca_mtl_base_component_2_0_0_t mca_mtl_portals4_component = { OMPI_MINOR_VERSION, /* MCA component minor version */ OMPI_RELEASE_VERSION, /* MCA component release version */ ompi_mtl_portals4_component_open, /* component open */ + ompi_mtl_portals4_component_query, /* component close */ ompi_mtl_portals4_component_close, /* component close */ - NULL, ompi_mtl_portals4_component_register }, { @@ -75,6 +78,14 @@ ompi_mtl_portals4_component_register(void) mca_base_var_enum_t *new_enum; int ret; + param_priority = 10; + (void) mca_base_component_var_register (&mca_mtl_portals4_component.mtl_version, + "priority", "Priority of the Portals4 MTL component", + MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, + OPAL_INFO_LVL_9, + MCA_BASE_VAR_SCOPE_READONLY, + ¶m_priority); + ompi_mtl_portals4.eager_limit = 2 * 1024; (void) mca_base_component_var_register(&mca_mtl_portals4_component.mtl_version, "eager_limit", @@ -211,6 +222,18 @@ ompi_mtl_portals4_component_open(void) return OMPI_SUCCESS; } +static int +ompi_mtl_portals4_component_query(mca_base_module_t **module, int *priority) +{ + /* + * assume if portals4 MTL was compiled, the user wants it + */ + + *priority = param_priority; + *module = &ompi_mtl_portals4.base; + return OMPI_SUCCESS; +} + static int ompi_mtl_portals4_component_close(void) diff --git a/ompi/mca/mtl/psm/mtl_psm_component.c b/ompi/mca/mtl/psm/mtl_psm_component.c index 1b6f77c9e1..2dfe4ac395 100644 --- a/ompi/mca/mtl/psm/mtl_psm_component.c +++ b/ompi/mca/mtl/psm/mtl_psm_component.c @@ -37,8 +37,11 @@ #include #include +static int param_priority; + static int ompi_mtl_psm_component_open(void); static int ompi_mtl_psm_component_close(void); +static int ompi_mtl_psm_component_query(mca_base_module_t **module, int *priority); static int ompi_mtl_psm_component_register(void); static mca_mtl_base_module_t* ompi_mtl_psm_component_init( bool enable_progress_threads, @@ -59,8 +62,8 @@ mca_mtl_psm_component_t mca_mtl_psm_component = { OMPI_RELEASE_VERSION, /* MCA component release version */ ompi_mtl_psm_component_open, /* component open */ ompi_mtl_psm_component_close, /* component close */ - NULL, - ompi_mtl_psm_component_register + ompi_mtl_psm_component_query, /* component close */ + ompi_mtl_psm_component_register }, { /* The component is not checkpoint ready */ @@ -86,6 +89,15 @@ ompi_mtl_psm_component_register(void) mca_base_var_enum_t *new_enum; #endif + + param_priority = 100; + (void) mca_base_component_var_register (&mca_mtl_psm_component.super.mtl_version, + "priority", "Priority of the PSM MTL component", + MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, + OPAL_INFO_LVL_9, + MCA_BASE_VAR_SCOPE_READONLY, + ¶m_priority); + ompi_mtl_psm.connect_timeout = 180; (void) mca_base_component_var_register(&mca_mtl_psm_component.super.mtl_version, "connect_timeout", @@ -180,6 +192,19 @@ ompi_mtl_psm_component_open(void) } } +static int +ompi_mtl_psm_component_query(mca_base_module_t **module, int *priority) +{ + /* + * if we get here it means that PSM is available so give high priority + */ + + *priority = param_priority; + *module = &ompi_mtl_psm.super; + return OMPI_SUCCESS; +} + + static int ompi_mtl_psm_component_close(void) {