1
1

osc/rdma: add an mca parameter to list MTLs for which osc pt2pt should have

higher priority than rdma and default to psm2.

Context: the Intel Omni-path driver (hfi1) has verbs support, so the openib
btl is available to use. However, at a bad performance. Without this
change osc rdma using btl openib is the default choice when running on Intel
Omni-path, with a lower performance than osc pt2pt over mtl psm2.

Signed-off-by: Matias A Cabral <matias.a.cabral@intel.com>
Этот коммит содержится в:
Matias A Cabral 2017-11-08 16:26:34 -08:00
родитель 6d9bd1746f
Коммит 80c8858c5a

Просмотреть файл

@ -15,7 +15,7 @@
* Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012-2015 Sandia National Laboratories. All rights reserved. * Copyright (c) 2012-2015 Sandia National Laboratories. All rights reserved.
* Copyright (c) 2015 NVIDIA Corporation. All rights reserved. * Copyright (c) 2015 NVIDIA Corporation. All rights reserved.
* Copyright (c) 2015 Intel, Inc. All rights reserved. * Copyright (c) 2015-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2016-2017 IBM Corporation. All rights reserved. * Copyright (c) 2016-2017 IBM Corporation. All rights reserved.
* $COPYRIGHT$ * $COPYRIGHT$
* *
@ -55,6 +55,7 @@
#include "opal/mca/btl/base/base.h" #include "opal/mca/btl/base/base.h"
#include "opal/mca/base/mca_base_pvar.h" #include "opal/mca/base/mca_base_pvar.h"
#include "ompi/mca/bml/base/base.h" #include "ompi/mca/bml/base/base.h"
#include "ompi/mca/mtl/base/base.h"
static int ompi_osc_rdma_component_register (void); static int ompi_osc_rdma_component_register (void);
static int ompi_osc_rdma_component_init (bool enable_progress_threads, bool enable_mpi_threads); static int ompi_osc_rdma_component_init (bool enable_progress_threads, bool enable_mpi_threads);
@ -70,10 +71,12 @@ static int ompi_osc_rdma_set_info (struct ompi_win_t *win, struct opal_info_t *i
static int ompi_osc_rdma_get_info (struct ompi_win_t *win, struct opal_info_t **info_used); static int ompi_osc_rdma_get_info (struct ompi_win_t *win, struct opal_info_t **info_used);
static int ompi_osc_rdma_query_btls (ompi_communicator_t *comm, struct mca_btl_base_module_t **btl); static int ompi_osc_rdma_query_btls (ompi_communicator_t *comm, struct mca_btl_base_module_t **btl);
static int ompi_osc_rdma_query_mtls (void);
static char* ompi_osc_rdma_set_no_lock_info(opal_infosubscriber_t *obj, char *key, char *value); static char* ompi_osc_rdma_set_no_lock_info(opal_infosubscriber_t *obj, char *key, char *value);
static char *ompi_osc_rdma_btl_names; static char *ompi_osc_rdma_btl_names;
static char *ompi_osc_rdma_mtl_names;
ompi_osc_rdma_component_t mca_osc_rdma_component = { ompi_osc_rdma_component_t mca_osc_rdma_component = {
.super = { .super = {
@ -223,6 +226,13 @@ static int ompi_osc_rdma_component_register (void)
MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, OPAL_INFO_LVL_3, MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, OPAL_INFO_LVL_3,
MCA_BASE_VAR_SCOPE_GROUP, &ompi_osc_rdma_btl_names); MCA_BASE_VAR_SCOPE_GROUP, &ompi_osc_rdma_btl_names);
ompi_osc_rdma_mtl_names = "psm2";
(void) mca_base_component_var_register (&mca_osc_rdma_component.super.osc_version, "mtls",
"Comma-delimited list of MTL component names to lower the priority of rdma "
"osc component favoring pt2pt osc (default: psm2)",
MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, OPAL_INFO_LVL_3,
MCA_BASE_VAR_SCOPE_GROUP, &ompi_osc_rdma_mtl_names);
/* register performance variables */ /* register performance variables */
@ -340,6 +350,10 @@ static int ompi_osc_rdma_component_query (struct ompi_win_t *win, void **base, s
} }
#endif /* OPAL_CUDA_SUPPORT */ #endif /* OPAL_CUDA_SUPPORT */
if (OMPI_SUCCESS == ompi_osc_rdma_query_mtls ()) {
return 5; /* this has to be lower that osc pt2pt default priority */
}
if (OMPI_SUCCESS != ompi_osc_rdma_query_btls (comm, NULL)) { if (OMPI_SUCCESS != ompi_osc_rdma_query_btls (comm, NULL)) {
return -1; return -1;
} }
@ -709,6 +723,21 @@ static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, s
return ret; return ret;
} }
static int ompi_osc_rdma_query_mtls (void)
{
char **mtls_to_use;
mtls_to_use = opal_argv_split (ompi_osc_rdma_mtl_names, ',');
if (mtls_to_use && ompi_mtl_base_selected_component) {
for (int i = 0 ; mtls_to_use[i] ; ++i) {
if (0 == strcmp (mtls_to_use[i], ompi_mtl_base_selected_component->mtl_version.mca_component_name)) {
return OMPI_SUCCESS;
}
}
}
return -1;
}
static int ompi_osc_rdma_query_btls (ompi_communicator_t *comm, struct mca_btl_base_module_t **btl) static int ompi_osc_rdma_query_btls (ompi_communicator_t *comm, struct mca_btl_base_module_t **btl)
{ {
struct mca_btl_base_module_t **possible_btls = NULL; struct mca_btl_base_module_t **possible_btls = NULL;