From 8fcf9cee3ba5da993cbd32894ad6c4c963b1e1c5 Mon Sep 17 00:00:00 2001 From: Howard Pritchard Date: Tue, 14 Apr 2020 09:19:56 -0600 Subject: [PATCH] add a common ofi whitelist/blacklist also add common verbose variable. Note the verbosity thing is a little tricky owing to the way the MCA frameworks and components are registered and and initialized. The BTL's are registered/initialized prior to the MTL components even getting registered. Here's the change in ofi mtl mca parameters. Before commit: MCA mtl ofi: parameter "mtl_ofi_provider_include" (current value: "psm2", data source: environment, level: 1 user/basic, type: string) Comma-delimited list of OFI providers that are considered for use (e.g., "psm,psm2"; an empty value means that all providers will be considered). Mutually exclusive with mtl_ofi_provider_exclude. MCA mtl ofi: parameter "mtl_ofi_provider_exclude" (current value: "shm,sockets,tcp,udp,rstream", data source: default, level: 1 user/basic, type: string) Comma-delimited list of OFI providers that are not considered for use (default: "sockets,mxm"; empty value means that all providers will be considered). Mutually exclusive with mtl_ofi_provider_include. After commit: MCA btl ofi: parameter "btl_ofi_provider_include" (current value: "", data source: default, level: 1 user/basic, type: string, synonym of: opal_common_ofi_provider_include) Comma-delimited list of OFI providers that are considered for use (e.g., "psm,psm2"; an empty value means that all providers will be considered). Mutually exclusive with mtl_ofi_provider_exclude. MCA btl ofi: parameter "btl_ofi_provider_exclude" (current value: "shm,sockets,tcp,udp,rstream", data source: default, level: 1 user/basic, type: string, synonym of: opal_common_ofi_provider_exclude) Comma-delimited list of OFI providers that are not considered for use (default: "sockets,mxm"; empty value means that all providers will be considered). Mutually exclusive with mtl_ofi_provider_include. MCA mtl ofi: parameter "mtl_ofi_provider_exclude" (current value: "shm,sockets,tcp,udp,rstream", data source: default, level: 1 user/basic, type: string, synonym of: opal_common_ofi_provider_exclude) Comma-delimited list of OFI providers that are not considered for use (default: "sockets,mxm"; empty value means that all providers will be considered). Mutually exclusive with mtl_ofi_provider_include. MCA mtl ofi: parameter "mtl_ofi_verbose" (current value: "0", data source: default, level: 3 user/all, type: int, synonym of: opal_common_ofi_verbose) related to #7755 Signed-off-by: Howard Pritchard (cherry picked from commit 9f1081a07ac3c7b7277a27277ed970ed713207c9) (cherry picked from commit 45b643d0cfa46f1abb9a5f43cf0ff304cf6a5fea) --- ompi/mca/mtl/ofi/Makefile.am | 2 + ompi/mca/mtl/ofi/mtl_ofi.c | 6 +- ompi/mca/mtl/ofi/mtl_ofi.h | 13 ++-- ompi/mca/mtl/ofi/mtl_ofi_component.c | 73 ++++++++----------- opal/mca/btl/ofi/Makefile.am | 2 + opal/mca/btl/ofi/btl_ofi_component.c | 41 ++++++----- opal/mca/common/ofi/common_ofi.c | 104 +++++++++++++++++++++++++-- opal/mca/common/ofi/common_ofi.h | 27 ++++++- 8 files changed, 191 insertions(+), 77 deletions(-) diff --git a/ompi/mca/mtl/ofi/Makefile.am b/ompi/mca/mtl/ofi/Makefile.am index 3ad6ea6ede..832e2b8e6e 100644 --- a/ompi/mca/mtl/ofi/Makefile.am +++ b/ompi/mca/mtl/ofi/Makefile.am @@ -7,6 +7,8 @@ # Copyright (c) 2017 IBM Corporation. All rights reserved. # Copyright (c) 2019 Research Organization for Information Science # and Technology (RIST). All rights reserved. +# Copyright (c) 2020 Triad National Security, LLC. All rights +# reserved. # $COPYRIGHT$ # # Additional copyrights may follow diff --git a/ompi/mca/mtl/ofi/mtl_ofi.c b/ompi/mca/mtl/ofi/mtl_ofi.c index 6c679b88b9..9d4a0d8274 100644 --- a/ompi/mca/mtl/ofi/mtl_ofi.c +++ b/ompi/mca/mtl/ofi/mtl_ofi.c @@ -112,7 +112,7 @@ ompi_mtl_ofi_add_procs(struct mca_mtl_base_module_t *mtl, */ count = fi_av_insert(ompi_mtl_ofi.av, ep_names, nprocs, fi_addrs, 0, NULL); if ((count < 0) || (nprocs != (size_t)count)) { - opal_output_verbose(1, ompi_mtl_base_framework.framework_output, + opal_output_verbose(1, opal_common_ofi.output, "%s:%d: fi_av_insert failed: %d\n", __FILE__, __LINE__, count); ret = OMPI_ERROR; @@ -125,7 +125,7 @@ ompi_mtl_ofi_add_procs(struct mca_mtl_base_module_t *mtl, for (i = 0; i < nprocs; ++i) { endpoint = OBJ_NEW(mca_mtl_ofi_endpoint_t); if (NULL == endpoint) { - opal_output_verbose(1, ompi_mtl_base_framework.framework_output, + opal_output_verbose(1, opal_common_ofi.output, "%s:%d: mtl/ofi: could not allocate endpoint" " structure\n", __FILE__, __LINE__); @@ -170,7 +170,7 @@ ompi_mtl_ofi_del_procs(struct mca_mtl_base_module_t *mtl, endpoint = procs[i]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL]; ret = fi_av_remove(ompi_mtl_ofi.av, &endpoint->peer_fiaddr, 1, 0); if (ret) { - opal_output_verbose(1, ompi_mtl_base_framework.framework_output, + opal_output_verbose(1, opal_common_ofi.output, "%s:%d: fi_av_remove failed: %s\n", __FILE__, __LINE__, fi_strerror(errno)); return ret; } diff --git a/ompi/mca/mtl/ofi/mtl_ofi.h b/ompi/mca/mtl/ofi/mtl_ofi.h index 183d80108f..44e107692c 100644 --- a/ompi/mca/mtl/ofi/mtl_ofi.h +++ b/ompi/mca/mtl/ofi/mtl_ofi.h @@ -2,7 +2,7 @@ * Copyright (c) 2013-2018 Intel, Inc. All rights reserved * Copyright (c) 2017 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2019 Triad National Security, LLC. All rights + * Copyright (c) 2019-2020 Triad National Security, LLC. All rights * reserved. * Copyright (c) 2018-2020 Amazon.com, Inc. or its affiliates. All rights * reserved. @@ -38,6 +38,7 @@ #include "ompi/mca/mtl/base/base.h" #include "ompi/mca/mtl/base/mtl_base_datatype.h" #include "ompi/message/message.h" +#include "opal/mca/common/ofi/common_ofi.h" #include "mtl_ofi_opt.h" #include "mtl_ofi_types.h" @@ -235,7 +236,7 @@ ompi_mtl_ofi_progress(void) #define MTL_OFI_LOG_FI_ERR(err, string) \ do { \ - opal_output_verbose(1, ompi_mtl_base_framework.framework_output, \ + opal_output_verbose(1, opal_common_ofi.output, \ "%s:%d:%s: %s\n", \ __FILE__, __LINE__, string, fi_strerror(-err)); \ } while(0); @@ -377,7 +378,7 @@ ompi_mtl_ofi_ssend_recv(ompi_mtl_ofi_request_t *ack_req, 0, /* Exact match, no ignore bits */ (void *) &ack_req->ctx), ret); if (OPAL_UNLIKELY(0 > ret)) { - opal_output_verbose(1, ompi_mtl_base_framework.framework_output, + opal_output_verbose(1, opal_common_ofi.output, "%s:%d: fi_trecv failed: %s(%zd)", __FILE__, __LINE__, fi_strerror(-ret), ret); free(ack_req); @@ -663,7 +664,7 @@ ompi_mtl_ofi_recv_callback(struct fi_cq_tagged_entry *wc, status->_ucount = wc->len; if (OPAL_UNLIKELY(wc->len > ofi_req->length)) { - opal_output_verbose(1, ompi_mtl_base_framework.framework_output, + opal_output_verbose(1, opal_common_ofi.output, "truncate expected: %ld %ld", wc->len, ofi_req->length); status->MPI_ERROR = MPI_ERR_TRUNCATE; @@ -677,7 +678,7 @@ ompi_mtl_ofi_recv_callback(struct fi_cq_tagged_entry *wc, ofi_req->buffer, wc->len); if (OPAL_UNLIKELY(OMPI_SUCCESS != ompi_ret)) { - opal_output_verbose(1, ompi_mtl_base_framework.framework_output, + opal_output_verbose(1, opal_common_ofi.output, "%s:%d: ompi_mtl_datatype_unpack failed: %d", __FILE__, __LINE__, ompi_ret); status->MPI_ERROR = ompi_ret; @@ -1330,7 +1331,7 @@ init_regular_ep: if (MPI_COMM_WORLD == comm) { ret = opal_progress_register(ompi_mtl_ofi_progress_no_inline); if (OMPI_SUCCESS != ret) { - opal_output_verbose(1, ompi_mtl_base_framework.framework_output, + opal_output_verbose(1, opal_common_ofi.output, "%s:%d: opal_progress_register failed: %d\n", __FILE__, __LINE__, ret); goto init_error; diff --git a/ompi/mca/mtl/ofi/mtl_ofi_component.c b/ompi/mca/mtl/ofi/mtl_ofi_component.c index 80cf17dbb2..1e32d1af66 100644 --- a/ompi/mca/mtl/ofi/mtl_ofi_component.c +++ b/ompi/mca/mtl/ofi/mtl_ofi_component.c @@ -5,6 +5,9 @@ * Copyright (c) 2014-2017 Cisco Systems, Inc. All rights reserved * Copyright (c) 2015-2016 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2018 Amazon.com, Inc. or its affiliates. All Rights reserved. + * Copyright (c) 2020 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -27,8 +30,6 @@ ompi_mtl_ofi_component_init(bool enable_progress_threads, bool enable_mpi_threads); static int param_priority; -static char *prov_include; -static char *prov_exclude; static int control_progress; static int data_progress; static int av_type; @@ -130,24 +131,6 @@ ompi_mtl_ofi_component_register(void) MCA_BASE_VAR_SCOPE_READONLY, ¶m_priority); - prov_include = NULL; - mca_base_component_var_register(&mca_mtl_ofi_component.super.mtl_version, - "provider_include", - "Comma-delimited list of OFI providers that are considered for use (e.g., \"psm,psm2\"; an empty value means that all providers will be considered). Mutually exclusive with mtl_ofi_provider_exclude.", - MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, - OPAL_INFO_LVL_1, - MCA_BASE_VAR_SCOPE_READONLY, - &prov_include); - - prov_exclude = "shm,sockets,tcp,udp,rstream"; - mca_base_component_var_register(&mca_mtl_ofi_component.super.mtl_version, - "provider_exclude", - "Comma-delimited list of OFI providers that are not considered for use (default: \"sockets,mxm\"; empty value means that all providers will be considered). Mutually exclusive with mtl_ofi_provider_include.", - MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, - OPAL_INFO_LVL_1, - MCA_BASE_VAR_SCOPE_READONLY, - &prov_exclude); - ompi_mtl_ofi.ofi_progress_event_count = MTL_OFI_MAX_PROG_EVENT_COUNT; opal_asprintf(&desc, "Max number of events to read each call to OFI progress (default: %d events will be read per OFI progress call)", ompi_mtl_ofi.ofi_progress_event_count); mca_base_component_var_register(&mca_mtl_ofi_component.super.mtl_version, @@ -267,6 +250,8 @@ ompi_mtl_ofi_component_register(void) MCA_BASE_VAR_SCOPE_READONLY, &ompi_mtl_ofi.num_ofi_contexts); + opal_common_ofi_register_mca_variables(&mca_mtl_ofi_component.super.mtl_version); + return OMPI_SUCCESS; } @@ -311,6 +296,7 @@ ompi_mtl_ofi_component_query(mca_base_module_t **module, int *priority) static int ompi_mtl_ofi_component_close(void) { + opal_common_ofi_mca_deregister(); return OMPI_SUCCESS; } @@ -349,7 +335,7 @@ select_ofi_provider(struct fi_info *providers, if (NULL != include_list) { while ((NULL != prov) && (!is_in_list(include_list, prov->fabric_attr->prov_name))) { - opal_output_verbose(1, ompi_mtl_base_framework.framework_output, + opal_output_verbose(1, opal_common_ofi.output, "%s:%d: mtl:ofi: \"%s\" not in include list\n", __FILE__, __LINE__, prov->fabric_attr->prov_name); @@ -358,7 +344,7 @@ select_ofi_provider(struct fi_info *providers, } else if (NULL != exclude_list) { while ((NULL != prov) && (is_in_list(exclude_list, prov->fabric_attr->prov_name))) { - opal_output_verbose(1, ompi_mtl_base_framework.framework_output, + opal_output_verbose(1, opal_common_ofi.output, "%s:%d: mtl:ofi: \"%s\" in exclude list\n", __FILE__, __LINE__, prov->fabric_attr->prov_name); @@ -366,7 +352,7 @@ select_ofi_provider(struct fi_info *providers, } } - opal_output_verbose(1, ompi_mtl_base_framework.framework_output, + opal_output_verbose(1, opal_common_ofi.output, "%s:%d: mtl:ofi:prov: %s\n", __FILE__, __LINE__, (prov ? prov->fabric_attr->prov_name : "none")); @@ -396,6 +382,7 @@ select_ofi_provider(struct fi_info *providers, return prov; } + /* Check if FI_REMOTE_CQ_DATA is supported, if so send the source rank there * FI_DIRECTED_RECV is also needed so receives can discrimate the source */ @@ -481,7 +468,7 @@ ompi_mtl_ofi_define_tag_mode(int ofi_tag_mode, int *bits_for_cid) { do { \ ompi_mtl_ofi.comm_to_context = calloc(arr_size, sizeof(int)); \ if (OPAL_UNLIKELY(!ompi_mtl_ofi.comm_to_context)) { \ - opal_output_verbose(1, ompi_mtl_base_framework.framework_output, \ + opal_output_verbose(1, opal_common_ofi.output, \ "%s:%d: alloc of comm_to_context array failed: %s\n",\ __FILE__, __LINE__, strerror(errno)); \ return ret; \ @@ -493,7 +480,7 @@ ompi_mtl_ofi_define_tag_mode(int ofi_tag_mode, int *bits_for_cid) { ompi_mtl_ofi.ofi_ctxt = (mca_mtl_ofi_context_t *) malloc(ompi_mtl_ofi.num_ofi_contexts * \ sizeof(mca_mtl_ofi_context_t)); \ if (OPAL_UNLIKELY(!ompi_mtl_ofi.ofi_ctxt)) { \ - opal_output_verbose(1, ompi_mtl_base_framework.framework_output, \ + opal_output_verbose(1, opal_common_ofi.output, \ "%s:%d: alloc of ofi_ctxt array failed: %s\n", \ __FILE__, __LINE__, strerror(errno)); \ return ret; \ @@ -641,17 +628,19 @@ ompi_mtl_ofi_component_init(bool enable_progress_threads, int universe_size; char *univ_size_str; - opal_output_verbose(1, ompi_mtl_base_framework.framework_output, - "%s:%d: mtl:ofi:provider_include = \"%s\"\n", - __FILE__, __LINE__, prov_include); - opal_output_verbose(1, ompi_mtl_base_framework.framework_output, - "%s:%d: mtl:ofi:provider_exclude = \"%s\"\n", - __FILE__, __LINE__, prov_exclude); + opal_common_ofi_mca_register(); - if (NULL != prov_include) { - include_list = opal_argv_split(prov_include, ','); - } else if (NULL != prov_exclude) { - exclude_list = opal_argv_split(prov_exclude, ','); + opal_output_verbose(1, opal_common_ofi.output, + "%s:%d: mtl:ofi:provider_include = \"%s\"\n", + __FILE__, __LINE__, *opal_common_ofi.prov_include); + opal_output_verbose(1, opal_common_ofi.output, + "%s:%d: mtl:ofi:provider_exclude = \"%s\"\n", + __FILE__, __LINE__, *opal_common_ofi.prov_exclude); + + if (NULL != *opal_common_ofi.prov_include) { + include_list = opal_argv_split(*opal_common_ofi.prov_include, ','); + } else if (NULL != *opal_common_ofi.prov_exclude) { + exclude_list = opal_argv_split(*opal_common_ofi.prov_exclude, ','); } /** @@ -666,7 +655,7 @@ ompi_mtl_ofi_component_init(bool enable_progress_threads, */ hints = fi_allocinfo(); if (!hints) { - opal_output_verbose(1, ompi_mtl_base_framework.framework_output, + opal_output_verbose(1, opal_common_ofi.output, "%s:%d: Could not allocate fi_info\n", __FILE__, __LINE__); goto error; @@ -752,7 +741,7 @@ ompi_mtl_ofi_component_init(bool enable_progress_threads, ret = fi_getinfo(fi_version, NULL, NULL, 0ULL, hints_dup, &providers); - opal_output_verbose(1, ompi_mtl_base_framework.framework_output, + opal_output_verbose(1, opal_common_ofi.output, "%s:%d: EFA specific fi_getinfo(): %s\n", __FILE__, __LINE__, fi_strerror(-ret)); @@ -789,7 +778,7 @@ ompi_mtl_ofi_component_init(bool enable_progress_threads, hints, /* In: Hints to filter providers */ &providers); /* Out: List of matching providers */ - opal_output_verbose(1, ompi_mtl_base_framework.framework_output, + opal_output_verbose(1, opal_common_ofi.output, "%s:%d: fi_getinfo(): %s\n", __FILE__, __LINE__, fi_strerror(-ret)); @@ -810,7 +799,7 @@ select_prov: */ prov = select_ofi_provider(providers, include_list, exclude_list); if (!prov) { - opal_output_verbose(1, ompi_mtl_base_framework.framework_output, + opal_output_verbose(1, opal_common_ofi.output, "%s:%d: select_ofi_provider: no provider found\n", __FILE__, __LINE__); goto error; @@ -839,7 +828,7 @@ select_prov: /* Fallback to MTL_OFI_TAG_1 */ ompi_mtl_ofi_define_tag_mode(MTL_OFI_TAG_1, &ofi_tag_bits_for_cid); } else { /* MTL_OFI_TAG_FULL */ - opal_output_verbose(1, ompi_mtl_base_framework.framework_output, + opal_output_verbose(1, opal_common_ofi.output, "%s:%d: OFI provider %s does not support FI_REMOTE_CQ_DATA\n", __FILE__, __LINE__, prov->fabric_attr->prov_name); goto error; @@ -919,7 +908,7 @@ select_prov: ompi_process_info.nodename, __FILE__, __LINE__); goto error; } else if (1 == sep_support_in_provider) { - opal_output_verbose(1, ompi_mtl_base_framework.framework_output, + opal_output_verbose(1, opal_common_ofi.output, "%s:%d: Scalable EP supported in %s provider. Enabling in MTL.\n", __FILE__, __LINE__, prov->fabric_attr->prov_name); } @@ -1078,7 +1067,7 @@ select_prov: &ep_name, namelen); if (OMPI_SUCCESS != ret) { - opal_output_verbose(1, ompi_mtl_base_framework.framework_output, + opal_output_verbose(1, opal_common_ofi.output, "%s:%d: modex_send failed: %d\n", __FILE__, __LINE__, ret); goto error; diff --git a/opal/mca/btl/ofi/Makefile.am b/opal/mca/btl/ofi/Makefile.am index 65e7e3edf6..dd0c7caae9 100644 --- a/opal/mca/btl/ofi/Makefile.am +++ b/opal/mca/btl/ofi/Makefile.am @@ -13,6 +13,8 @@ # Copyright (c) 2013 NVIDIA Corporation. All rights reserved. # Copyright (c) 2017 IBM Corporation. All rights reserved. # Copyright (c) 2018 Intel, inc. All rights reserved +# Copyright (c) 2020 Triad National Security, LLC. All rights +# reserved. # $COPYRIGHT$ # # Additional copyrights may follow diff --git a/opal/mca/btl/ofi/btl_ofi_component.c b/opal/mca/btl/ofi/btl_ofi_component.c index c1675775c2..470f0d76ac 100644 --- a/opal/mca/btl/ofi/btl_ofi_component.c +++ b/opal/mca/btl/ofi/btl_ofi_component.c @@ -14,6 +14,9 @@ * reserved. * Copyright (c) 2018 Intel, Inc, All rights reserved * + * Copyright (c) 2018 Amazon.com, Inc. or its affiliates. All Rights reserved. + * Copyright (c) 2020 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -41,7 +44,6 @@ #define MCA_BTL_OFI_REQUESTED_MR_MODE (FI_MR_ALLOCATED | FI_MR_PROV_KEY | FI_MR_VIRT_ADDR) -static char *prov_include; static char *ofi_progress_mode; static bool disable_sep; static int mca_btl_ofi_init_device(struct fi_info *info); @@ -107,20 +109,6 @@ static int mca_btl_ofi_component_register(void) MCA_BASE_VAR_SCOPE_READONLY, &mca_btl_ofi_component.mode); - /* fi_getinfo with prov_name == NULL means ALL provider. - * Since now we are using the first valid info returned, I'm not sure - * if we need to provide the support for comma limited provider list. */ - prov_include = NULL; - (void) mca_base_component_var_register(&mca_btl_ofi_component.super.btl_version, - "provider_include", - "OFI provider that ofi btl will query for. This parameter only " - "accept ONE provider name. " - "(e.g., \"psm2\"; an empty value means that all providers will " - "be considered.", - MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, - OPAL_INFO_LVL_4, - MCA_BASE_VAR_SCOPE_READONLY, - &prov_include); mca_btl_ofi_component.num_cqe_read = MCA_BTL_OFI_NUM_CQE_READ; (void) mca_base_component_var_register(&mca_btl_ofi_component.super.btl_version, @@ -185,6 +173,8 @@ static int mca_btl_ofi_component_register(void) /* for now we want this component to lose to the MTL. */ module->super.btl_exclusivity = MCA_BTL_EXCLUSIVITY_HIGH - 50; + opal_common_ofi_register_mca_variables(&mca_btl_ofi_component.super.btl_version); + return mca_btl_base_param_register (&mca_btl_ofi_component.super.btl_version, &module->super); } @@ -200,7 +190,8 @@ static int mca_btl_ofi_component_open(void) */ static int mca_btl_ofi_component_close(void) { - /* If we don't sleep, sockets provider freaks out. */ + opal_common_ofi_mca_deregister(); + /* If we don't sleep, sockets provider freaks out. Ummm this is a scary comment */ sleep(1); return OPAL_SUCCESS; } @@ -225,6 +216,7 @@ static mca_btl_base_module_t **mca_btl_ofi_component_init (int *num_btl_modules, uint64_t progress_mode; unsigned resource_count = 0; struct mca_btl_base_module_t **base_modules; + char **include_list = NULL; BTL_VERBOSE(("initializing ofi btl")); @@ -247,6 +239,8 @@ static mca_btl_base_module_t **mca_btl_ofi_component_init (int *num_btl_modules, struct fi_domain_attr domain_attr = {0}; uint64_t required_caps; + opal_common_ofi_mca_register(); + switch (mca_btl_ofi_component.mode) { case MCA_BTL_OFI_MODE_TWO_SIDED: @@ -266,8 +260,12 @@ static mca_btl_base_module_t **mca_btl_ofi_component_init (int *num_btl_modules, break; } - /* Select the provider */ - fabric_attr.prov_name = prov_include; + fabric_attr.prov_name = NULL; + /* Select the provider - sort of. we just take first element in list for now */ + if (NULL != *opal_common_ofi.prov_include) { + include_list = opal_argv_split(*opal_common_ofi.prov_include, ','); + fabric_attr.prov_name = include_list[0]; + } domain_attr.mr_mode = MCA_BTL_OFI_REQUESTED_MR_MODE; @@ -312,9 +310,13 @@ static mca_btl_base_module_t **mca_btl_ofi_component_init (int *num_btl_modules, rc = fi_getinfo(FI_VERSION(1, 5), NULL, NULL, 0, &hints, &info_list); if (0 != rc) { BTL_VERBOSE(("fi_getinfo failed with code %d: %s",rc, fi_strerror(-rc))); + if (NULL != include_list) { + opal_argv_free(include_list); + } return NULL; } + /* count the number of resources/ */ info = info_list; while(info) { @@ -356,6 +358,9 @@ static mca_btl_base_module_t **mca_btl_ofi_component_init (int *num_btl_modules, /* We are done with the returned info. */ fi_freeinfo(info_list); + if (NULL != include_list) { + opal_argv_free(include_list); + } /* pass module array back to caller */ base_modules = calloc (mca_btl_ofi_component.module_count, sizeof (*base_modules)); diff --git a/opal/mca/common/ofi/common_ofi.c b/opal/mca/common/ofi/common_ofi.c index 074fb880ca..9d6cc8ade2 100644 --- a/opal/mca/common/ofi/common_ofi.c +++ b/opal/mca/common/ofi/common_ofi.c @@ -2,6 +2,8 @@ * Copyright (c) 2015 Intel, Inc. All rights reserved. * Copyright (c) 2017 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2020 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -9,22 +11,110 @@ * $HEADER$ */ -#include "opal_config.h" -#include "opal/constants.h" -#include "opal/mca/hwloc/base/base.h" #include #include #include "common_ofi.h" +#include "opal_config.h" +#include "opal/constants.h" +#include "opal/util/argv.h" +#include "opal/mca/base/mca_base_var.h" +#include "opal/mca/base/mca_base_framework.h" +#include "opal/mca/hwloc/base/base.h" -int mca_common_ofi_register_mca_variables(void) +OPAL_DECLSPEC opal_common_ofi_module_t opal_common_ofi = { + .prov_include = NULL, + .prov_exclude = NULL, + .registered = 0, + .verbose = 0 +}; + +static const char default_prov_exclude_list[] = "shm,sockets,tcp,udp,rstream"; + +OPAL_DECLSPEC int opal_common_ofi_register_mca_variables(const mca_base_component_t *component) { - if (fi_version() >= FI_VERSION(1,0)) { - return OPAL_SUCCESS; - } else { + static int registered = 0; + static int include_index; + static int exclude_index; + static int verbose_index; + + if (fi_version() < FI_VERSION(1,0)) { return OPAL_ERROR; } + + if (!registered) { + /* + * this monkey business is needed because of the way the MCA VARs stuff tries to handle pointers to strings when + * when destructing the MCA var database. If you don't do something like this,the MCA var framework will try + * to dereference a pointer which itself is no longer a valid address owing to having been previously dlclosed. + */ + opal_common_ofi.prov_include = (char **)malloc(sizeof(char *)); + *opal_common_ofi.prov_include = NULL; + include_index = mca_base_var_register("opal", "opal_common", "ofi", + "provider_include", + "Comma-delimited list of OFI providers that are considered for use (e.g., \"psm,psm2\"; an empty value means that all providers will be considered). Mutually exclusive with mtl_ofi_provider_exclude.", + MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, + OPAL_INFO_LVL_1, + MCA_BASE_VAR_SCOPE_READONLY, + opal_common_ofi.prov_include); + opal_common_ofi.prov_exclude = (char **)malloc(sizeof(char *)); + *opal_common_ofi.prov_exclude = strdup(default_prov_exclude_list); + exclude_index = mca_base_var_register("opal", "opal_common", "ofi", + "provider_exclude", + "Comma-delimited list of OFI providers that are not considered for use (default: \"sockets,mxm\"; empty value means that all providers will be considered). Mutually exclusive with mtl_ofi_provider_include.", + MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, + OPAL_INFO_LVL_1, + MCA_BASE_VAR_SCOPE_READONLY, + opal_common_ofi.prov_exclude); + verbose_index = mca_base_var_register("opal", "opal_common", "ofi", "verbose", + "Verbose level of the OFI components", + MCA_BASE_VAR_TYPE_INT, NULL, 0, + MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3, + MCA_BASE_VAR_SCOPE_LOCAL, + &opal_common_ofi.verbose); + registered = 1; + } + + if (component) { + mca_base_var_register_synonym(include_index, component->mca_project_name, + component->mca_type_name, + component->mca_component_name, + "provider_include", 0); + mca_base_var_register_synonym(exclude_index, component->mca_project_name, + component->mca_type_name, + component->mca_component_name, + "provider_exclude", 0); + mca_base_var_register_synonym(verbose_index, component->mca_project_name, + component->mca_type_name, + component->mca_component_name, + "verbose", 0); + } + + return OPAL_SUCCESS; +} + +OPAL_DECLSPEC void opal_common_ofi_mca_register(void) +{ + opal_common_ofi.registered++; + if (opal_common_ofi.registered > 1) { + opal_output_set_verbosity(opal_common_ofi.output, opal_common_ofi.verbose); + return; + } + + opal_common_ofi.output = opal_output_open(NULL); + opal_output_set_verbosity(opal_common_ofi.output, opal_common_ofi.verbose); +} + +OPAL_DECLSPEC void opal_common_ofi_mca_deregister(void) +{ + /* unregister only on last deregister */ + opal_common_ofi.registered--; + assert(opal_common_ofi.registered >= 0); + if (opal_common_ofi.registered) { + return; + } + opal_output_close(opal_common_ofi.output); } /* check that the tx attributes match */ diff --git a/opal/mca/common/ofi/common_ofi.h b/opal/mca/common/ofi/common_ofi.h index 30f9f4f88a..2745515dc6 100644 --- a/opal/mca/common/ofi/common_ofi.h +++ b/opal/mca/common/ofi/common_ofi.h @@ -3,6 +3,9 @@ * Copyright (c) 2015 Intel, Inc. All rights reserved. * Copyright (c) 2017 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2020 Triad National Security, LLC. All rights + * reserved. + * * $COPYRIGHT$ * * Additional copyrights may follow @@ -12,9 +15,31 @@ #ifndef OPAL_MCA_COMMON_OFI_H #define OPAL_MCA_COMMON_OFI_H + +#include "opal_config.h" +#include "opal/mca/base/mca_base_var.h" +#include "opal/mca/base/mca_base_framework.h" #include -OPAL_DECLSPEC int mca_common_ofi_register_mca_variables(void); +BEGIN_C_DECLS + +typedef struct opal_common_ofi_module { + char **prov_include; + char **prov_exclude; + int verbose; + int registered; + int output; +} opal_common_ofi_module_t; + +extern opal_common_ofi_module_t opal_common_ofi; + +OPAL_DECLSPEC int opal_common_ofi_register_mca_variables(const mca_base_component_t *component); +OPAL_DECLSPEC void opal_common_ofi_mca_register(void); +OPAL_DECLSPEC void opal_common_ofi_mca_deregister(void); +OPAL_DECLSPEC struct fi_info* opal_common_ofi_select_ofi_provider(struct fi_info *providers, + char *framework_name); + +END_C_DECLS struct fi_info* opal_mca_common_ofi_select_provider(struct fi_info *provider_list, int rank);