From 526775dfd7ad75c308532784de4fb3ffed25458f Mon Sep 17 00:00:00 2001 From: Nathan Hjelm Date: Thu, 26 Sep 2019 08:57:00 -0700 Subject: [PATCH 1/2] btl/uct: add support for OpenUCX v1.8 API changes OpenUCX broke the UCT API again in v1.8. This commit updates btl/uct to fix compilation with current OpenUCX master (future v1.8). Further changes will likely be needed for the final release. Signed-off-by: Nathan Hjelm --- opal/mca/btl/uct/btl_uct.h | 4 ++ opal/mca/btl/uct/btl_uct_amo.c | 4 +- opal/mca/btl/uct/btl_uct_component.c | 82 +++++++++++++++++++++++++++- opal/mca/btl/uct/btl_uct_rdma.c | 4 +- opal/mca/btl/uct/btl_uct_rdma.h | 14 +++++ opal/mca/btl/uct/btl_uct_tl.c | 6 ++ 6 files changed, 107 insertions(+), 7 deletions(-) diff --git a/opal/mca/btl/uct/btl_uct.h b/opal/mca/btl/uct/btl_uct.h index 3875679443..73640103c0 100644 --- a/opal/mca/btl/uct/btl_uct.h +++ b/opal/mca/btl/uct/btl_uct.h @@ -85,6 +85,10 @@ struct mca_btl_uct_module_t { /** array containing the am_tl and rdma_tl */ mca_btl_uct_tl_t *comm_tls[2]; +#if UCT_API > UCT_VERSION(1, 7) + uct_component_h uct_component; +#endif + /** registration cache */ mca_rcache_base_module_t *rcache; diff --git a/opal/mca/btl/uct/btl_uct_amo.c b/opal/mca/btl/uct/btl_uct_amo.c index f7d0232688..72398ce736 100644 --- a/opal/mca/btl/uct/btl_uct_amo.c +++ b/opal/mca/btl/uct/btl_uct_amo.c @@ -110,7 +110,7 @@ int mca_btl_uct_afop (struct mca_btl_base_module_t *btl, struct mca_btl_base_end mca_btl_uct_uct_completion_release (comp); } - uct_rkey_release (&rkey); + mca_btl_uct_rkey_release (uct_btl, &rkey); return rc; } @@ -184,7 +184,7 @@ int mca_btl_uct_acswap (struct mca_btl_base_module_t *btl, struct mca_btl_base_e mca_btl_uct_uct_completion_release (comp); } - uct_rkey_release (&rkey); + mca_btl_uct_rkey_release (uct_btl, &rkey); return rc; } diff --git a/opal/mca/btl/uct/btl_uct_component.c b/opal/mca/btl/uct/btl_uct_component.c index 830c262325..a4073208f8 100644 --- a/opal/mca/btl/uct/btl_uct_component.c +++ b/opal/mca/btl/uct/btl_uct_component.c @@ -316,7 +316,12 @@ ucs_status_t mca_btl_uct_am_handler (void *arg, void *data, size_t length, unsig return UCS_OK; } +#if UCT_API > UCT_VERSION(1, 7) +static int mca_btl_uct_component_process_uct_md (uct_component_h component, uct_md_resource_desc_t *md_desc, + char **allowed_ifaces) +#else static int mca_btl_uct_component_process_uct_md (uct_md_resource_desc_t *md_desc, char **allowed_ifaces) +#endif { mca_rcache_base_resources_t rcache_resources; uct_tl_resource_desc_t *tl_desc; @@ -350,8 +355,14 @@ static int mca_btl_uct_component_process_uct_md (uct_md_resource_desc_t *md_desc md = OBJ_NEW(mca_btl_uct_md_t); + +#if UCT_API > UCT_VERSION(1, 7) + uct_md_config_read (component, NULL, NULL, &uct_config); + uct_md_open (component, md_desc->md_name, uct_config, &md->uct_md); +#else uct_md_config_read (md_desc->md_name, NULL, NULL, &uct_config); uct_md_open (md_desc->md_name, uct_config, &md->uct_md); +#endif uct_config_release (uct_config); uct_md_query (md->uct_md, &md_attr); @@ -377,6 +388,10 @@ static int mca_btl_uct_component_process_uct_md (uct_md_resource_desc_t *md_desc return OPAL_ERR_NOT_AVAILABLE; } +#if UCT_API > UCT_VERSION(1, 7) + module->uct_component = component; +#endif + mca_btl_uct_component.modules[mca_btl_uct_component.module_count++] = module; /* NTH: a registration cache shouldn't be necessary when using UCT but there are measurable @@ -402,6 +417,42 @@ static int mca_btl_uct_component_process_uct_md (uct_md_resource_desc_t *md_desc return OPAL_SUCCESS; } +#if UCT_API > UCT_VERSION(1, 7) +static int mca_btl_uct_component_process_uct_component (uct_component_h component, char **allowed_ifaces) +{ + uct_component_attr_t attr = {.field_mask = UCT_COMPONENT_ATTR_FIELD_NAME | + UCT_COMPONENT_ATTR_FIELD_MD_RESOURCE_COUNT}; + ucs_status_t ucs_status; + int rc; + + ucs_status = uct_component_query (component, &attr); + if (UCS_OK != ucs_status) { + return OPAL_ERROR; + } + + BTL_VERBOSE(("processing uct component %s", attr.name)); + + attr.md_resources = calloc (attr.md_resource_count, sizeof (*attr.md_resources)); + attr.field_mask |= UCT_COMPONENT_ATTR_FIELD_MD_RESOURCES; + ucs_status = uct_component_query (component, &attr); + if (UCS_OK != ucs_status) { + return OPAL_ERROR; + } + + for (int i = 0 ; i < attr.md_resource_count ; ++i) { + rc = mca_btl_uct_component_process_uct_md (component, attr.md_resources + i, + allowed_ifaces); + if (OPAL_SUCCESS != rc) { + break; + } + } + + free (attr.md_resources); + + return OPAL_SUCCESS; +} +#endif /* UCT_API > UCT_VERSION(1, 7) */ + /* * UCT component initialization: * (1) read interface list from kernel and compare against component parameters @@ -417,6 +468,7 @@ static mca_btl_base_module_t **mca_btl_uct_component_init (int *num_btl_modules, struct mca_btl_base_module_t **base_modules; uct_md_resource_desc_t *resources; unsigned resource_count; + ucs_status_t ucs_status; char **allowed_ifaces; int rc; @@ -433,10 +485,32 @@ static mca_btl_base_module_t **mca_btl_uct_component_init (int *num_btl_modules, return NULL; } - uct_query_md_resources (&resources, &resource_count); - mca_btl_uct_component.module_count = 0; +#if UCT_API > UCT_VERSION(1, 7) + uct_component_h *components; + unsigned num_components; + + ucs_status = uct_query_components(&components, &num_components); + if (UCS_OK != ucs_status) { + BTL_ERROR(("could not query UCT components")); + return NULL; + } + + /* generate all suitable btl modules */ + for (unsigned i = 0 ; i < num_components ; ++i) { + rc = mca_btl_uct_component_process_uct_component (components[i], allowed_ifaces); + if (OPAL_SUCCESS != rc) { + break; + } + } + + uct_release_component_list (components); + +#else /* UCT 1.6 and older */ + + uct_query_md_resources (&resources, &resource_count); + /* generate all suitable btl modules */ for (unsigned i = 0 ; i < resource_count ; ++i) { rc = mca_btl_uct_component_process_uct_md (resources + i, allowed_ifaces); @@ -445,9 +519,11 @@ static mca_btl_base_module_t **mca_btl_uct_component_init (int *num_btl_modules, } } - opal_argv_free (allowed_ifaces); uct_release_md_resource_list (resources); +#endif /* UCT_API > UCT_VERSION(1, 7) */ + + opal_argv_free (allowed_ifaces); mca_btl_uct_modex_send (); /* pass module array back to caller */ diff --git a/opal/mca/btl/uct/btl_uct_rdma.c b/opal/mca/btl/uct/btl_uct_rdma.c index 2d2d1c3f04..9ee9530f26 100644 --- a/opal/mca/btl/uct/btl_uct_rdma.c +++ b/opal/mca/btl/uct/btl_uct_rdma.c @@ -132,7 +132,7 @@ int mca_btl_uct_get (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoi BTL_VERBOSE(("get issued. status = %d", ucs_status)); - uct_rkey_release (&rkey); + mca_btl_uct_rkey_release (uct_btl, &rkey); return OPAL_LIKELY(UCS_OK == ucs_status) ? OPAL_SUCCESS : OPAL_ERR_RESOURCE_BUSY; } @@ -237,7 +237,7 @@ int mca_btl_uct_put (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoi mca_btl_uct_uct_completion_release (comp); } - uct_rkey_release (&rkey); + mca_btl_uct_rkey_release (uct_btl, &rkey); return OPAL_LIKELY(UCS_OK == ucs_status) ? OPAL_SUCCESS : OPAL_ERR_RESOURCE_BUSY; } diff --git a/opal/mca/btl/uct/btl_uct_rdma.h b/opal/mca/btl/uct/btl_uct_rdma.h index e9b0d6b19d..609fec91f5 100644 --- a/opal/mca/btl/uct/btl_uct_rdma.h +++ b/opal/mca/btl/uct/btl_uct_rdma.h @@ -55,8 +55,22 @@ static inline int mca_btl_uct_get_rkey (mca_btl_uct_module_t *module, return rc; } +#if UCT_API > UCT_VERSION(1, 7) + ucs_status = uct_rkey_unpack (module->uct_component, (void *) remote_handle, rkey); +#else ucs_status = uct_rkey_unpack ((void *) remote_handle, rkey); +#endif return (UCS_OK == ucs_status) ? OPAL_SUCCESS : OPAL_ERROR; } +static inline void mca_btl_uct_rkey_release (mca_btl_uct_module_t *uct_btl, uct_rkey_bundle_t *rkey) +{ +#if UCT_API > UCT_VERSION(1, 7) + uct_rkey_release (uct_btl->uct_component, rkey); +#else + (void) uct_btl; + uct_rkey_release (rkey); +#endif +} + #endif /* !defined(BTL_UCT_RDMA_H) */ diff --git a/opal/mca/btl/uct/btl_uct_tl.c b/opal/mca/btl/uct/btl_uct_tl.c index a711a41ce9..dcf00f2352 100644 --- a/opal/mca/btl/uct/btl_uct_tl.c +++ b/opal/mca/btl/uct/btl_uct_tl.c @@ -516,7 +516,13 @@ static int mca_btl_uct_evaluate_tl (mca_btl_uct_module_t *module, mca_btl_uct_tl * come up with a better estimate. */ /* UCT bandwidth is in bytes/sec, BTL is in MB/sec */ +#if UCT_API > UCT_VERSION(1, 7) + module->super.btl_bandwidth = (uint32_t) ((MCA_BTL_UCT_TL_ATTR(tl, 0).bandwidth.dedicated + + MCA_BTL_UCT_TL_ATTR(tl, 0).bandwidth.shared / + (opal_process_info.num_local_peers + 1)) / 1048576.0); +#else module->super.btl_bandwidth = (uint32_t) (MCA_BTL_UCT_TL_ATTR(tl, 0).bandwidth / 1048576.0); +#endif /* TODO -- figure out how to translate UCT latency to us */ module->super.btl_latency = 1; } From 8473a66466e28f5b6e36f6debeac6e536d9e132b Mon Sep 17 00:00:00 2001 From: Nathan Hjelm Date: Fri, 27 Sep 2019 12:30:25 -0700 Subject: [PATCH 2/2] btl/uct: fix bug when using a transport without zero-copy This commit fixes a crash that can occur if a transport is usable but doesn't have zero-copy support. In this case do not attempt to use zero-copy and set the max send size off the bcopy limit. Signed-off-by: Nathan Hjelm --- opal/mca/btl/uct/btl_uct_am.c | 3 ++- opal/mca/btl/uct/btl_uct_tl.c | 10 ++++++++-- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/opal/mca/btl/uct/btl_uct_am.c b/opal/mca/btl/uct/btl_uct_am.c index 90ea28eed5..8f0d29bdd3 100644 --- a/opal/mca/btl/uct/btl_uct_am.c +++ b/opal/mca/btl/uct/btl_uct_am.c @@ -175,7 +175,8 @@ int mca_btl_uct_send_frag (mca_btl_uct_module_t *uct_btl, mca_btl_uct_base_frag_ if (!context->in_am_callback) { mca_btl_uct_context_lock (context); /* attempt to post the fragment */ - if (NULL != frag->base.super.registration) { + if (NULL != frag->base.super.registration && + (context->uct_iface_attr.cap.flags & UCT_IFACE_FLAG_AM_ZCOPY)) { frag->comp.dev_context = context; ucs_status = uct_ep_am_zcopy (ep_handle, MCA_BTL_UCT_FRAG, &frag->header, sizeof (frag->header), &frag->uct_iov, 1, 0, &frag->comp.uct_comp); diff --git a/opal/mca/btl/uct/btl_uct_tl.c b/opal/mca/btl/uct/btl_uct_tl.c index dcf00f2352..7d5d519abb 100644 --- a/opal/mca/btl/uct/btl_uct_tl.c +++ b/opal/mca/btl/uct/btl_uct_tl.c @@ -461,8 +461,14 @@ static void mca_btl_uct_set_tl_am (mca_btl_uct_module_t *module, mca_btl_uct_tl_ tl->max_device_contexts = mca_btl_uct_component.num_contexts_per_module; } - module->super.btl_max_send_size = MCA_BTL_UCT_TL_ATTR(tl, 0).cap.am.max_zcopy - sizeof (mca_btl_uct_am_header_t); - module->super.btl_eager_limit = MCA_BTL_UCT_TL_ATTR(tl, 0).cap.am.max_bcopy - sizeof (mca_btl_uct_am_header_t); + module->super.btl_eager_limit = MCA_BTL_UCT_TL_ATTR(tl, 0).cap.am.max_bcopy - + sizeof (mca_btl_uct_am_header_t); + if (MCA_BTL_UCT_TL_ATTR(tl, 0).cap.flags & UCT_IFACE_FLAG_AM_ZCOPY) { + module->super.btl_max_send_size = MCA_BTL_UCT_TL_ATTR(tl, 0).cap.am.max_zcopy - + sizeof (mca_btl_uct_am_header_t); + } else { + module->super.btl_max_send_size = module->super.btl_eager_limit; + } } static int mca_btl_uct_set_tl_conn (mca_btl_uct_module_t *module, mca_btl_uct_tl_t *tl)