1
1

btl/uct: add support for OpenUCX v1.8 API changes

OpenUCX broke the UCT API again in v1.8. This commit updates
btl/uct to fix compilation with current OpenUCX master
(future v1.8). Further changes will likely be needed for
the final release.

Signed-off-by: Nathan Hjelm <hjelmn@google.com>
(cherry picked from commit 526775dfd7ad75c308532784de4fb3ffed25458f)
Этот коммит содержится в:
Nathan Hjelm 2019-09-26 08:57:00 -07:00 коммит произвёл Howard Pritchard
родитель 524960dcdd
Коммит 47ec3e4d2b
6 изменённых файлов: 107 добавлений и 7 удалений

Просмотреть файл

@ -85,6 +85,10 @@ struct mca_btl_uct_module_t {
/** array containing the am_tl and rdma_tl */
mca_btl_uct_tl_t *comm_tls[2];
#if UCT_API > UCT_VERSION(1, 7)
uct_component_h uct_component;
#endif
/** registration cache */
mca_rcache_base_module_t *rcache;

Просмотреть файл

@ -110,7 +110,7 @@ int mca_btl_uct_afop (struct mca_btl_base_module_t *btl, struct mca_btl_base_end
mca_btl_uct_uct_completion_release (comp);
}
uct_rkey_release (&rkey);
mca_btl_uct_rkey_release (uct_btl, &rkey);
return rc;
}
@ -184,7 +184,7 @@ int mca_btl_uct_acswap (struct mca_btl_base_module_t *btl, struct mca_btl_base_e
mca_btl_uct_uct_completion_release (comp);
}
uct_rkey_release (&rkey);
mca_btl_uct_rkey_release (uct_btl, &rkey);
return rc;
}

Просмотреть файл

@ -314,7 +314,12 @@ ucs_status_t mca_btl_uct_am_handler (void *arg, void *data, size_t length, unsig
return UCS_OK;
}
#if UCT_API > UCT_VERSION(1, 7)
static int mca_btl_uct_component_process_uct_md (uct_component_h component, uct_md_resource_desc_t *md_desc,
char **allowed_ifaces)
#else
static int mca_btl_uct_component_process_uct_md (uct_md_resource_desc_t *md_desc, char **allowed_ifaces)
#endif
{
mca_rcache_base_resources_t rcache_resources;
uct_tl_resource_desc_t *tl_desc;
@ -348,8 +353,14 @@ static int mca_btl_uct_component_process_uct_md (uct_md_resource_desc_t *md_desc
md = OBJ_NEW(mca_btl_uct_md_t);
#if UCT_API > UCT_VERSION(1, 7)
uct_md_config_read (component, NULL, NULL, &uct_config);
uct_md_open (component, md_desc->md_name, uct_config, &md->uct_md);
#else
uct_md_config_read (md_desc->md_name, NULL, NULL, &uct_config);
uct_md_open (md_desc->md_name, uct_config, &md->uct_md);
#endif
uct_config_release (uct_config);
uct_md_query (md->uct_md, &md_attr);
@ -375,6 +386,10 @@ static int mca_btl_uct_component_process_uct_md (uct_md_resource_desc_t *md_desc
return OPAL_ERR_NOT_AVAILABLE;
}
#if UCT_API > UCT_VERSION(1, 7)
module->uct_component = component;
#endif
mca_btl_uct_component.modules[mca_btl_uct_component.module_count++] = module;
/* NTH: a registration cache shouldn't be necessary when using UCT but there are measurable
@ -400,6 +415,42 @@ static int mca_btl_uct_component_process_uct_md (uct_md_resource_desc_t *md_desc
return OPAL_SUCCESS;
}
#if UCT_API > UCT_VERSION(1, 7)
static int mca_btl_uct_component_process_uct_component (uct_component_h component, char **allowed_ifaces)
{
uct_component_attr_t attr = {.field_mask = UCT_COMPONENT_ATTR_FIELD_NAME |
UCT_COMPONENT_ATTR_FIELD_MD_RESOURCE_COUNT};
ucs_status_t ucs_status;
int rc;
ucs_status = uct_component_query (component, &attr);
if (UCS_OK != ucs_status) {
return OPAL_ERROR;
}
BTL_VERBOSE(("processing uct component %s", attr.name));
attr.md_resources = calloc (attr.md_resource_count, sizeof (*attr.md_resources));
attr.field_mask |= UCT_COMPONENT_ATTR_FIELD_MD_RESOURCES;
ucs_status = uct_component_query (component, &attr);
if (UCS_OK != ucs_status) {
return OPAL_ERROR;
}
for (int i = 0 ; i < attr.md_resource_count ; ++i) {
rc = mca_btl_uct_component_process_uct_md (component, attr.md_resources + i,
allowed_ifaces);
if (OPAL_SUCCESS != rc) {
break;
}
}
free (attr.md_resources);
return OPAL_SUCCESS;
}
#endif /* UCT_API > UCT_VERSION(1, 7) */
/*
* UCT component initialization:
* (1) read interface list from kernel and compare against component parameters
@ -415,6 +466,7 @@ static mca_btl_base_module_t **mca_btl_uct_component_init (int *num_btl_modules,
struct mca_btl_base_module_t **base_modules;
uct_md_resource_desc_t *resources;
unsigned resource_count;
ucs_status_t ucs_status;
char **allowed_ifaces;
int rc;
@ -431,10 +483,32 @@ static mca_btl_base_module_t **mca_btl_uct_component_init (int *num_btl_modules,
return NULL;
}
uct_query_md_resources (&resources, &resource_count);
mca_btl_uct_component.module_count = 0;
#if UCT_API > UCT_VERSION(1, 7)
uct_component_h *components;
unsigned num_components;
ucs_status = uct_query_components(&components, &num_components);
if (UCS_OK != ucs_status) {
BTL_ERROR(("could not query UCT components"));
return NULL;
}
/* generate all suitable btl modules */
for (unsigned i = 0 ; i < num_components ; ++i) {
rc = mca_btl_uct_component_process_uct_component (components[i], allowed_ifaces);
if (OPAL_SUCCESS != rc) {
break;
}
}
uct_release_component_list (components);
#else /* UCT 1.6 and older */
uct_query_md_resources (&resources, &resource_count);
/* generate all suitable btl modules */
for (unsigned i = 0 ; i < resource_count ; ++i) {
rc = mca_btl_uct_component_process_uct_md (resources + i, allowed_ifaces);
@ -443,9 +517,11 @@ static mca_btl_base_module_t **mca_btl_uct_component_init (int *num_btl_modules,
}
}
opal_argv_free (allowed_ifaces);
uct_release_md_resource_list (resources);
#endif /* UCT_API > UCT_VERSION(1, 7) */
opal_argv_free (allowed_ifaces);
mca_btl_uct_modex_send ();
/* pass module array back to caller */

Просмотреть файл

@ -132,7 +132,7 @@ int mca_btl_uct_get (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoi
BTL_VERBOSE(("get issued. status = %d", ucs_status));
uct_rkey_release (&rkey);
mca_btl_uct_rkey_release (uct_btl, &rkey);
return OPAL_LIKELY(UCS_OK == ucs_status) ? OPAL_SUCCESS : OPAL_ERR_RESOURCE_BUSY;
}
@ -237,7 +237,7 @@ int mca_btl_uct_put (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoi
mca_btl_uct_uct_completion_release (comp);
}
uct_rkey_release (&rkey);
mca_btl_uct_rkey_release (uct_btl, &rkey);
return OPAL_LIKELY(UCS_OK == ucs_status) ? OPAL_SUCCESS : OPAL_ERR_RESOURCE_BUSY;
}

Просмотреть файл

@ -55,8 +55,22 @@ static inline int mca_btl_uct_get_rkey (mca_btl_uct_module_t *module,
return rc;
}
#if UCT_API > UCT_VERSION(1, 7)
ucs_status = uct_rkey_unpack (module->uct_component, (void *) remote_handle, rkey);
#else
ucs_status = uct_rkey_unpack ((void *) remote_handle, rkey);
#endif
return (UCS_OK == ucs_status) ? OPAL_SUCCESS : OPAL_ERROR;
}
static inline void mca_btl_uct_rkey_release (mca_btl_uct_module_t *uct_btl, uct_rkey_bundle_t *rkey)
{
#if UCT_API > UCT_VERSION(1, 7)
uct_rkey_release (uct_btl->uct_component, rkey);
#else
(void) uct_btl;
uct_rkey_release (rkey);
#endif
}
#endif /* !defined(BTL_UCT_RDMA_H) */

Просмотреть файл

@ -516,7 +516,13 @@ static int mca_btl_uct_evaluate_tl (mca_btl_uct_module_t *module, mca_btl_uct_tl
* come up with a better estimate. */
/* UCT bandwidth is in bytes/sec, BTL is in MB/sec */
#if UCT_API > UCT_VERSION(1, 7)
module->super.btl_bandwidth = (uint32_t) ((MCA_BTL_UCT_TL_ATTR(tl, 0).bandwidth.dedicated +
MCA_BTL_UCT_TL_ATTR(tl, 0).bandwidth.shared /
(opal_process_info.num_local_peers + 1)) / 1048576.0);
#else
module->super.btl_bandwidth = (uint32_t) (MCA_BTL_UCT_TL_ATTR(tl, 0).bandwidth / 1048576.0);
#endif
/* TODO -- figure out how to translate UCT latency to us */
module->super.btl_latency = 1;
}