2018-03-15 12:50:10 -06:00
|
|
|
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
|
|
|
/*
|
|
|
|
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
|
|
|
|
* University Research and Technology
|
|
|
|
* Corporation. All rights reserved.
|
|
|
|
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
|
|
|
* of Tennessee Research Foundation. All rights
|
|
|
|
* reserved.
|
|
|
|
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
|
|
|
* University of Stuttgart. All rights reserved.
|
|
|
|
* Copyright (c) 2004-2005 The Regents of the University of California.
|
|
|
|
* All rights reserved.
|
|
|
|
* Copyright (c) 2014-2018 Los Alamos National Security, LLC. All rights
|
|
|
|
* reserved.
|
2018-06-26 14:53:02 +09:00
|
|
|
* Copyright (c) 2018 Research Organization for Information Science
|
|
|
|
* and Technology (RIST). All rights reserved.
|
2018-03-15 12:50:10 -06:00
|
|
|
* $COPYRIGHT$
|
|
|
|
*
|
|
|
|
* Additional copyrights may follow
|
|
|
|
*
|
|
|
|
* $HEADER$
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
|
|
#include "opal_config.h"
|
|
|
|
|
|
|
|
#include "opal/mca/btl/btl.h"
|
|
|
|
#include "opal/mca/btl/base/base.h"
|
|
|
|
#include "opal/mca/hwloc/base/base.h"
|
2018-06-26 14:53:02 +09:00
|
|
|
#include "opal/util/argv.h"
|
2018-03-15 12:50:10 -06:00
|
|
|
|
|
|
|
#include <string.h>
|
|
|
|
|
|
|
|
#include "btl_uct_device_context.h"
|
|
|
|
#include "btl_uct_am.h"
|
|
|
|
|
|
|
|
static int mca_btl_uct_component_register(void)
|
|
|
|
{
|
|
|
|
mca_btl_uct_module_t *module = &mca_btl_uct_module_template;
|
|
|
|
|
|
|
|
mca_btl_uct_component.memory_domains = "none";
|
|
|
|
(void) mca_base_component_var_register(&mca_btl_uct_component.super.btl_version,
|
|
|
|
"memory_domains", "Comma-delimited list of memory domains of the form "
|
|
|
|
"to use for communication. Memory domains MUST provide transports that "
|
|
|
|
"support put, get, and amos. Special values: all (all available), none."
|
|
|
|
" (default: none)", MCA_BASE_VAR_TYPE_STRING, NULL, 0,
|
|
|
|
MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3, MCA_BASE_VAR_SCOPE_LOCAL,
|
|
|
|
&mca_btl_uct_component.memory_domains);
|
|
|
|
|
|
|
|
mca_btl_uct_component.allowed_transports = "any";
|
|
|
|
(void) mca_base_component_var_register(&mca_btl_uct_component.super.btl_version,
|
|
|
|
"transports", "Comma-delimited list of transports of the form to use."
|
|
|
|
" The list of transports available can be queried using ucx_info. Special"
|
|
|
|
"values: any (any available) (default: any)", MCA_BASE_VAR_TYPE_STRING,
|
|
|
|
NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3, MCA_BASE_VAR_SCOPE_LOCAL,
|
|
|
|
&mca_btl_uct_component.allowed_transports);
|
|
|
|
|
|
|
|
mca_btl_uct_component.num_contexts_per_module = 0;
|
|
|
|
(void) mca_base_component_var_register(&mca_btl_uct_component.super.btl_version,
|
|
|
|
"num_contexts_per_module", "Number of UCT worker contexts "
|
|
|
|
"to create for each BTL module. Larger numbers will improve "
|
|
|
|
"multi-threaded performance but may increase memory usage. "
|
|
|
|
"A good rule of thumb is one context per application thread "
|
|
|
|
"that will be calling into MPI. (default: 0 -- autoselect "
|
|
|
|
"based on the number of cores)", MCA_BASE_VAR_TYPE_INT,
|
|
|
|
NULL, 0 ,MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3,
|
|
|
|
MCA_BASE_VAR_SCOPE_ALL, &mca_btl_uct_component.num_contexts_per_module);
|
|
|
|
|
|
|
|
mca_btl_uct_component.disable_ucx_memory_hooks = true;
|
|
|
|
(void) mca_base_component_var_register(&mca_btl_uct_component.super.btl_version,
|
|
|
|
"disable_ucx_memory_hooks", "Disable the munmap memory hook "
|
|
|
|
"inside UCX. These hooks are not necessary when using the "
|
|
|
|
"uct btl and tend to cause performance problems when using "
|
|
|
|
"multiple threads (default: true)", MCA_BASE_VAR_TYPE_BOOL,
|
|
|
|
NULL, 0 ,MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3,
|
|
|
|
MCA_BASE_VAR_SCOPE_ALL, &mca_btl_uct_component.disable_ucx_memory_hooks);
|
|
|
|
|
|
|
|
|
|
|
|
#if OPAL_C_HAVE__THREAD_LOCAL
|
|
|
|
mca_btl_uct_component.bind_threads_to_contexts = true;
|
|
|
|
(void) mca_base_component_var_register(&mca_btl_uct_component.super.btl_version,
|
|
|
|
"bind_threads_to_contexts", "Bind threads to device contexts. "
|
|
|
|
"In general this should improve the multi-threaded performance "
|
|
|
|
"when threads are used. (default: true)", MCA_BASE_VAR_TYPE_BOOL,
|
|
|
|
NULL, 0 ,MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3,
|
|
|
|
MCA_BASE_VAR_SCOPE_ALL, &mca_btl_uct_component.bind_threads_to_contexts);
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/* for now we want this component to lose to btl/ugni and btl/vader */
|
|
|
|
module->super.btl_exclusivity = MCA_BTL_EXCLUSIVITY_HIGH;
|
|
|
|
|
|
|
|
return mca_btl_base_param_register (&mca_btl_uct_component.super.btl_version,
|
|
|
|
&module->super);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int mca_btl_uct_component_open(void)
|
|
|
|
{
|
|
|
|
if (0 == mca_btl_uct_component.num_contexts_per_module) {
|
|
|
|
/* use the core count and the number of local processes to determine
|
|
|
|
* how many UCT workers to create */
|
|
|
|
int core_count = 36;
|
|
|
|
|
|
|
|
(void) opal_hwloc_base_get_topology ();
|
|
|
|
core_count = hwloc_get_nbobjs_by_type (opal_hwloc_topology, HWLOC_OBJ_CORE);
|
|
|
|
|
|
|
|
if (core_count <= opal_process_info.num_local_peers || !opal_using_threads()) {
|
|
|
|
/* there is probably no benefit to using multiple device contexts when not
|
|
|
|
* using threads or oversubscribing the node with mpi processes. */
|
|
|
|
mca_btl_uct_component.num_contexts_per_module = 1;
|
|
|
|
} else {
|
|
|
|
mca_btl_uct_component.num_contexts_per_module = core_count / (opal_process_info.num_local_peers + 1);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return OPAL_SUCCESS;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* component cleanup - sanity checking of queue lengths
|
|
|
|
*/
|
|
|
|
static int mca_btl_uct_component_close(void)
|
|
|
|
{
|
|
|
|
return OPAL_SUCCESS;
|
|
|
|
}
|
|
|
|
|
|
|
|
static size_t mca_btl_uct_tl_modex_size (mca_btl_uct_tl_t *tl)
|
|
|
|
{
|
|
|
|
const size_t size = strlen (tl->uct_tl_name) + 1;
|
|
|
|
|
|
|
|
if (tl->uct_iface_attr.cap.flags & UCT_IFACE_FLAG_CONNECT_TO_IFACE) {
|
|
|
|
/* pad out to a multiple of 4 bytes */
|
|
|
|
return (4 + 3 + size + tl->uct_iface_attr.device_addr_len + tl->uct_iface_attr.iface_addr_len) & ~3;
|
|
|
|
}
|
|
|
|
|
|
|
|
return (4 + 3 + size + tl->uct_iface_attr.device_addr_len) & ~3;
|
|
|
|
}
|
|
|
|
|
|
|
|
static size_t mca_btl_uct_module_modex_size (mca_btl_uct_module_t *module)
|
|
|
|
{
|
|
|
|
size_t modex_size = 4 + strlen (module->md_name) + 1;
|
|
|
|
|
|
|
|
if (module->rdma_tl) {
|
|
|
|
modex_size += mca_btl_uct_tl_modex_size (module->rdma_tl);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (module->am_tl && module->am_tl != module->rdma_tl) {
|
|
|
|
modex_size += mca_btl_uct_tl_modex_size (module->am_tl);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (module->conn_tl && module->conn_tl != module->rdma_tl && module->conn_tl != module->am_tl) {
|
|
|
|
modex_size += mca_btl_uct_tl_modex_size (module->conn_tl);
|
|
|
|
}
|
|
|
|
|
|
|
|
return modex_size;
|
|
|
|
}
|
|
|
|
|
|
|
|
static size_t mca_btl_uct_tl_modex_pack (mca_btl_uct_tl_t *tl, uint8_t *modex_data)
|
|
|
|
{
|
|
|
|
mca_btl_uct_device_context_t *dev_context = tl->uct_dev_contexts[0];
|
|
|
|
size_t modex_size = mca_btl_uct_tl_modex_size (tl);
|
|
|
|
|
|
|
|
*((uint32_t *) modex_data) = (uint32_t) modex_size;
|
|
|
|
modex_data += 4;
|
|
|
|
|
|
|
|
strcpy ((char *) modex_data, tl->uct_tl_name);
|
|
|
|
modex_data += strlen (tl->uct_tl_name) + 1;
|
|
|
|
|
|
|
|
/* NTH: only the first context is available. i assume the device addresses of the
|
|
|
|
* contexts will be the same but they will have different iface addresses. i also
|
|
|
|
* am assuming that it doesn't really matter if all remote contexts connect to
|
|
|
|
* the same endpoint since we are only doing RDMA. if any of these assumptions are
|
|
|
|
* wrong then we can't delay creating the other contexts and must include their
|
|
|
|
* information in the modex. */
|
|
|
|
if (tl->uct_iface_attr.cap.flags & UCT_IFACE_FLAG_CONNECT_TO_IFACE) {
|
|
|
|
uct_iface_get_address (dev_context->uct_iface, (uct_iface_addr_t *) modex_data);
|
|
|
|
modex_data += tl->uct_iface_attr.iface_addr_len;
|
|
|
|
}
|
|
|
|
|
|
|
|
uct_iface_get_device_address (dev_context->uct_iface, (uct_device_addr_t *) modex_data);
|
|
|
|
modex_data += tl->uct_iface_attr.device_addr_len;
|
|
|
|
|
|
|
|
return modex_size;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int mca_btl_uct_modex_send (void)
|
|
|
|
{
|
|
|
|
size_t modex_size = sizeof (mca_btl_uct_modex_t);
|
|
|
|
mca_btl_uct_modex_t *modex;
|
|
|
|
uint8_t *modex_data;
|
|
|
|
int rc;
|
|
|
|
|
2018-07-23 12:55:03 -06:00
|
|
|
for (int i = 0 ; i < mca_btl_uct_component.module_count ; ++i) {
|
2018-03-15 12:50:10 -06:00
|
|
|
modex_size += mca_btl_uct_module_modex_size (mca_btl_uct_component.modules[i]);
|
|
|
|
}
|
|
|
|
|
|
|
|
modex = alloca (modex_size);
|
|
|
|
modex_data = modex->data;
|
|
|
|
|
|
|
|
modex->module_count = mca_btl_uct_component.module_count;
|
|
|
|
|
2018-07-23 12:55:03 -06:00
|
|
|
for (int i = 0 ; i < mca_btl_uct_component.module_count ; ++i) {
|
2018-03-15 12:50:10 -06:00
|
|
|
mca_btl_uct_module_t *module = mca_btl_uct_component.modules[i];
|
|
|
|
size_t name_len = strlen (module->md_name);
|
|
|
|
|
|
|
|
/* pack the size */
|
|
|
|
*((uint32_t *) modex_data) = (uint32_t) mca_btl_uct_module_modex_size (module);
|
|
|
|
|
|
|
|
modex_data += 4;
|
|
|
|
|
|
|
|
strcpy ((char *) modex_data, module->md_name);
|
|
|
|
modex_data += name_len + 1;
|
|
|
|
|
|
|
|
if (module->rdma_tl) {
|
|
|
|
modex_data += mca_btl_uct_tl_modex_pack (module->rdma_tl, modex_data);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (module->am_tl && module->am_tl != module->rdma_tl) {
|
|
|
|
modex_data += mca_btl_uct_tl_modex_pack (module->am_tl, modex_data);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (module->conn_tl && module->conn_tl != module->rdma_tl && module->conn_tl != module->am_tl) {
|
|
|
|
modex_data += mca_btl_uct_tl_modex_pack (module->conn_tl, modex_data);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
OPAL_MODEX_SEND(rc, OPAL_PMIX_GLOBAL, &mca_btl_uct_component.super.btl_version, modex, modex_size);
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
|
|
|
static mca_btl_uct_module_t *mca_btl_uct_alloc_module (const char *md_name, mca_btl_uct_md_t *md,
|
|
|
|
size_t registration_size)
|
|
|
|
{
|
|
|
|
mca_btl_uct_module_t *module;
|
|
|
|
ucs_status_t ucs_status;
|
|
|
|
|
|
|
|
module = malloc (sizeof (*module));
|
|
|
|
if (NULL == module) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* copy the module template */
|
|
|
|
*module = mca_btl_uct_module_template;
|
|
|
|
|
|
|
|
OBJ_CONSTRUCT(&module->id_to_endpoint, opal_hash_table_t);
|
|
|
|
OBJ_CONSTRUCT(&module->endpoint_lock, opal_mutex_t);
|
|
|
|
OBJ_CONSTRUCT(&module->short_frags, opal_free_list_t);
|
|
|
|
OBJ_CONSTRUCT(&module->eager_frags, opal_free_list_t);
|
|
|
|
OBJ_CONSTRUCT(&module->max_frags, opal_free_list_t);
|
|
|
|
OBJ_CONSTRUCT(&module->rdma_completions, opal_free_list_t);
|
|
|
|
OBJ_CONSTRUCT(&module->pending_frags, opal_list_t);
|
|
|
|
OBJ_CONSTRUCT(&module->lock, opal_mutex_t);
|
|
|
|
|
|
|
|
module->md = md;
|
|
|
|
module->md_name = strdup (md_name);
|
|
|
|
module->super.btl_registration_handle_size = registration_size;
|
|
|
|
|
|
|
|
ucs_status = ucs_async_context_create (UCS_ASYNC_MODE_THREAD, &module->ucs_async);
|
|
|
|
if (UCS_OK != ucs_status) {
|
|
|
|
BTL_VERBOSE(("Could not create a UCT async context"));
|
|
|
|
mca_btl_uct_finalize (&module->super);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
return module;
|
|
|
|
}
|
|
|
|
|
|
|
|
ucs_status_t mca_btl_uct_am_handler (void *arg, void *data, size_t length, unsigned flags)
|
|
|
|
{
|
|
|
|
mca_btl_uct_device_context_t *tl_context = (mca_btl_uct_device_context_t *) arg;
|
|
|
|
mca_btl_uct_module_t *uct_btl = tl_context->uct_btl;
|
|
|
|
mca_btl_uct_am_header_t *header = (mca_btl_uct_am_header_t *) data;
|
|
|
|
mca_btl_active_message_callback_t *reg;
|
|
|
|
mca_btl_base_segment_t seg = {.seg_addr = {.pval = (void *) ((intptr_t) data + sizeof (*header))},
|
|
|
|
.seg_len = length - sizeof (*header)};
|
|
|
|
mca_btl_uct_base_frag_t frag = {.base = {.des_segments = &seg, .des_segment_count = 1}};
|
|
|
|
|
|
|
|
reg = mca_btl_base_active_message_trigger + header->data.tag;
|
|
|
|
mca_btl_uct_context_unlock (tl_context);
|
|
|
|
reg->cbfunc (&uct_btl->super, header->data.tag, &frag.base, reg->cbdata);
|
|
|
|
mca_btl_uct_context_lock (tl_context);
|
|
|
|
|
|
|
|
return UCS_OK;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int mca_btl_uct_component_process_uct_md (uct_md_resource_desc_t *md_desc, char **allowed_ifaces)
|
|
|
|
{
|
|
|
|
mca_rcache_base_resources_t rcache_resources;
|
|
|
|
uct_tl_resource_desc_t *tl_desc;
|
|
|
|
mca_btl_uct_module_t *module;
|
|
|
|
uct_md_config_t *uct_config;
|
|
|
|
uct_md_attr_t md_attr;
|
|
|
|
mca_btl_uct_md_t *md;
|
|
|
|
bool found = false;
|
|
|
|
unsigned num_tls;
|
|
|
|
char *tmp;
|
|
|
|
|
|
|
|
if (MCA_BTL_UCT_MAX_MODULES == mca_btl_uct_component.module_count) {
|
|
|
|
BTL_VERBOSE(("created the maximum number of allowable modules"));
|
|
|
|
return OPAL_ERR_NOT_AVAILABLE;
|
|
|
|
}
|
|
|
|
|
|
|
|
BTL_VERBOSE(("processing memory domain %s", md_desc->md_name));
|
|
|
|
|
|
|
|
for (int j = 0 ; allowed_ifaces[j] ; ++j) {
|
|
|
|
if (0 == strncmp (allowed_ifaces[j], md_desc->md_name, strlen (md_desc->md_name)) ||
|
|
|
|
0 == strcmp (allowed_ifaces[j], "all")) {
|
|
|
|
found = true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!found) {
|
|
|
|
/* nothing to do */
|
|
|
|
return OPAL_SUCCESS;
|
|
|
|
}
|
|
|
|
|
|
|
|
md = OBJ_NEW(mca_btl_uct_md_t);
|
|
|
|
|
|
|
|
uct_md_config_read (md_desc->md_name, NULL, NULL, &uct_config);
|
|
|
|
uct_md_open (md_desc->md_name, uct_config, &md->uct_md);
|
|
|
|
uct_config_release (uct_config);
|
|
|
|
|
|
|
|
uct_md_query (md->uct_md, &md_attr);
|
|
|
|
uct_md_query_tl_resources (md->uct_md, &tl_desc, &num_tls);
|
|
|
|
|
|
|
|
module = mca_btl_uct_alloc_module (md_desc->md_name, md, md_attr.rkey_packed_size);
|
|
|
|
if (NULL == module) {
|
|
|
|
uct_release_tl_resource_list (tl_desc);
|
|
|
|
return OPAL_ERR_OUT_OF_RESOURCE;
|
|
|
|
}
|
|
|
|
|
|
|
|
(void) mca_btl_uct_query_tls (module, md, tl_desc, num_tls);
|
|
|
|
|
|
|
|
uct_release_tl_resource_list (tl_desc);
|
|
|
|
|
|
|
|
/* release the initial reference to the md object. if any modules were created the UCT md will remain
|
|
|
|
* open until those modules are finalized. */
|
|
|
|
OBJ_RELEASE(md);
|
|
|
|
|
|
|
|
if (NULL == module->am_tl && NULL == module->rdma_tl) {
|
|
|
|
BTL_VERBOSE(("uct memory domain %s does not have any appropriate tls", md_desc->md_name));
|
|
|
|
mca_btl_uct_finalize (&module->super);
|
|
|
|
return OPAL_ERR_NOT_AVAILABLE;
|
|
|
|
}
|
|
|
|
|
|
|
|
mca_btl_uct_component.modules[mca_btl_uct_component.module_count++] = module;
|
|
|
|
|
|
|
|
/* NTH: a registration cache shouldn't be necessary when using UCT but there are measurable
|
|
|
|
* performance benefits to using rcache/grdma instead of assuming UCT will do the right
|
|
|
|
* thing. */
|
|
|
|
(void) asprintf (&tmp, "uct.%s", module->md_name);
|
|
|
|
|
|
|
|
rcache_resources.cache_name = tmp;
|
|
|
|
rcache_resources.reg_data = (void *) module;
|
|
|
|
rcache_resources.sizeof_reg = sizeof (mca_btl_uct_reg_t) + module->super.btl_registration_handle_size;
|
|
|
|
rcache_resources.register_mem = mca_btl_uct_reg_mem;
|
|
|
|
rcache_resources.deregister_mem = mca_btl_uct_dereg_mem;
|
|
|
|
|
|
|
|
module->rcache = mca_rcache_base_module_create ("grdma", module, &rcache_resources);
|
|
|
|
free (tmp);
|
|
|
|
if (NULL == module->rcache) {
|
|
|
|
/* something when horribly wrong */
|
|
|
|
BTL_VERBOSE(("could not allocate a registration cache for this btl module"));
|
|
|
|
mca_btl_uct_finalize (&module->super);
|
|
|
|
return OPAL_ERROR;
|
|
|
|
}
|
|
|
|
|
|
|
|
return OPAL_SUCCESS;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* UCT component initialization:
|
|
|
|
* (1) read interface list from kernel and compare against component parameters
|
|
|
|
* then create a BTL instance for selected interfaces
|
|
|
|
* (2) setup UCT listen socket for incoming connection attempts
|
|
|
|
* (3) register BTL parameters with the MCA
|
|
|
|
*/
|
|
|
|
|
|
|
|
static mca_btl_base_module_t **mca_btl_uct_component_init (int *num_btl_modules, bool enable_progress_threads,
|
|
|
|
bool enable_mpi_threads)
|
|
|
|
{
|
|
|
|
/* for this BTL to be useful the interface needs to support RDMA and certain atomic operations */
|
|
|
|
struct mca_btl_base_module_t **base_modules;
|
|
|
|
uct_md_resource_desc_t *resources;
|
|
|
|
unsigned resource_count;
|
|
|
|
char **allowed_ifaces;
|
|
|
|
int rc;
|
|
|
|
|
|
|
|
BTL_VERBOSE(("initializing uct btl"));
|
|
|
|
|
|
|
|
if (NULL == mca_btl_uct_component.memory_domains || 0 == strlen (mca_btl_uct_component.memory_domains) ||
|
|
|
|
0 == strcmp (mca_btl_uct_component.memory_domains, "none")) {
|
|
|
|
BTL_VERBOSE(("no uct memory domains specified"));
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
allowed_ifaces = opal_argv_split (mca_btl_uct_component.memory_domains, ',');
|
|
|
|
if (NULL == allowed_ifaces) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
uct_query_md_resources (&resources, &resource_count);
|
|
|
|
|
|
|
|
mca_btl_uct_component.module_count = 0;
|
|
|
|
|
|
|
|
/* generate all suitable btl modules */
|
|
|
|
for (unsigned i = 0 ; i < resource_count ; ++i) {
|
|
|
|
rc = mca_btl_uct_component_process_uct_md (resources + i, allowed_ifaces);
|
|
|
|
if (OPAL_SUCCESS != rc) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
opal_argv_free (allowed_ifaces);
|
|
|
|
uct_release_md_resource_list (resources);
|
|
|
|
|
|
|
|
mca_btl_uct_modex_send ();
|
|
|
|
|
|
|
|
/* pass module array back to caller */
|
|
|
|
base_modules = calloc (mca_btl_uct_component.module_count, sizeof (*base_modules));
|
|
|
|
if (NULL == base_modules) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
memcpy (base_modules, mca_btl_uct_component.modules, mca_btl_uct_component.module_count *
|
|
|
|
sizeof (mca_btl_uct_component.modules[0]));
|
|
|
|
|
|
|
|
*num_btl_modules = mca_btl_uct_component.module_count;
|
|
|
|
|
|
|
|
BTL_VERBOSE(("uct btl initialization complete. found %d suitable memory domains",
|
|
|
|
mca_btl_uct_component.module_count));
|
|
|
|
|
|
|
|
return base_modules;
|
|
|
|
}
|
|
|
|
|
2018-07-23 12:55:03 -06:00
|
|
|
static int mca_btl_uct_tl_progress (mca_btl_uct_tl_t *tl, int starting_index)
|
2018-03-15 12:50:10 -06:00
|
|
|
{
|
|
|
|
unsigned int ret = 0;
|
|
|
|
|
|
|
|
if (NULL == tl) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (int j = 0 ; j < tl->max_device_contexts ; ++j) {
|
|
|
|
if (tl->uct_dev_contexts[j]) {
|
|
|
|
ret += mca_btl_uct_context_progress (tl->uct_dev_contexts[j]);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int mca_btl_uct_component_progress_pending (mca_btl_uct_module_t *uct_btl)
|
|
|
|
{
|
|
|
|
mca_btl_uct_base_frag_t *frag, *next;
|
|
|
|
size_t count;
|
|
|
|
|
|
|
|
if (0 == (count = opal_list_get_size (&uct_btl->pending_frags))) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
OPAL_THREAD_LOCK(&uct_btl->lock);
|
|
|
|
OPAL_LIST_FOREACH_SAFE(frag, next, &uct_btl->pending_frags, mca_btl_uct_base_frag_t) {
|
|
|
|
if (!frag->ready) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
opal_list_remove_item (&uct_btl->pending_frags, (opal_list_item_t *) frag);
|
|
|
|
|
|
|
|
if (OPAL_SUCCESS > mca_btl_uct_send (&uct_btl->super, frag->endpoint, &frag->base,
|
|
|
|
frag->header.data.tag)) {
|
|
|
|
opal_list_prepend (&uct_btl->pending_frags, (opal_list_item_t *) frag);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
OPAL_THREAD_UNLOCK(&uct_btl->lock);
|
|
|
|
|
|
|
|
return OPAL_SUCCESS;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @brief UCT BTL progress function
|
|
|
|
*
|
|
|
|
* This function explictly progresses all workers.
|
|
|
|
*/
|
|
|
|
static int mca_btl_uct_component_progress (void)
|
|
|
|
{
|
|
|
|
int starting_index = mca_btl_uct_get_context_index ();
|
|
|
|
unsigned ret = 0;
|
|
|
|
|
2018-07-23 12:55:03 -06:00
|
|
|
for (int i = 0 ; i < mca_btl_uct_component.module_count ; ++i) {
|
2018-03-15 12:50:10 -06:00
|
|
|
mca_btl_uct_module_t *module = mca_btl_uct_component.modules[i];
|
|
|
|
|
|
|
|
/* unlike ucp, uct actually tells us something useful! its almost like it was "inspired"
|
|
|
|
* by the btl progress functions.... */
|
|
|
|
ret += mca_btl_uct_tl_progress (module->rdma_tl, starting_index);
|
|
|
|
|
|
|
|
if (module->am_tl != module->rdma_tl) {
|
|
|
|
ret += mca_btl_uct_tl_progress (module->am_tl, starting_index);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (module->conn_tl) {
|
|
|
|
if (module->conn_tl != module->am_tl && module->conn_tl != module->rdma_tl) {
|
|
|
|
ret += mca_btl_uct_tl_progress (module->conn_tl, 0);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (0 != opal_list_get_size (&module->pending_frags)) {
|
|
|
|
mca_btl_uct_component_progress_pending (module);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return (int) ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
/** UCT btl component */
|
|
|
|
mca_btl_uct_component_t mca_btl_uct_component = {
|
|
|
|
.super = {
|
|
|
|
.btl_version = {
|
|
|
|
MCA_BTL_DEFAULT_VERSION("uct"),
|
|
|
|
.mca_open_component = mca_btl_uct_component_open,
|
|
|
|
.mca_close_component = mca_btl_uct_component_close,
|
|
|
|
.mca_register_component_params = mca_btl_uct_component_register,
|
|
|
|
},
|
|
|
|
.btl_data = {
|
|
|
|
/* The component is not checkpoint ready */
|
|
|
|
.param_field = MCA_BASE_METADATA_PARAM_NONE
|
|
|
|
},
|
|
|
|
|
|
|
|
.btl_init = mca_btl_uct_component_init,
|
|
|
|
.btl_progress = mca_btl_uct_component_progress,
|
|
|
|
}
|
|
|
|
};
|