diff --git a/opal/mca/btl/uct/btl_uct.h b/opal/mca/btl/uct/btl_uct.h index d31ad1402f..989fe64132 100644 --- a/opal/mca/btl/uct/btl_uct.h +++ b/opal/mca/btl/uct/btl_uct.h @@ -286,7 +286,7 @@ int mca_btl_uct_query_tls (mca_btl_uct_module_t *module, mca_btl_uct_md_t *md, u */ static inline bool mca_btl_uct_tl_supports_rdma (mca_btl_uct_tl_t *tl) { - return (tl->uct_iface_attr.cap.flags & (UCT_IFACE_FLAG_PUT_ZCOPY | UCT_IFACE_FLAG_GET_ZCOPY)) == + return (MCA_BTL_UCT_TL_ATTR(tl, 0).cap.flags & (UCT_IFACE_FLAG_PUT_ZCOPY | UCT_IFACE_FLAG_GET_ZCOPY)) == (UCT_IFACE_FLAG_PUT_ZCOPY | UCT_IFACE_FLAG_GET_ZCOPY); } @@ -295,7 +295,7 @@ static inline bool mca_btl_uct_tl_supports_rdma (mca_btl_uct_tl_t *tl) */ static inline bool mca_btl_uct_tl_support_am (mca_btl_uct_tl_t *tl) { - return (tl->uct_iface_attr.cap.flags & (UCT_IFACE_FLAG_AM_SHORT | UCT_IFACE_FLAG_AM_BCOPY | UCT_IFACE_FLAG_AM_ZCOPY)); + return (MCA_BTL_UCT_TL_ATTR(tl, 0).cap.flags & (UCT_IFACE_FLAG_AM_SHORT | UCT_IFACE_FLAG_AM_BCOPY | UCT_IFACE_FLAG_AM_ZCOPY)); } /** @@ -305,7 +305,7 @@ static inline bool mca_btl_uct_tl_support_am (mca_btl_uct_tl_t *tl) */ static inline bool mca_btl_uct_tl_supports_conn (mca_btl_uct_tl_t *tl) { - return (tl->uct_iface_attr.cap.flags & (UCT_IFACE_FLAG_AM_SHORT | UCT_IFACE_FLAG_CONNECT_TO_IFACE)) == + return (MCA_BTL_UCT_TL_ATTR(tl, 0).cap.flags & (UCT_IFACE_FLAG_AM_SHORT | UCT_IFACE_FLAG_CONNECT_TO_IFACE)) == (UCT_IFACE_FLAG_AM_SHORT | UCT_IFACE_FLAG_CONNECT_TO_IFACE); } @@ -316,7 +316,7 @@ static inline bool mca_btl_uct_tl_supports_conn (mca_btl_uct_tl_t *tl) */ static inline bool mca_btl_uct_tl_requires_connection_tl (mca_btl_uct_tl_t *tl) { - return !(tl->uct_iface_attr.cap.flags & UCT_IFACE_FLAG_CONNECT_TO_IFACE); + return !(MCA_BTL_UCT_TL_ATTR(tl, 0).cap.flags & UCT_IFACE_FLAG_CONNECT_TO_IFACE); } END_C_DECLS diff --git a/opal/mca/btl/uct/btl_uct_am.c b/opal/mca/btl/uct/btl_uct_am.c index 1c36769137..5d3f0ef042 100644 --- a/opal/mca/btl/uct/btl_uct_am.c +++ b/opal/mca/btl/uct/btl_uct_am.c @@ -25,7 +25,7 @@ mca_btl_base_descriptor_t *mca_btl_uct_alloc (mca_btl_base_module_t *btl, mca_bt mca_btl_uct_module_t *uct_btl = (mca_btl_uct_module_t *) btl; mca_btl_uct_base_frag_t *frag = NULL; - if ((size + 8) <= (size_t) uct_btl->am_tl->uct_iface_attr.cap.am.max_short) { + if ((size + 8) <= (size_t) MCA_BTL_UCT_TL_ATTR(uct_btl->am_tl, 0).cap.am.max_short) { frag = mca_btl_uct_frag_alloc_short (uct_btl, endpoint); } else if (size <= uct_btl->super.btl_eager_limit) { frag = mca_btl_uct_frag_alloc_eager (uct_btl, endpoint); @@ -97,7 +97,7 @@ struct mca_btl_base_descriptor_t *mca_btl_uct_prepare_src (mca_btl_base_module_t frag->base.order = order; frag->base.des_flags = flags; - if (total_size > (size_t) uct_btl->am_tl->uct_iface_attr.cap.am.max_short) { + if (total_size > (size_t) MCA_BTL_UCT_TL_ATTR(uct_btl->am_tl, 0).cap.am.max_short) { frag->segments[1].seg_len = *size; frag->segments[1].seg_addr.pval = data_ptr; frag->base.des_segment_count = 2; @@ -253,10 +253,11 @@ static size_t mca_btl_uct_sendi_pack (void *data, void *arg) return args->header_size + args->payload_size + 8; } -static inline size_t mca_btl_uct_max_sendi (mca_btl_uct_module_t *uct_btl) +static inline size_t mca_btl_uct_max_sendi (mca_btl_uct_module_t *uct_btl, int context_id) { - return (uct_btl->am_tl->uct_iface_attr.cap.am.max_short > uct_btl->am_tl->uct_iface_attr.cap.am.max_bcopy) ? - uct_btl->am_tl->uct_iface_attr.cap.am.max_short : uct_btl->am_tl->uct_iface_attr.cap.am.max_bcopy; + const mca_btl_uct_tl_t *tl = uct_btl->am_tl; + return (MCA_BTL_UCT_TL_ATTR(tl, context_id).cap.am.max_short > MCA_BTL_UCT_TL_ATTR(tl, context_id).cap.am.max_bcopy) ? + MCA_BTL_UCT_TL_ATTR(tl, context_id).cap.am.max_short : MCA_BTL_UCT_TL_ATTR(tl, context_id).cap.am.max_bcopy; } int mca_btl_uct_sendi (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint, opal_convertor_t *convertor, @@ -274,7 +275,7 @@ int mca_btl_uct_sendi (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endp int rc; rc = mca_btl_uct_endpoint_check_am (uct_btl, endpoint, context, &ep_handle); - if (OPAL_UNLIKELY(OPAL_SUCCESS != rc || msg_size > mca_btl_uct_max_sendi (uct_btl))) { + if (OPAL_UNLIKELY(OPAL_SUCCESS != rc || msg_size > mca_btl_uct_max_sendi (uct_btl, context->context_id))) { if (descriptor) { *descriptor = mca_btl_uct_alloc (btl, endpoint, order, total_size, flags); } @@ -287,7 +288,7 @@ int mca_btl_uct_sendi (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endp mca_btl_uct_context_lock (context); if (0 == payload_size) { ucs_status = uct_ep_am_short (ep_handle, MCA_BTL_UCT_FRAG, am_header.value, header, header_size); - } else if (msg_size < (size_t) uct_btl->am_tl->uct_iface_attr.cap.am.max_short) { + } else if (msg_size < (size_t) MCA_BTL_UCT_TL_ATTR(uct_btl->am_tl, context->context_id).cap.am.max_short) { int8_t *data = alloca (total_size); _mca_btl_uct_send_pack (data, header, header_size, convertor, payload_size); ucs_status = uct_ep_am_short (ep_handle, MCA_BTL_UCT_FRAG, am_header.value, data, total_size); diff --git a/opal/mca/btl/uct/btl_uct_component.c b/opal/mca/btl/uct/btl_uct_component.c index 2105dfb4da..70f1726787 100644 --- a/opal/mca/btl/uct/btl_uct_component.c +++ b/opal/mca/btl/uct/btl_uct_component.c @@ -152,12 +152,12 @@ static size_t mca_btl_uct_tl_modex_size (mca_btl_uct_tl_t *tl) { const size_t size = strlen (tl->uct_tl_name) + 1; - if (tl->uct_iface_attr.cap.flags & UCT_IFACE_FLAG_CONNECT_TO_IFACE) { + if (MCA_BTL_UCT_TL_ATTR(tl, 0).cap.flags & UCT_IFACE_FLAG_CONNECT_TO_IFACE) { /* pad out to a multiple of 4 bytes */ - return (4 + 3 + size + tl->uct_iface_attr.device_addr_len + tl->uct_iface_attr.iface_addr_len) & ~3; + return (4 + 3 + size + MCA_BTL_UCT_TL_ATTR(tl, 0).device_addr_len + MCA_BTL_UCT_TL_ATTR(tl, 0).iface_addr_len) & ~3; } - return (4 + 3 + size + tl->uct_iface_attr.device_addr_len) & ~3; + return (4 + 3 + size + MCA_BTL_UCT_TL_ATTR(tl, 0).device_addr_len) & ~3; } static size_t mca_btl_uct_module_modex_size (mca_btl_uct_module_t *module) @@ -196,13 +196,13 @@ static size_t mca_btl_uct_tl_modex_pack (mca_btl_uct_tl_t *tl, uint8_t *modex_da * the same endpoint since we are only doing RDMA. if any of these assumptions are * wrong then we can't delay creating the other contexts and must include their * information in the modex. */ - if (tl->uct_iface_attr.cap.flags & UCT_IFACE_FLAG_CONNECT_TO_IFACE) { + if (MCA_BTL_UCT_TL_ATTR(tl, 0).cap.flags & UCT_IFACE_FLAG_CONNECT_TO_IFACE) { uct_iface_get_address (dev_context->uct_iface, (uct_iface_addr_t *) modex_data); - modex_data += tl->uct_iface_attr.iface_addr_len; + modex_data += MCA_BTL_UCT_TL_ATTR(tl, 0).iface_addr_len; } uct_iface_get_device_address (dev_context->uct_iface, (uct_device_addr_t *) modex_data); - modex_data += tl->uct_iface_attr.device_addr_len; + modex_data += MCA_BTL_UCT_TL_ATTR(tl, 0).device_addr_len; return modex_size; } diff --git a/opal/mca/btl/uct/btl_uct_endpoint.c b/opal/mca/btl/uct/btl_uct_endpoint.c index 286326d8fe..e0d39dee55 100644 --- a/opal/mca/btl/uct/btl_uct_endpoint.c +++ b/opal/mca/btl/uct/btl_uct_endpoint.c @@ -109,15 +109,14 @@ static int mca_btl_uct_endpoint_connect_iface (mca_btl_uct_module_t *uct_btl, mc /* easy case. just connect to the interface */ iface_addr = (uct_iface_addr_t *) tl_data; - device_addr = (uct_device_addr_t *) ((uintptr_t) iface_addr + tl->uct_iface_attr.iface_addr_len); + device_addr = (uct_device_addr_t *) ((uintptr_t) iface_addr + MCA_BTL_UCT_TL_ATTR(tl, tl_context->context_id).iface_addr_len); BTL_VERBOSE(("connecting endpoint to interface")); mca_btl_uct_context_lock (tl_context); ucs_status = uct_ep_create_connected (tl_context->uct_iface, device_addr, iface_addr, &tl_endpoint->uct_ep); - mca_btl_uct_context_unlock (tl_context); - tl_endpoint->flags = MCA_BTL_UCT_ENDPOINT_FLAG_CONN_READY; + mca_btl_uct_context_unlock (tl_context); return (UCS_OK == ucs_status) ? OPAL_SUCCESS : OPAL_ERROR; } @@ -189,7 +188,7 @@ static int mca_btl_uct_endpoint_connect_endpoint (mca_btl_uct_module_t *uct_btl, mca_btl_uct_tl_endpoint_t *tl_endpoint, uint8_t *tl_data, uint8_t *conn_tl_data, void *ep_addr) { - size_t request_length = sizeof (mca_btl_uct_conn_req_t) + tl->uct_iface_attr.ep_addr_len; + size_t request_length = sizeof (mca_btl_uct_conn_req_t) + MCA_BTL_UCT_TL_ATTR(tl, tl_context->context_id).ep_addr_len; mca_btl_uct_connection_ep_t *conn_ep = endpoint->conn_ep; mca_btl_uct_tl_t *conn_tl = uct_btl->conn_tl; mca_btl_uct_device_context_t *conn_tl_context = conn_tl->uct_dev_contexts[0]; @@ -208,7 +207,7 @@ static int mca_btl_uct_endpoint_connect_endpoint (mca_btl_uct_module_t *uct_btl, opal_process_name_print (endpoint->ep_proc->proc_name))); iface_addr = (uct_iface_addr_t *) conn_tl_data; - device_addr = (uct_device_addr_t *) ((uintptr_t) conn_tl_data + conn_tl->uct_iface_attr.iface_addr_len); + device_addr = (uct_device_addr_t *) ((uintptr_t) conn_tl_data + MCA_BTL_UCT_TL_ATTR(conn_tl, 0).iface_addr_len); endpoint->conn_ep = conn_ep = OBJ_NEW(mca_btl_uct_connection_ep_t); if (OPAL_UNLIKELY(NULL == conn_ep)) { diff --git a/opal/mca/btl/uct/btl_uct_module.c b/opal/mca/btl/uct/btl_uct_module.c index 8357be215a..ebd7ab6d68 100644 --- a/opal/mca/btl/uct/btl_uct_module.c +++ b/opal/mca/btl/uct/btl_uct_module.c @@ -88,7 +88,7 @@ static int mca_btl_uct_add_procs (mca_btl_base_module_t *btl, if (am_tl) { rc = opal_free_list_init (&uct_module->short_frags, sizeof (mca_btl_uct_base_frag_t), opal_cache_line_size, OBJ_CLASS(mca_btl_uct_base_frag_t), - am_tl->uct_iface_attr.cap.am.max_short, opal_cache_line_size, + MCA_BTL_UCT_TL_ATTR(am_tl, 0).cap.am.max_short, opal_cache_line_size, 0, 1024, 64, NULL, 0, NULL, NULL, NULL); rc = opal_free_list_init (&uct_module->eager_frags, sizeof (mca_btl_uct_base_frag_t), diff --git a/opal/mca/btl/uct/btl_uct_rdma.c b/opal/mca/btl/uct/btl_uct_rdma.c index 51cc5676ce..708dea148d 100644 --- a/opal/mca/btl/uct/btl_uct_rdma.c +++ b/opal/mca/btl/uct/btl_uct_rdma.c @@ -98,13 +98,12 @@ int mca_btl_uct_get (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoi mca_btl_uct_context_lock (context); - if (size <= uct_btl->rdma_tl->uct_iface_attr.cap.get.max_bcopy) { + if (size <= MCA_BTL_UCT_TL_ATTR(uct_btl->rdma_tl, context->context_id).cap.get.max_bcopy) { ucs_status = uct_ep_get_bcopy (ep_handle, mca_btl_uct_get_unpack, local_address, size, remote_address, rkey.rkey, &comp->uct_comp); } else { uct_iov_t iov = {.buffer = local_address, .length = size, .stride = 0, .count = 1, .memh = MCA_BTL_UCT_REG_REMOTE_TO_LOCAL(local_handle)->uct_memh}; - ucs_status = uct_ep_get_zcopy (ep_handle, &iov, 1, remote_address, rkey.rkey, &comp->uct_comp); } @@ -183,7 +182,7 @@ int mca_btl_uct_put (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoi /* determine what UCT prototol should be used */ if (size <= uct_btl->super.btl_put_local_registration_threshold) { - use_short = size <= uct_btl->rdma_tl->uct_iface_attr.cap.put.max_short; + use_short = size <= MCA_BTL_UCT_TL_ATTR(uct_btl->rdma_tl, context->context_id).cap.put.max_short; use_bcopy = !use_short; } diff --git a/opal/mca/btl/uct/btl_uct_tl.c b/opal/mca/btl/uct/btl_uct_tl.c index a9f4069b8c..bca62c4813 100644 --- a/opal/mca/btl/uct/btl_uct_tl.c +++ b/opal/mca/btl/uct/btl_uct_tl.c @@ -61,11 +61,11 @@ static uint64_t mca_btl_uct_cap_to_btl_atomic_flag[][2] = { static void mca_btl_uct_module_set_atomic_flags (mca_btl_uct_module_t *module, mca_btl_uct_tl_t *tl) { - uint64_t cap_flags = tl->uct_iface_attr.cap.flags; + uint64_t cap_flags = MCA_BTL_UCT_TL_ATTR(tl, 0).cap.flags; /* NTH: only use the fetching atomics for now */ - uint64_t atomic_flags32 = tl->uct_iface_attr.cap.atomic32.fop_flags; - uint64_t atomic_flags64 = tl->uct_iface_attr.cap.atomic64.fop_flags; + uint64_t atomic_flags32 = MCA_BTL_UCT_TL_ATTR(tl, 0).cap.atomic32.fop_flags; + uint64_t atomic_flags64 = MCA_BTL_UCT_TL_ATTR(tl, 0).cap.atomic64.fop_flags; /* NTH: don't really have a way to seperate 32-bit and 64-bit right now */ uint64_t all_flags = atomic_flags32 & atomic_flags64; @@ -110,7 +110,7 @@ static uint64_t mca_btl_uct_cap_to_btl_atomic_flag[][2] = { */ static void mca_btl_uct_module_set_atomic_flags (mca_btl_uct_module_t *module, mca_btl_uct_tl_t *tl) { - uint64_t cap_flags = tl->uct_iface_attr.cap.flags; + uint64_t cap_flags = MCA_BTL_UCT_TL_ATTR(tl, 0).cap.flags; module->super.btl_atomic_flags = 0; @@ -299,9 +299,16 @@ mca_btl_uct_device_context_t *mca_btl_uct_context_create (mca_btl_uct_module_t * return NULL; } - BTL_VERBOSE(("enabling progress for tl %p context id %d", (void *) tl, context_id)); + /* only need to query one of the interfaces to get the attributes */ + ucs_status = uct_iface_query (context->uct_iface, &context->uct_iface_attr); + if (UCS_OK != ucs_status) { + BTL_VERBOSE(("Error querying UCT interface")); + mca_btl_uct_context_destroy (context); + return NULL; + } if (enable_progress) { + BTL_VERBOSE(("enabling progress for tl %p context id %d", (void *) tl, context_id)); mca_btl_uct_context_enable_progress (context); } @@ -372,15 +379,8 @@ static mca_btl_uct_tl_t *mca_btl_uct_create_tl (mca_btl_uct_module_t *module, mc return NULL; } - /* only need to query one of the interfaces to get the attributes */ - ucs_status = uct_iface_query (tl->uct_dev_contexts[0]->uct_iface, &tl->uct_iface_attr); - if (UCS_OK != ucs_status) { - BTL_VERBOSE(("Error querying UCT interface")); - OBJ_RELEASE(tl); - return NULL; - } - - BTL_VERBOSE(("Interface CAPS for tl %s::%s: 0x%lx", module->md_name, tl_desc->tl_name, (unsigned long) tl->uct_iface_attr.cap.flags)); + BTL_VERBOSE(("Interface CAPS for tl %s::%s: 0x%lx", module->md_name, tl_desc->tl_name, + (unsigned long) MCA_BTL_UCT_TL_ATTR(tl, 0).cap.flags)); return tl; } @@ -391,20 +391,20 @@ static void mca_btl_uct_set_tl_rdma (mca_btl_uct_module_t *module, mca_btl_uct_t mca_btl_uct_module_set_atomic_flags (module, tl); - module->super.btl_get_limit = tl->uct_iface_attr.cap.get.max_zcopy; - if (tl->uct_iface_attr.cap.get.max_bcopy) { + module->super.btl_get_limit = MCA_BTL_UCT_TL_ATTR(tl, 0).cap.get.max_zcopy; + if (MCA_BTL_UCT_TL_ATTR(tl, 0).cap.get.max_bcopy) { module->super.btl_get_alignment = 0; - module->super.btl_get_local_registration_threshold = tl->uct_iface_attr.cap.get.max_bcopy; + module->super.btl_get_local_registration_threshold = MCA_BTL_UCT_TL_ATTR(tl, 0).cap.get.max_bcopy; } else { /* this is overkill in terms of alignment but we have no way to enforce a minimum get size */ - module->super.btl_get_alignment = opal_next_poweroftwo_inclusive (tl->uct_iface_attr.cap.get.min_zcopy); + module->super.btl_get_alignment = opal_next_poweroftwo_inclusive (MCA_BTL_UCT_TL_ATTR(tl, 0).cap.get.min_zcopy); } - module->super.btl_put_limit = tl->uct_iface_attr.cap.put.max_zcopy; + module->super.btl_put_limit = MCA_BTL_UCT_TL_ATTR(tl, 0).cap.put.max_zcopy; module->super.btl_put_alignment = 0; /* no registration needed when using short/bcopy put */ - module->super.btl_put_local_registration_threshold = tl->uct_iface_attr.cap.put.max_bcopy; + module->super.btl_put_local_registration_threshold = MCA_BTL_UCT_TL_ATTR(tl, 0).cap.put.max_bcopy; module->rdma_tl = tl; OBJ_RETAIN(tl); @@ -480,14 +480,14 @@ static int mca_btl_uct_evaluate_tl (mca_btl_uct_module_t *module, mca_btl_uct_tl } if (tl == module->rdma_tl || tl == module->am_tl) { - BTL_VERBOSE(("tl has flags 0x%" PRIx64, tl->uct_iface_attr.cap.flags)); - module->super.btl_flags |= mca_btl_uct_module_flags (tl->uct_iface_attr.cap.flags); + BTL_VERBOSE(("tl has flags 0x%" PRIx64, MCA_BTL_UCT_TL_ATTR(tl, 0).cap.flags)); + module->super.btl_flags |= mca_btl_uct_module_flags (MCA_BTL_UCT_TL_ATTR(tl, 0).cap.flags); /* the bandwidth and latency numbers relate to both rdma and active messages. need to * come up with a better estimate. */ /* UCT bandwidth is in bytes/sec, BTL is in MB/sec */ - module->super.btl_bandwidth = (uint32_t) (tl->uct_iface_attr.bandwidth / 1048576.0); + module->super.btl_bandwidth = (uint32_t) (MCA_BTL_UCT_TL_ATTR(tl, 0).bandwidth / 1048576.0); /* TODO -- figure out how to translate UCT latency to us */ module->super.btl_latency = 1; } diff --git a/opal/mca/btl/uct/btl_uct_types.h b/opal/mca/btl/uct/btl_uct_types.h index 4f86885113..1df08eb199 100644 --- a/opal/mca/btl/uct/btl_uct_types.h +++ b/opal/mca/btl/uct/btl_uct_types.h @@ -141,6 +141,9 @@ struct mca_btl_uct_device_context_t { /** UCT interface handle */ uct_iface_h uct_iface; + /** interface attributes */ + uct_iface_attr_t uct_iface_attr; + /** RDMA completions */ opal_free_list_t rdma_completions; @@ -307,9 +310,6 @@ struct mca_btl_uct_tl_t { /** device name for this tl (used for creating device contexts) */ char *uct_dev_name; - /** interface attributes */ - uct_iface_attr_t uct_iface_attr; - /** maxiumum number of device contexts that can be created */ int max_device_contexts; @@ -324,4 +324,6 @@ struct mca_btl_uct_tl_t { typedef struct mca_btl_uct_tl_t mca_btl_uct_tl_t; OBJ_CLASS_DECLARATION(mca_btl_uct_tl_t); +#define MCA_BTL_UCT_TL_ATTR(tl, context_id) (tl)->uct_dev_contexts[(context_id)]->uct_iface_attr + #endif /* !defined(BTL_UCT_TYPES_H) */