diff --git a/opal/mca/btl/uct/btl_uct_component.c b/opal/mca/btl/uct/btl_uct_component.c index 74f85480c3..c8bc9e9377 100644 --- a/opal/mca/btl/uct/btl_uct_component.c +++ b/opal/mca/btl/uct/btl_uct_component.c @@ -14,6 +14,9 @@ * reserved. * Copyright (c) 2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2018 Amazon.com, Inc. or its affiliates. All Rights reserved. + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -50,7 +53,7 @@ static int mca_btl_uct_component_register(void) MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3, MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_uct_component.memory_domains); - mca_btl_uct_component.allowed_transports = "dc_mlx5,rc_mlx5,ud,any"; + mca_btl_uct_component.allowed_transports = "dc_mlx5,rc_mlx5,ud,ugni_rdma,ugni_smsg,any"; (void) mca_base_component_var_register(&mca_btl_uct_component.super.btl_version, "transports", "Comma-delimited list of transports to use sorted by increasing " "priority. The list of transports available can be queried using ucx_info. Special" diff --git a/opal/mca/btl/uct/btl_uct_endpoint.c b/opal/mca/btl/uct/btl_uct_endpoint.c index 804820bbba..40349673e2 100644 --- a/opal/mca/btl/uct/btl_uct_endpoint.c +++ b/opal/mca/btl/uct/btl_uct_endpoint.c @@ -2,6 +2,8 @@ /* * Copyright (c) 2018 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -137,11 +139,26 @@ static void mca_btl_uct_connection_ep_destruct (mca_btl_uct_connection_ep_t *ep) OBJ_CLASS_INSTANCE(mca_btl_uct_connection_ep_t, opal_object_t, mca_btl_uct_connection_ep_construct, mca_btl_uct_connection_ep_destruct); +struct mca_btl_uct_conn_completion_t { + uct_completion_t super; + volatile bool complete; +}; +typedef struct mca_btl_uct_conn_completion_t mca_btl_uct_conn_completion_t; + +static void mca_btl_uct_endpoint_flush_complete (uct_completion_t *self, ucs_status_t status) +{ + mca_btl_uct_conn_completion_t *completion = (mca_btl_uct_conn_completion_t *) self; + BTL_VERBOSE(("connection flush complete")); + completion->complete = true; +} + static int mca_btl_uct_endpoint_send_conn_req (mca_btl_uct_module_t *uct_btl, mca_btl_base_endpoint_t *endpoint, mca_btl_uct_device_context_t *conn_tl_context, mca_btl_uct_conn_req_t *request, size_t request_length) { mca_btl_uct_connection_ep_t *conn_ep = endpoint->conn_ep; + mca_btl_uct_conn_completion_t completion = {.super = {.count = 1, .func = mca_btl_uct_endpoint_flush_complete}, + .complete = false}; ucs_status_t ucs_status; BTL_VERBOSE(("sending connection request to peer. context id: %d, type: %d, length: %" PRIsize_t, @@ -170,10 +187,18 @@ static int mca_btl_uct_endpoint_send_conn_req (mca_btl_uct_module_t *uct_btl, mc } while (1); /* for now we just wait for the connection request to complete before continuing */ - do { - ucs_status = uct_ep_flush (conn_ep->uct_ep, 0, NULL); - mca_btl_uct_context_progress (conn_tl_context); - } while (UCS_INPROGRESS == ucs_status); + ucs_status = uct_ep_flush (conn_ep->uct_ep, 0, &completion.super); + if (UCS_OK != ucs_status && UCS_INPROGRESS != ucs_status) { + /* NTH: I don't know if this path is needed. For some networks we must use a completion. */ + do { + ucs_status = uct_ep_flush (conn_ep->uct_ep, 0, NULL); + mca_btl_uct_context_progress (conn_tl_context); + } while (UCS_INPROGRESS == ucs_status); + } else { + do { + mca_btl_uct_context_progress (conn_tl_context); + } while (!completion.complete); + } opal_mutex_lock (&endpoint->ep_lock); @@ -284,8 +309,8 @@ int mca_btl_uct_endpoint_connect (mca_btl_uct_module_t *uct_btl, mca_btl_uct_end void *ep_addr, int tl_index) { mca_btl_uct_tl_endpoint_t *tl_endpoint = endpoint->uct_eps[context_id] + tl_index; - mca_btl_uct_device_context_t *tl_context = mca_btl_uct_module_get_rdma_context_specific (uct_btl, context_id); mca_btl_uct_tl_t *tl = (tl_index == uct_btl->rdma_tl->tl_index) ? uct_btl->rdma_tl : uct_btl->am_tl; + mca_btl_uct_device_context_t *tl_context = mca_btl_uct_module_get_tl_context_specific (uct_btl, tl, context_id); uint8_t *rdma_tl_data = NULL, *conn_tl_data = NULL, *am_tl_data = NULL, *tl_data; mca_btl_uct_connection_ep_t *conn_ep = NULL; mca_btl_uct_modex_t *modex; diff --git a/opal/mca/btl/uct/btl_uct_tl.c b/opal/mca/btl/uct/btl_uct_tl.c index 0c8f27d923..9c70124d66 100644 --- a/opal/mca/btl/uct/btl_uct_tl.c +++ b/opal/mca/btl/uct/btl_uct_tl.c @@ -4,6 +4,8 @@ * reserved. * Copyright (c) 2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -26,7 +28,7 @@ * @brief Convert UCT capabilities to BTL flags */ static uint64_t mca_btl_uct_cap_to_btl_flag[][2] = { - {UCT_IFACE_FLAG_AM_ZCOPY, MCA_BTL_FLAGS_SEND}, + {UCT_IFACE_FLAG_AM_SHORT, MCA_BTL_FLAGS_SEND}, {UCT_IFACE_FLAG_PUT_ZCOPY, MCA_BTL_FLAGS_PUT}, {UCT_IFACE_FLAG_GET_ZCOPY, MCA_BTL_FLAGS_GET}, {0,0}, diff --git a/opal/mca/btl/uct/configure.m4 b/opal/mca/btl/uct/configure.m4 index eae8c6abfc..cdee689f8e 100644 --- a/opal/mca/btl/uct/configure.m4 +++ b/opal/mca/btl/uct/configure.m4 @@ -14,6 +14,8 @@ # Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2011-2018 Los Alamos National Security, LLC. # All rights reserved. +# Copyright (c) 2018 Research Organization for Information Science +# and Technology (RIST). All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -39,7 +41,7 @@ AC_DEFUN([MCA_opal_btl_uct_CONFIG],[ CPPFLAGS_save="$CPPFLAGS" CPPFLAGS="$CPPFLAGS $btl_uct_CPPFLAGS" - AC_CHECK_DECLS([UCT_PROGRESS_THREAD_SAFE UCT_CB_FLAG_SYNC], [], [], [[#include ]]) + AC_CHECK_DECLS([UCT_PROGRESS_THREAD_SAFE, UCT_CB_FLAG_SYNC], [], [], [[#include ]]) CPPFLAGS="$CPPFLAGS_save" OPAL_VAR_SCOPE_POP