Merge pull request #6160 from hjelmn/v4.0.x_btl_uct_fix_usage_when_on_ugni_even_though_we_really_dont_want_to_support_that_usage
v4.0.x: fix btl/uct usage on Cray hardware
Этот коммит содержится в:
Коммит
7ad025e92b
@ -14,6 +14,9 @@
|
|||||||
* reserved.
|
* reserved.
|
||||||
* Copyright (c) 2018 Research Organization for Information Science
|
* Copyright (c) 2018 Research Organization for Information Science
|
||||||
* and Technology (RIST). All rights reserved.
|
* and Technology (RIST). All rights reserved.
|
||||||
|
* Copyright (c) 2018 Amazon.com, Inc. or its affiliates. All Rights reserved.
|
||||||
|
* Copyright (c) 2018 Triad National Security, LLC. All rights
|
||||||
|
* reserved.
|
||||||
* $COPYRIGHT$
|
* $COPYRIGHT$
|
||||||
*
|
*
|
||||||
* Additional copyrights may follow
|
* Additional copyrights may follow
|
||||||
@ -50,7 +53,7 @@ static int mca_btl_uct_component_register(void)
|
|||||||
MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3, MCA_BASE_VAR_SCOPE_LOCAL,
|
MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3, MCA_BASE_VAR_SCOPE_LOCAL,
|
||||||
&mca_btl_uct_component.memory_domains);
|
&mca_btl_uct_component.memory_domains);
|
||||||
|
|
||||||
mca_btl_uct_component.allowed_transports = "dc_mlx5,rc_mlx5,ud,any";
|
mca_btl_uct_component.allowed_transports = "dc_mlx5,rc_mlx5,ud,ugni_rdma,ugni_smsg,any";
|
||||||
(void) mca_base_component_var_register(&mca_btl_uct_component.super.btl_version,
|
(void) mca_base_component_var_register(&mca_btl_uct_component.super.btl_version,
|
||||||
"transports", "Comma-delimited list of transports to use sorted by increasing "
|
"transports", "Comma-delimited list of transports to use sorted by increasing "
|
||||||
"priority. The list of transports available can be queried using ucx_info. Special"
|
"priority. The list of transports available can be queried using ucx_info. Special"
|
||||||
|
@ -2,6 +2,8 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2018 Los Alamos National Security, LLC. All rights
|
* Copyright (c) 2018 Los Alamos National Security, LLC. All rights
|
||||||
* reserved.
|
* reserved.
|
||||||
|
* Copyright (c) 2018 Triad National Security, LLC. All rights
|
||||||
|
* reserved.
|
||||||
* $COPYRIGHT$
|
* $COPYRIGHT$
|
||||||
*
|
*
|
||||||
* Additional copyrights may follow
|
* Additional copyrights may follow
|
||||||
@ -137,11 +139,26 @@ static void mca_btl_uct_connection_ep_destruct (mca_btl_uct_connection_ep_t *ep)
|
|||||||
OBJ_CLASS_INSTANCE(mca_btl_uct_connection_ep_t, opal_object_t, mca_btl_uct_connection_ep_construct,
|
OBJ_CLASS_INSTANCE(mca_btl_uct_connection_ep_t, opal_object_t, mca_btl_uct_connection_ep_construct,
|
||||||
mca_btl_uct_connection_ep_destruct);
|
mca_btl_uct_connection_ep_destruct);
|
||||||
|
|
||||||
|
struct mca_btl_uct_conn_completion_t {
|
||||||
|
uct_completion_t super;
|
||||||
|
volatile bool complete;
|
||||||
|
};
|
||||||
|
typedef struct mca_btl_uct_conn_completion_t mca_btl_uct_conn_completion_t;
|
||||||
|
|
||||||
|
static void mca_btl_uct_endpoint_flush_complete (uct_completion_t *self, ucs_status_t status)
|
||||||
|
{
|
||||||
|
mca_btl_uct_conn_completion_t *completion = (mca_btl_uct_conn_completion_t *) self;
|
||||||
|
BTL_VERBOSE(("connection flush complete"));
|
||||||
|
completion->complete = true;
|
||||||
|
}
|
||||||
|
|
||||||
static int mca_btl_uct_endpoint_send_conn_req (mca_btl_uct_module_t *uct_btl, mca_btl_base_endpoint_t *endpoint,
|
static int mca_btl_uct_endpoint_send_conn_req (mca_btl_uct_module_t *uct_btl, mca_btl_base_endpoint_t *endpoint,
|
||||||
mca_btl_uct_device_context_t *conn_tl_context,
|
mca_btl_uct_device_context_t *conn_tl_context,
|
||||||
mca_btl_uct_conn_req_t *request, size_t request_length)
|
mca_btl_uct_conn_req_t *request, size_t request_length)
|
||||||
{
|
{
|
||||||
mca_btl_uct_connection_ep_t *conn_ep = endpoint->conn_ep;
|
mca_btl_uct_connection_ep_t *conn_ep = endpoint->conn_ep;
|
||||||
|
mca_btl_uct_conn_completion_t completion = {.super = {.count = 1, .func = mca_btl_uct_endpoint_flush_complete},
|
||||||
|
.complete = false};
|
||||||
ucs_status_t ucs_status;
|
ucs_status_t ucs_status;
|
||||||
|
|
||||||
BTL_VERBOSE(("sending connection request to peer. context id: %d, type: %d, length: %" PRIsize_t,
|
BTL_VERBOSE(("sending connection request to peer. context id: %d, type: %d, length: %" PRIsize_t,
|
||||||
@ -170,10 +187,18 @@ static int mca_btl_uct_endpoint_send_conn_req (mca_btl_uct_module_t *uct_btl, mc
|
|||||||
} while (1);
|
} while (1);
|
||||||
|
|
||||||
/* for now we just wait for the connection request to complete before continuing */
|
/* for now we just wait for the connection request to complete before continuing */
|
||||||
do {
|
ucs_status = uct_ep_flush (conn_ep->uct_ep, 0, &completion.super);
|
||||||
ucs_status = uct_ep_flush (conn_ep->uct_ep, 0, NULL);
|
if (UCS_OK != ucs_status && UCS_INPROGRESS != ucs_status) {
|
||||||
mca_btl_uct_context_progress (conn_tl_context);
|
/* NTH: I don't know if this path is needed. For some networks we must use a completion. */
|
||||||
} while (UCS_INPROGRESS == ucs_status);
|
do {
|
||||||
|
ucs_status = uct_ep_flush (conn_ep->uct_ep, 0, NULL);
|
||||||
|
mca_btl_uct_context_progress (conn_tl_context);
|
||||||
|
} while (UCS_INPROGRESS == ucs_status);
|
||||||
|
} else {
|
||||||
|
do {
|
||||||
|
mca_btl_uct_context_progress (conn_tl_context);
|
||||||
|
} while (!completion.complete);
|
||||||
|
}
|
||||||
|
|
||||||
opal_mutex_lock (&endpoint->ep_lock);
|
opal_mutex_lock (&endpoint->ep_lock);
|
||||||
|
|
||||||
@ -284,8 +309,8 @@ int mca_btl_uct_endpoint_connect (mca_btl_uct_module_t *uct_btl, mca_btl_uct_end
|
|||||||
void *ep_addr, int tl_index)
|
void *ep_addr, int tl_index)
|
||||||
{
|
{
|
||||||
mca_btl_uct_tl_endpoint_t *tl_endpoint = endpoint->uct_eps[context_id] + tl_index;
|
mca_btl_uct_tl_endpoint_t *tl_endpoint = endpoint->uct_eps[context_id] + tl_index;
|
||||||
mca_btl_uct_device_context_t *tl_context = mca_btl_uct_module_get_rdma_context_specific (uct_btl, context_id);
|
|
||||||
mca_btl_uct_tl_t *tl = (tl_index == uct_btl->rdma_tl->tl_index) ? uct_btl->rdma_tl : uct_btl->am_tl;
|
mca_btl_uct_tl_t *tl = (tl_index == uct_btl->rdma_tl->tl_index) ? uct_btl->rdma_tl : uct_btl->am_tl;
|
||||||
|
mca_btl_uct_device_context_t *tl_context = mca_btl_uct_module_get_tl_context_specific (uct_btl, tl, context_id);
|
||||||
uint8_t *rdma_tl_data = NULL, *conn_tl_data = NULL, *am_tl_data = NULL, *tl_data;
|
uint8_t *rdma_tl_data = NULL, *conn_tl_data = NULL, *am_tl_data = NULL, *tl_data;
|
||||||
mca_btl_uct_connection_ep_t *conn_ep = NULL;
|
mca_btl_uct_connection_ep_t *conn_ep = NULL;
|
||||||
mca_btl_uct_modex_t *modex;
|
mca_btl_uct_modex_t *modex;
|
||||||
|
@ -4,6 +4,8 @@
|
|||||||
* reserved.
|
* reserved.
|
||||||
* Copyright (c) 2018 Research Organization for Information Science
|
* Copyright (c) 2018 Research Organization for Information Science
|
||||||
* and Technology (RIST). All rights reserved.
|
* and Technology (RIST). All rights reserved.
|
||||||
|
* Copyright (c) 2018 Triad National Security, LLC. All rights
|
||||||
|
* reserved.
|
||||||
* $COPYRIGHT$
|
* $COPYRIGHT$
|
||||||
*
|
*
|
||||||
* Additional copyrights may follow
|
* Additional copyrights may follow
|
||||||
@ -26,7 +28,7 @@
|
|||||||
* @brief Convert UCT capabilities to BTL flags
|
* @brief Convert UCT capabilities to BTL flags
|
||||||
*/
|
*/
|
||||||
static uint64_t mca_btl_uct_cap_to_btl_flag[][2] = {
|
static uint64_t mca_btl_uct_cap_to_btl_flag[][2] = {
|
||||||
{UCT_IFACE_FLAG_AM_ZCOPY, MCA_BTL_FLAGS_SEND},
|
{UCT_IFACE_FLAG_AM_SHORT, MCA_BTL_FLAGS_SEND},
|
||||||
{UCT_IFACE_FLAG_PUT_ZCOPY, MCA_BTL_FLAGS_PUT},
|
{UCT_IFACE_FLAG_PUT_ZCOPY, MCA_BTL_FLAGS_PUT},
|
||||||
{UCT_IFACE_FLAG_GET_ZCOPY, MCA_BTL_FLAGS_GET},
|
{UCT_IFACE_FLAG_GET_ZCOPY, MCA_BTL_FLAGS_GET},
|
||||||
{0,0},
|
{0,0},
|
||||||
|
@ -14,6 +14,8 @@
|
|||||||
# Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
|
# Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
|
||||||
# Copyright (c) 2011-2018 Los Alamos National Security, LLC.
|
# Copyright (c) 2011-2018 Los Alamos National Security, LLC.
|
||||||
# All rights reserved.
|
# All rights reserved.
|
||||||
|
# Copyright (c) 2018 Research Organization for Information Science
|
||||||
|
# and Technology (RIST). All rights reserved.
|
||||||
# $COPYRIGHT$
|
# $COPYRIGHT$
|
||||||
#
|
#
|
||||||
# Additional copyrights may follow
|
# Additional copyrights may follow
|
||||||
@ -39,7 +41,7 @@ AC_DEFUN([MCA_opal_btl_uct_CONFIG],[
|
|||||||
CPPFLAGS_save="$CPPFLAGS"
|
CPPFLAGS_save="$CPPFLAGS"
|
||||||
CPPFLAGS="$CPPFLAGS $btl_uct_CPPFLAGS"
|
CPPFLAGS="$CPPFLAGS $btl_uct_CPPFLAGS"
|
||||||
|
|
||||||
AC_CHECK_DECLS([UCT_PROGRESS_THREAD_SAFE UCT_CB_FLAG_SYNC], [], [], [[#include <uct/api/uct.h>]])
|
AC_CHECK_DECLS([UCT_PROGRESS_THREAD_SAFE, UCT_CB_FLAG_SYNC], [], [], [[#include <uct/api/uct.h>]])
|
||||||
|
|
||||||
CPPFLAGS="$CPPFLAGS_save"
|
CPPFLAGS="$CPPFLAGS_save"
|
||||||
OPAL_VAR_SCOPE_POP
|
OPAL_VAR_SCOPE_POP
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user