1
1

btl/uct: fix some issues when using UCX over ugni

Though not a recommended configuration it is possible to use Open MPI
over UCX over uGNI. This configuration had some issues related to the
connection management and tl selection. This commit fixes those
issues.

Signed-off-by: Nathan Hjelm <hjelmn@lanl.gov>
Этот коммит содержится в:
Nathan Hjelm 2018-12-05 16:30:54 -07:00
родитель fccb3e7514
Коммит e07a64c52d
3 изменённых файлов: 36 добавлений и 7 удалений

Просмотреть файл

@ -15,6 +15,8 @@
* Copyright (c) 2018 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2018 Amazon.com, Inc. or its affiliates. All Rights reserved.
* Copyright (c) 2018 Triad National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -53,7 +55,7 @@ static int mca_btl_uct_component_register(void)
MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3, MCA_BASE_VAR_SCOPE_LOCAL,
&mca_btl_uct_component.memory_domains);
mca_btl_uct_component.allowed_transports = "dc_mlx5,rc_mlx5,ud,any";
mca_btl_uct_component.allowed_transports = "dc_mlx5,rc_mlx5,ud,ugni_rdma,ugni_smsg,any";
(void) mca_base_component_var_register(&mca_btl_uct_component.super.btl_version,
"transports", "Comma-delimited list of transports to use sorted by increasing "
"priority. The list of transports available can be queried using ucx_info. Special"

Просмотреть файл

@ -2,6 +2,8 @@
/*
* Copyright (c) 2018 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2018 Triad National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -137,11 +139,26 @@ static void mca_btl_uct_connection_ep_destruct (mca_btl_uct_connection_ep_t *ep)
OBJ_CLASS_INSTANCE(mca_btl_uct_connection_ep_t, opal_object_t, mca_btl_uct_connection_ep_construct,
mca_btl_uct_connection_ep_destruct);
struct mca_btl_uct_conn_completion_t {
uct_completion_t super;
volatile bool complete;
};
typedef struct mca_btl_uct_conn_completion_t mca_btl_uct_conn_completion_t;
static void mca_btl_uct_endpoint_flush_complete (uct_completion_t *self, ucs_status_t status)
{
mca_btl_uct_conn_completion_t *completion = (mca_btl_uct_conn_completion_t *) self;
BTL_VERBOSE(("connection flush complete"));
completion->complete = true;
}
static int mca_btl_uct_endpoint_send_conn_req (mca_btl_uct_module_t *uct_btl, mca_btl_base_endpoint_t *endpoint,
mca_btl_uct_device_context_t *conn_tl_context,
mca_btl_uct_conn_req_t *request, size_t request_length)
{
mca_btl_uct_connection_ep_t *conn_ep = endpoint->conn_ep;
mca_btl_uct_conn_completion_t completion = {.super = {.count = 1, .func = mca_btl_uct_endpoint_flush_complete},
.complete = false};
ucs_status_t ucs_status;
BTL_VERBOSE(("sending connection request to peer. context id: %d, type: %d, length: %" PRIsize_t,
@ -170,10 +187,18 @@ static int mca_btl_uct_endpoint_send_conn_req (mca_btl_uct_module_t *uct_btl, mc
} while (1);
/* for now we just wait for the connection request to complete before continuing */
do {
ucs_status = uct_ep_flush (conn_ep->uct_ep, 0, NULL);
mca_btl_uct_context_progress (conn_tl_context);
} while (UCS_INPROGRESS == ucs_status);
ucs_status = uct_ep_flush (conn_ep->uct_ep, 0, &completion.super);
if (UCS_OK != ucs_status && UCS_INPROGRESS != ucs_status) {
/* NTH: I don't know if this path is needed. For some networks we must use a completion. */
do {
ucs_status = uct_ep_flush (conn_ep->uct_ep, 0, NULL);
mca_btl_uct_context_progress (conn_tl_context);
} while (UCS_INPROGRESS == ucs_status);
} else {
do {
mca_btl_uct_context_progress (conn_tl_context);
} while (!completion.complete);
}
opal_mutex_lock (&endpoint->ep_lock);
@ -284,8 +309,8 @@ int mca_btl_uct_endpoint_connect (mca_btl_uct_module_t *uct_btl, mca_btl_uct_end
void *ep_addr, int tl_index)
{
mca_btl_uct_tl_endpoint_t *tl_endpoint = endpoint->uct_eps[context_id] + tl_index;
mca_btl_uct_device_context_t *tl_context = mca_btl_uct_module_get_rdma_context_specific (uct_btl, context_id);
mca_btl_uct_tl_t *tl = (tl_index == uct_btl->rdma_tl->tl_index) ? uct_btl->rdma_tl : uct_btl->am_tl;
mca_btl_uct_device_context_t *tl_context = mca_btl_uct_module_get_tl_context_specific (uct_btl, tl, context_id);
uint8_t *rdma_tl_data = NULL, *conn_tl_data = NULL, *am_tl_data = NULL, *tl_data;
mca_btl_uct_connection_ep_t *conn_ep = NULL;
mca_btl_uct_modex_t *modex;

Просмотреть файл

@ -4,6 +4,8 @@
* reserved.
* Copyright (c) 2018 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2018 Triad National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -26,7 +28,7 @@
* @brief Convert UCT capabilities to BTL flags
*/
static uint64_t mca_btl_uct_cap_to_btl_flag[][2] = {
{UCT_IFACE_FLAG_AM_ZCOPY, MCA_BTL_FLAGS_SEND},
{UCT_IFACE_FLAG_AM_SHORT, MCA_BTL_FLAGS_SEND},
{UCT_IFACE_FLAG_PUT_ZCOPY, MCA_BTL_FLAGS_PUT},
{UCT_IFACE_FLAG_GET_ZCOPY, MCA_BTL_FLAGS_GET},
{0,0},