1
1
openmpi/opal/mca/btl/uct/btl_uct_amo.c
Nathan Hjelm 39be6ec15c btl/uct: bug fixes and general improvements
This commit updates the uct btl to change the transports parameter
into a priority list. The dc_mlx5, rc_mlx5, and ud transports to the
priority list. This will give better out of the box performance for
multi-threaded codes beacuse the *_mlx5 transports can avoid the mlx5
lock inside libmlx5_rdmav2.

This commit also fixes a number of leaks and a possible deadlock when
using RDMA.

Signed-off-by: Nathan Hjelm <hjelmn@lanl.gov>
2018-10-09 15:15:45 -06:00

191 строка
7.5 KiB
C

/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2014-2018 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "btl_uct_device_context.h"
#if OPAL_HAVE_UCT_EP_ATOMIC64_POST
/* we add 1 to the ops to differentiate between unsupported and supported ops since
* UCT_ATOMIC_OP_ADD == 0. otherwise we would have to fill in this table completely. */
static int mca_btl_uct_btl_to_uct_atomic[MCA_BTL_ATOMIC_LAST] = {
[MCA_BTL_ATOMIC_ADD] = UCT_ATOMIC_OP_ADD + 1,
[MCA_BTL_ATOMIC_AND] = UCT_ATOMIC_OP_AND + 1,
[MCA_BTL_ATOMIC_OR] = UCT_ATOMIC_OP_OR + 1,
[MCA_BTL_ATOMIC_XOR] = UCT_ATOMIC_OP_XOR + 1,
[MCA_BTL_ATOMIC_SWAP] = UCT_ATOMIC_OP_SWAP + 1,
};
#endif
int mca_btl_uct_afop (struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint,
void *local_address, uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle,
mca_btl_base_registration_handle_t *remote_handle, mca_btl_base_atomic_op_t op,
uint64_t operand, int flags, int order, mca_btl_base_rdma_completion_fn_t cbfunc,
void *cbcontext, void *cbdata)
{
mca_btl_uct_module_t *uct_btl = (mca_btl_uct_module_t *) btl;
mca_btl_uct_device_context_t *context = mca_btl_uct_module_get_rdma_context (uct_btl);
mca_btl_uct_uct_completion_t *comp = NULL;
ucs_status_t ucs_status;
uct_rkey_bundle_t rkey;
uct_ep_h ep_handle;
int rc;
#if OPAL_HAVE_UCT_EP_ATOMIC64_POST
int uct_op = mca_btl_uct_btl_to_uct_atomic[op];
if (OPAL_UNLIKELY(0 == uct_op--)) {
return OPAL_ERR_BAD_PARAM;
}
#else
if (OPAL_UNLIKELY(MCA_BTL_ATOMIC_ADD != op && MCA_BTL_ATOMIC_SWAP != op)) {
return OPAL_ERR_BAD_PARAM;
}
#endif
if (cbfunc) {
comp = mca_btl_uct_uct_completion_alloc (uct_btl, endpoint, local_address, local_handle, context,
cbfunc, cbcontext, cbdata);
if (OPAL_UNLIKELY(NULL == comp)) {
return OPAL_ERR_OUT_OF_RESOURCE;
}
}
rc = mca_btl_uct_get_rkey (uct_btl, context, endpoint, remote_handle, &rkey, &ep_handle);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
mca_btl_uct_uct_completion_release (comp);
return rc;
}
mca_btl_uct_context_lock (context);
#if OPAL_HAVE_UCT_EP_ATOMIC64_POST
if (flags & MCA_BTL_ATOMIC_FLAG_32BIT) {
ucs_status = uct_ep_atomic32_fetch (ep_handle, uct_op, operand, (uint32_t *) local_address, remote_address,
rkey.rkey, &comp->uct_comp);
} else {
ucs_status = uct_ep_atomic64_fetch (ep_handle, uct_op, operand, (uint64_t *) local_address, remote_address,
rkey.rkey, &comp->uct_comp);
}
#else
if (MCA_BTL_ATOMIC_ADD == op) {
if (flags & MCA_BTL_ATOMIC_FLAG_32BIT) {
ucs_status = uct_ep_atomic_fadd32 (ep_handle, (uint32_t) operand, remote_address,
rkey.rkey, (uint32_t *) local_address, &comp->uct_comp);
} else {
ucs_status = uct_ep_atomic_fadd64 (ep_handle, operand, remote_address, rkey.rkey,
(uint64_t *) local_address, &comp->uct_comp);
}
} else {
if (flags & MCA_BTL_ATOMIC_FLAG_32BIT) {
ucs_status = uct_ep_atomic_swap32 (ep_handle, (uint32_t) operand, remote_address,
rkey.rkey, (uint32_t *) local_address, &comp->uct_comp);
} else {
ucs_status = uct_ep_atomic_swap64 (ep_handle, operand, remote_address, rkey.rkey,
(uint64_t *) local_address, &comp->uct_comp);
}
}
#endif
/* go ahead and progress the worker while we have the lock */
(void) uct_worker_progress (context->uct_worker);
mca_btl_uct_context_unlock (context);
mca_btl_uct_device_handle_completions (context);
if (UCS_INPROGRESS == ucs_status) {
rc = OPAL_SUCCESS;
} else if (UCS_OK == ucs_status) {
rc = 1;
mca_btl_uct_uct_completion_release (comp);
} else {
rc = OPAL_ERR_OUT_OF_RESOURCE;
mca_btl_uct_uct_completion_release (comp);
}
uct_rkey_release (&rkey);
return rc;
}
int mca_btl_uct_aop (struct mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint,
uint64_t remote_address, mca_btl_base_registration_handle_t *remote_handle,
mca_btl_base_atomic_op_t op, uint64_t operand, int flags, int order,
mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata)
{
/* this is static so it survives after this function returns. we don't care about the result */
static uint64_t result;
/* just use the fetching ops for now. there probably is a performance benefit to using
* the non-fetching on some platforms but this is easier to implement quickly and it
* guarantees remote completion. */
return mca_btl_uct_afop (btl, endpoint, &result, remote_address, NULL, remote_handle, op,
operand, flags, order, cbfunc, cbcontext, cbdata);
}
int mca_btl_uct_acswap (struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint,
void *local_address, uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle,
mca_btl_base_registration_handle_t *remote_handle, uint64_t compare, uint64_t value, int flags,
int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata)
{
mca_btl_uct_module_t *uct_btl = (mca_btl_uct_module_t *) btl;
mca_btl_uct_device_context_t *context = mca_btl_uct_module_get_rdma_context (uct_btl);
mca_btl_uct_uct_completion_t *comp = NULL;
ucs_status_t ucs_status;
uct_rkey_bundle_t rkey;
uct_ep_h ep_handle;
int rc;
if (cbfunc) {
comp = mca_btl_uct_uct_completion_alloc (uct_btl, endpoint, local_address, local_handle, context,
cbfunc, cbcontext, cbdata);
if (OPAL_UNLIKELY(NULL == comp)) {
return OPAL_ERR_OUT_OF_RESOURCE;
}
}
rc = mca_btl_uct_get_rkey (uct_btl, context, endpoint, remote_handle, &rkey, &ep_handle);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
mca_btl_uct_uct_completion_release (comp);
return rc;
}
mca_btl_uct_context_lock (context);
if (flags & MCA_BTL_ATOMIC_FLAG_32BIT) {
ucs_status = uct_ep_atomic_cswap32 (ep_handle, (uint32_t) compare, (uint32_t) value, remote_address,
rkey.rkey, (uint32_t *) local_address, &comp->uct_comp);
} else {
ucs_status = uct_ep_atomic_cswap64 (ep_handle, compare, value, remote_address, rkey.rkey,
(uint64_t *) local_address, &comp->uct_comp);
}
/* go ahead and progress the worker while we have the lock */
(void) uct_worker_progress (context->uct_worker);
mca_btl_uct_context_unlock (context);
mca_btl_uct_device_handle_completions (context);
if (UCS_INPROGRESS == ucs_status) {
rc = OPAL_SUCCESS;
} else if (UCS_OK == ucs_status) {
rc = 1;
mca_btl_uct_uct_completion_release (comp);
} else {
rc = OPAL_ERR_OUT_OF_RESOURCE;
mca_btl_uct_uct_completion_release (comp);
}
uct_rkey_release (&rkey);
return rc;
}