btl/openib: add support for mlx5 atomic operations
This commit adds support for fetch-and-add and compare-and-swap when using the mlx5 driver. The support is only enabled if the expanded verbs interface is detected. This is required because mlx5 HCAs return the atomic result in network byte order. This support may need to be tweaked if Mellanox commits their changes into upstream verbs. Closes open-mpi/ompi#1077 Signed-off-by: Nathan Hjelm <hjelmn@lanl.gov>
Этот коммит содержится в:
родитель
a549db8ce2
Коммит
02a6c6856d
@ -387,6 +387,23 @@ AC_DEFUN([OPAL_CHECK_OPENFABRICS_CM],[
|
||||
fi
|
||||
])dnl
|
||||
|
||||
AC_DEFUN([OPAL_CHECK_EXP_VERBS],[
|
||||
OPAL_VAR_SCOPE_PUSH([have_struct_ibv_exp_send_wr])
|
||||
|
||||
AC_MSG_CHECKING([whether expanded verbs are available])
|
||||
AC_TRY_COMPILE([#include <infiniband/verbs_exp.h>], [struct ibv_exp_send_wr;],
|
||||
[have_struct_ibv_exp_send_wr=1
|
||||
AC_MSG_RESULT([yes])],
|
||||
[have_struct_ibv_exp_send_wr=0
|
||||
AC_MSG_RESULT([no])])
|
||||
|
||||
AC_DEFINE_UNQUOTED([HAVE_EXP_VERBS], [$have_struct_ibv_exp_send_wr], [Expanded verbs])
|
||||
AC_CHECK_DECLS([IBV_EXP_ATOMIC_HCA_REPLY_BE, IBV_EXP_QP_CREATE_ATOMIC_BE_REPLY, ibv_exp_create_qp], [], [], [#include <infiniband/verbs_exp.h>])
|
||||
AC_CHECK_HEADERS([infiniband/verbs_exp.h])
|
||||
AS_IF([test '$have_struct_ibv_exp_send_wr' = 1], [$1], [$2])
|
||||
OPAL_VAR_SCOPE_POP
|
||||
])dnl
|
||||
|
||||
AC_DEFUN([OPAL_CHECK_MLNX_OPENFABRICS],[
|
||||
$1_have_mverbs=0
|
||||
$1_have_mqe=0
|
||||
|
@ -490,6 +490,8 @@ struct mca_btl_openib_module_t {
|
||||
mca_btl_openib_module_qp_t * qps;
|
||||
|
||||
int local_procs; /** number of local procs */
|
||||
|
||||
bool atomic_ops_be; /** atomic result is big endian */
|
||||
};
|
||||
typedef struct mca_btl_openib_module_t mca_btl_openib_module_t;
|
||||
|
||||
|
@ -27,6 +27,7 @@ static int mca_btl_openib_atomic_internal (struct mca_btl_base_module_t *btl, st
|
||||
{
|
||||
mca_btl_openib_get_frag_t* frag = NULL;
|
||||
int qp = order;
|
||||
int32_t rkey;
|
||||
int rc;
|
||||
|
||||
frag = to_get_frag(alloc_recv_user_frag());
|
||||
@ -61,15 +62,16 @@ static int mca_btl_openib_atomic_internal (struct mca_btl_base_module_t *btl, st
|
||||
frag->sr_desc.wr.atomic.compare_add = operand;
|
||||
frag->sr_desc.wr.atomic.swap = operand2;
|
||||
|
||||
rkey = remote_handle->rkey;
|
||||
|
||||
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
|
||||
if((endpoint->endpoint_proc->proc_opal->proc_arch & OPAL_ARCH_ISBIGENDIAN)
|
||||
!= (opal_proc_local_get()->proc_arch & OPAL_ARCH_ISBIGENDIAN)) {
|
||||
frag->sr_desc.wr.atomic.rkey = opal_swap_bytes4 (remote_handle->rkey);
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
frag->sr_desc.wr.atomic.rkey = remote_handle->rkey;
|
||||
rkey = opal_swap_bytes4 (rkey);
|
||||
}
|
||||
#endif
|
||||
|
||||
frag->sr_desc.wr.atomic.rkey = rkey;
|
||||
|
||||
#if HAVE_XRC
|
||||
if (MCA_BTL_XRC_ENABLED && BTL_OPENIB_QP_TYPE_XRC(qp)) {
|
||||
|
@ -822,13 +822,26 @@ static int init_one_port(opal_list_t *btl_list, mca_btl_openib_device_t *device,
|
||||
openib_btl->super.btl_get_local_registration_threshold = 0;
|
||||
|
||||
#if HAVE_DECL_IBV_ATOMIC_HCA
|
||||
if (openib_btl->device->ib_dev_attr.atomic_cap == IBV_ATOMIC_NONE) {
|
||||
openib_btl->atomic_ops_be = false;
|
||||
|
||||
switch (openib_btl->device->ib_dev_attr.atomic_cap) {
|
||||
case IBV_ATOMIC_GLOB:
|
||||
openib_btl->super.btl_flags |= MCA_BTL_ATOMIC_SUPPORTS_GLOB;
|
||||
break;
|
||||
#if HAVE_DECL_IBV_EXP_ATOMIC_HCA_REPLY_BE
|
||||
case IBV_EXP_ATOMIC_HCA_REPLY_BE:
|
||||
openib_btl->atomic_ops_be = true;
|
||||
break;
|
||||
#endif
|
||||
case IBV_ATOMIC_HCA:
|
||||
break;
|
||||
case IBV_ATOMIC_NONE:
|
||||
default:
|
||||
/* no atomics or an unsupported atomic type */
|
||||
openib_btl->super.btl_flags &= ~MCA_BTL_FLAGS_ATOMIC_FOPS;
|
||||
openib_btl->super.btl_atomic_flags = 0;
|
||||
openib_btl->super.btl_atomic_fop = NULL;
|
||||
openib_btl->super.btl_atomic_cswap = NULL;
|
||||
} else if (IBV_ATOMIC_GLOB == openib_btl->device->ib_dev_attr.atomic_cap) {
|
||||
openib_btl->super.btl_flags |= MCA_BTL_ATOMIC_SUPPORTS_GLOB;
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -3446,6 +3459,11 @@ static void handle_wc(mca_btl_openib_device_t* device, const uint32_t cq,
|
||||
|
||||
mca_btl_openib_get_frag_t *get_frag = to_get_frag(des);
|
||||
|
||||
/* check if atomic result needs to be byte swapped (mlx5) */
|
||||
if (openib_btl->atomic_ops_be && IBV_WC_RDMA_READ != wc->opcode) {
|
||||
*((int64_t *) frag->sg_entry.addr) = ntoh64 (*((int64_t *) frag->sg_entry.addr));
|
||||
}
|
||||
|
||||
get_frag->cb.func (&openib_btl->super, endpoint, (void *)(intptr_t) frag->sg_entry.addr,
|
||||
get_frag->cb.local_handle, get_frag->cb.context, get_frag->cb.data,
|
||||
OPAL_SUCCESS);
|
||||
|
@ -46,6 +46,7 @@ AC_DEFUN([MCA_opal_btl_openib_CONFIG],[
|
||||
[btl_openib_happy="yes"
|
||||
OPAL_CHECK_OPENFABRICS_CM([btl_openib])],
|
||||
[btl_openib_happy="no"])
|
||||
OPAL_CHECK_EXP_VERBS([btl_openib], [], [])
|
||||
|
||||
AS_IF([test "$btl_openib_happy" = "yes"],
|
||||
[# With the new openib flags, look for ibv_fork_init
|
||||
|
@ -56,6 +56,9 @@
|
||||
#include <sys/types.h>
|
||||
#include <fcntl.h>
|
||||
#include <infiniband/verbs.h>
|
||||
#ifdef HAVE_INFINIBAND_VERBS_EXP_H
|
||||
#include <infiniband/verbs_exp.h>
|
||||
#endif
|
||||
#include <signal.h>
|
||||
|
||||
#include <pthread.h>
|
||||
@ -1307,7 +1310,11 @@ static int udcm_rc_qp_create_one(udcm_module_t *m, mca_btl_base_endpoint_t* lcl_
|
||||
uint32_t max_send_wr)
|
||||
{
|
||||
udcm_endpoint_t *udep = UDCM_ENDPOINT_DATA(lcl_ep);
|
||||
#if HAVE_DECL_IBV_EXP_CREATE_QP
|
||||
struct ibv_exp_qp_init_attr init_attr;
|
||||
#else
|
||||
struct ibv_qp_init_attr init_attr;
|
||||
#endif
|
||||
size_t req_inline;
|
||||
int rc;
|
||||
|
||||
@ -1328,6 +1335,32 @@ static int udcm_rc_qp_create_one(udcm_module_t *m, mca_btl_base_endpoint_t* lcl_
|
||||
}
|
||||
init_attr.cap.max_send_wr = max_send_wr;
|
||||
|
||||
#if HAVE_DECL_IBV_EXP_CREATE_QP
|
||||
/* use expanded verbs qp create to enable use of mlx5 atomics */
|
||||
init_attr.comp_mask = IBV_EXP_QP_INIT_ATTR_PD;
|
||||
init_attr.pd = m->btl->device->ib_pd;
|
||||
|
||||
init_attr.comp_mask |= IBV_EXP_QP_INIT_ATTR_ATOMICS_ARG;
|
||||
init_attr.max_atomic_arg = 8;
|
||||
|
||||
#if HAVE_DECL_IBV_EXP_ATOMIC_HCA_REPLY_BE
|
||||
if (IBV_EXP_ATOMIC_HCA_REPLY_BE == m->btl->device->ib_dev_attr.atomic_cap) {
|
||||
init_attr.exp_create_flags = IBV_EXP_QP_CREATE_ATOMIC_BE_REPLY;
|
||||
init_attr.comp_mask |= IBV_EXP_QP_INIT_ATTR_CREATE_FLAGS;
|
||||
}
|
||||
#endif
|
||||
|
||||
while (NULL == (lcl_ep->qps[qp].qp->lcl_qp = ibv_exp_create_qp (m->btl->device->ib_dev_context,
|
||||
&init_attr))) {
|
||||
/* NTH: this process may be out of registered memory. try evicting an item from
|
||||
the lru of this btl's mpool */
|
||||
if (false == mca_mpool_grdma_evict (m->btl->super.btl_mpool)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
while (NULL == (lcl_ep->qps[qp].qp->lcl_qp = ibv_create_qp(m->btl->device->ib_pd,
|
||||
&init_attr))) {
|
||||
/* NTH: this process may be out of registered memory. try evicting an item from
|
||||
@ -1337,6 +1370,8 @@ static int udcm_rc_qp_create_one(udcm_module_t *m, mca_btl_base_endpoint_t* lcl_
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
if (NULL == lcl_ep->qps[qp].qp->lcl_qp) {
|
||||
opal_show_help("help-mpi-btl-openib-cpc-base.txt",
|
||||
"ibv_create_qp failed", true, opal_process_info.nodename,
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user