1
1

btl/openib: add support for mlx5 atomic operations

This commit adds support for fetch-and-add and compare-and-swap when
using the mlx5 driver. The support is only enabled if the expanded
verbs interface is detected. This is required because mlx5 HCAs return
the atomic result in network byte order. This support may need to be
tweaked if Mellanox commits their changes into upstream verbs.

Closes open-mpi/ompi#1077

Signed-off-by: Nathan Hjelm <hjelmn@lanl.gov>
Этот коммит содержится в:
Nathan Hjelm 2015-11-23 16:07:12 -07:00
родитель a549db8ce2
Коммит 02a6c6856d
6 изменённых файлов: 83 добавлений и 8 удалений

Просмотреть файл

@ -387,6 +387,23 @@ AC_DEFUN([OPAL_CHECK_OPENFABRICS_CM],[
fi
])dnl
AC_DEFUN([OPAL_CHECK_EXP_VERBS],[
OPAL_VAR_SCOPE_PUSH([have_struct_ibv_exp_send_wr])
AC_MSG_CHECKING([whether expanded verbs are available])
AC_TRY_COMPILE([#include <infiniband/verbs_exp.h>], [struct ibv_exp_send_wr;],
[have_struct_ibv_exp_send_wr=1
AC_MSG_RESULT([yes])],
[have_struct_ibv_exp_send_wr=0
AC_MSG_RESULT([no])])
AC_DEFINE_UNQUOTED([HAVE_EXP_VERBS], [$have_struct_ibv_exp_send_wr], [Expanded verbs])
AC_CHECK_DECLS([IBV_EXP_ATOMIC_HCA_REPLY_BE, IBV_EXP_QP_CREATE_ATOMIC_BE_REPLY, ibv_exp_create_qp], [], [], [#include <infiniband/verbs_exp.h>])
AC_CHECK_HEADERS([infiniband/verbs_exp.h])
AS_IF([test '$have_struct_ibv_exp_send_wr' = 1], [$1], [$2])
OPAL_VAR_SCOPE_POP
])dnl
AC_DEFUN([OPAL_CHECK_MLNX_OPENFABRICS],[
$1_have_mverbs=0
$1_have_mqe=0

Просмотреть файл

@ -490,6 +490,8 @@ struct mca_btl_openib_module_t {
mca_btl_openib_module_qp_t * qps;
int local_procs; /** number of local procs */
bool atomic_ops_be; /** atomic result is big endian */
};
typedef struct mca_btl_openib_module_t mca_btl_openib_module_t;

Просмотреть файл

@ -27,6 +27,7 @@ static int mca_btl_openib_atomic_internal (struct mca_btl_base_module_t *btl, st
{
mca_btl_openib_get_frag_t* frag = NULL;
int qp = order;
int32_t rkey;
int rc;
frag = to_get_frag(alloc_recv_user_frag());
@ -61,15 +62,16 @@ static int mca_btl_openib_atomic_internal (struct mca_btl_base_module_t *btl, st
frag->sr_desc.wr.atomic.compare_add = operand;
frag->sr_desc.wr.atomic.swap = operand2;
rkey = remote_handle->rkey;
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
if((endpoint->endpoint_proc->proc_opal->proc_arch & OPAL_ARCH_ISBIGENDIAN)
!= (opal_proc_local_get()->proc_arch & OPAL_ARCH_ISBIGENDIAN)) {
frag->sr_desc.wr.atomic.rkey = opal_swap_bytes4 (remote_handle->rkey);
} else
#endif
{
frag->sr_desc.wr.atomic.rkey = remote_handle->rkey;
rkey = opal_swap_bytes4 (rkey);
}
#endif
frag->sr_desc.wr.atomic.rkey = rkey;
#if HAVE_XRC
if (MCA_BTL_XRC_ENABLED && BTL_OPENIB_QP_TYPE_XRC(qp)) {

Просмотреть файл

@ -822,13 +822,26 @@ static int init_one_port(opal_list_t *btl_list, mca_btl_openib_device_t *device,
openib_btl->super.btl_get_local_registration_threshold = 0;
#if HAVE_DECL_IBV_ATOMIC_HCA
if (openib_btl->device->ib_dev_attr.atomic_cap == IBV_ATOMIC_NONE) {
openib_btl->atomic_ops_be = false;
switch (openib_btl->device->ib_dev_attr.atomic_cap) {
case IBV_ATOMIC_GLOB:
openib_btl->super.btl_flags |= MCA_BTL_ATOMIC_SUPPORTS_GLOB;
break;
#if HAVE_DECL_IBV_EXP_ATOMIC_HCA_REPLY_BE
case IBV_EXP_ATOMIC_HCA_REPLY_BE:
openib_btl->atomic_ops_be = true;
break;
#endif
case IBV_ATOMIC_HCA:
break;
case IBV_ATOMIC_NONE:
default:
/* no atomics or an unsupported atomic type */
openib_btl->super.btl_flags &= ~MCA_BTL_FLAGS_ATOMIC_FOPS;
openib_btl->super.btl_atomic_flags = 0;
openib_btl->super.btl_atomic_fop = NULL;
openib_btl->super.btl_atomic_cswap = NULL;
} else if (IBV_ATOMIC_GLOB == openib_btl->device->ib_dev_attr.atomic_cap) {
openib_btl->super.btl_flags |= MCA_BTL_ATOMIC_SUPPORTS_GLOB;
}
#endif
@ -3446,6 +3459,11 @@ static void handle_wc(mca_btl_openib_device_t* device, const uint32_t cq,
mca_btl_openib_get_frag_t *get_frag = to_get_frag(des);
/* check if atomic result needs to be byte swapped (mlx5) */
if (openib_btl->atomic_ops_be && IBV_WC_RDMA_READ != wc->opcode) {
*((int64_t *) frag->sg_entry.addr) = ntoh64 (*((int64_t *) frag->sg_entry.addr));
}
get_frag->cb.func (&openib_btl->super, endpoint, (void *)(intptr_t) frag->sg_entry.addr,
get_frag->cb.local_handle, get_frag->cb.context, get_frag->cb.data,
OPAL_SUCCESS);

Просмотреть файл

@ -46,6 +46,7 @@ AC_DEFUN([MCA_opal_btl_openib_CONFIG],[
[btl_openib_happy="yes"
OPAL_CHECK_OPENFABRICS_CM([btl_openib])],
[btl_openib_happy="no"])
OPAL_CHECK_EXP_VERBS([btl_openib], [], [])
AS_IF([test "$btl_openib_happy" = "yes"],
[# With the new openib flags, look for ibv_fork_init

Просмотреть файл

@ -56,6 +56,9 @@
#include <sys/types.h>
#include <fcntl.h>
#include <infiniband/verbs.h>
#ifdef HAVE_INFINIBAND_VERBS_EXP_H
#include <infiniband/verbs_exp.h>
#endif
#include <signal.h>
#include <pthread.h>
@ -1307,7 +1310,11 @@ static int udcm_rc_qp_create_one(udcm_module_t *m, mca_btl_base_endpoint_t* lcl_
uint32_t max_send_wr)
{
udcm_endpoint_t *udep = UDCM_ENDPOINT_DATA(lcl_ep);
#if HAVE_DECL_IBV_EXP_CREATE_QP
struct ibv_exp_qp_init_attr init_attr;
#else
struct ibv_qp_init_attr init_attr;
#endif
size_t req_inline;
int rc;
@ -1328,6 +1335,32 @@ static int udcm_rc_qp_create_one(udcm_module_t *m, mca_btl_base_endpoint_t* lcl_
}
init_attr.cap.max_send_wr = max_send_wr;
#if HAVE_DECL_IBV_EXP_CREATE_QP
/* use expanded verbs qp create to enable use of mlx5 atomics */
init_attr.comp_mask = IBV_EXP_QP_INIT_ATTR_PD;
init_attr.pd = m->btl->device->ib_pd;
init_attr.comp_mask |= IBV_EXP_QP_INIT_ATTR_ATOMICS_ARG;
init_attr.max_atomic_arg = 8;
#if HAVE_DECL_IBV_EXP_ATOMIC_HCA_REPLY_BE
if (IBV_EXP_ATOMIC_HCA_REPLY_BE == m->btl->device->ib_dev_attr.atomic_cap) {
init_attr.exp_create_flags = IBV_EXP_QP_CREATE_ATOMIC_BE_REPLY;
init_attr.comp_mask |= IBV_EXP_QP_INIT_ATTR_CREATE_FLAGS;
}
#endif
while (NULL == (lcl_ep->qps[qp].qp->lcl_qp = ibv_exp_create_qp (m->btl->device->ib_dev_context,
&init_attr))) {
/* NTH: this process may be out of registered memory. try evicting an item from
the lru of this btl's mpool */
if (false == mca_mpool_grdma_evict (m->btl->super.btl_mpool)) {
break;
}
}
#else
while (NULL == (lcl_ep->qps[qp].qp->lcl_qp = ibv_create_qp(m->btl->device->ib_pd,
&init_attr))) {
/* NTH: this process may be out of registered memory. try evicting an item from
@ -1337,6 +1370,8 @@ static int udcm_rc_qp_create_one(udcm_module_t *m, mca_btl_base_endpoint_t* lcl_
}
}
#endif
if (NULL == lcl_ep->qps[qp].qp->lcl_qp) {
opal_show_help("help-mpi-btl-openib-cpc-base.txt",
"ibv_create_qp failed", true, opal_process_info.nodename,