1
1

Merge pull request #313 from ggouaillardet/topic/OFED_3_12

btl/openib: add XRC support with OFED 3.12+
Этот коммит содержится в:
Jeff Squyres 2015-01-06 11:33:19 -05:00
родитель 3d5a1bfb7b b3617e736e
Коммит e77838973d
12 изменённых файлов: 305 добавлений и 10 удалений

Просмотреть файл

@ -16,6 +16,9 @@
# Copyright (c) 2006-2009 Mellanox Technologies. All rights reserved.
# Copyright (c) 2010-2012 Oracle and/or its affiliates. All rights reserved.
# Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved.
# Copyright (c) 2014 Bull SAS. All rights reserved.
# Copyright (c) 2014-2015 Research Organization for Information Science
# and Technology (RIST). All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
@ -148,6 +151,7 @@ AC_DEFUN([OPAL_CHECK_OPENFABRICS],[
# Set these up so that we can do an AC_DEFINE below
# (unconditionally)
$1_have_xrc=0
$1_have_xrcd=0
$1_have_opensm_devel=0
# If we have the openib stuff available, find out what we've got
@ -161,9 +165,14 @@ AC_DEFUN([OPAL_CHECK_OPENFABRICS],[
[#include <infiniband/verbs.h>])
# ibv_create_xrc_rcv_qp was added in OFED 1.3
# ibv_cmd_open_xrcd (aka XRC Domains) was added in OFED 3.12
if test "$enable_connectx_xrc" = "yes"; then
AC_CHECK_FUNCS([ibv_create_xrc_rcv_qp], [$1_have_xrc=1])
AC_CHECK_FUNCS([ibv_create_xrc_rcv_qp ibv_cmd_open_xrcd], [$1_have_xrc=1])
fi
if test "$enable_connectx_xrc" = "yes"; then
AC_CHECK_FUNCS([ibv_cmd_open_xrcd], [$1_have_xrcd=1])
fi
if test "no" != "$enable_openib_dynamic_sl"; then
# We need ib_types.h file, which is installed with opensm-devel
@ -228,6 +237,15 @@ AC_DEFUN([OPAL_CHECK_OPENFABRICS],[
AC_MSG_RESULT([no])
fi
AC_MSG_CHECKING([if ConnectIB XRC support is enabled])
AC_DEFINE_UNQUOTED([OPAL_HAVE_XRCD], [$$1_have_xrcd],
[Enable features required for XRC domains support])
if test "1" = "$$1_have_xrcd"; then
AC_MSG_RESULT([yes])
else
AC_MSG_RESULT([no])
fi
AC_MSG_CHECKING([if dynamic SL is enabled])
AC_DEFINE_UNQUOTED([OPAL_ENABLE_DYNAMIC_SL], [$$1_have_opensm_devel],
[Enable features required for dynamic SL support])

Просмотреть файл

@ -19,8 +19,9 @@
* Copyright (c) 2009 IBM Corporation. All rights reserved.
* Copyright (c) 2013-2014 Intel, Inc. All rights reserved
* Copyright (c) 2013 NVIDIA Corporation. All rights reserved.
* Copyright (c) 2014 Research Organization for Information Science
* Copyright (c) 2014-2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2014 Bull SAS. All rights reserved
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -324,10 +325,26 @@ static int create_srq(mca_btl_openib_module_t *openib_btl)
openib_btl->qps[qp].u.srq_qp.rd_posted = 0;
#if HAVE_XRC
if(BTL_OPENIB_QP_TYPE_XRC(qp)) {
#if OPAL_HAVE_XRCD
struct ibv_srq_init_attr_ex attr_ex;
memset(&attr_ex, 0, sizeof(struct ibv_srq_init_attr_ex));
attr_ex.attr.max_wr = attr.attr.max_wr;
attr_ex.attr.max_sge = attr.attr.max_sge;
attr_ex.comp_mask = IBV_SRQ_INIT_ATTR_TYPE | IBV_SRQ_INIT_ATTR_XRCD |
IBV_SRQ_INIT_ATTR_CQ | IBV_SRQ_INIT_ATTR_PD;
attr_ex.srq_type = IBV_SRQT_XRC;
attr_ex.xrcd = openib_btl->device->xrcd;
attr_ex.cq = openib_btl->device->ib_cq[qp_cq_prio(qp)];
attr_ex.pd = openib_btl->device->ib_pd;
openib_btl->qps[qp].u.srq_qp.srq =
ibv_create_srq_ex(openib_btl->device->ib_dev_context, &attr_ex);
#else
openib_btl->qps[qp].u.srq_qp.srq =
ibv_create_xrc_srq(openib_btl->device->ib_pd,
openib_btl->device->xrc_domain,
openib_btl->device->ib_cq[qp_cq_prio(qp)], &attr);
#endif
} else
#endif
{
@ -1946,14 +1963,20 @@ int mca_btl_openib_put( mca_btl_base_module_t* btl,
to_com_frag(frag)->endpoint = ep;
#if HAVE_XRC
if (MCA_BTL_XRC_ENABLED && BTL_OPENIB_QP_TYPE_XRC(qp))
#if OPAL_HAVE_XRCD
frag->sr_desc.qp_type.xrc.remote_srqn=ep->rem_info.rem_srqs[qp].rem_srq_num;
#else
frag->sr_desc.xrc_remote_srq_num=ep->rem_info.rem_srqs[qp].rem_srq_num;
#endif
#endif
descriptor->order = qp;
/* Setting opcode on a frag constructor isn't enough since prepare_src
* may return send_frag instead of put_frag */
frag->sr_desc.opcode = IBV_WR_RDMA_WRITE;
frag->sr_desc.send_flags = ib_send_flags(src_seg->base.seg_len, &(ep->qps[qp]), 1);
frag->sr_desc.send_flags = ib_send_flags(descriptor->des_local->seg_len, &(ep->qps[qp]), 1);
qp_inflight_wqe_to_frag(ep, qp, to_com_frag(frag));
qp_reset_signal_count(ep, qp);
qp_inflight_wqe_to_frag(ep, qp, to_com_frag(frag));
qp_reset_signal_count(ep, qp);
@ -2033,7 +2056,11 @@ int mca_btl_openib_get(mca_btl_base_module_t* btl,
#if HAVE_XRC
if (MCA_BTL_XRC_ENABLED && BTL_OPENIB_QP_TYPE_XRC(qp))
#if OPAL_HAVE_XRCD
frag->sr_desc.qp_type.xrc.remote_srqn=ep->rem_info.rem_srqs[qp].rem_srq_num;
#else
frag->sr_desc.xrc_remote_srq_num=ep->rem_info.rem_srqs[qp].rem_srq_num;
#endif
#endif
descriptor->order = qp;

Просмотреть файл

@ -17,6 +17,9 @@
* Copyright (c) 2006-2007 Voltaire All rights reserved.
* Copyright (c) 2009-2010 Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2013-2014 NVIDIA Corporation. All rights reserved.
* Copyright (c) 2014 Bull SAS. All rights reserved.
* Copyright (c) 2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -398,7 +401,11 @@ typedef struct mca_btl_openib_device_t {
volatile bool got_port_event;
#endif
#if HAVE_XRC
#if OPAL_HAVE_XRCD
struct ibv_xrcd *xrcd;
#else
struct ibv_xrc_domain *xrc_domain;
#endif
int xrc_fd;
#endif
int32_t non_eager_rdma_endpoints;

Просмотреть файл

@ -7,6 +7,9 @@
* Copyright (c) 2013 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2014 Intel, Inc. All rights reserved.
* Copyright (c) 2014 Bull SAS. All rights reserved.
* Copyright (c) 2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -128,7 +131,11 @@ static mca_btl_openib_endpoint_t * xrc_qp2endpoint(uint32_t qp_num, mca_btl_open
int ep_i;
for(ep_i = 0; ep_i < opal_pointer_array_get_size(device->endpoints); ep_i++) {
ep = opal_pointer_array_get_item(device->endpoints, ep_i);
#if OPAL_HAVE_XRCD
if (qp_num == ep->xrc_recv_qp->qp_num)
#else
if (qp_num == ep->xrc_recv_qp_num)
#endif
return ep;
}
return NULL;
@ -352,11 +359,14 @@ static int btl_openib_async_deviceh(struct mca_btl_openib_async_poll *devices_po
event_type = event.event_type;
#if HAVE_XRC
/* is it XRC event ?*/
#if OPAL_HAVE_XRCD
#else
if (IBV_XRC_QP_EVENT_FLAG & event.event_type) {
xrc_event = true;
/* Clean the bitnd handel as usual */
event_type ^= IBV_XRC_QP_EVENT_FLAG;
}
#endif
#endif
switch(event_type) {
case IBV_EVENT_PATH_MIG:
@ -367,9 +377,12 @@ static int btl_openib_async_deviceh(struct mca_btl_openib_async_poll *devices_po
mca_btl_openib_load_apm(event.element.qp,
qp2endpoint(event.element.qp, device));
#if HAVE_XRC
#if OPAL_HAVE_XRCD
#else
else
mca_btl_openib_load_apm_xrc_rcv(event.element.xrc_qp_num,
xrc_qp2endpoint(event.element.xrc_qp_num, device));
#endif
#endif
}
break;
@ -648,7 +661,7 @@ void mca_btl_openib_load_apm(struct ibv_qp *qp, mca_btl_openib_endpoint_t *ep)
qp->qp_num, strerror(errno), errno));
}
#if HAVE_XRC
#if HAVE_XRC && ! OPAL_HAVE_XRCD
void mca_btl_openib_load_apm_xrc_rcv(uint32_t qp_num, mca_btl_openib_endpoint_t *ep)
{
struct ibv_qp_init_attr qp_init_attr;
@ -678,6 +691,7 @@ void mca_btl_openib_load_apm_xrc_rcv(uint32_t qp_num, mca_btl_openib_endpoint_t
}
ibv_modify_xrc_rcv_qp(btl->device->xrc_domain, qp_num, &attr, mask);
/* Maybe the qp already was modified by other process - ignoring error */
}
#endif

Просмотреть файл

@ -1,5 +1,8 @@
/*
* Copyright (c) 2007-2008 Mellanox Technologies. All rights reserved.
* Copyright (c) 2014 Bull SAS. All rights reserved.
* Copyright (c) 2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -16,7 +19,7 @@
int start_async_event_thread(void);
void mca_btl_openib_load_apm(struct ibv_qp *qp, mca_btl_openib_endpoint_t *ep);
int btl_openib_async_command_done(int exp);
#if HAVE_XRC
#if HAVE_XRC && ! OPAL_HAVE_XRCD
void mca_btl_openib_load_apm_xrc_rcv(uint32_t qp_num, mca_btl_openib_endpoint_t *ep);
#endif

Просмотреть файл

@ -19,8 +19,9 @@
* Copyright (c) 2011-2014 NVIDIA Corporation. All rights reserved.
* Copyright (c) 2012 Oak Ridge National Laboratory. All rights reserved
* Copyright (c) 2013-2014 Intel, Inc. All rights reserved
* Copyright (c) 2014 Research Organization for Information Science
* Copyright (c) 2014-2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2014 Bull SAS. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -979,6 +980,11 @@ static void device_destruct(mca_btl_openib_device_t *device)
}
#if HAVE_XRC
if (!mca_btl_openib_xrc_check_api()) {
return;
}
if (MCA_BTL_XRC_ENABLED) {
if (OPAL_SUCCESS != mca_btl_openib_close_xrc_domain(device)) {
BTL_VERBOSE(("XRC Internal error. Failed to close xrc domain"));

Просмотреть файл

@ -19,6 +19,9 @@
* Copyright (c) 2010-2011 Oracle and/or its affiliates. All rights reserved
* Copyright (c) 2013-2014 Intel, Inc. All rights reserved
* Copyright (c) 2013 NVIDIA Corporation. All rights reserved.
* Copyright (c) 2014 Bull SAS. All rights reserved.
* Copyright (c) 2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
*
* $COPYRIGHT$
*
@ -349,7 +352,11 @@ static void mca_btl_openib_endpoint_construct(mca_btl_base_endpoint_t* endpoint)
}
endpoint->ib_addr = NULL;
#if OPAL_HAVE_XRCD
endpoint->xrc_recv_qp = NULL;
#else
endpoint->xrc_recv_qp_num = 0;
#endif
endpoint->endpoint_btl = 0;
endpoint->endpoint_proc = 0;
endpoint->endpoint_local_cpc = NULL;
@ -460,12 +467,24 @@ static void mca_btl_openib_endpoint_destruct(mca_btl_base_endpoint_t* endpoint)
/* unregister xrc recv qp */
#if HAVE_XRC
#if OPAL_HAVE_XRCD
if (NULL != endpoint->xrc_recv_qp) {
if(ibv_destroy_qp(endpoint->xrc_recv_qp)) {
BTL_ERROR(("Failed to unregister XRC recv QP:%d\n", endpoint->xrc_recv_qp->qp_num));
} else {
BTL_VERBOSE(("Unregistered XRC Recv QP:%d\n", endpoint->xrc_recv_qp->qp_num));
}
}
#else
if (0 != endpoint->xrc_recv_qp_num) {
if(ibv_unreg_xrc_rcv_qp(endpoint->endpoint_btl->device->xrc_domain,
endpoint->xrc_recv_qp_num)) {
BTL_ERROR(("Failed to unregister XRC recv QP:%d\n", endpoint->xrc_recv_qp_num));
} else {
BTL_VERBOSE(("Unregistered XRC Recv QP:%d\n", endpoint->xrc_recv_qp_num));
}
}
#endif
#endif
OBJ_DESTRUCT(&endpoint->endpoint_lock);

Просмотреть файл

@ -15,6 +15,9 @@
* Copyright (c) 2006-2007 Voltaire All rights reserved.
* Copyright (c) 2007-2009 Mellanox Technologies. All rights reserved.
* Copyright (c) 2010-2012 Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014 Bull SAS. All rights reserved.
* Copyright (c) 2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -39,6 +42,8 @@
#define QP_TX_BATCH_COUNT 64
#define QP_TX_BATCH_COUNT 64
BEGIN_C_DECLS
struct mca_btl_openib_frag_t;
@ -206,7 +211,11 @@ struct mca_btl_base_endpoint_t {
opal_list_t pending_lazy_frags;
mca_btl_openib_endpoint_qp_t *qps;
#if OPAL_HAVE_XRCD
struct ibv_qp *xrc_recv_qp;
#else
uint32_t xrc_recv_qp_num; /* in xrc we will use it as recv qp */
#endif
uint32_t xrc_recv_psn;
/** list of pending rget ops */
@ -323,6 +332,7 @@ static inline void qp_reset_signal_count(mca_btl_openib_endpoint_t *ep, const in
}
int mca_btl_openib_endpoint_send(mca_btl_base_endpoint_t*,
mca_btl_openib_send_frag_t*);
int mca_btl_openib_endpoint_post_send(mca_btl_openib_endpoint_t*,
@ -596,8 +606,13 @@ static inline int post_send(mca_btl_openib_endpoint_t *ep,
}
#if HAVE_XRC
#if OPAL_HAVE_XRCD
if(BTL_OPENIB_QP_TYPE_XRC(qp))
sr_desc->qp_type.xrc.remote_srqn = ep->rem_info.rem_srqs[qp].rem_srq_num;
#else
if(BTL_OPENIB_QP_TYPE_XRC(qp))
sr_desc->xrc_remote_srq_num = ep->rem_info.rem_srqs[qp].rem_srq_num;
#endif
#endif
assert(sg->addr == (uint64_t)(uintptr_t)frag->hdr);

Просмотреть файл

@ -421,6 +421,12 @@ static int parse_line(parsed_section_values_t *sv)
sv->values.ignore_device_set = true;
}
else if (0 == strcasecmp(key_buffer, "ignore_device")) {
/* Single value */
sv->values.ignore_device = (bool) opal_btl_openib_ini_intify(value);
sv->values.ignore_device_set = true;
}
else {
/* Have no idea what this parameter is. Not an error -- just
ignore it */
@ -584,6 +590,11 @@ static int save_section(parsed_section_values_t *s)
h->values.ignore_device_set = true;
}
if (s->values.ignore_device_set) {
h->values.ignore_device = s->values.ignore_device;
h->values.ignore_device_set = true;
}
found = true;
break;
}

Просмотреть файл

@ -4,6 +4,9 @@
* Copyright (c) 2014 NVIDIA Corporation. All rights reserved.
* Copyright (c) 2014 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2014 Bull SAS. All rights reserved.
* Copyright (c) 2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -21,6 +24,7 @@
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#include <dlfcn.h>
#include "opal/mca/btl/base/base.h"
#include "btl_openib_xrc.h"
@ -37,12 +41,38 @@ OBJ_CLASS_INSTANCE(ib_address_t,
ib_address_constructor,
ib_address_destructor);
/* run-time check for which libibverbs XRC API we really have underneath */
bool mca_btl_openib_xrc_check_api()
{
void *lib = dlopen(NULL, RTLD_NOW); /* current program */
if (!lib) {
BTL_ERROR(("XRC error: could not find XRC API version"));
return false;
}
#if OPAL_HAVE_XRCD
if (NULL != dlsym(lib, "ibv_open_xrcd")) {
BTL_ERROR(("XRC error: bad XRC API (require XRC from OFED 3.12+)"));
return false;
}
#else
if (NULL != dlsym(lib, "ibv_create_xrc_rcv_qp")) {
BTL_ERROR(("XRC error: bad XRC API (require XRC from OFED pre 3.12)."));
return false;
}
#endif
return true;
}
/* This func. opens XRC domain */
int mca_btl_openib_open_xrc_domain(struct mca_btl_openib_device_t *device)
{
int len;
char *xrc_file_name;
const char *dev_name;
#if OPAL_HAVE_XRCD
struct ibv_xrcd_init_attr xrcd_attr;
#endif
dev_name = ibv_get_device_name(device->ib_dev);
len = asprintf(&xrc_file_name,
@ -61,9 +91,17 @@ int mca_btl_openib_open_xrc_domain(struct mca_btl_openib_device_t *device)
free(xrc_file_name);
return OPAL_ERROR;
}
#if OPAL_HAVE_XRCD
memset(&xrcd_attr, 0, sizeof xrcd_attr);
xrcd_attr.comp_mask = IBV_XRCD_INIT_ATTR_FD | IBV_XRCD_INIT_ATTR_OFLAGS;
xrcd_attr.fd = device->xrc_fd;
xrcd_attr.oflags = O_CREAT;
device->xrcd = ibv_open_xrcd(device->ib_dev_context, &xrcd_attr);
if (NULL == device->xrcd) {
#else
device->xrc_domain = ibv_open_xrc_domain(device->ib_dev_context, device->xrc_fd, O_CREAT);
if (NULL == device->xrc_domain) {
#endif
BTL_ERROR(("Failed to open XRC domain\n"));
close(device->xrc_fd);
free(xrc_file_name);
@ -76,11 +114,19 @@ int mca_btl_openib_open_xrc_domain(struct mca_btl_openib_device_t *device)
/* This func. closes XRC domain */
int mca_btl_openib_close_xrc_domain(struct mca_btl_openib_device_t *device)
{
#if OPAL_HAVE_XRCD
if (NULL == device->xrcd) {
#else
if (NULL == device->xrc_domain) {
#endif
/* No XRC domain, just exit */
return OPAL_SUCCESS;
}
#if OPAL_HAVE_XRCD
if (ibv_close_xrcd(device->xrcd)) {
#else
if (ibv_close_xrc_domain(device->xrc_domain)) {
#endif
BTL_ERROR(("Failed to close XRC domain, errno %d says %s\n",
device->xrc_fd, strerror(errno)));
return OPAL_ERROR;

Просмотреть файл

@ -2,6 +2,9 @@
* Copyright (c) 2007-2008 Mellanox Technologies. All rights reserved.
* Copyright (c) 2014 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2014 Bull SAS. All rights reserved.
* Copyright (c) 2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -48,4 +51,6 @@ int mca_btl_openib_close_xrc_domain(struct mca_btl_openib_device_t *device);
int mca_btl_openib_ib_address_add_new (uint16_t lid, uint64_t s_id,
opal_jobid_t ep_jobid, mca_btl_openib_endpoint_t *ep);
bool mca_btl_openib_xrc_check_api(void);
#endif

Просмотреть файл

@ -8,6 +8,9 @@
* Copyright (c) 2014 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2014 Intel, Inc. All rights reserved.
* Copyright (c) 2014 Bull SAS. All rights reserved.
* Copyright (c) 2014-2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
*
* $COPYRIGHT$
*
@ -329,7 +332,11 @@ static int udcm_xrc_start_connect (opal_btl_openib_connect_base_module_t *cpc,
static int udcm_xrc_restart_connect (mca_btl_base_endpoint_t *lcl_ep);
static int udcm_xrc_send_qp_connect (mca_btl_openib_endpoint_t *lcl_ep, udcm_msg_hdr_t *msg_hdr);
static int udcm_xrc_send_qp_create (mca_btl_base_endpoint_t *lcl_ep);
#if OPAL_HAVE_XRCD
static int udcm_xrc_recv_qp_connect (mca_btl_openib_endpoint_t *lcl_ep, uint32_t qp_num);
#else
static int udcm_xrc_recv_qp_connect (mca_btl_openib_endpoint_t *lcl_ep);
#endif
static int udcm_xrc_recv_qp_create (mca_btl_openib_endpoint_t *lcl_ep, udcm_msg_hdr_t *msg_hdr);
static int udcm_xrc_send_request (mca_btl_base_endpoint_t *lcl_ep, mca_btl_base_endpoint_t *rem_ep,
uint8_t msg_type);
@ -1963,7 +1970,10 @@ static int udcm_process_messages (struct ibv_cq *event_cq, udcm_module_t *m)
if (UDCM_MESSAGE_XCONNECT2 == message->hdr.type) {
/* save the qp number for unregister */
#if ! OPAL_HAVE_XRCD
lcl_ep->xrc_recv_qp_num = message->hdr.data.xreq.rem_qp_num;
#endif
}
}
#endif
@ -2403,7 +2413,11 @@ static int udcm_xrc_send_qp_create (mca_btl_base_endpoint_t *lcl_ep)
uint32_t send_wr;
struct ibv_qp **qp;
uint32_t *psn;
#if OPAL_HAVE_XRCD
struct ibv_qp_init_attr_ex qp_init_attr;
#else
struct ibv_qp_init_attr qp_init_attr;
#endif
struct ibv_qp_attr attr;
int ret;
size_t req_inline;
@ -2420,7 +2434,11 @@ static int udcm_xrc_send_qp_create (mca_btl_base_endpoint_t *lcl_ep)
send_wr = lcl_ep->ib_addr->qp->sd_wqe +
(mca_btl_openib_component.use_eager_rdma ?
mca_btl_openib_component.max_eager_rdma : 0);
#if OPAL_HAVE_XRCD
memset(&qp_init_attr, 0, sizeof(struct ibv_qp_init_attr_ex));
#else
memset(&qp_init_attr, 0, sizeof(struct ibv_qp_init_attr));
#endif
memset(&attr, 0, sizeof(struct ibv_qp_attr));
qp_init_attr.send_cq = qp_init_attr.recv_cq = openib_btl->device->ib_cq[prio];
@ -2433,9 +2451,16 @@ static int udcm_xrc_send_qp_create (mca_btl_base_endpoint_t *lcl_ep)
qp_init_attr.cap.max_send_sge = 1;
/* this one is ignored by driver */
qp_init_attr.cap.max_recv_sge = 1; /* we do not use SG list */
#if OPAL_HAVE_XRCD
qp_init_attr.qp_type = IBV_QPT_XRC_SEND;
qp_init_attr.comp_mask = IBV_QP_INIT_ATTR_PD;
qp_init_attr.pd = openib_btl->device->ib_pd;
*qp = ibv_create_qp_ex(openib_btl->device->ib_dev_context, &qp_init_attr);
#else
qp_init_attr.qp_type = IBV_QPT_XRC;
qp_init_attr.xrc_domain = openib_btl->device->xrc_domain;
*qp = ibv_create_qp(openib_btl->device->ib_pd, &qp_init_attr);
#endif
if (NULL == *qp) {
opal_show_help("help-mpi-btl-openib-cpc-base.txt",
"ibv_create_qp failed", true,
@ -2482,11 +2507,33 @@ static int udcm_xrc_send_qp_create (mca_btl_base_endpoint_t *lcl_ep)
/* mark: xrc receive qp */
/* Recv qp connect */
#if OPAL_HAVE_XRCD
static int udcm_xrc_recv_qp_connect (mca_btl_openib_endpoint_t *lcl_ep, uint32_t qp_num)
#else
static int udcm_xrc_recv_qp_connect (mca_btl_openib_endpoint_t *lcl_ep)
#endif
{
mca_btl_openib_module_t *openib_btl = lcl_ep->endpoint_btl;
int ret;
#if OPAL_HAVE_XRCD
struct ibv_qp_open_attr attr;
memset(&attr, 0, sizeof(struct ibv_qp_open_attr));
attr.comp_mask = IBV_QP_OPEN_ATTR_NUM | IBV_QP_OPEN_ATTR_XRCD | IBV_QP_OPEN_ATTR_TYPE;
attr.qp_num = qp_num;
attr.qp_type = IBV_QPT_XRC_RECV;
attr.xrcd = openib_btl->device->xrcd;
BTL_VERBOSE(("Connecting Recv QP\n"));
lcl_ep->xrc_recv_qp = ibv_open_qp(openib_btl->device->ib_dev_context, &attr);
if (NULL == lcl_ep->xrc_recv_qp) { /* failed to regester the qp, so it is already die and we should create new one */
/* Return NOT READY !!!*/
BTL_ERROR(("Failed to register qp_num: %d , get error: %s (%d)\n. Replying with RNR",
lcl_ep->xrc_recv_qp->qp_num, strerror(errno), errno));
return OPAL_ERROR;
} else {
BTL_VERBOSE(("Connected to XRC Recv qp [%d]", lcl_ep->xrc_recv_qp->qp_num));
return OPAL_SUCCESS;
}
#else
BTL_VERBOSE(("Connecting receive qp: %d", lcl_ep->xrc_recv_qp_num));
ret = ibv_reg_xrc_rcv_qp(openib_btl->device->xrc_domain, lcl_ep->xrc_recv_qp_num);
if (ret) { /* failed to regester the qp, so it is already die and we should create new one */
@ -2496,6 +2543,7 @@ static int udcm_xrc_recv_qp_connect (mca_btl_openib_endpoint_t *lcl_ep)
lcl_ep->xrc_recv_qp_num, strerror(ret), ret));
return OPAL_ERROR;
}
#endif
return OPAL_SUCCESS;
}
@ -2504,27 +2552,58 @@ static int udcm_xrc_recv_qp_connect (mca_btl_openib_endpoint_t *lcl_ep)
static int udcm_xrc_recv_qp_create (mca_btl_openib_endpoint_t *lcl_ep, udcm_msg_hdr_t *msg_hdr)
{
mca_btl_openib_module_t* openib_btl = lcl_ep->endpoint_btl;
#if OPAL_HAVE_XRCD
struct ibv_qp_init_attr_ex qp_init_attr;
#else
struct ibv_qp_init_attr qp_init_attr;
#endif
struct ibv_qp_attr attr;
int ret;
BTL_VERBOSE(("creating xrc receive qp"));
#if OPAL_HAVE_XRCD
memset(&qp_init_attr, 0, sizeof(struct ibv_qp_init_attr_ex));
qp_init_attr.qp_type = IBV_QPT_XRC_RECV;
qp_init_attr.comp_mask = IBV_QP_INIT_ATTR_XRCD;
qp_init_attr.xrcd = openib_btl->device->xrcd;
lcl_ep->xrc_recv_qp = ibv_create_qp_ex(openib_btl->device->ib_dev_context,
&qp_init_attr);
if (NULL == lcl_ep->xrc_recv_qp) {
BTL_ERROR(("Error creating XRC recv QP, errno says: %s [%d]",
strerror(errno), errno));
return OPAL_ERROR;
}
#else
memset(&qp_init_attr, 0, sizeof(struct ibv_qp_init_attr));
/* Only xrc_domain is required, all other are ignored */
qp_init_attr.xrc_domain = openib_btl->device->xrc_domain;
ret = ibv_create_xrc_rcv_qp(&qp_init_attr, &lcl_ep->xrc_recv_qp_num);
ret = ibv_create_xrc_rcv_qp(&qp_init_attr, &lcl_ep->xrc_recv_qp->qp_num);
if (ret) {
BTL_ERROR(("Error creating XRC recv QP[%x], errno says: %s [%d]",
lcl_ep->xrc_recv_qp_num, strerror(ret), ret));
lcl_ep->xrc_recv_qp->qp_num, strerror(ret), ret));
return OPAL_ERROR;
}
#endif
memset(&attr, 0, sizeof(struct ibv_qp_attr));
attr.qp_state = IBV_QPS_INIT;
attr.pkey_index = openib_btl->pkey_index;
attr.port_num = openib_btl->port_num;
attr.qp_access_flags = IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_REMOTE_READ;
#if OPAL_HAVE_XRCD
ret = ibv_modify_qp(lcl_ep->xrc_recv_qp,
&attr,
IBV_QP_STATE|
IBV_QP_PKEY_INDEX|
IBV_QP_PORT|
IBV_QP_ACCESS_FLAGS);
if (ret) {
BTL_ERROR(("Error modifying XRC recv QP to IBV_QPS_INIT, errno says: %s [%d]",
strerror(ret), ret));
return OPAL_ERROR;
}
#else
ret = ibv_modify_xrc_rcv_qp(openib_btl->device->xrc_domain,
lcl_ep->xrc_recv_qp_num, &attr,
IBV_QP_STATE | IBV_QP_PKEY_INDEX |
@ -2535,6 +2614,7 @@ static int udcm_xrc_recv_qp_create (mca_btl_openib_endpoint_t *lcl_ep, udcm_msg_
while(1);
return OPAL_ERROR;
}
#endif
memset(&attr, 0, sizeof(struct ibv_qp_attr));
attr.qp_state = IBV_QPS_RTR;
@ -2555,7 +2635,11 @@ static int udcm_xrc_recv_qp_create (mca_btl_openib_endpoint_t *lcl_ep, udcm_msg_
/* if user enabled dynamic SL, get it from PathRecord */
if (0 != mca_btl_openib_component.ib_path_record_service_level) {
int rc = btl_openib_connect_get_pathrecord_sl(
#if OPAL_HAVE_XRCD
openib_btl->device->xrcd->context,
#else
openib_btl->device->xrc_domain->context,
#endif
attr.ah_attr.port_num,
openib_btl->lid,
attr.ah_attr.dlid);
@ -2566,6 +2650,22 @@ static int udcm_xrc_recv_qp_create (mca_btl_openib_endpoint_t *lcl_ep, udcm_msg_
}
#endif
#if OPAL_HAVE_XRCD
ret = ibv_modify_qp(lcl_ep->xrc_recv_qp,
&attr,
IBV_QP_STATE|
IBV_QP_AV|
IBV_QP_PATH_MTU|
IBV_QP_DEST_QPN|
IBV_QP_RQ_PSN|
IBV_QP_MAX_DEST_RD_ATOMIC|
IBV_QP_MIN_RNR_TIMER);
if (ret) {
BTL_ERROR(("Error modifying XRC recv QP to IBV_QPS_RTR, errno says: %s [%d]",
strerror(ret), ret));
return OPAL_ERROR;
}
#else
ret = ibv_modify_xrc_rcv_qp(openib_btl->device->xrc_domain,
lcl_ep->xrc_recv_qp_num,
&attr,
@ -2581,9 +2681,14 @@ static int udcm_xrc_recv_qp_create (mca_btl_openib_endpoint_t *lcl_ep, udcm_msg_
lcl_ep->xrc_recv_qp_num, strerror(ret), ret));
return OPAL_ERROR;
}
#endif
#if OPAL_HAVE_THREADS
if (APM_ENABLED) {
#if OPAL_HAVE_XRCD
mca_btl_openib_load_apm(lcl_ep->xrc_recv_qp, lcl_ep);
#else
mca_btl_openib_load_apm_xrc_rcv(lcl_ep->xrc_recv_qp_num, lcl_ep);
#endif
}
#endif
@ -2648,14 +2753,29 @@ static int udcm_xrc_send_xresponse (mca_btl_base_endpoint_t *lcl_ep, mca_btl_bas
msg->data->hdr.data.xres.rem_ep_index = htonl(lcl_ep->index);
if (UDCM_MESSAGE_XRESPONSE == msg_type) {
#if OPAL_HAVE_XRCD
BTL_VERBOSE(("Sending qp: %d, psn: %d", lcl_ep->xrc_recv_qp->qp_num, lcl_ep->xrc_recv_psn));
msg->data->hdr.data.xres.rem_qp_num = htonl(lcl_ep->xrc_recv_qp->qp_num);
msg->data->hdr.data.xres.rem_psn = htonl(lcl_ep->xrc_recv_psn);
#else
BTL_VERBOSE(("Sending qp: %d, psn: %d", lcl_ep->xrc_recv_qp_num, lcl_ep->xrc_recv_psn));
msg->data->hdr.data.xres.rem_qp_num = htonl(lcl_ep->xrc_recv_qp_num);
msg->data->hdr.data.xres.rem_psn = htonl(lcl_ep->xrc_recv_psn);
#endif
}
for (int i = 0; i < mca_btl_openib_component.num_xrc_qps; ++i) {
#if OPAL_HAVE_XRCD
uint32_t srq_num;
if (ibv_get_srq_num(lcl_ep->endpoint_btl->qps[i].u.srq_qp.srq, &srq_num)) {
BTL_ERROR(("BTL openib XOOB internal error: can't get srq num"));
}
BTL_VERBOSE(("Sending srq[%d] num = %d", i, srq_num));
msg->data->qps[i].qp_num = htonl(srq_num);
#else
BTL_VERBOSE(("Sending srq[%d] num = %d", i, lcl_ep->endpoint_btl->qps[i].u.srq_qp.srq->xrc_srq_num));
msg->data->qps[i].qp_num = htonl(lcl_ep->endpoint_btl->qps[i].u.srq_qp.srq->xrc_srq_num);
#endif
}
rc = udcm_post_send (lcl_ep, msg->data, m->msg_length, 0);
@ -2695,7 +2815,11 @@ static int udcm_xrc_handle_xconnect (mca_btl_openib_endpoint_t *lcl_ep, udcm_msg
if (UDCM_MESSAGE_XCONNECT2 == msg_hdr->type) {
response_type = UDCM_MESSAGE_XRESPONSE2;
#if OPAL_HAVE_XRCD
rc = udcm_xrc_recv_qp_connect (lcl_ep, msg_hdr->data.xreq.rem_qp_num);
#else
rc = udcm_xrc_recv_qp_connect (lcl_ep);
#endif
if (OPAL_SUCCESS != rc) {
/* return not ready. remote side will retry */
rej_reason = UDCM_REJ_NOT_READY;