1
1

btl/openib: add atomic operation support

Этот коммит содержится в:
Nathan Hjelm 2014-11-13 17:26:35 -07:00 коммит произвёл Nathan Hjelm
родитель 22625b005b
Коммит bf7daac388
11 изменённых файлов: 397 добавлений и 30 удалений

Просмотреть файл

@ -152,7 +152,7 @@ AC_DEFUN([OPAL_CHECK_OPENFABRICS],[
# If we have the openib stuff available, find out what we've got
AS_IF([test "$ompi_check_openib_happy" = "yes"],
[AC_CHECK_DECLS([IBV_EVENT_CLIENT_REREGISTER, IBV_ACCESS_SO, IBV_TRANSPORT_USNIC, IBV_TRANSPORT_USNIC_UDP, IBV_NODE_USNIC], [], [],
[AC_CHECK_DECLS([IBV_EVENT_CLIENT_REREGISTER, IBV_ACCESS_SO, IBV_TRANSPORT_USNIC, IBV_TRANSPORT_USNIC_UDP, IBV_NODE_USNIC, IBV_ATOMIC_HCA], [], [],
[#include <infiniband/verbs.h>])
AC_CHECK_FUNCS([ibv_get_device_list ibv_resize_cq])

Просмотреть файл

@ -282,6 +282,8 @@ enum {
MCA_BTL_ATOMIC_SUPPORTS_XOR = 0x00000800,
/** The btl supports atomic compare-and-swap */
MCA_BTL_ATOMIC_SUPPORTS_CSWAP = 0x10000000,
/** The btl guarantees global atomicity (can mix btl atomics with cpu atomics) */
MCA_BTL_ATOMIC_SUPPORTS_GLOB = 0x20000000,
};
enum mca_btl_base_atomic_op_t {

Просмотреть файл

@ -61,6 +61,7 @@ sources = \
btl_openib_ip.c \
btl_openib_put.c \
btl_openib_get.c \
btl_openib_atomic.c \
connect/base.h \
connect/btl_openib_connect_base.c \
connect/btl_openib_connect_empty.c \

Просмотреть файл

@ -115,6 +115,10 @@ mca_btl_openib_module_t mca_btl_openib_module = {
.btl_ft_event = mca_btl_openib_ft_event,
.btl_register_mem = mca_btl_openib_register_mem,
.btl_deregister_mem = mca_btl_openib_deregister_mem,
#if HAVE_DECL_IBV_ATOMIC_HCA
.btl_atomic_fop = mca_btl_openib_atomic_fop,
.btl_atomic_cswap = mca_btl_openib_atomic_cswap,
#endif
}
};
@ -954,6 +958,12 @@ int mca_btl_openib_add_procs(
return rc;
}
rc = mca_btl_openib_size_queues(openib_btl, nprocs);
if (OPAL_SUCCESS != rc) {
BTL_ERROR(("error creating cqs"));
return rc;
}
for (i = 0, local_procs = 0 ; i < (int) nprocs; i++) {
struct opal_proc_t* proc = procs[i];
mca_btl_openib_proc_t* ib_proc;
@ -965,11 +975,6 @@ int mca_btl_openib_add_procs(
local_procs ++;
}
/* OOB, XOOB, and RDMACM do not support SELF comunication, so
* mark the prco as unreachable by openib btl */
if (0 == opal_compare_proc(OPAL_PROC_MY_NAME, proc->proc_name)) {
continue;
}
#if defined(HAVE_STRUCT_IBV_DEVICE_TRANSPORT_TYPE)
/* Most current iWARP adapters (June 2008) cannot handle
talking to other processes on the same host (!) -- so mark
@ -1139,7 +1144,7 @@ int mca_btl_openib_add_procs(
return OPAL_ERROR;
}
return mca_btl_openib_size_queues(openib_btl, nprocs);
return OPAL_SUCCESS;
}
/*

Просмотреть файл

@ -700,6 +700,98 @@ int mca_btl_openib_get (mca_btl_base_module_t *btl, struct mca_btl_base_endpoint
mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags,
int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata);
/**
* Initiate an asynchronous fetching atomic operation.
* Completion Semantics: if this function returns a 1 then the operation
* is complete. a return of OPAL_SUCCESS indicates
* the atomic operation has been queued with the
* network.
*
* @param btl (IN) BTL module
* @param endpoint (IN) BTL addressing information
* @param local_address (OUT) Local address to store the result in
* @param remote_address (IN) Remote address perfom operation on to (registered remotely)
* @param local_handle (IN) Local registration handle for region containing
* (local_address, local_address + 8)
* @param remote_handle (IN) Remote registration handle for region containing
* (remote_address, remote_address + 8)
* @param op (IN) Operation to perform
* @param operand (IN) Operand for the operation
* @param flags (IN) Flags for this put operation
* @param order (IN) Ordering
* @param cbfunc (IN) Function to call on completion (if queued)
* @param cbcontext (IN) Context for the callback
* @param cbdata (IN) Data for callback
*
* @retval OPAL_SUCCESS The operation was successfully queued
* @retval 1 The operation is complete
* @retval OPAL_ERROR The operation was NOT successfully queued
* @retval OPAL_ERR_OUT_OF_RESOURCE Insufficient resources to queue the atomic
* operation. Try again later
* @retval OPAL_ERR_NOT_AVAILABLE Atomic operation can not be performed due to
* alignment restrictions or the operation {op} is not supported
* by the hardware.
*
* After the operation is complete the remote address specified by {remote_address} and
* {remote_handle} will be updated with (*remote_address) = (*remote_address) op operand.
* {local_address} will be updated with the previous value stored in {remote_address}.
* The btl will guarantee consistency of atomic operations performed via the btl. Note,
* however, that not all btls will provide consistency between btl atomic operations and
* cpu atomics.
*/
int mca_btl_openib_atomic_fop (struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint,
void *local_address, uint64_t remote_address,
struct mca_btl_base_registration_handle_t *local_handle,
struct mca_btl_base_registration_handle_t *remote_handle, mca_btl_base_atomic_op_t op,
int64_t operand, int flags, int order, mca_btl_base_rdma_completion_fn_t cbfunc,
void *cbcontext, void *cbdata);
/**
* Initiate an asynchronous compare and swap operation.
* Completion Semantics: if this function returns a 1 then the operation
* is complete. a return of OPAL_SUCCESS indicates
* the atomic operation has been queued with the
* network.
*
* @param btl (IN) BTL module
* @param endpoint (IN) BTL addressing information
* @param local_address (OUT) Local address to store the result in
* @param remote_address (IN) Remote address perfom operation on to (registered remotely)
* @param local_handle (IN) Local registration handle for region containing
* (local_address, local_address + 8)
* @param remote_handle (IN) Remote registration handle for region containing
* (remote_address, remote_address + 8)
* @param compare (IN) Operand for the operation
* @param value (IN) Value to store on success
* @param flags (IN) Flags for this put operation
* @param order (IN) Ordering
* @param cbfunc (IN) Function to call on completion (if queued)
* @param cbcontext (IN) Context for the callback
* @param cbdata (IN) Data for callback
*
* @retval OPAL_SUCCESS The operation was successfully queued
* @retval 1 The operation is complete
* @retval OPAL_ERROR The operation was NOT successfully queued
* @retval OPAL_ERR_OUT_OF_RESOURCE Insufficient resources to queue the atomic
* operation. Try again later
* @retval OPAL_ERR_NOT_AVAILABLE Atomic operation can not be performed due to
* alignment restrictions or the operation {op} is not supported
* by the hardware.
*
* After the operation is complete the remote address specified by {remote_address} and
* {remote_handle} will be updated with {value} if *remote_address == compare.
* {local_address} will be updated with the previous value stored in {remote_address}.
* The btl will guarantee consistency of atomic operations performed via the btl. Note,
* however, that not all btls will provide consistency between btl atomic operations and
* cpu atomics.
*/
int mca_btl_openib_atomic_cswap (struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint,
void *local_address, uint64_t remote_address,
struct mca_btl_base_registration_handle_t *local_handle,
struct mca_btl_base_registration_handle_t *remote_handle, int64_t compare,
int64_t value, int flags, int order, mca_btl_base_rdma_completion_fn_t cbfunc,
void *cbcontext, void *cbdata);
/**
* Allocate a descriptor.
*

134
opal/mca/btl/openib/btl_openib_atomic.c Обычный файл
Просмотреть файл

@ -0,0 +1,134 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2014 Los Alamos National Security, LLC. All rights
* reserved.
*
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "btl_openib.h"
#include "btl_openib_endpoint.h"
#if HAVE_DECL_IBV_ATOMIC_HCA
static int mca_btl_openib_atomic_internal (struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint,
void *local_address, uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle,
mca_btl_base_registration_handle_t *remote_handle, enum ibv_wr_opcode opcode,
int64_t operand, int operand2, int flags, int order, mca_btl_base_rdma_completion_fn_t cbfunc,
void *cbcontext, void *cbdata)
{
mca_btl_openib_get_frag_t* frag = NULL;
int qp = order;
int rc;
frag = to_get_frag(alloc_recv_user_frag());
if (OPAL_UNLIKELY(NULL == frag)) {
return OPAL_ERR_OUT_OF_RESOURCE;
}
if (MCA_BTL_NO_ORDER == qp) {
qp = mca_btl_openib_component.rdma_qp;
}
/* set base descriptor flags */
to_base_frag(frag)->base.order = qp;
/* free this descriptor when the operation is complete */
to_base_frag(frag)->base.des_flags = MCA_BTL_DES_FLAGS_BTL_OWNERSHIP;
/* set up scatter-gather entry */
to_com_frag(frag)->sg_entry.length = 8;
to_com_frag(frag)->sg_entry.lkey = local_handle->lkey;
to_com_frag(frag)->sg_entry.addr = (uint64_t)(uintptr_t) local_address;
to_com_frag(frag)->endpoint = endpoint;
/* set up rdma callback */
frag->cb.func = cbfunc;
frag->cb.context = cbcontext;
frag->cb.data = cbdata;
frag->cb.local_handle = local_handle;
/* set up descriptor */
frag->sr_desc.wr.atomic.remote_addr = remote_address;
frag->sr_desc.opcode = opcode;
frag->sr_desc.wr.atomic.compare_add = operand;
frag->sr_desc.wr.atomic.swap = operand2;
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
if((endpoint->endpoint_proc->proc_opal->proc_arch & OPAL_ARCH_ISBIGENDIAN)
!= (opal_proc_local_get()->proc_arch & OPAL_ARCH_ISBIGENDIAN)) {
frag->sr_desc.wr.atomic.rkey = opal_swap_bytes4 (remote_handle->rkey);
} else
#endif
{
frag->sr_desc.wr.atomic.rkey = remote_handle->rkey;
}
#if HAVE_XRC
if (MCA_BTL_XRC_ENABLED && BTL_OPENIB_QP_TYPE_XRC(qp)) {
frag->sr_desc.xrc_remote_srq_num=endpoint->rem_info.rem_srqs[qp].rem_srq_num;
}
#endif
if (endpoint->endpoint_state != MCA_BTL_IB_CONNECTED) {
OPAL_THREAD_LOCK(&endpoint->endpoint_lock);
rc = check_endpoint_state(endpoint, &to_base_frag(frag)->base, &endpoint->pending_get_frags);
OPAL_THREAD_UNLOCK(&endpoint->endpoint_lock);
if (OPAL_ERR_RESOURCE_BUSY == rc) {
return OPAL_SUCCESS;
}
if (OPAL_SUCCESS != rc) {
MCA_BTL_IB_FRAG_RETURN (frag);
return rc;
}
}
rc = mca_btl_openib_get_internal (btl, endpoint, frag);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
if (OPAL_LIKELY(OPAL_ERR_OUT_OF_RESOURCE == rc)) {
rc = OPAL_SUCCESS;
OPAL_THREAD_SCOPED_LOCK(&endpoint->endpoint_lock,
opal_list_append(&endpoint->pending_get_frags, (opal_list_item_t*)frag));
} else {
MCA_BTL_IB_FRAG_RETURN (frag);
}
}
return rc;
}
int mca_btl_openib_atomic_fop (struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint,
void *local_address, uint64_t remote_address,
struct mca_btl_base_registration_handle_t *local_handle,
struct mca_btl_base_registration_handle_t *remote_handle, mca_btl_base_atomic_op_t op,
int64_t operand, int flags, int order, mca_btl_base_rdma_completion_fn_t cbfunc,
void *cbcontext, void *cbdata)
{
if (OPAL_UNLIKELY(MCA_BTL_ATOMIC_ADD != op)) {
return OPAL_ERR_NOT_SUPPORTED;
}
return mca_btl_openib_atomic_internal (btl, endpoint, local_address, remote_address, local_handle,
remote_handle, IBV_WR_ATOMIC_FETCH_AND_ADD, operand, 0,
flags, order, cbfunc, cbcontext, cbdata);
}
int mca_btl_openib_atomic_cswap (struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint,
void *local_address, uint64_t remote_address,
struct mca_btl_base_registration_handle_t *local_handle,
struct mca_btl_base_registration_handle_t *remote_handle, int64_t compare,
int64_t value, int flags, int order, mca_btl_base_rdma_completion_fn_t cbfunc,
void *cbcontext, void *cbdata)
{
return mca_btl_openib_atomic_internal (btl, endpoint, local_address, remote_address, local_handle,
remote_handle, IBV_WR_ATOMIC_CMP_AND_SWP, compare, value,
flags, order, cbfunc, cbcontext, cbdata);
}
#endif

Просмотреть файл

@ -583,6 +583,10 @@ static int openib_reg_mr(void *reg_data, void *base, size_t size,
enum ibv_access_flags access_flag = (enum ibv_access_flags) (IBV_ACCESS_LOCAL_WRITE |
IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_REMOTE_READ);
#if HAVE_DECL_IBV_ATOMIC_HCA
access_flag |= IBV_ACCESS_REMOTE_ATOMIC;
#endif
if (device->mem_reg_max &&
device->mem_reg_max < (device->mem_reg_active + size)) {
return OPAL_ERR_OUT_OF_RESOURCE;
@ -817,6 +821,17 @@ static int init_one_port(opal_list_t *btl_list, mca_btl_openib_device_t *device,
openib_btl->super.btl_put_limit = openib_btl->ib_port_attr.max_msg_sz;
}
#if HAVE_DECL_IBV_ATOMIC_HCA
if (openib_btl->device->ib_dev_attr.atomic_cap == IBV_ATOMIC_NONE) {
openib_btl->super.btl_flags &= ~MCA_BTL_FLAGS_ATOMIC_FOPS;
openib_btl->super.btl_atomic_flags = 0;
openib_btl->super.btl_atomic_fop = NULL;
openib_btl->super.btl_atomic_cswap = NULL;
} else if (IBV_ATOMIC_GLOB == openib_btl->device->ib_dev_attr.atomic_cap) {
openib_btl->super.btl_flags |= MCA_BTL_ATOMIC_SUPPORTS_GLOB;
}
#endif
openib_btl->super.btl_put_alignment = 0;
openib_btl->super.btl_registration_handle_size = sizeof (mca_btl_base_registration_handle_t);
@ -3286,18 +3301,19 @@ static void handle_wc(mca_btl_openib_device_t* device, const uint32_t cq,
/* Handle work completions */
switch(wc->opcode) {
case IBV_WC_RDMA_READ:
case IBV_WC_RDMA_WRITE:
case IBV_WC_COMP_SWAP:
case IBV_WC_FETCH_ADD:
OPAL_OUTPUT((-1, "Got WC: RDMA_READ or RDMA_WRITE"));
if (IBV_WC_RDMA_READ == wc->opcode) {
OPAL_THREAD_ADD32(&endpoint->get_tokens, 1);
OPAL_THREAD_ADD32(&endpoint->get_tokens, 1);
mca_btl_openib_get_frag_t *get_frag = to_get_frag(des);
mca_btl_openib_get_frag_t *get_frag = to_get_frag(des);
get_frag->cb.func (&openib_btl->super, endpoint, (void *)(intptr_t) frag->sg_entry.addr,
get_frag->cb.local_handle, get_frag->cb.context, get_frag->cb.data,
OPAL_SUCCESS);
} else if (MCA_BTL_OPENIB_FRAG_SEND_USER == openib_frag_type(des)) {
get_frag->cb.func (&openib_btl->super, endpoint, (void *)(intptr_t) frag->sg_entry.addr,
get_frag->cb.local_handle, get_frag->cb.context, get_frag->cb.data,
OPAL_SUCCESS);
case IBV_WC_RDMA_WRITE:
if (MCA_BTL_OPENIB_FRAG_SEND_USER == openib_frag_type(des)) {
mca_btl_openib_put_frag_t *put_frag = to_put_frag(des);
put_frag->cb.func (&openib_btl->super, endpoint, (void *)(intptr_t) frag->sg_entry.addr,
@ -3334,7 +3350,7 @@ static void handle_wc(mca_btl_openib_device_t* device, const uint32_t cq,
/* Process a completed send/put/get */
btl_ownership = (des->des_flags & MCA_BTL_DES_FLAGS_BTL_OWNERSHIP);
if (des->des_flags & MCA_BTL_DES_SEND_ALWAYS_CALLBACK) {
des->des_cbfunc(&openib_btl->super, endpoint, des,OPAL_SUCCESS);
des->des_cbfunc(&openib_btl->super, endpoint, des, OPAL_SUCCESS);
}
if( btl_ownership ) {
mca_btl_openib_free(&openib_btl->super, des);

Просмотреть файл

@ -77,6 +77,8 @@ int mca_btl_openib_get (mca_btl_base_module_t *btl, struct mca_btl_base_endpoint
/* set up descriptor */
frag->sr_desc.wr.rdma.remote_addr = remote_address;
/* the opcode may have been changed by an atomic operation */
frag->sr_desc.opcode = IBV_WR_RDMA_READ;
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
if((ep->endpoint_proc->proc_opal->proc_arch & OPAL_ARCH_ISBIGENDIAN)

Просмотреть файл

@ -1,3 +1,4 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
@ -11,7 +12,7 @@
* All rights reserved.
* Copyright (c) 2006-2013 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2006-2009 Mellanox Technologies. All rights reserved.
* Copyright (c) 2006-2007 Los Alamos National Security, LLC. All rights
* Copyright (c) 2006-2014 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2006-2007 Voltaire All rights reserved.
* Copyright (c) 2009-2010 Oracle and/or its affiliates. All rights reserved.
@ -567,10 +568,16 @@ int btl_openib_register_mca_params(void)
mca_btl_openib_module.super.btl_rdma_pipeline_frag_size = 1024 * 1024;
mca_btl_openib_module.super.btl_min_rdma_pipeline_size = 256 * 1024;
mca_btl_openib_module.super.btl_flags = MCA_BTL_FLAGS_RDMA |
MCA_BTL_FLAGS_NEED_ACK | MCA_BTL_FLAGS_NEED_CSUM | MCA_BTL_FLAGS_HETEROGENEOUS_RDMA;
MCA_BTL_FLAGS_NEED_ACK | MCA_BTL_FLAGS_NEED_CSUM | MCA_BTL_FLAGS_HETEROGENEOUS_RDMA;
#if BTL_OPENIB_FAILOVER_ENABLED
mca_btl_openib_module.super.btl_flags |= MCA_BTL_FLAGS_FAILOVER_SUPPORT;
#endif
#if HAVE_DECL_IBV_ATOMIC_HCA
mca_btl_openib_module.super.btl_flags |= MCA_BTL_FLAGS_ATOMIC_FOPS;
mca_btl_openib_module.super.btl_atomic_flags = MCA_BTL_ATOMIC_SUPPORTS_ADD | MCA_BTL_ATOMIC_SUPPORTS_CSWAP;
#endif
/* Default to bandwidth auto-detection */
mca_btl_openib_module.super.btl_bandwidth = 0;
mca_btl_openib_module.super.btl_latency = 4;

Просмотреть файл

@ -126,7 +126,6 @@ AC_DEFUN([MCA_opal_btl_openib_CONFIG],[
[enable openib BTL failover])
AM_CONDITIONAL([MCA_btl_openib_enable_failover], [test "x$btl_openib_failover_enabled" = "x1"])
# Check for __malloc_hook availability
AC_ARG_ENABLE(btl-openib-malloc-alignment,
AC_HELP_STRING([--enable-btl-openib-malloc-alignment], [Enable support for allocated memory alignment. Default: enabled if supported, disabled otherwise.]))

Просмотреть файл

@ -321,16 +321,17 @@ static int udcm_send_request (mca_btl_base_endpoint_t *lcl_ep,
static void udcm_send_timeout (evutil_socket_t fd, short event, void *arg);
static int udcm_finish_connection (mca_btl_openib_endpoint_t *lcl_ep);
static int udcm_rc_qps_to_rts(mca_btl_openib_endpoint_t *lcl_ep);
/* XRC support */
#if HAVE_XRC
static int udcm_xrc_start_connect (opal_btl_openib_connect_base_module_t *cpc,
mca_btl_base_endpoint_t *lcl_ep);
static int udcm_xrc_restart_connect (mca_btl_base_endpoint_t *lcl_ep);
static int udcm_xrc_send_qp_connect (mca_btl_openib_endpoint_t *lcl_ep, udcm_msg_hdr_t *msg_hdr);
static int udcm_xrc_send_qp_connect (mca_btl_openib_endpoint_t *lcl_ep, uint32_t rem_qp_num, uint32_t rem_psn);
static int udcm_xrc_send_qp_create (mca_btl_base_endpoint_t *lcl_ep);
static int udcm_xrc_recv_qp_connect (mca_btl_openib_endpoint_t *lcl_ep);
static int udcm_xrc_recv_qp_create (mca_btl_openib_endpoint_t *lcl_ep, udcm_msg_hdr_t *msg_hdr);
static int udcm_xrc_recv_qp_create (mca_btl_openib_endpoint_t *lcl_ep, uint32_t rem_qp_num, uint32_t rem_psn);
static int udcm_xrc_send_request (mca_btl_base_endpoint_t *lcl_ep, mca_btl_base_endpoint_t *rem_ep,
uint8_t msg_type);
static int udcm_xrc_send_xresponse (mca_btl_base_endpoint_t *lcl_ep, mca_btl_base_endpoint_t *rem_ep,
@ -507,6 +508,93 @@ static int udcm_component_finalize(void)
/* mark: udcm module */
#if HAVE_XRC
static int udcm_endpoint_init_self_xrc (struct mca_btl_base_endpoint_t *lcl_ep)
{
udcm_endpoint_t *udep = UDCM_ENDPOINT_DATA(lcl_ep);
int rc;
opal_mutex_lock (&udep->udep_lock);
do {
rc = udcm_xrc_recv_qp_connect (lcl_ep);
if (OMPI_SUCCESS != rc) {
BTL_VERBOSE(("error connecting loopback XRC receive queue pair"));
break;
}
lcl_ep->xrc_recv_qp_num = lcl_ep->qps[0].qp->lcl_qp->qp_num;
rc = mca_btl_openib_endpoint_post_recvs (lcl_ep);
if (OMPI_SUCCESS != rc) {
BTL_VERBOSE(("error posting receives for loopback queue pair"));
break;
}
rc = udcm_xrc_recv_qp_create (lcl_ep, lcl_ep->qps[0].qp->lcl_qp->qp_num,
lcl_ep->qps[0].qp->lcl_psn);
if (OMPI_SUCCESS != rc) {
BTL_VERBOSE(("error creating loopback XRC receive queue pair"));
break;
}
rc = udcm_xrc_send_qp_connect (lcl_ep, lcl_ep->qps[0].qp->lcl_qp->qp_num,
lcl_ep->qps[0].qp->lcl_psn);
if (OMPI_SUCCESS != rc) {
BTL_VERBOSE(("error creating loopback XRC send queue pair"));
break;
}
lcl_ep->endpoint_state = MCA_BTL_IB_CONNECTED;
rc = udcm_finish_connection (lcl_ep);
} while (0);
opal_mutex_unlock (&udep->udep_lock);
return rc;
}
#endif
static int udcm_endpoint_init_self (struct mca_btl_base_endpoint_t *lcl_ep)
{
udcm_endpoint_t *udep = UDCM_ENDPOINT_DATA(lcl_ep);
int rc;
opal_mutex_lock (&udep->udep_lock);
do {
if (OPAL_SUCCESS != (rc = udcm_endpoint_init_data (lcl_ep))) {
BTL_VERBOSE(("error initializing loopback endpoint cpc data"));
break;
}
if (OPAL_SUCCESS != (rc = udcm_rc_qp_create_all (lcl_ep))) {
BTL_VERBOSE(("error initializing loopback endpoint qps"));
break;
}
/* save queue pair info */
lcl_ep->rem_info.rem_index = lcl_ep->index;
for (int i = 0 ; i < mca_btl_openib_component.num_qps ; ++i) {
lcl_ep->rem_info.rem_qps[i].rem_psn = lcl_ep->qps[i].qp->lcl_psn;
lcl_ep->rem_info.rem_qps[i].rem_qp_num = lcl_ep->qps[i].qp->lcl_qp->qp_num;
}
if (OPAL_SUCCESS != (rc = udcm_rc_qps_to_rts (lcl_ep))) {
BTL_VERBOSE(("error moving loopback endpoint qps to RTS"));
break;
}
lcl_ep->endpoint_state = MCA_BTL_IB_CONNECTED;
rc = udcm_finish_connection (lcl_ep);
return OPAL_SUCCESS;
} while (0);
opal_mutex_unlock (&udep->udep_lock);
return rc;
}
static int udcm_endpoint_init (struct mca_btl_base_endpoint_t *lcl_ep)
{
udcm_endpoint_t *udep = lcl_ep->endpoint_local_cpc_data =
@ -518,6 +606,16 @@ static int udcm_endpoint_init (struct mca_btl_base_endpoint_t *lcl_ep)
OBJ_CONSTRUCT(&udep->udep_lock, opal_mutex_t);
if (lcl_ep->endpoint_proc->proc_opal == opal_proc_local_get ()) {
/* go ahead and try to create a loopback queue pair */
#if HAVE_XRC
if (mca_btl_openib_component.num_xrc_qps > 0) {
return udcm_endpoint_init_self_xrc (lcl_ep);
} else
#endif
return udcm_endpoint_init_self (lcl_ep);
}
return OPAL_SUCCESS;
}
@ -1069,6 +1167,9 @@ static inline int udcm_rc_qp_to_init (struct ibv_qp *qp,
attr.pkey_index = btl->pkey_index;
attr.port_num = btl->port_num;
attr.qp_access_flags = IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_REMOTE_READ;
#if HAVE_DECL_IBV_ATOMIC_HCA
attr.qp_access_flags |= IBV_ACCESS_REMOTE_ATOMIC;
#endif
attr_mask = IBV_QP_STATE | IBV_QP_PKEY_INDEX | IBV_QP_PORT |
IBV_QP_ACCESS_FLAGS;
@ -2307,7 +2408,7 @@ static int udcm_xrc_restart_connect (mca_btl_base_endpoint_t *lcl_ep)
/* mark: xrc send qp */
/* Send qp connect */
static int udcm_xrc_send_qp_connect (mca_btl_openib_endpoint_t *lcl_ep, udcm_msg_hdr_t *msg_hdr)
static int udcm_xrc_send_qp_connect (mca_btl_openib_endpoint_t *lcl_ep, uint32_t rem_qp_num, uint32_t rem_psn)
{
mca_btl_openib_module_t *openib_btl = lcl_ep->endpoint_btl;
struct ibv_qp_attr attr;
@ -2316,7 +2417,7 @@ static int udcm_xrc_send_qp_connect (mca_btl_openib_endpoint_t *lcl_ep, udcm_msg
int ret;
BTL_VERBOSE(("Connecting send qp: %p, remote qp: %d", (void *)lcl_ep->qps[0].qp->lcl_qp,
msg_hdr->data.xres.rem_qp_num));
rem_qp_num));
assert(NULL != lcl_ep->qps);
qp = lcl_ep->qps[0].qp->lcl_qp;
psn = lcl_ep->qps[0].qp->lcl_psn;
@ -2326,8 +2427,8 @@ static int udcm_xrc_send_qp_connect (mca_btl_openib_endpoint_t *lcl_ep, udcm_msg
attr.qp_state = IBV_QPS_RTR;
attr.path_mtu = (openib_btl->device->mtu < lcl_ep->rem_info.rem_mtu) ?
openib_btl->device->mtu : lcl_ep->rem_info.rem_mtu;
attr.dest_qp_num = msg_hdr->data.xres.rem_qp_num;
attr.rq_psn = msg_hdr->data.xres.rem_psn;
attr.dest_qp_num = rem_qp_num;
attr.rq_psn = rem_psn;
attr.max_dest_rd_atomic = mca_btl_openib_component.ib_max_rdma_dst_ops;
attr.min_rnr_timer = mca_btl_openib_component.ib_min_rnr_timer;
attr.ah_attr.is_global = 0;
@ -2460,6 +2561,9 @@ static int udcm_xrc_send_qp_create (mca_btl_base_endpoint_t *lcl_ep)
attr.pkey_index = openib_btl->pkey_index;
attr.port_num = openib_btl->port_num;
attr.qp_access_flags = IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_REMOTE_READ;
#if HAVE_DECL_IBV_ATOMIC_HCA
attr.qp_access_flags |= IBV_ACCESS_REMOTE_ATOMIC;
#endif
ret = ibv_modify_qp(*qp, &attr,
IBV_QP_STATE |
IBV_QP_PKEY_INDEX |
@ -2501,7 +2605,7 @@ static int udcm_xrc_recv_qp_connect (mca_btl_openib_endpoint_t *lcl_ep)
}
/* Recv qp create */
static int udcm_xrc_recv_qp_create (mca_btl_openib_endpoint_t *lcl_ep, udcm_msg_hdr_t *msg_hdr)
static int udcm_xrc_recv_qp_create (mca_btl_openib_endpoint_t *lcl_ep, uint32_t rem_qp_num, uint32_t rem_psn)
{
mca_btl_openib_module_t* openib_btl = lcl_ep->endpoint_btl;
struct ibv_qp_init_attr qp_init_attr;
@ -2525,6 +2629,11 @@ static int udcm_xrc_recv_qp_create (mca_btl_openib_endpoint_t *lcl_ep, udcm_msg_
attr.pkey_index = openib_btl->pkey_index;
attr.port_num = openib_btl->port_num;
attr.qp_access_flags = IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_REMOTE_READ;
#if HAVE_DECL_IBV_ATOMIC_HCA
attr.qp_access_flags |= IBV_ACCESS_REMOTE_ATOMIC;
#endif
ret = ibv_modify_xrc_rcv_qp(openib_btl->device->xrc_domain,
lcl_ep->xrc_recv_qp_num, &attr,
IBV_QP_STATE | IBV_QP_PKEY_INDEX |
@ -2540,8 +2649,8 @@ static int udcm_xrc_recv_qp_create (mca_btl_openib_endpoint_t *lcl_ep, udcm_msg_
attr.qp_state = IBV_QPS_RTR;
attr.path_mtu = (openib_btl->device->mtu < lcl_ep->rem_info.rem_mtu) ?
openib_btl->device->mtu : lcl_ep->rem_info.rem_mtu;
attr.dest_qp_num = msg_hdr->data.xreq.rem_qp_num;
attr.rq_psn = msg_hdr->data.xreq.rem_psn;
attr.dest_qp_num = rem_qp_num;
attr.rq_psn = rem_psn;
attr.max_dest_rd_atomic = mca_btl_openib_component.ib_max_rdma_dst_ops;
attr.min_rnr_timer = mca_btl_openib_component.ib_min_rnr_timer;
attr.ah_attr.is_global = 0;
@ -2715,7 +2824,7 @@ static int udcm_xrc_handle_xconnect (mca_btl_openib_endpoint_t *lcl_ep, udcm_msg
response_type = UDCM_MESSAGE_XRESPONSE;
rc = udcm_xrc_recv_qp_create (lcl_ep, msg_hdr);
rc = udcm_xrc_recv_qp_create (lcl_ep, msg_hdr->data.xreq.rem_qp_num, msg_hdr->data.xreq.rem_psn);
if (OPAL_SUCCESS != rc) {
break;
}
@ -2761,7 +2870,7 @@ static int udcm_xrc_handle_xresponse (mca_btl_openib_endpoint_t *lcl_ep, udcm_ms
udep->recv_resp = true;
rc = udcm_xrc_send_qp_connect (lcl_ep, msg_hdr);
rc = udcm_xrc_send_qp_connect (lcl_ep, msg_hdr->data.xres.rem_qp_num, msg_hdr->data.xres.rem_psn);
if (OPAL_SUCCESS != rc) {
mca_btl_openib_endpoint_invoke_error (lcl_ep);
}