1
1

OPENIB BTL/CONNECT: Add support for AF_IB addressing in rdmacm.

This commit was SVN r30875.
Этот коммит содержится в:
Vasily Filipov 2014-02-27 11:29:47 +00:00
родитель e466fee747
Коммит f2014b96e7
3 изменённых файлов: 309 добавлений и 34 удалений

Просмотреть файл

@ -283,6 +283,10 @@ AC_DEFUN([OMPI_CHECK_OPENFABRICS_CM_ARGS],[
AC_ARG_ENABLE([openib-rdmacm],
[AC_HELP_STRING([--enable-openib-rdmacm],
[Enable Open Fabrics RDMACM support in openib BTL (default: enabled)])])
AC_ARG_ENABLE([openib-rdmacm-ibaddr],
[AC_HELP_STRING([--enable-openib-rdmacm-ibaddr],
[Enable Open Fabrics RDMACM with IB addressing support in openib BTL (default: disabled)])],
[enable_openib_rdmacm=yes])
])dnl
AC_DEFUN([OMPI_CHECK_OPENFABRICS_CM],[

Просмотреть файл

@ -71,6 +71,42 @@ AC_DEFUN([MCA_ompi_btl_openib_CONFIG],[
if test "x$btl_openib_have_rdmacm" = "x1" -a \
"$have_threads" != "none"; then
cpcs="$cpcs rdmacm"
if test "$enable_openib_rdmacm_ibaddr" = "yes"; then
LDFLAGS_save="$LDFLAGS"
LIBS_save="$LIBS"
LDFLAGS="$LDFLAGS $btl_openib_LDFLAGS"
LIBS="$LIBS $btl_openib_LIBS"
AC_LANG(C)
AC_MSG_CHECKING([rsockets keepalive])
AC_RUN_IFELSE(
[AC_LANG_PROGRAM(
[
#include <stdio.h>
#include <netinet/in.h>
#include <netinet/tcp.h>
#include <sys/types.h>
#include <rdma/rsocket.h>
#include <infiniband/ib.h>
],
[
int rsock;
rsock = rsocket(AF_IB, SOCK_STREAM, 0);
if (rsock < 0) {
return -1;
}
rclose(rsock);
]
)],
[ AC_MSG_RESULT([yes])
AC_DEFINE(BTL_OPENIB_RDMACM_IB_ADDR, 1, rdmacm with rsockets support)
],
[ AC_MSG_RESULT([no])
AC_MSG_WARN([rsockets does not support keepalives. librdmacm 1.0.18 or beyond is needed.])
]
)
LDFLAGS="$LDFLAGS_save"
LIBS="$LIBS_save"
fi
fi
if test "x$btl_openib_have_udcm" = "x1" -a \
"$have_threads" != "none"; then

Просмотреть файл

@ -58,6 +58,10 @@
#include "btl_openib_ip.h"
#include "btl_openib_ini.h"
#if BTL_OPENIB_RDMACM_IB_ADDR
#include <infiniband/ib.h>
#endif
#define mymin(a, b) ((a) < (b) ? (a) : (b))
static void rdmacm_component_register(void);
@ -85,8 +89,13 @@ typedef struct {
/* Dummy QP only used when we expect the connection to be
rejected */
struct ibv_cq *dummy_cq;
#if BTL_OPENIB_RDMACM_IB_ADDR
union ibv_gid gid;
uint64_t service_id;
#else
uint32_t ipaddr;
uint16_t tcp_port;
#endif
/* server==false means that this proc initiated the connection;
server==true means that this proc accepted the incoming
connection. Note that this may be different than the "one way"
@ -114,8 +123,13 @@ OBJ_CLASS_INSTANCE(rdmacm_contents_t, opal_list_item_t,
typedef struct {
int device_max_qp_rd_atom;
int device_max_qp_init_rd_atom;
#if BTL_OPENIB_RDMACM_IB_ADDR
uint8_t gid[16];
uint64_t service_id;
#else
uint32_t ipaddr;
uint16_t tcp_port;
#endif
uint8_t end;
} modex_message_t;
@ -147,10 +161,15 @@ OBJ_CLASS_INSTANCE(id_context_t, opal_list_item_t,
typedef struct {
uint32_t rem_index;
#if BTL_OPENIB_RDMACM_IB_ADDR
uint64_t rem_port;
#else
uint16_t rem_port;
#endif
uint8_t qpnum;
} private_data_t;
#ifndef BTL_OPENIB_RDMACM_IB_ADDR
/* Used to send a specific show_help message from the service_thread
to the main thread (because we can't call show_help from the
service_thread) */
@ -159,6 +178,7 @@ typedef struct {
uint32_t peer_ip_addr;
uint32_t peer_tcp_port;
} cant_find_endpoint_context_t;
#endif
static opal_list_t server_listener_list;
static opal_list_t client_list;
@ -166,7 +186,11 @@ static opal_mutex_t client_list_lock;
static struct rdma_event_channel *event_channel = NULL;
static int rdmacm_priority = 30;
static unsigned int rdmacm_port = 0;
#ifndef BTL_OPENIB_RDMACM_IB_ADDR
static uint32_t rdmacm_addr = 0;
#endif
static int rdmacm_resolve_timeout = 30000;
static int rdmacm_resolve_max_retry_count = 20;
static bool rdmacm_reject_causes_connect_error = false;
@ -209,8 +233,12 @@ static void rdmacm_contents_constructor(rdmacm_contents_t *contents)
contents->endpoint = NULL;
contents->openib_btl = NULL;
contents->dummy_cq = NULL;
#if BTL_OPENIB_RDMACM_IB_ADDR
contents->service_id = 0;
#else
contents->ipaddr = 0;
contents->tcp_port = 0;
#endif
contents->server = false;
contents->on_client_list = false;
OBJ_CONSTRUCT(&(contents->ids), opal_list_t);
@ -327,13 +355,23 @@ static char *stringify(uint32_t addr)
*/
static mca_btl_openib_endpoint_t *rdmacm_find_endpoint(rdmacm_contents_t *contents,
struct rdma_cm_id *id,
#if BTL_OPENIB_RDMACM_IB_ADDR
uint64_t rem_port)
#else
uint16_t rem_port)
#endif
{
int i;
mca_btl_openib_endpoint_t *ep = NULL;
opal_pointer_array_t *endpoints = contents->openib_btl->device->endpoints;
struct sockaddr *peeraddr = rdma_get_peer_addr(id);
uint32_t peeripaddr = ((struct sockaddr_in *)peeraddr)->sin_addr.s_addr;
#if BTL_OPENIB_RDMACM_IB_ADDR
union ibv_gid *ep_gid, peer_gid;
memcpy(peer_gid.raw, ((struct sockaddr_ib *) peeraddr)->sib_addr.sib_raw, sizeof peer_gid);
#else
uint32_t peeripaddr = ((struct sockaddr_in *) peeraddr)->sin_addr.s_addr;
#if OPAL_ENABLE_DEBUG
char *a;
#endif
@ -343,6 +381,8 @@ static mca_btl_openib_endpoint_t *rdmacm_find_endpoint(rdmacm_contents_t *conten
#if OPAL_ENABLE_DEBUG
free(a);
#endif
#endif
for (i = 0; i < opal_pointer_array_get_size(endpoints); i++) {
mca_btl_openib_endpoint_t *endpoint;
modex_message_t *message;
@ -353,12 +393,22 @@ static mca_btl_openib_endpoint_t *rdmacm_find_endpoint(rdmacm_contents_t *conten
}
message = (modex_message_t *) endpoint->endpoint_remote_cpc_data->cbm_modex_message;
#ifndef BTL_OPENIB_RDMACM_IB_ADDR
OPAL_OUTPUT((-1, "message ipaddr = %s port %d",
a = stringify(message->ipaddr), message->tcp_port));
#if OPAL_ENABLE_DEBUG
free(a);
#endif
#endif
#if BTL_OPENIB_RDMACM_IB_ADDR
ep_gid = (union ibv_gid *) message->gid;
if (ep_gid->global.interface_id == peer_gid.global.interface_id &&
ep_gid->global.subnet_prefix == peer_gid.global.subnet_prefix &&
message->service_id == rem_port) {
#else
if (message->ipaddr == peeripaddr && message->tcp_port == rem_port) {
#endif
ep = endpoint;
break;
}
@ -495,34 +545,93 @@ out:
* IP addresses are the same (i.e., the MPI procs are on the same
* node), then the process with the lower TCP port wins.
*/
static bool i_initiate(uint32_t local_ipaddr, uint16_t local_port,
uint32_t remote_ipaddr, uint16_t remote_port)
static bool i_initiate(uint64_t local_port, uint64_t remote_port,
#if BTL_OPENIB_RDMACM_IB_ADDR
union ibv_gid *local_gid, union ibv_gid *remote_gid)
{
#else
uint32_t local_ipaddr, uint32_t remote_ipaddr)
{
#if OPAL_ENABLE_DEBUG
char *a = stringify(local_ipaddr);
char *b = stringify(remote_ipaddr);
#endif
#endif
#if BTL_OPENIB_RDMACM_IB_ADDR
if (local_gid->global.subnet_prefix < remote_gid->global.subnet_prefix ||
(local_gid->global.subnet_prefix == remote_gid->global.subnet_prefix &&
local_gid->global.interface_id < remote_gid->global.interface_id) ||
(local_gid->global.subnet_prefix == remote_gid->global.subnet_prefix &&
local_gid->global.interface_id == remote_gid->global.interface_id &&
#else
if (local_ipaddr > remote_ipaddr ||
(local_ipaddr == remote_ipaddr && local_port < remote_port)) {
(local_ipaddr == remote_ipaddr &&
#endif
local_port < remote_port)) {
#ifndef BTL_OPENIB_RDMACM_IB_ADDR
OPAL_OUTPUT((-1, "i_initiate (I WIN): local ipaddr %s, remote ipaddr %s",
a, b));
#if OPAL_ENABLE_DEBUG
free(a);
free(b);
#endif
return true;
} else {
OPAL_OUTPUT((-1, "i_initiate (I lose): local ipaddr %s, remote ipaddr %s",
a, b));
#if OPAL_ENABLE_DEBUG
free(a);
free(b);
#endif
return false;
return true;
}
#ifndef BTL_OPENIB_RDMACM_IB_ADDR
OPAL_OUTPUT((-1, "i_initiate (I lose): local ipaddr %s, remote ipaddr %s",
a, b));
#if OPAL_ENABLE_DEBUG
free(a);
free(b);
#endif
#endif
return false;
}
#if BTL_OPENIB_RDMACM_IB_ADDR
static int get_rdma_addr(char *src, char *dst,
struct rdma_addrinfo **rdma_addr,
int server)
{
int rc;
struct rdma_addrinfo hints, *sres, *dres;
memset(&hints, 0, sizeof hints);
hints.ai_family = AF_IB;
hints.ai_port_space = RDMA_PS_TCP;
hints.ai_flags = RAI_NUMERICHOST | RAI_FAMILY | RAI_PASSIVE;
rc = rdma_getaddrinfo(src, NULL, &hints, &sres);
if (0 != rc) {
return OMPI_ERROR;
}
if (server) {
*rdma_addr = sres;
return OMPI_SUCCESS;
}
hints.ai_src_len = sres->ai_src_len;
hints.ai_src_addr = sres->ai_src_addr;
hints.ai_flags &= ~RAI_PASSIVE;
rc = rdma_getaddrinfo(dst, NULL, &hints, &dres);
if (0 != rc) {
rdma_freeaddrinfo(sres);
return OMPI_ERROR;
}
rdma_freeaddrinfo(sres);
*rdma_addr = dres;
return OMPI_SUCCESS;
}
#endif
/*
* Invoked by main thread
*/
@ -530,11 +639,17 @@ static int rdmacm_client_connect_one(rdmacm_contents_t *contents,
modex_message_t *message,
int num)
{
struct sockaddr_in src_in, dest_in;
id_context_t *context;
int rc;
id_context_t *context;
#if BTL_OPENIB_RDMACM_IB_ADDR
char src_addr[32], dst_addr[32];
struct rdma_addrinfo *rdma_addr;
#else
struct sockaddr_in src_in, dest_in;
#if OPAL_ENABLE_DEBUG
char *a, *b;
#endif
#endif
/* We'll need to access some data in the event handler. We can
@ -558,7 +673,7 @@ static int rdmacm_client_connect_one(rdmacm_contents_t *contents,
BTL_ERROR(("Failed to create a rdma id with %d", rc));
goto out1;
}
#ifndef BTL_OPENIB_RDMACM_IB_ADDR
/* Source address (we must specify this to ensure that the traffic
goes out on the device+port that we expect it go out). */
memset(&src_in, 0, sizeof(src_in));
@ -585,7 +700,7 @@ static int rdmacm_client_connect_one(rdmacm_contents_t *contents,
free(a);
free(b);
#endif
#endif
/* This is odd an worth explaining: when we place the context on
the ids list, we need to add an extra RETAIN to the context.
The reason is because of a race condition. Let's explain
@ -623,15 +738,46 @@ static int rdmacm_client_connect_one(rdmacm_contents_t *contents,
*/
OBJ_RETAIN(context);
opal_list_append(&(contents->ids), &(context->super));
#if BTL_OPENIB_RDMACM_IB_ADDR
if (NULL == inet_ntop(AF_INET6, contents->gid.raw,
src_addr, sizeof src_addr)) {
BTL_ERROR(("local addr string creating fail"));
goto out1;
}
if (NULL == inet_ntop(AF_INET6, message->gid,
dst_addr, sizeof dst_addr)) {
BTL_ERROR(("remote addr string creating fail"));
goto out1;
}
rc = get_rdma_addr(src_addr, dst_addr, &rdma_addr, 0);
if (OMPI_SUCCESS != rc) {
BTL_ERROR(("server: create rdma addr error"));
goto out1;
}
((struct sockaddr_ib *) (rdma_addr->ai_dst_addr))->sib_sid = message->service_id;
#endif
rc = rdma_resolve_addr(context->id,
#if BTL_OPENIB_RDMACM_IB_ADDR
rdma_addr->ai_src_addr,
rdma_addr->ai_dst_addr,
#else
(struct sockaddr *) &src_in,
(struct sockaddr *) &dest_in,
#endif
rdmacm_resolve_timeout);
if (0 != rc) {
BTL_ERROR(("Failed to resolve the remote address with %d", rc));
#if BTL_OPENIB_RDMACM_IB_ADDR
rdma_freeaddrinfo(rdma_addr);
#endif
goto out1;
}
#if BTL_OPENIB_RDMACM_IB_ADDR
rdma_freeaddrinfo(rdma_addr);
#endif
return OMPI_SUCCESS;
@ -655,10 +801,11 @@ static int rdmacm_module_start_connect(ompi_btl_openib_connect_base_module_t *cp
modex_message_t *message, *local_message;
int rc, qp;
opal_list_item_t *item;
#ifndef BTL_OPENIB_RDMACM_IB_ADDR
#if OPAL_ENABLE_DEBUG
char *a, *b;
#endif
#endif
/* Don't use the CPC to get the message, because this function is
invoked from the event_handler (to intitiate connections in the
Right direction), where we don't have the CPC, so it'll be
@ -667,7 +814,7 @@ static int rdmacm_module_start_connect(ompi_btl_openib_connect_base_module_t *cp
(modex_message_t *) endpoint->endpoint_local_cpc->data.cbm_modex_message;
message = (modex_message_t *)
endpoint->endpoint_remote_cpc_data->cbm_modex_message;
#ifndef BTL_OPENIB_RDMACM_IB_ADDR
OPAL_OUTPUT((-1, "Connecting from IP %s:%d to remote IP %s:%d ep state = %d",
a = stringify(local_message->ipaddr), local_message->tcp_port,
b = stringify(message->ipaddr), message->tcp_port, endpoint->endpoint_state));
@ -677,7 +824,7 @@ static int rdmacm_module_start_connect(ompi_btl_openib_connect_base_module_t *cp
#endif
BTL_VERBOSE(("Connecting to remote ip addr = %x, port = %d ep state = %d",
message->ipaddr, message->tcp_port, endpoint->endpoint_state));
#endif
if (MCA_BTL_IB_CONNECTED == endpoint->endpoint_state ||
MCA_BTL_IB_CONNECTING == endpoint->endpoint_state ||
MCA_BTL_IB_CONNECT_ACK == endpoint->endpoint_state) {
@ -705,14 +852,25 @@ static int rdmacm_module_start_connect(ompi_btl_openib_connect_base_module_t *cp
* is being connected from, in the case where there are multiple
* listeners on the local system.
*/
#if BTL_OPENIB_RDMACM_IB_ADDR
memcpy(contents->gid.raw, local_message->gid, sizeof(contents->gid));
contents->service_id = local_message->service_id;
#else
contents->ipaddr = local_message->ipaddr;
contents->tcp_port = local_message->tcp_port;
#endif
/* Are we the initiator? Or do we expect this connect request to
be rejected? */
endpoint->endpoint_initiator =
i_initiate(contents->ipaddr, contents->tcp_port,
message->ipaddr, message->tcp_port);
i_initiate(
#if BTL_OPENIB_RDMACM_IB_ADDR
contents->service_id, message->service_id,
&contents->gid, (union ibv_gid *) message->gid);
#else
contents->tcp_port, message->tcp_port,
contents->ipaddr, message->ipaddr);
#endif
OPAL_OUTPUT((-1, "MAIN Start connect; ep=%p (%p), I %s the initiator to %s",
(void*) endpoint,
(void*) endpoint->endpoint_local_cpc,
@ -760,6 +918,7 @@ out:
return rc;
}
#ifndef BTL_OPENIB_RDMACM_IB_ADDR
static void *show_help_cant_find_endpoint(void *context)
{
char *msg;
@ -786,6 +945,7 @@ static void *show_help_cant_find_endpoint(void *context)
mca_btl_openib_endpoint_invoke_error(NULL);
return NULL;
}
#endif
/*
* Invoked by service thread
@ -809,7 +969,11 @@ static int handle_connect_request(struct rdma_cm_event *event)
private_data_t msg;
int rc = -1, qpnum;
uint32_t rem_index;
#if BTL_OPENIB_RDMACM_IB_ADDR
uint64_t rem_port;
#else
uint16_t rem_port;
#endif
qpnum = ((private_data_t *)event->param.conn.private_data)->qpnum;
rem_port = ((private_data_t *)event->param.conn.private_data)->rem_port;
@ -819,6 +983,7 @@ static int handle_connect_request(struct rdma_cm_event *event)
to; use the listener's context->contents to figure it out */
endpoint = rdmacm_find_endpoint(contents, event->id, rem_port);
if (NULL == endpoint) {
#ifndef BTL_OPENIB_RDMACM_IB_ADDR
struct sockaddr *peeraddr = rdma_get_peer_addr(event->id);
cant_find_endpoint_context_t *c = (cant_find_endpoint_context_t *) calloc(1, sizeof(*c));
if (NULL != c) {
@ -831,18 +996,25 @@ static int handle_connect_request(struct rdma_cm_event *event)
c->peer_tcp_port = rdma_get_dst_port(event->id);
}
ompi_btl_openib_fd_run_in_main(show_help_cant_find_endpoint, c);
#else
BTL_ERROR(("Cannot find endpoint."));
#endif
goto out;
}
message = (modex_message_t *) endpoint->endpoint_remote_cpc_data->cbm_modex_message;
endpoint->endpoint_initiator =
i_initiate(contents->ipaddr, contents->tcp_port,
message->ipaddr, rem_port);
i_initiate(
#if BTL_OPENIB_RDMACM_IB_ADDR
contents->service_id, rem_port,
&contents->gid, (union ibv_gid *) message->gid);
#else
contents->tcp_port, rem_port,
contents->ipaddr, message->ipaddr);
BTL_VERBOSE(("ep state = %d, local ipaddr = %x, remote ipaddr = %x, local port = %d, remote port = %d",
endpoint->endpoint_state, contents->ipaddr, message->ipaddr,
contents->tcp_port, rem_port));
endpoint->endpoint_state, contents->ipaddr, message->ipaddr,
contents->tcp_port, rem_port));
#endif
OPAL_OUTPUT((-1, "SERVICE in handle_connect_request; ep=%p (%p), I still %s the initiator to %s",
(void*) endpoint,
(void*) endpoint->endpoint_local_cpc,
@ -1161,11 +1333,13 @@ static int rdmacm_connect_endpoint(id_context_t *context,
}
message = (modex_message_t *) endpoint->endpoint_remote_cpc_data->cbm_modex_message;
#ifndef BTL_OPENIB_RDMACM_IB_ADDR
BTL_VERBOSE(("%s connected!!! local %x remote %x state = %d",
contents->server?"server":"client",
contents->ipaddr,
message->ipaddr,
endpoint->endpoint_state));
#endif
/* Ensure that all the writes back to the endpoint and associated
data structures have completed */
@ -1348,13 +1522,17 @@ static int finish_connect(id_context_t *context)
private_data_t msg;
int rc;
struct sockaddr *peeraddr;
#ifndef BTL_OPENIB_RDMACM_IB_ADDR
uint32_t remoteipaddr;
#endif
uint16_t remoteport;
modex_message_t *message;
remoteport = rdma_get_dst_port(context->id);
peeraddr = rdma_get_peer_addr(context->id);
#ifndef BTL_OPENIB_RDMACM_IB_ADDR
remoteipaddr = ((struct sockaddr_in *)peeraddr)->sin_addr.s_addr;
#endif
message = (modex_message_t *)
context->endpoint->endpoint_remote_cpc_data->cbm_modex_message;
@ -1431,6 +1609,9 @@ static int finish_connect(id_context_t *context)
msg.qpnum = context->qpnum;
msg.rem_index = contents->endpoint->index;
#if BTL_OPENIB_RDMACM_IB_ADDR
msg.rem_port = contents->service_id;
#else
msg.rem_port = contents->tcp_port;
if (contents->endpoint->endpoint_initiator) {
#if OPAL_ENABLE_DEBUG
@ -1447,6 +1628,7 @@ static int finish_connect(id_context_t *context)
free(a);
#endif
}
#endif
/* Now all of the local setup has been done. The remote system
should now get a RDMA_CM_EVENT_CONNECT_REQUEST event to further
@ -1512,8 +1694,10 @@ static int event_handler(struct rdma_cm_event *event)
{
id_context_t *context = (id_context_t*) event->id->context;
rdmacm_contents_t *contents;
#ifndef BTL_OPENIB_RDMACM_IB_ADDR
struct sockaddr *peeraddr, *localaddr;
uint32_t peeripaddr, localipaddr;
#endif
int rc = -1;
ompi_btl_openib_ini_values_t ini;
bool found;
@ -1523,6 +1707,8 @@ static int event_handler(struct rdma_cm_event *event)
}
contents = context->contents;
#ifndef BTL_OPENIB_RDMACM_IB_ADDR
localaddr = rdma_get_local_addr(event->id);
peeraddr = rdma_get_peer_addr(event->id);
localipaddr = ((struct sockaddr_in *)localaddr)->sin_addr.s_addr;
@ -1533,6 +1719,7 @@ static int event_handler(struct rdma_cm_event *event)
rdma_event_str(event->event),
event->status,
peeripaddr));
#endif
switch (event->event) {
case RDMA_CM_EVENT_ADDR_RESOLVED:
@ -1542,7 +1729,9 @@ static int event_handler(struct rdma_cm_event *event)
case RDMA_CM_EVENT_ROUTE_RESOLVED:
OPAL_OUTPUT((-1, "SERVICE Got ROUTE_RESOLVED: ID %p", (void*) context->id));
#ifndef BTL_OPENIB_RDMACM_IB_ADDR
contents->ipaddr = localipaddr;
#endif
rc = finish_connect(context);
break;
@ -1717,6 +1906,7 @@ static int rdmacm_init(mca_btl_openib_endpoint_t *endpoint)
return OMPI_SUCCESS;
}
#ifndef BTL_OPENIB_RDMACM_IB_ADDR
static int ipaddrcheck(id_context_t *context,
mca_btl_openib_module_t *openib_btl)
{
@ -1781,14 +1971,17 @@ static int ipaddrcheck(id_context_t *context,
return already_exists ? OMPI_ERROR : OMPI_SUCCESS;
}
#endif
static int create_message(rdmacm_contents_t *server,
mca_btl_openib_module_t *openib_btl,
ompi_btl_openib_connect_base_module_data_t *data)
{
modex_message_t *message;
#ifndef BTL_OPENIB_RDMACM_IB_ADDR
#if OPAL_ENABLE_DEBUG
char *a;
#endif
#endif
message = (modex_message_t *) malloc(sizeof(modex_message_t));
@ -1801,6 +1994,11 @@ static int create_message(rdmacm_contents_t *server,
openib_btl->device->ib_dev_attr.max_qp_rd_atom;
message->device_max_qp_init_rd_atom =
openib_btl->device->ib_dev_attr.max_qp_init_rd_atom;
#if BTL_OPENIB_RDMACM_IB_ADDR
memcpy(message->gid, server->gid.raw, sizeof(server->gid));
message->service_id = server->service_id;
#else
message->ipaddr = server->ipaddr;
message->tcp_port = server->tcp_port;
@ -1808,6 +2006,7 @@ static int create_message(rdmacm_contents_t *server,
a = stringify(message->ipaddr), message->tcp_port));
#if OPAL_ENABLE_DEBUG
free(a);
#endif
#endif
data->cbm_modex_message = message;
data->cbm_modex_message_len = message_len;
@ -1823,11 +2022,18 @@ static int create_message(rdmacm_contents_t *server,
*/
static int rdmacm_component_query(mca_btl_openib_module_t *openib_btl, ompi_btl_openib_connect_base_module_t **cpc)
{
rdmacm_contents_t *server = NULL;
struct sockaddr_in sin;
id_context_t *context;
int rc;
id_context_t *context;
rdmacm_contents_t *server = NULL;
#if BTL_OPENIB_RDMACM_IB_ADDR
char rdmacm_addr_str[32];
struct rdma_addrinfo *rdma_addr;
#else
struct sockaddr_in sin;
#endif
/* RDMACM is not supported if we have any XRC QPs */
if (mca_btl_openib_component.num_xrc_qps > 0) {
BTL_VERBOSE(("rdmacm CPC not supported with XRC receive queues, please try xoob CPC; skipped on %s:%d",
@ -1883,23 +2089,49 @@ static int rdmacm_component_query(mca_btl_openib_module_t *openib_btl, ompi_btl_
rc = OMPI_ERR_OUT_OF_RESOURCE;
goto out4;
}
#ifndef BTL_OPENIB_RDMACM_IB_ADDR
memset(&sin, 0, sizeof(sin));
sin.sin_family = AF_INET;
sin.sin_addr.s_addr = rdmacm_addr;
sin.sin_port = (uint16_t) rdmacm_port;
#else
rc = ibv_query_gid(openib_btl->device->ib_pd->context, openib_btl->port_num, 0, &server->gid);
if (0 != rc) {
BTL_ERROR(("local gid query failed"));
goto out4;
}
if (NULL == inet_ntop(AF_INET6, server->gid.raw,
rdmacm_addr_str, sizeof rdmacm_addr_str)) {
BTL_ERROR(("local gaddr string creating fail"));
goto out4;
}
rc = get_rdma_addr(rdmacm_addr_str, NULL, &rdma_addr, 1);
if (OMPI_SUCCESS != rc) {
BTL_ERROR(("server: create rdma addr error"));
goto out4;
}
#endif
/* Bind the rdmacm server to the local IP address and an ephemerial
* port or one specified by a comand arg.
*/
rc = rdma_bind_addr(context->id, (struct sockaddr *)&sin);
rc = rdma_bind_addr(context->id,
#if BTL_OPENIB_RDMACM_IB_ADDR
rdma_addr->ai_src_addr);
#else
(struct sockaddr *)&sin);
#endif
if (0 != rc) {
opal_output_verbose(5, ompi_btl_base_framework.framework_output,
"openib BTL: rdmacm CPC unable to bind to address");
rc = OMPI_ERR_UNREACH;
goto out5;
}
#if BTL_OPENIB_RDMACM_IB_ADDR
server->service_id = ((struct sockaddr_ib *) (&context->id->route.addr.src_addr))->sib_sid;
rdma_freeaddrinfo(rdma_addr);
#else
/* Verify that the device has a valid IP address on it, or we
cannot use the cpc */
rc = ipaddrcheck(context, openib_btl);
@ -1909,7 +2141,7 @@ static int rdmacm_component_query(mca_btl_openib_module_t *openib_btl, ompi_btl_
rc = OMPI_ERR_NOT_SUPPORTED;
goto out5;
}
#endif
/* Listen on the specified address/port with the rdmacm, limit the
amount of incoming connections to 1024 */
/* FIXME - 1024 should be (num of connectors *
@ -1944,6 +2176,9 @@ out5:
* But don't do it here since it's part of out4:OBJ_RELEASE(context),
* and we don't want to do it twice.
*/
#if BTL_OPENIB_RDMACM_IB_ADDR
rdma_freeaddrinfo(rdma_addr);
#endif
out4:
opal_list_remove_first(&(server->ids));
OBJ_RELEASE(context);