OPENIB BTL/CONNECT: Add support for AF_IB addressing in rdmacm.
This commit was SVN r30875.
Этот коммит содержится в:
родитель
e466fee747
Коммит
f2014b96e7
@ -283,6 +283,10 @@ AC_DEFUN([OMPI_CHECK_OPENFABRICS_CM_ARGS],[
|
||||
AC_ARG_ENABLE([openib-rdmacm],
|
||||
[AC_HELP_STRING([--enable-openib-rdmacm],
|
||||
[Enable Open Fabrics RDMACM support in openib BTL (default: enabled)])])
|
||||
AC_ARG_ENABLE([openib-rdmacm-ibaddr],
|
||||
[AC_HELP_STRING([--enable-openib-rdmacm-ibaddr],
|
||||
[Enable Open Fabrics RDMACM with IB addressing support in openib BTL (default: disabled)])],
|
||||
[enable_openib_rdmacm=yes])
|
||||
])dnl
|
||||
|
||||
AC_DEFUN([OMPI_CHECK_OPENFABRICS_CM],[
|
||||
|
@ -71,6 +71,42 @@ AC_DEFUN([MCA_ompi_btl_openib_CONFIG],[
|
||||
if test "x$btl_openib_have_rdmacm" = "x1" -a \
|
||||
"$have_threads" != "none"; then
|
||||
cpcs="$cpcs rdmacm"
|
||||
if test "$enable_openib_rdmacm_ibaddr" = "yes"; then
|
||||
LDFLAGS_save="$LDFLAGS"
|
||||
LIBS_save="$LIBS"
|
||||
LDFLAGS="$LDFLAGS $btl_openib_LDFLAGS"
|
||||
LIBS="$LIBS $btl_openib_LIBS"
|
||||
AC_LANG(C)
|
||||
AC_MSG_CHECKING([rsockets keepalive])
|
||||
AC_RUN_IFELSE(
|
||||
[AC_LANG_PROGRAM(
|
||||
[
|
||||
#include <stdio.h>
|
||||
#include <netinet/in.h>
|
||||
#include <netinet/tcp.h>
|
||||
#include <sys/types.h>
|
||||
#include <rdma/rsocket.h>
|
||||
#include <infiniband/ib.h>
|
||||
],
|
||||
[
|
||||
int rsock;
|
||||
rsock = rsocket(AF_IB, SOCK_STREAM, 0);
|
||||
if (rsock < 0) {
|
||||
return -1;
|
||||
}
|
||||
rclose(rsock);
|
||||
]
|
||||
)],
|
||||
[ AC_MSG_RESULT([yes])
|
||||
AC_DEFINE(BTL_OPENIB_RDMACM_IB_ADDR, 1, rdmacm with rsockets support)
|
||||
],
|
||||
[ AC_MSG_RESULT([no])
|
||||
AC_MSG_WARN([rsockets does not support keepalives. librdmacm 1.0.18 or beyond is needed.])
|
||||
]
|
||||
)
|
||||
LDFLAGS="$LDFLAGS_save"
|
||||
LIBS="$LIBS_save"
|
||||
fi
|
||||
fi
|
||||
if test "x$btl_openib_have_udcm" = "x1" -a \
|
||||
"$have_threads" != "none"; then
|
||||
|
@ -58,6 +58,10 @@
|
||||
#include "btl_openib_ip.h"
|
||||
#include "btl_openib_ini.h"
|
||||
|
||||
#if BTL_OPENIB_RDMACM_IB_ADDR
|
||||
#include <infiniband/ib.h>
|
||||
#endif
|
||||
|
||||
#define mymin(a, b) ((a) < (b) ? (a) : (b))
|
||||
|
||||
static void rdmacm_component_register(void);
|
||||
@ -85,8 +89,13 @@ typedef struct {
|
||||
/* Dummy QP only used when we expect the connection to be
|
||||
rejected */
|
||||
struct ibv_cq *dummy_cq;
|
||||
#if BTL_OPENIB_RDMACM_IB_ADDR
|
||||
union ibv_gid gid;
|
||||
uint64_t service_id;
|
||||
#else
|
||||
uint32_t ipaddr;
|
||||
uint16_t tcp_port;
|
||||
#endif
|
||||
/* server==false means that this proc initiated the connection;
|
||||
server==true means that this proc accepted the incoming
|
||||
connection. Note that this may be different than the "one way"
|
||||
@ -114,8 +123,13 @@ OBJ_CLASS_INSTANCE(rdmacm_contents_t, opal_list_item_t,
|
||||
typedef struct {
|
||||
int device_max_qp_rd_atom;
|
||||
int device_max_qp_init_rd_atom;
|
||||
#if BTL_OPENIB_RDMACM_IB_ADDR
|
||||
uint8_t gid[16];
|
||||
uint64_t service_id;
|
||||
#else
|
||||
uint32_t ipaddr;
|
||||
uint16_t tcp_port;
|
||||
#endif
|
||||
uint8_t end;
|
||||
} modex_message_t;
|
||||
|
||||
@ -147,10 +161,15 @@ OBJ_CLASS_INSTANCE(id_context_t, opal_list_item_t,
|
||||
|
||||
typedef struct {
|
||||
uint32_t rem_index;
|
||||
#if BTL_OPENIB_RDMACM_IB_ADDR
|
||||
uint64_t rem_port;
|
||||
#else
|
||||
uint16_t rem_port;
|
||||
#endif
|
||||
uint8_t qpnum;
|
||||
} private_data_t;
|
||||
|
||||
#ifndef BTL_OPENIB_RDMACM_IB_ADDR
|
||||
/* Used to send a specific show_help message from the service_thread
|
||||
to the main thread (because we can't call show_help from the
|
||||
service_thread) */
|
||||
@ -159,6 +178,7 @@ typedef struct {
|
||||
uint32_t peer_ip_addr;
|
||||
uint32_t peer_tcp_port;
|
||||
} cant_find_endpoint_context_t;
|
||||
#endif
|
||||
|
||||
static opal_list_t server_listener_list;
|
||||
static opal_list_t client_list;
|
||||
@ -166,7 +186,11 @@ static opal_mutex_t client_list_lock;
|
||||
static struct rdma_event_channel *event_channel = NULL;
|
||||
static int rdmacm_priority = 30;
|
||||
static unsigned int rdmacm_port = 0;
|
||||
|
||||
#ifndef BTL_OPENIB_RDMACM_IB_ADDR
|
||||
static uint32_t rdmacm_addr = 0;
|
||||
#endif
|
||||
|
||||
static int rdmacm_resolve_timeout = 30000;
|
||||
static int rdmacm_resolve_max_retry_count = 20;
|
||||
static bool rdmacm_reject_causes_connect_error = false;
|
||||
@ -209,8 +233,12 @@ static void rdmacm_contents_constructor(rdmacm_contents_t *contents)
|
||||
contents->endpoint = NULL;
|
||||
contents->openib_btl = NULL;
|
||||
contents->dummy_cq = NULL;
|
||||
#if BTL_OPENIB_RDMACM_IB_ADDR
|
||||
contents->service_id = 0;
|
||||
#else
|
||||
contents->ipaddr = 0;
|
||||
contents->tcp_port = 0;
|
||||
#endif
|
||||
contents->server = false;
|
||||
contents->on_client_list = false;
|
||||
OBJ_CONSTRUCT(&(contents->ids), opal_list_t);
|
||||
@ -327,13 +355,23 @@ static char *stringify(uint32_t addr)
|
||||
*/
|
||||
static mca_btl_openib_endpoint_t *rdmacm_find_endpoint(rdmacm_contents_t *contents,
|
||||
struct rdma_cm_id *id,
|
||||
#if BTL_OPENIB_RDMACM_IB_ADDR
|
||||
uint64_t rem_port)
|
||||
#else
|
||||
uint16_t rem_port)
|
||||
#endif
|
||||
{
|
||||
int i;
|
||||
mca_btl_openib_endpoint_t *ep = NULL;
|
||||
opal_pointer_array_t *endpoints = contents->openib_btl->device->endpoints;
|
||||
|
||||
struct sockaddr *peeraddr = rdma_get_peer_addr(id);
|
||||
uint32_t peeripaddr = ((struct sockaddr_in *)peeraddr)->sin_addr.s_addr;
|
||||
#if BTL_OPENIB_RDMACM_IB_ADDR
|
||||
union ibv_gid *ep_gid, peer_gid;
|
||||
memcpy(peer_gid.raw, ((struct sockaddr_ib *) peeraddr)->sib_addr.sib_raw, sizeof peer_gid);
|
||||
#else
|
||||
uint32_t peeripaddr = ((struct sockaddr_in *) peeraddr)->sin_addr.s_addr;
|
||||
|
||||
#if OPAL_ENABLE_DEBUG
|
||||
char *a;
|
||||
#endif
|
||||
@ -343,6 +381,8 @@ static mca_btl_openib_endpoint_t *rdmacm_find_endpoint(rdmacm_contents_t *conten
|
||||
#if OPAL_ENABLE_DEBUG
|
||||
free(a);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
for (i = 0; i < opal_pointer_array_get_size(endpoints); i++) {
|
||||
mca_btl_openib_endpoint_t *endpoint;
|
||||
modex_message_t *message;
|
||||
@ -353,12 +393,22 @@ static mca_btl_openib_endpoint_t *rdmacm_find_endpoint(rdmacm_contents_t *conten
|
||||
}
|
||||
|
||||
message = (modex_message_t *) endpoint->endpoint_remote_cpc_data->cbm_modex_message;
|
||||
#ifndef BTL_OPENIB_RDMACM_IB_ADDR
|
||||
OPAL_OUTPUT((-1, "message ipaddr = %s port %d",
|
||||
a = stringify(message->ipaddr), message->tcp_port));
|
||||
#if OPAL_ENABLE_DEBUG
|
||||
free(a);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if BTL_OPENIB_RDMACM_IB_ADDR
|
||||
ep_gid = (union ibv_gid *) message->gid;
|
||||
if (ep_gid->global.interface_id == peer_gid.global.interface_id &&
|
||||
ep_gid->global.subnet_prefix == peer_gid.global.subnet_prefix &&
|
||||
message->service_id == rem_port) {
|
||||
#else
|
||||
if (message->ipaddr == peeripaddr && message->tcp_port == rem_port) {
|
||||
#endif
|
||||
ep = endpoint;
|
||||
break;
|
||||
}
|
||||
@ -495,34 +545,93 @@ out:
|
||||
* IP addresses are the same (i.e., the MPI procs are on the same
|
||||
* node), then the process with the lower TCP port wins.
|
||||
*/
|
||||
static bool i_initiate(uint32_t local_ipaddr, uint16_t local_port,
|
||||
uint32_t remote_ipaddr, uint16_t remote_port)
|
||||
static bool i_initiate(uint64_t local_port, uint64_t remote_port,
|
||||
#if BTL_OPENIB_RDMACM_IB_ADDR
|
||||
union ibv_gid *local_gid, union ibv_gid *remote_gid)
|
||||
{
|
||||
#else
|
||||
uint32_t local_ipaddr, uint32_t remote_ipaddr)
|
||||
{
|
||||
#if OPAL_ENABLE_DEBUG
|
||||
char *a = stringify(local_ipaddr);
|
||||
char *b = stringify(remote_ipaddr);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if BTL_OPENIB_RDMACM_IB_ADDR
|
||||
if (local_gid->global.subnet_prefix < remote_gid->global.subnet_prefix ||
|
||||
(local_gid->global.subnet_prefix == remote_gid->global.subnet_prefix &&
|
||||
local_gid->global.interface_id < remote_gid->global.interface_id) ||
|
||||
(local_gid->global.subnet_prefix == remote_gid->global.subnet_prefix &&
|
||||
local_gid->global.interface_id == remote_gid->global.interface_id &&
|
||||
#else
|
||||
if (local_ipaddr > remote_ipaddr ||
|
||||
(local_ipaddr == remote_ipaddr && local_port < remote_port)) {
|
||||
(local_ipaddr == remote_ipaddr &&
|
||||
#endif
|
||||
local_port < remote_port)) {
|
||||
#ifndef BTL_OPENIB_RDMACM_IB_ADDR
|
||||
OPAL_OUTPUT((-1, "i_initiate (I WIN): local ipaddr %s, remote ipaddr %s",
|
||||
a, b));
|
||||
#if OPAL_ENABLE_DEBUG
|
||||
free(a);
|
||||
free(b);
|
||||
#endif
|
||||
return true;
|
||||
} else {
|
||||
OPAL_OUTPUT((-1, "i_initiate (I lose): local ipaddr %s, remote ipaddr %s",
|
||||
a, b));
|
||||
#if OPAL_ENABLE_DEBUG
|
||||
free(a);
|
||||
free(b);
|
||||
#endif
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
#ifndef BTL_OPENIB_RDMACM_IB_ADDR
|
||||
OPAL_OUTPUT((-1, "i_initiate (I lose): local ipaddr %s, remote ipaddr %s",
|
||||
a, b));
|
||||
#if OPAL_ENABLE_DEBUG
|
||||
free(a);
|
||||
free(b);
|
||||
#endif
|
||||
#endif
|
||||
return false;
|
||||
}
|
||||
|
||||
#if BTL_OPENIB_RDMACM_IB_ADDR
|
||||
static int get_rdma_addr(char *src, char *dst,
|
||||
struct rdma_addrinfo **rdma_addr,
|
||||
int server)
|
||||
{
|
||||
int rc;
|
||||
struct rdma_addrinfo hints, *sres, *dres;
|
||||
|
||||
memset(&hints, 0, sizeof hints);
|
||||
|
||||
hints.ai_family = AF_IB;
|
||||
hints.ai_port_space = RDMA_PS_TCP;
|
||||
hints.ai_flags = RAI_NUMERICHOST | RAI_FAMILY | RAI_PASSIVE;
|
||||
|
||||
rc = rdma_getaddrinfo(src, NULL, &hints, &sres);
|
||||
if (0 != rc) {
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
if (server) {
|
||||
*rdma_addr = sres;
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
hints.ai_src_len = sres->ai_src_len;
|
||||
hints.ai_src_addr = sres->ai_src_addr;
|
||||
|
||||
hints.ai_flags &= ~RAI_PASSIVE;
|
||||
|
||||
rc = rdma_getaddrinfo(dst, NULL, &hints, &dres);
|
||||
if (0 != rc) {
|
||||
rdma_freeaddrinfo(sres);
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
rdma_freeaddrinfo(sres);
|
||||
*rdma_addr = dres;
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Invoked by main thread
|
||||
*/
|
||||
@ -530,11 +639,17 @@ static int rdmacm_client_connect_one(rdmacm_contents_t *contents,
|
||||
modex_message_t *message,
|
||||
int num)
|
||||
{
|
||||
struct sockaddr_in src_in, dest_in;
|
||||
id_context_t *context;
|
||||
int rc;
|
||||
id_context_t *context;
|
||||
#if BTL_OPENIB_RDMACM_IB_ADDR
|
||||
char src_addr[32], dst_addr[32];
|
||||
struct rdma_addrinfo *rdma_addr;
|
||||
#else
|
||||
struct sockaddr_in src_in, dest_in;
|
||||
|
||||
#if OPAL_ENABLE_DEBUG
|
||||
char *a, *b;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* We'll need to access some data in the event handler. We can
|
||||
@ -558,7 +673,7 @@ static int rdmacm_client_connect_one(rdmacm_contents_t *contents,
|
||||
BTL_ERROR(("Failed to create a rdma id with %d", rc));
|
||||
goto out1;
|
||||
}
|
||||
|
||||
#ifndef BTL_OPENIB_RDMACM_IB_ADDR
|
||||
/* Source address (we must specify this to ensure that the traffic
|
||||
goes out on the device+port that we expect it go out). */
|
||||
memset(&src_in, 0, sizeof(src_in));
|
||||
@ -585,7 +700,7 @@ static int rdmacm_client_connect_one(rdmacm_contents_t *contents,
|
||||
free(a);
|
||||
free(b);
|
||||
#endif
|
||||
|
||||
#endif
|
||||
/* This is odd an worth explaining: when we place the context on
|
||||
the ids list, we need to add an extra RETAIN to the context.
|
||||
The reason is because of a race condition. Let's explain
|
||||
@ -623,15 +738,46 @@ static int rdmacm_client_connect_one(rdmacm_contents_t *contents,
|
||||
*/
|
||||
OBJ_RETAIN(context);
|
||||
opal_list_append(&(contents->ids), &(context->super));
|
||||
#if BTL_OPENIB_RDMACM_IB_ADDR
|
||||
if (NULL == inet_ntop(AF_INET6, contents->gid.raw,
|
||||
src_addr, sizeof src_addr)) {
|
||||
BTL_ERROR(("local addr string creating fail"));
|
||||
goto out1;
|
||||
}
|
||||
|
||||
if (NULL == inet_ntop(AF_INET6, message->gid,
|
||||
dst_addr, sizeof dst_addr)) {
|
||||
BTL_ERROR(("remote addr string creating fail"));
|
||||
goto out1;
|
||||
}
|
||||
|
||||
rc = get_rdma_addr(src_addr, dst_addr, &rdma_addr, 0);
|
||||
if (OMPI_SUCCESS != rc) {
|
||||
BTL_ERROR(("server: create rdma addr error"));
|
||||
goto out1;
|
||||
}
|
||||
|
||||
((struct sockaddr_ib *) (rdma_addr->ai_dst_addr))->sib_sid = message->service_id;
|
||||
#endif
|
||||
rc = rdma_resolve_addr(context->id,
|
||||
#if BTL_OPENIB_RDMACM_IB_ADDR
|
||||
rdma_addr->ai_src_addr,
|
||||
rdma_addr->ai_dst_addr,
|
||||
#else
|
||||
(struct sockaddr *) &src_in,
|
||||
(struct sockaddr *) &dest_in,
|
||||
#endif
|
||||
rdmacm_resolve_timeout);
|
||||
if (0 != rc) {
|
||||
BTL_ERROR(("Failed to resolve the remote address with %d", rc));
|
||||
#if BTL_OPENIB_RDMACM_IB_ADDR
|
||||
rdma_freeaddrinfo(rdma_addr);
|
||||
#endif
|
||||
goto out1;
|
||||
}
|
||||
#if BTL_OPENIB_RDMACM_IB_ADDR
|
||||
rdma_freeaddrinfo(rdma_addr);
|
||||
#endif
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
|
||||
@ -655,10 +801,11 @@ static int rdmacm_module_start_connect(ompi_btl_openib_connect_base_module_t *cp
|
||||
modex_message_t *message, *local_message;
|
||||
int rc, qp;
|
||||
opal_list_item_t *item;
|
||||
#ifndef BTL_OPENIB_RDMACM_IB_ADDR
|
||||
#if OPAL_ENABLE_DEBUG
|
||||
char *a, *b;
|
||||
#endif
|
||||
|
||||
#endif
|
||||
/* Don't use the CPC to get the message, because this function is
|
||||
invoked from the event_handler (to intitiate connections in the
|
||||
Right direction), where we don't have the CPC, so it'll be
|
||||
@ -667,7 +814,7 @@ static int rdmacm_module_start_connect(ompi_btl_openib_connect_base_module_t *cp
|
||||
(modex_message_t *) endpoint->endpoint_local_cpc->data.cbm_modex_message;
|
||||
message = (modex_message_t *)
|
||||
endpoint->endpoint_remote_cpc_data->cbm_modex_message;
|
||||
|
||||
#ifndef BTL_OPENIB_RDMACM_IB_ADDR
|
||||
OPAL_OUTPUT((-1, "Connecting from IP %s:%d to remote IP %s:%d ep state = %d",
|
||||
a = stringify(local_message->ipaddr), local_message->tcp_port,
|
||||
b = stringify(message->ipaddr), message->tcp_port, endpoint->endpoint_state));
|
||||
@ -677,7 +824,7 @@ static int rdmacm_module_start_connect(ompi_btl_openib_connect_base_module_t *cp
|
||||
#endif
|
||||
BTL_VERBOSE(("Connecting to remote ip addr = %x, port = %d ep state = %d",
|
||||
message->ipaddr, message->tcp_port, endpoint->endpoint_state));
|
||||
|
||||
#endif
|
||||
if (MCA_BTL_IB_CONNECTED == endpoint->endpoint_state ||
|
||||
MCA_BTL_IB_CONNECTING == endpoint->endpoint_state ||
|
||||
MCA_BTL_IB_CONNECT_ACK == endpoint->endpoint_state) {
|
||||
@ -705,14 +852,25 @@ static int rdmacm_module_start_connect(ompi_btl_openib_connect_base_module_t *cp
|
||||
* is being connected from, in the case where there are multiple
|
||||
* listeners on the local system.
|
||||
*/
|
||||
#if BTL_OPENIB_RDMACM_IB_ADDR
|
||||
memcpy(contents->gid.raw, local_message->gid, sizeof(contents->gid));
|
||||
contents->service_id = local_message->service_id;
|
||||
#else
|
||||
contents->ipaddr = local_message->ipaddr;
|
||||
contents->tcp_port = local_message->tcp_port;
|
||||
#endif
|
||||
|
||||
/* Are we the initiator? Or do we expect this connect request to
|
||||
be rejected? */
|
||||
endpoint->endpoint_initiator =
|
||||
i_initiate(contents->ipaddr, contents->tcp_port,
|
||||
message->ipaddr, message->tcp_port);
|
||||
i_initiate(
|
||||
#if BTL_OPENIB_RDMACM_IB_ADDR
|
||||
contents->service_id, message->service_id,
|
||||
&contents->gid, (union ibv_gid *) message->gid);
|
||||
#else
|
||||
contents->tcp_port, message->tcp_port,
|
||||
contents->ipaddr, message->ipaddr);
|
||||
#endif
|
||||
OPAL_OUTPUT((-1, "MAIN Start connect; ep=%p (%p), I %s the initiator to %s",
|
||||
(void*) endpoint,
|
||||
(void*) endpoint->endpoint_local_cpc,
|
||||
@ -760,6 +918,7 @@ out:
|
||||
return rc;
|
||||
}
|
||||
|
||||
#ifndef BTL_OPENIB_RDMACM_IB_ADDR
|
||||
static void *show_help_cant_find_endpoint(void *context)
|
||||
{
|
||||
char *msg;
|
||||
@ -786,6 +945,7 @@ static void *show_help_cant_find_endpoint(void *context)
|
||||
mca_btl_openib_endpoint_invoke_error(NULL);
|
||||
return NULL;
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Invoked by service thread
|
||||
@ -809,7 +969,11 @@ static int handle_connect_request(struct rdma_cm_event *event)
|
||||
private_data_t msg;
|
||||
int rc = -1, qpnum;
|
||||
uint32_t rem_index;
|
||||
#if BTL_OPENIB_RDMACM_IB_ADDR
|
||||
uint64_t rem_port;
|
||||
#else
|
||||
uint16_t rem_port;
|
||||
#endif
|
||||
|
||||
qpnum = ((private_data_t *)event->param.conn.private_data)->qpnum;
|
||||
rem_port = ((private_data_t *)event->param.conn.private_data)->rem_port;
|
||||
@ -819,6 +983,7 @@ static int handle_connect_request(struct rdma_cm_event *event)
|
||||
to; use the listener's context->contents to figure it out */
|
||||
endpoint = rdmacm_find_endpoint(contents, event->id, rem_port);
|
||||
if (NULL == endpoint) {
|
||||
#ifndef BTL_OPENIB_RDMACM_IB_ADDR
|
||||
struct sockaddr *peeraddr = rdma_get_peer_addr(event->id);
|
||||
cant_find_endpoint_context_t *c = (cant_find_endpoint_context_t *) calloc(1, sizeof(*c));
|
||||
if (NULL != c) {
|
||||
@ -831,18 +996,25 @@ static int handle_connect_request(struct rdma_cm_event *event)
|
||||
c->peer_tcp_port = rdma_get_dst_port(event->id);
|
||||
}
|
||||
ompi_btl_openib_fd_run_in_main(show_help_cant_find_endpoint, c);
|
||||
#else
|
||||
BTL_ERROR(("Cannot find endpoint."));
|
||||
#endif
|
||||
goto out;
|
||||
}
|
||||
|
||||
message = (modex_message_t *) endpoint->endpoint_remote_cpc_data->cbm_modex_message;
|
||||
endpoint->endpoint_initiator =
|
||||
i_initiate(contents->ipaddr, contents->tcp_port,
|
||||
message->ipaddr, rem_port);
|
||||
|
||||
i_initiate(
|
||||
#if BTL_OPENIB_RDMACM_IB_ADDR
|
||||
contents->service_id, rem_port,
|
||||
&contents->gid, (union ibv_gid *) message->gid);
|
||||
#else
|
||||
contents->tcp_port, rem_port,
|
||||
contents->ipaddr, message->ipaddr);
|
||||
BTL_VERBOSE(("ep state = %d, local ipaddr = %x, remote ipaddr = %x, local port = %d, remote port = %d",
|
||||
endpoint->endpoint_state, contents->ipaddr, message->ipaddr,
|
||||
contents->tcp_port, rem_port));
|
||||
|
||||
endpoint->endpoint_state, contents->ipaddr, message->ipaddr,
|
||||
contents->tcp_port, rem_port));
|
||||
#endif
|
||||
OPAL_OUTPUT((-1, "SERVICE in handle_connect_request; ep=%p (%p), I still %s the initiator to %s",
|
||||
(void*) endpoint,
|
||||
(void*) endpoint->endpoint_local_cpc,
|
||||
@ -1161,11 +1333,13 @@ static int rdmacm_connect_endpoint(id_context_t *context,
|
||||
}
|
||||
|
||||
message = (modex_message_t *) endpoint->endpoint_remote_cpc_data->cbm_modex_message;
|
||||
#ifndef BTL_OPENIB_RDMACM_IB_ADDR
|
||||
BTL_VERBOSE(("%s connected!!! local %x remote %x state = %d",
|
||||
contents->server?"server":"client",
|
||||
contents->ipaddr,
|
||||
message->ipaddr,
|
||||
endpoint->endpoint_state));
|
||||
#endif
|
||||
|
||||
/* Ensure that all the writes back to the endpoint and associated
|
||||
data structures have completed */
|
||||
@ -1348,13 +1522,17 @@ static int finish_connect(id_context_t *context)
|
||||
private_data_t msg;
|
||||
int rc;
|
||||
struct sockaddr *peeraddr;
|
||||
#ifndef BTL_OPENIB_RDMACM_IB_ADDR
|
||||
uint32_t remoteipaddr;
|
||||
#endif
|
||||
uint16_t remoteport;
|
||||
modex_message_t *message;
|
||||
|
||||
remoteport = rdma_get_dst_port(context->id);
|
||||
peeraddr = rdma_get_peer_addr(context->id);
|
||||
#ifndef BTL_OPENIB_RDMACM_IB_ADDR
|
||||
remoteipaddr = ((struct sockaddr_in *)peeraddr)->sin_addr.s_addr;
|
||||
#endif
|
||||
|
||||
message = (modex_message_t *)
|
||||
context->endpoint->endpoint_remote_cpc_data->cbm_modex_message;
|
||||
@ -1431,6 +1609,9 @@ static int finish_connect(id_context_t *context)
|
||||
|
||||
msg.qpnum = context->qpnum;
|
||||
msg.rem_index = contents->endpoint->index;
|
||||
#if BTL_OPENIB_RDMACM_IB_ADDR
|
||||
msg.rem_port = contents->service_id;
|
||||
#else
|
||||
msg.rem_port = contents->tcp_port;
|
||||
if (contents->endpoint->endpoint_initiator) {
|
||||
#if OPAL_ENABLE_DEBUG
|
||||
@ -1447,6 +1628,7 @@ static int finish_connect(id_context_t *context)
|
||||
free(a);
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Now all of the local setup has been done. The remote system
|
||||
should now get a RDMA_CM_EVENT_CONNECT_REQUEST event to further
|
||||
@ -1512,8 +1694,10 @@ static int event_handler(struct rdma_cm_event *event)
|
||||
{
|
||||
id_context_t *context = (id_context_t*) event->id->context;
|
||||
rdmacm_contents_t *contents;
|
||||
#ifndef BTL_OPENIB_RDMACM_IB_ADDR
|
||||
struct sockaddr *peeraddr, *localaddr;
|
||||
uint32_t peeripaddr, localipaddr;
|
||||
#endif
|
||||
int rc = -1;
|
||||
ompi_btl_openib_ini_values_t ini;
|
||||
bool found;
|
||||
@ -1523,6 +1707,8 @@ static int event_handler(struct rdma_cm_event *event)
|
||||
}
|
||||
|
||||
contents = context->contents;
|
||||
|
||||
#ifndef BTL_OPENIB_RDMACM_IB_ADDR
|
||||
localaddr = rdma_get_local_addr(event->id);
|
||||
peeraddr = rdma_get_peer_addr(event->id);
|
||||
localipaddr = ((struct sockaddr_in *)localaddr)->sin_addr.s_addr;
|
||||
@ -1533,6 +1719,7 @@ static int event_handler(struct rdma_cm_event *event)
|
||||
rdma_event_str(event->event),
|
||||
event->status,
|
||||
peeripaddr));
|
||||
#endif
|
||||
|
||||
switch (event->event) {
|
||||
case RDMA_CM_EVENT_ADDR_RESOLVED:
|
||||
@ -1542,7 +1729,9 @@ static int event_handler(struct rdma_cm_event *event)
|
||||
|
||||
case RDMA_CM_EVENT_ROUTE_RESOLVED:
|
||||
OPAL_OUTPUT((-1, "SERVICE Got ROUTE_RESOLVED: ID %p", (void*) context->id));
|
||||
#ifndef BTL_OPENIB_RDMACM_IB_ADDR
|
||||
contents->ipaddr = localipaddr;
|
||||
#endif
|
||||
rc = finish_connect(context);
|
||||
break;
|
||||
|
||||
@ -1717,6 +1906,7 @@ static int rdmacm_init(mca_btl_openib_endpoint_t *endpoint)
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
#ifndef BTL_OPENIB_RDMACM_IB_ADDR
|
||||
static int ipaddrcheck(id_context_t *context,
|
||||
mca_btl_openib_module_t *openib_btl)
|
||||
{
|
||||
@ -1781,14 +1971,17 @@ static int ipaddrcheck(id_context_t *context,
|
||||
|
||||
return already_exists ? OMPI_ERROR : OMPI_SUCCESS;
|
||||
}
|
||||
#endif
|
||||
|
||||
static int create_message(rdmacm_contents_t *server,
|
||||
mca_btl_openib_module_t *openib_btl,
|
||||
ompi_btl_openib_connect_base_module_data_t *data)
|
||||
{
|
||||
modex_message_t *message;
|
||||
#ifndef BTL_OPENIB_RDMACM_IB_ADDR
|
||||
#if OPAL_ENABLE_DEBUG
|
||||
char *a;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
message = (modex_message_t *) malloc(sizeof(modex_message_t));
|
||||
@ -1801,6 +1994,11 @@ static int create_message(rdmacm_contents_t *server,
|
||||
openib_btl->device->ib_dev_attr.max_qp_rd_atom;
|
||||
message->device_max_qp_init_rd_atom =
|
||||
openib_btl->device->ib_dev_attr.max_qp_init_rd_atom;
|
||||
|
||||
#if BTL_OPENIB_RDMACM_IB_ADDR
|
||||
memcpy(message->gid, server->gid.raw, sizeof(server->gid));
|
||||
message->service_id = server->service_id;
|
||||
#else
|
||||
message->ipaddr = server->ipaddr;
|
||||
message->tcp_port = server->tcp_port;
|
||||
|
||||
@ -1808,6 +2006,7 @@ static int create_message(rdmacm_contents_t *server,
|
||||
a = stringify(message->ipaddr), message->tcp_port));
|
||||
#if OPAL_ENABLE_DEBUG
|
||||
free(a);
|
||||
#endif
|
||||
#endif
|
||||
data->cbm_modex_message = message;
|
||||
data->cbm_modex_message_len = message_len;
|
||||
@ -1823,11 +2022,18 @@ static int create_message(rdmacm_contents_t *server,
|
||||
*/
|
||||
static int rdmacm_component_query(mca_btl_openib_module_t *openib_btl, ompi_btl_openib_connect_base_module_t **cpc)
|
||||
{
|
||||
rdmacm_contents_t *server = NULL;
|
||||
struct sockaddr_in sin;
|
||||
id_context_t *context;
|
||||
int rc;
|
||||
|
||||
id_context_t *context;
|
||||
rdmacm_contents_t *server = NULL;
|
||||
|
||||
#if BTL_OPENIB_RDMACM_IB_ADDR
|
||||
char rdmacm_addr_str[32];
|
||||
struct rdma_addrinfo *rdma_addr;
|
||||
#else
|
||||
struct sockaddr_in sin;
|
||||
#endif
|
||||
|
||||
/* RDMACM is not supported if we have any XRC QPs */
|
||||
if (mca_btl_openib_component.num_xrc_qps > 0) {
|
||||
BTL_VERBOSE(("rdmacm CPC not supported with XRC receive queues, please try xoob CPC; skipped on %s:%d",
|
||||
@ -1883,23 +2089,49 @@ static int rdmacm_component_query(mca_btl_openib_module_t *openib_btl, ompi_btl_
|
||||
rc = OMPI_ERR_OUT_OF_RESOURCE;
|
||||
goto out4;
|
||||
}
|
||||
|
||||
#ifndef BTL_OPENIB_RDMACM_IB_ADDR
|
||||
memset(&sin, 0, sizeof(sin));
|
||||
sin.sin_family = AF_INET;
|
||||
sin.sin_addr.s_addr = rdmacm_addr;
|
||||
sin.sin_port = (uint16_t) rdmacm_port;
|
||||
#else
|
||||
rc = ibv_query_gid(openib_btl->device->ib_pd->context, openib_btl->port_num, 0, &server->gid);
|
||||
if (0 != rc) {
|
||||
BTL_ERROR(("local gid query failed"));
|
||||
goto out4;
|
||||
}
|
||||
|
||||
if (NULL == inet_ntop(AF_INET6, server->gid.raw,
|
||||
rdmacm_addr_str, sizeof rdmacm_addr_str)) {
|
||||
BTL_ERROR(("local gaddr string creating fail"));
|
||||
goto out4;
|
||||
}
|
||||
|
||||
rc = get_rdma_addr(rdmacm_addr_str, NULL, &rdma_addr, 1);
|
||||
if (OMPI_SUCCESS != rc) {
|
||||
BTL_ERROR(("server: create rdma addr error"));
|
||||
goto out4;
|
||||
}
|
||||
#endif
|
||||
/* Bind the rdmacm server to the local IP address and an ephemerial
|
||||
* port or one specified by a comand arg.
|
||||
*/
|
||||
rc = rdma_bind_addr(context->id, (struct sockaddr *)&sin);
|
||||
rc = rdma_bind_addr(context->id,
|
||||
#if BTL_OPENIB_RDMACM_IB_ADDR
|
||||
rdma_addr->ai_src_addr);
|
||||
#else
|
||||
(struct sockaddr *)&sin);
|
||||
#endif
|
||||
if (0 != rc) {
|
||||
opal_output_verbose(5, ompi_btl_base_framework.framework_output,
|
||||
"openib BTL: rdmacm CPC unable to bind to address");
|
||||
rc = OMPI_ERR_UNREACH;
|
||||
goto out5;
|
||||
}
|
||||
|
||||
#if BTL_OPENIB_RDMACM_IB_ADDR
|
||||
server->service_id = ((struct sockaddr_ib *) (&context->id->route.addr.src_addr))->sib_sid;
|
||||
rdma_freeaddrinfo(rdma_addr);
|
||||
#else
|
||||
/* Verify that the device has a valid IP address on it, or we
|
||||
cannot use the cpc */
|
||||
rc = ipaddrcheck(context, openib_btl);
|
||||
@ -1909,7 +2141,7 @@ static int rdmacm_component_query(mca_btl_openib_module_t *openib_btl, ompi_btl_
|
||||
rc = OMPI_ERR_NOT_SUPPORTED;
|
||||
goto out5;
|
||||
}
|
||||
|
||||
#endif
|
||||
/* Listen on the specified address/port with the rdmacm, limit the
|
||||
amount of incoming connections to 1024 */
|
||||
/* FIXME - 1024 should be (num of connectors *
|
||||
@ -1944,6 +2176,9 @@ out5:
|
||||
* But don't do it here since it's part of out4:OBJ_RELEASE(context),
|
||||
* and we don't want to do it twice.
|
||||
*/
|
||||
#if BTL_OPENIB_RDMACM_IB_ADDR
|
||||
rdma_freeaddrinfo(rdma_addr);
|
||||
#endif
|
||||
out4:
|
||||
opal_list_remove_first(&(server->ids));
|
||||
OBJ_RELEASE(context);
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user