Revert r18409; that commit broke the build because it forgot to add
the btl_openib_iwarp.c and btl_openib_iwarp.h files. This commit was SVN r18410. The following SVN revision numbers were found above: r18409 --> open-mpi/ompi@056bbb68c8
Этот коммит содержится в:
родитель
056bbb68c8
Коммит
60f39a30f6
@ -52,8 +52,6 @@ sources = \
|
||||
btl_openib_xrc.h \
|
||||
btl_openib_fd.h \
|
||||
btl_openib_fd.c \
|
||||
btl_openib_iwarp.h \
|
||||
btl_openib_iwarp.c \
|
||||
connect/base.h \
|
||||
connect/btl_openib_connect_base.c \
|
||||
connect/btl_openib_connect_oob.c \
|
||||
|
@ -71,7 +71,6 @@
|
||||
#include "btl_openib_async.h"
|
||||
#endif
|
||||
#include "connect/base.h"
|
||||
#include "btl_openib_iwarp.h"
|
||||
|
||||
/*
|
||||
* Local functions
|
||||
@ -450,6 +449,12 @@ static int start_async_event_thread(void)
|
||||
}
|
||||
#endif
|
||||
|
||||
#if OMPI_HAVE_RDMACM
|
||||
extern uint64_t get_iwarp_subnet_id(struct ibv_device *ib_dev);
|
||||
#else
|
||||
static inline uint64_t get_iwarp_subnet_id(struct ibv_device *ib_dev) {return 0;}
|
||||
#endif
|
||||
|
||||
static int init_one_port(opal_list_t *btl_list, mca_btl_openib_hca_t *hca,
|
||||
uint8_t port_num, uint16_t pkey_index,
|
||||
struct ibv_port_attr *ib_port_attr)
|
||||
|
@ -25,6 +25,8 @@
|
||||
#include <string.h>
|
||||
#include <ctype.h>
|
||||
#include <dirent.h>
|
||||
#include <ifaddrs.h>
|
||||
#include <stdio.h>
|
||||
#include <malloc.h>
|
||||
|
||||
#include "opal/util/argv.h"
|
||||
@ -36,7 +38,6 @@
|
||||
#include "btl_openib_proc.h"
|
||||
#include "btl_openib_endpoint.h"
|
||||
#include "connect/connect.h"
|
||||
#include "btl_openib_iwarp.h"
|
||||
|
||||
/* JMS to be removed: see #1264 */
|
||||
#undef event
|
||||
@ -47,9 +48,26 @@ static int rdmacm_component_query(mca_btl_openib_module_t *openib_btl,
|
||||
|
||||
static int rdmacm_module_start_connect(ompi_btl_openib_connect_base_module_t *cpc,
|
||||
mca_btl_base_endpoint_t *endpoint);
|
||||
static uint32_t rdma_get_ipv4addr(struct ibv_context *verbs, uint8_t port);
|
||||
static int rdmacm_component_destroy(void);
|
||||
static int rdmacm_component_init(void);
|
||||
|
||||
/*
|
||||
* The cruft below maintains the linked list of rdma ipv4 addresses and their
|
||||
* associated rdma device names and device port numbers.
|
||||
*/
|
||||
struct rdma_addr_list {
|
||||
uint32_t addr;
|
||||
uint32_t subnet;
|
||||
char addr_str[16];
|
||||
char dev_name[IBV_SYSFS_NAME_MAX];
|
||||
uint8_t dev_port;
|
||||
struct rdma_addr_list *next;
|
||||
};
|
||||
static struct rdma_addr_list *myaddrs;
|
||||
static int build_rdma_addr_list(void);
|
||||
static void free_rdma_addr_list(void);
|
||||
|
||||
ompi_btl_openib_connect_base_component_t ompi_btl_openib_connect_rdmacm = {
|
||||
"rdmacm",
|
||||
rdmacm_component_register,
|
||||
@ -84,23 +102,21 @@ struct id_contexts {
|
||||
uint8_t qpnum;
|
||||
};
|
||||
|
||||
struct list_item {
|
||||
struct list_item *next;
|
||||
struct rdmacm_contents *item;
|
||||
};
|
||||
|
||||
struct conn_message {
|
||||
uint32_t rem_index;
|
||||
uint16_t rem_port;
|
||||
uint8_t qpnum;
|
||||
};
|
||||
|
||||
struct list_item {
|
||||
opal_list_item_t super;
|
||||
struct rdmacm_contents *item;
|
||||
};
|
||||
typedef struct list_item list_item_t;
|
||||
|
||||
static OBJ_CLASS_INSTANCE(list_item_t, opal_list_item_t,
|
||||
NULL, NULL);
|
||||
|
||||
static opal_list_t server_list;
|
||||
static opal_list_t client_list;
|
||||
static struct list_item *server_list_head = NULL;
|
||||
static struct list_item *server_list_tail = NULL;
|
||||
static struct list_item *client_list_head = NULL;
|
||||
static struct list_item *client_list_tail = NULL;
|
||||
static struct rdma_event_channel *event_channel = NULL;
|
||||
static int rdmacm_priority = 30;
|
||||
static uint16_t rdmacm_port = 0;
|
||||
@ -108,6 +124,45 @@ static uint32_t rdmacm_addr = 0;
|
||||
|
||||
#define RDMA_RESOLVE_ADDR_TIMEOUT 2000
|
||||
|
||||
static int list_add(struct rdmacm_contents *client, struct list_item **head, struct list_item **tail)
|
||||
{
|
||||
struct list_item *temp;
|
||||
|
||||
temp = malloc(sizeof(struct list_item));
|
||||
if (NULL == temp) {
|
||||
BTL_ERROR(("malloc error"));
|
||||
return 1;
|
||||
}
|
||||
|
||||
temp->item = client;
|
||||
temp->next = NULL;
|
||||
if (NULL != *tail)
|
||||
(*tail)->next = temp;
|
||||
*tail = temp;
|
||||
|
||||
if (NULL == *head) {
|
||||
*head = temp;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct rdmacm_contents *list_del(struct list_item **head)
|
||||
{
|
||||
struct rdmacm_contents *temp;
|
||||
struct list_item *temp_item;
|
||||
|
||||
if (NULL == *head)
|
||||
return NULL;
|
||||
|
||||
temp_item = *head;
|
||||
temp = (*head)->item;
|
||||
*head = (*head)->next;
|
||||
free(temp_item);
|
||||
|
||||
return temp;
|
||||
}
|
||||
|
||||
/* Open - this functions sets up any rdma_cm specific commandline params */
|
||||
static void rdmacm_component_register(void)
|
||||
{
|
||||
@ -517,7 +572,7 @@ static void rdmacm_server_cleanup(struct rdmacm_contents *local)
|
||||
|
||||
static int rdmacm_connection_shutdown(struct mca_btl_base_endpoint_t *endpoint)
|
||||
{
|
||||
opal_list_item_t *item;
|
||||
struct list_item *temp;
|
||||
|
||||
BTL_VERBOSE(("Start disconnecting..."));
|
||||
|
||||
@ -526,9 +581,7 @@ static int rdmacm_connection_shutdown(struct mca_btl_base_endpoint_t *endpoint)
|
||||
return 0;
|
||||
}
|
||||
|
||||
for (item = opal_list_get_first(&client_list); item != opal_list_get_end(&client_list); item = opal_list_get_next(item)) {
|
||||
struct list_item *temp = (struct list_item *)item;
|
||||
|
||||
for (temp = client_list_head; NULL != temp; temp = temp->next) {
|
||||
if (endpoint == temp->item->endpoint) {
|
||||
int i;
|
||||
for (i = 0; i < mca_btl_openib_component.num_qps; i++)
|
||||
@ -565,18 +618,16 @@ static int rdmacm_connect_endpoint(struct rdmacm_contents *local, struct rdma_cm
|
||||
if (local->server)
|
||||
endpoint = ((struct id_contexts *)event->id->context)->endpoint;
|
||||
else {
|
||||
struct list_item *temp;
|
||||
int rc;
|
||||
|
||||
endpoint = local->endpoint;
|
||||
local->endpoint->rem_info.rem_index = ((struct conn_message *)event->param.conn.private_data)->rem_index;
|
||||
|
||||
temp = OBJ_NEW(list_item_t);
|
||||
if (NULL == temp) {
|
||||
BTL_ERROR(("malloc error"));
|
||||
rc = list_add(local, &client_list_head, &client_list_tail);
|
||||
if (0 != rc) {
|
||||
BTL_ERROR(("list_add error"));
|
||||
return -1;
|
||||
}
|
||||
temp->item = local;
|
||||
opal_list_append(&client_list, &(temp->super));
|
||||
}
|
||||
if (NULL == endpoint) {
|
||||
BTL_ERROR(("Can't find endpoint"));
|
||||
@ -929,12 +980,11 @@ static int ipaddrcheck(struct rdmacm_contents *server, mca_btl_openib_module_t *
|
||||
}
|
||||
|
||||
for (i = 0; i < attr.phys_port_cnt; i++) {
|
||||
struct list_item *pitem;
|
||||
bool found = false;
|
||||
uint32_t temp = rdma_get_ipv4addr(openib_btl->hca->ib_dev_context, i+1);
|
||||
opal_list_item_t *item;
|
||||
|
||||
for (item = opal_list_get_first(&server_list); item != opal_list_get_end(&server_list); item = opal_list_get_next(item)) {
|
||||
struct list_item *pitem = (struct list_item *)item;
|
||||
for (pitem = server_list_head; NULL != pitem; pitem = pitem->next) {
|
||||
BTL_VERBOSE(("paddr = %x, temp addr = %x", pitem->item->ipaddr, temp));
|
||||
if (pitem->item->ipaddr == temp || 0 == temp) {
|
||||
BTL_VERBOSE(("addr %x already exists", temp));
|
||||
@ -988,7 +1038,6 @@ static int rdmacm_component_query(mca_btl_openib_module_t *openib_btl,
|
||||
{
|
||||
struct rdmacm_contents *server = NULL;
|
||||
struct sockaddr_in sin;
|
||||
struct list_item *temp;
|
||||
int rc;
|
||||
|
||||
/* RDMACM is not supported if we have any XRC QPs */
|
||||
@ -1094,14 +1143,12 @@ static int rdmacm_component_query(mca_btl_openib_module_t *openib_btl,
|
||||
goto out;
|
||||
}
|
||||
|
||||
temp = OBJ_NEW(list_item_t);
|
||||
if (NULL== temp) {
|
||||
rc = list_add(server, &server_list_head, &server_list_tail);
|
||||
if (0 != rc) {
|
||||
opal_output_verbose(5, mca_btl_base_output,
|
||||
"openib BTL: rdmacm CPC unable to add to list");
|
||||
goto out;
|
||||
}
|
||||
temp->item = server;
|
||||
opal_list_append(&server_list, &(temp->super));
|
||||
|
||||
opal_output_verbose(5, mca_btl_base_output,
|
||||
"openib BTL: rdmacm CPC available for use on %s",
|
||||
@ -1127,29 +1174,15 @@ out:
|
||||
|
||||
static int rdmacm_component_destroy(void)
|
||||
{
|
||||
opal_list_item_t *item;
|
||||
struct rdmacm_contents *local;
|
||||
int rc;
|
||||
|
||||
if (0 != opal_list_get_size(&client_list)) {
|
||||
for (item = opal_list_get_first(&client_list);
|
||||
item != opal_list_get_end(&client_list);
|
||||
item = opal_list_get_next(item)) {
|
||||
struct rdmacm_contents *temp = ((struct list_item *)item)->item;
|
||||
|
||||
rdmacm_destroy(temp);
|
||||
opal_list_remove_item(&client_list, item);
|
||||
}
|
||||
while (NULL != (local = list_del(&client_list_head))) {
|
||||
rdmacm_destroy(local);
|
||||
}
|
||||
|
||||
if (0 != opal_list_get_size(&server_list)) {
|
||||
for (item = opal_list_get_first(&server_list);
|
||||
item != opal_list_get_end(&server_list);
|
||||
item = opal_list_get_next(item)) {
|
||||
struct rdmacm_contents *temp = ((struct list_item *)item)->item;
|
||||
|
||||
rdmacm_destroy(temp);
|
||||
opal_list_remove_item(&server_list, item);
|
||||
}
|
||||
while (NULL != (local = list_del(&server_list_head))) {
|
||||
rdmacm_server_cleanup(local);
|
||||
}
|
||||
|
||||
if (NULL != event_channel) {
|
||||
@ -1168,9 +1201,6 @@ static int rdmacm_component_init(void)
|
||||
{
|
||||
int rc;
|
||||
|
||||
OBJ_CONSTRUCT(&server_list, opal_list_t);
|
||||
OBJ_CONSTRUCT(&client_list, opal_list_t);
|
||||
|
||||
rc = build_rdma_addr_list();
|
||||
if (-1 == rc) {
|
||||
opal_output_verbose(5, mca_btl_base_output,
|
||||
@ -1191,3 +1221,162 @@ static int rdmacm_component_init(void)
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
uint64_t get_iwarp_subnet_id(struct ibv_device *ib_dev)
|
||||
{
|
||||
struct rdma_addr_list *addr;
|
||||
|
||||
for (addr = myaddrs; addr; addr = addr->next) {
|
||||
if (!strcmp(addr->dev_name, ib_dev->name)) {
|
||||
return addr->subnet;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static uint32_t rdma_get_ipv4addr(struct ibv_context *verbs, uint8_t port)
|
||||
{
|
||||
struct rdma_addr_list *addr;
|
||||
|
||||
for (addr = myaddrs; addr; addr = addr->next) {
|
||||
if (!strcmp(addr->dev_name, verbs->device->name) &&
|
||||
port == addr->dev_port) {
|
||||
return addr->addr;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int dev_specified(char *name, uint32_t ipaddr, int port)
|
||||
{
|
||||
char **list;
|
||||
|
||||
if (NULL != mca_btl_openib_component.if_include) {
|
||||
int i;
|
||||
|
||||
list = opal_argv_split(mca_btl_openib_component.if_include, ',');
|
||||
for (i = 0; NULL != list[i]; i++) {
|
||||
char **temp = opal_argv_split(list[i], ':');
|
||||
if (0 == strcmp(name, temp[0]) &&
|
||||
(NULL == temp[1] || port == atoi(temp[1]))) {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (NULL != mca_btl_openib_component.if_exclude) {
|
||||
int i;
|
||||
|
||||
list = opal_argv_split(mca_btl_openib_component.if_exclude, ',');
|
||||
for (i = 0; NULL != list[i]; i++) {
|
||||
char **temp = opal_argv_split(list[i], ':');
|
||||
if (0 == strcmp(name, temp[0]) &&
|
||||
(NULL == temp[1] || port == atoi(temp[1]))) {
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int add_rdma_addr(struct ifaddrs *ifa)
|
||||
{
|
||||
struct sockaddr_in *sinp;
|
||||
struct rdma_cm_id *cm_id;
|
||||
struct rdma_event_channel *ch;
|
||||
int rc = OMPI_SUCCESS;
|
||||
struct rdma_addr_list *myaddr;
|
||||
|
||||
ch = rdma_create_event_channel();
|
||||
if (NULL == ch) {
|
||||
BTL_ERROR(("failed creating event channel"));
|
||||
rc = OMPI_ERROR;
|
||||
goto out1;
|
||||
}
|
||||
|
||||
rc = rdma_create_id(ch, &cm_id, NULL, RDMA_PS_TCP);
|
||||
if (rc) {
|
||||
BTL_ERROR(("rdma_create_id returned %d", rc));
|
||||
rc = OMPI_ERROR;
|
||||
goto out2;
|
||||
}
|
||||
|
||||
rc = rdma_bind_addr(cm_id, ifa->ifa_addr);
|
||||
if (rc) {
|
||||
rc = OMPI_SUCCESS;
|
||||
goto out3;
|
||||
}
|
||||
|
||||
if (!cm_id->verbs ||
|
||||
0 == ((struct sockaddr_in *)ifa->ifa_addr)->sin_addr.s_addr ||
|
||||
dev_specified(cm_id->verbs->device->name, ((struct sockaddr_in *)ifa->ifa_addr)->sin_addr.s_addr, cm_id->port_num)) {
|
||||
goto out3;
|
||||
}
|
||||
|
||||
myaddr = malloc(sizeof *myaddr);
|
||||
if (NULL == myaddr) {
|
||||
BTL_ERROR(("malloc failed!"));
|
||||
rc = OMPI_ERROR;
|
||||
goto out3;
|
||||
}
|
||||
|
||||
sinp = (struct sockaddr_in *)ifa->ifa_addr;
|
||||
myaddr->addr = sinp->sin_addr.s_addr;
|
||||
myaddr->subnet = myaddr->addr & ((struct sockaddr_in *)ifa->ifa_netmask)->sin_addr.s_addr;
|
||||
inet_ntop(sinp->sin_family, &sinp->sin_addr,
|
||||
myaddr->addr_str, sizeof myaddr->addr_str);
|
||||
memcpy(myaddr->dev_name, cm_id->verbs->device->name, IBV_SYSFS_NAME_MAX);
|
||||
myaddr->dev_port = cm_id->port_num;
|
||||
BTL_VERBOSE(("adding addr %s dev %s port %d to rdma_addr_list",
|
||||
myaddr->addr_str, myaddr->dev_name, myaddr->dev_port));
|
||||
|
||||
myaddr->next = myaddrs;
|
||||
myaddrs = myaddr;
|
||||
|
||||
out3:
|
||||
rdma_destroy_id(cm_id);
|
||||
out2:
|
||||
rdma_destroy_event_channel(ch);
|
||||
out1:
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int build_rdma_addr_list(void)
|
||||
{
|
||||
int rc;
|
||||
struct ifaddrs *ifa_list, *ifa;
|
||||
|
||||
rc = getifaddrs(&ifa_list);
|
||||
if (-1 == rc) {
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
ifa = ifa_list;
|
||||
while (ifa) {
|
||||
if (ifa->ifa_addr->sa_family == AF_INET) {
|
||||
rc = add_rdma_addr(ifa);
|
||||
if (OMPI_SUCCESS != rc) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
ifa = ifa->ifa_next;
|
||||
}
|
||||
freeifaddrs(ifa_list);
|
||||
return rc;
|
||||
}
|
||||
|
||||
static void free_rdma_addr_list(void)
|
||||
{
|
||||
struct rdma_addr_list *addr, *tmp;
|
||||
|
||||
addr = myaddrs;
|
||||
while (addr) {
|
||||
tmp = addr->next;
|
||||
free(addr);
|
||||
addr = tmp;
|
||||
}
|
||||
}
|
||||
|
Загрузка…
Ссылка в новой задаче
Block a user