usnic: enable UDP support
This commit decouples OMPI deployment from the version(s) of the lower layers of the stack by probing for UDP support. Verbs applications assume a 40-byte header (there is no current mechanism for querying payload offset). So to support a 42-byte UDP header without causing existing applications like ibv_ud_pingpong or older versions of OMPI to crash, we must inform libusnic_verbs that we are aware of the nonstandard payload offset. We do this by overriding the `transport_type` field of the device to be 42 before calling `ibv_open_device`. If the library resets it to something else, then we know the lower layers are UDP capable. Otherwise we use the older custom-L2 format. This necessitated some minor ugliness in common_verbs, but it's as tidy as Jeff and I know how to make it right now. This commit only adds support for UDP headers and connectivity over the same L2 network, it does not touch routing or interface pairing. Reviewed-by: Jeff Squyres <jsquyres@cisco.com> cmr=v1.7.5:ticket=trac:4253 This commit was SVN r30838. The following Trac tickets were found above: Ticket 4253 --> https://svn.open-mpi.org/trac/ompi/ticket/4253
Этот коммит содержится в:
родитель
e10ad5763f
Коммит
4875f48eaa
@ -10,7 +10,7 @@
|
|||||||
# University of Stuttgart. All rights reserved.
|
# University of Stuttgart. All rights reserved.
|
||||||
# Copyright (c) 2004-2005 The Regents of the University of California.
|
# Copyright (c) 2004-2005 The Regents of the University of California.
|
||||||
# All rights reserved.
|
# All rights reserved.
|
||||||
# Copyright (c) 2006-2012 Cisco Systems, Inc. All rights reserved.
|
# Copyright (c) 2006-2014 Cisco Systems, Inc. All rights reserved.
|
||||||
# Copyright (c) 2006-2011 Los Alamos National Security, LLC. All rights
|
# Copyright (c) 2006-2011 Los Alamos National Security, LLC. All rights
|
||||||
# reserved.
|
# reserved.
|
||||||
# Copyright (c) 2006-2009 Mellanox Technologies. All rights reserved.
|
# Copyright (c) 2006-2009 Mellanox Technologies. All rights reserved.
|
||||||
@ -152,7 +152,7 @@ AC_DEFUN([OMPI_CHECK_OPENFABRICS],[
|
|||||||
|
|
||||||
# If we have the openib stuff available, find out what we've got
|
# If we have the openib stuff available, find out what we've got
|
||||||
AS_IF([test "$ompi_check_openib_happy" = "yes"],
|
AS_IF([test "$ompi_check_openib_happy" = "yes"],
|
||||||
[AC_CHECK_DECLS([IBV_EVENT_CLIENT_REREGISTER, IBV_ACCESS_SO], [], [],
|
[AC_CHECK_DECLS([IBV_EVENT_CLIENT_REREGISTER, IBV_ACCESS_SO, IBV_TRANSPORT_USNIC, IBV_TRANSPORT_USNIC_UDP, IBV_NODE_USNIC], [], [],
|
||||||
[#include <infiniband/verbs.h>])
|
[#include <infiniband/verbs.h>])
|
||||||
AC_CHECK_FUNCS([ibv_get_device_list ibv_resize_cq])
|
AC_CHECK_FUNCS([ibv_get_device_list ibv_resize_cq])
|
||||||
|
|
||||||
|
@ -145,6 +145,9 @@ typedef struct ompi_btl_usnic_component_t {
|
|||||||
/** convertor packing threshold */
|
/** convertor packing threshold */
|
||||||
int pack_lazy_threshold;
|
int pack_lazy_threshold;
|
||||||
|
|
||||||
|
/** does the stack below us speak UDP or custom-L2? */
|
||||||
|
bool use_udp;
|
||||||
|
|
||||||
/* vvvvvvvvvv non-fastpath fields go below vvvvvvvvvv */
|
/* vvvvvvvvvv non-fastpath fields go below vvvvvvvvvv */
|
||||||
|
|
||||||
/** list of usnic proc structures */
|
/** list of usnic proc structures */
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2013 Cisco Systems, Inc. All rights reserved.
|
* Copyright (c) 2013-2014 Cisco Systems, Inc. All rights reserved.
|
||||||
* $COPYRIGHT$
|
* $COPYRIGHT$
|
||||||
*
|
*
|
||||||
* Additional copyrights may follow
|
* Additional copyrights may follow
|
||||||
|
@ -98,7 +98,6 @@ static mca_btl_base_module_t **
|
|||||||
usnic_component_init(int* num_btl_modules, bool want_progress_threads,
|
usnic_component_init(int* num_btl_modules, bool want_progress_threads,
|
||||||
bool want_mpi_threads);
|
bool want_mpi_threads);
|
||||||
static int usnic_component_progress(void);
|
static int usnic_component_progress(void);
|
||||||
static bool port_is_usnic(ompi_common_verbs_port_item_t *port);
|
|
||||||
static int init_module_from_port(ompi_btl_usnic_module_t *module,
|
static int init_module_from_port(ompi_btl_usnic_module_t *module,
|
||||||
ompi_common_verbs_port_item_t *port);
|
ompi_common_verbs_port_item_t *port);
|
||||||
|
|
||||||
@ -526,11 +525,34 @@ static mca_btl_base_module_t** usnic_component_init(int* num_btl_modules,
|
|||||||
* filtering below, so don't let the verbs code see our
|
* filtering below, so don't let the verbs code see our
|
||||||
* if_include/if_exclude strings */
|
* if_include/if_exclude strings */
|
||||||
port_list = ompi_common_verbs_find_ports(NULL, NULL,
|
port_list = ompi_common_verbs_find_ports(NULL, NULL,
|
||||||
OMPI_COMMON_VERBS_FLAGS_UD,
|
OMPI_COMMON_VERBS_FLAGS_TRANSPORT_USNIC_UDP,
|
||||||
USNIC_OUT);
|
USNIC_OUT);
|
||||||
if (NULL == port_list || 0 == opal_list_get_size(port_list)) {
|
if (NULL == port_list) {
|
||||||
mca_btl_base_error_no_nics("usNIC", "device");
|
OMPI_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
||||||
goto free_include_list;
|
goto free_include_list;
|
||||||
|
} else if (opal_list_get_size(port_list) > 0) {
|
||||||
|
mca_btl_usnic_component.use_udp = true;
|
||||||
|
opal_output_verbose(5, USNIC_OUT, "btl:usnic: using UDP transport");
|
||||||
|
} else {
|
||||||
|
OBJ_RELEASE(port_list);
|
||||||
|
|
||||||
|
/* If we got no USNIC_UDP transport devices, try again with
|
||||||
|
USNIC */
|
||||||
|
port_list = ompi_common_verbs_find_ports(NULL, NULL,
|
||||||
|
OMPI_COMMON_VERBS_FLAGS_TRANSPORT_USNIC,
|
||||||
|
USNIC_OUT);
|
||||||
|
|
||||||
|
if (NULL == port_list) {
|
||||||
|
OMPI_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
|
||||||
|
goto free_include_list;
|
||||||
|
} else if (opal_list_get_size(port_list) > 0) {
|
||||||
|
mca_btl_usnic_component.use_udp = false;
|
||||||
|
opal_output_verbose(5, USNIC_OUT,
|
||||||
|
"btl:usnic: using L2-only transport");
|
||||||
|
} else {
|
||||||
|
mca_btl_base_error_no_nics("usNIC", "device");
|
||||||
|
goto free_include_list;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Setup an array of pointers to point to each module (which we'll
|
/* Setup an array of pointers to point to each module (which we'll
|
||||||
@ -597,16 +619,6 @@ static mca_btl_base_module_t** usnic_component_init(int* num_btl_modules,
|
|||||||
"btl:usnic: found: device %s, port %d",
|
"btl:usnic: found: device %s, port %d",
|
||||||
port->device->device_name, port->port_num);
|
port->device->device_name, port->port_num);
|
||||||
|
|
||||||
/* This component only works with Cisco VIC/usNIC devices; it
|
|
||||||
is not a general verbs UD component. Reject any ports
|
|
||||||
found on devices that are not Cisco VICs. */
|
|
||||||
if (!port_is_usnic(port)) {
|
|
||||||
opal_output_verbose(5, USNIC_OUT,
|
|
||||||
"btl:usnic: this is not a usnic-capable device");
|
|
||||||
--mca_btl_usnic_component.num_modules;
|
|
||||||
continue; /* next port */
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Fill in a bunch of the module struct */
|
/* Fill in a bunch of the module struct */
|
||||||
module = &(mca_btl_usnic_component.usnic_all_modules[i]);
|
module = &(mca_btl_usnic_component.usnic_all_modules[i]);
|
||||||
if (OMPI_SUCCESS != init_module_from_port(module, port)) {
|
if (OMPI_SUCCESS != init_module_from_port(module, port)) {
|
||||||
@ -695,24 +707,24 @@ static mca_btl_base_module_t** usnic_component_init(int* num_btl_modules,
|
|||||||
/* Find the max payload this port can handle */
|
/* Find the max payload this port can handle */
|
||||||
module->max_frag_payload =
|
module->max_frag_payload =
|
||||||
module->if_mtu - /* start with the MTU */
|
module->if_mtu - /* start with the MTU */
|
||||||
sizeof(ompi_btl_usnic_protocol_header_t) -
|
OMPI_BTL_USNIC_PROTO_HDR_SZ -
|
||||||
sizeof(ompi_btl_usnic_btl_header_t); /* subtract size of
|
sizeof(ompi_btl_usnic_btl_header_t); /* subtract size of
|
||||||
the BTL header */
|
the BTL header */
|
||||||
/* same, but use chunk header */
|
/* same, but use chunk header */
|
||||||
module->max_chunk_payload =
|
module->max_chunk_payload =
|
||||||
module->if_mtu -
|
module->if_mtu -
|
||||||
sizeof(ompi_btl_usnic_protocol_header_t) -
|
OMPI_BTL_USNIC_PROTO_HDR_SZ -
|
||||||
sizeof(ompi_btl_usnic_btl_chunk_header_t);
|
sizeof(ompi_btl_usnic_btl_chunk_header_t);
|
||||||
|
|
||||||
/* Priorirty queue MTU and max size */
|
/* Priorirty queue MTU and max size */
|
||||||
if (0 == module->tiny_mtu) {
|
if (0 == module->tiny_mtu) {
|
||||||
module->tiny_mtu = 768;
|
module->tiny_mtu = 768;
|
||||||
module->max_tiny_payload = module->tiny_mtu -
|
module->max_tiny_payload = module->tiny_mtu -
|
||||||
sizeof(ompi_btl_usnic_protocol_header_t) -
|
OMPI_BTL_USNIC_PROTO_HDR_SZ -
|
||||||
sizeof(ompi_btl_usnic_btl_header_t);
|
sizeof(ompi_btl_usnic_btl_header_t);
|
||||||
} else {
|
} else {
|
||||||
module->tiny_mtu = module->max_tiny_payload +
|
module->tiny_mtu = module->max_tiny_payload +
|
||||||
sizeof(ompi_btl_usnic_protocol_header_t) +
|
OMPI_BTL_USNIC_PROTO_HDR_SZ +
|
||||||
sizeof(ompi_btl_usnic_btl_header_t);
|
sizeof(ompi_btl_usnic_btl_header_t);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -982,7 +994,7 @@ static int usnic_handle_completion(
|
|||||||
going. The sender will eventually re-send it. */
|
going. The sender will eventually re-send it. */
|
||||||
if (IBV_WC_RECV == cwc->opcode) {
|
if (IBV_WC_RECV == cwc->opcode) {
|
||||||
if (cwc->byte_len <
|
if (cwc->byte_len <
|
||||||
(sizeof(ompi_btl_usnic_protocol_header_t)+
|
(OMPI_BTL_USNIC_PROTO_HDR_SZ +
|
||||||
sizeof(ompi_btl_usnic_btl_header_t))) {
|
sizeof(ompi_btl_usnic_btl_header_t))) {
|
||||||
BTL_ERROR(("%s: RX error polling CQ[%d] with status %d for wr_id %" PRIx64 " vend_err %d, byte_len %d",
|
BTL_ERROR(("%s: RX error polling CQ[%d] with status %d for wr_id %" PRIx64 " vend_err %d, byte_len %d",
|
||||||
ibv_get_device_name(module->device),
|
ibv_get_device_name(module->device),
|
||||||
@ -1115,33 +1127,6 @@ static int usnic_component_progress_2(void)
|
|||||||
return count;
|
return count;
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool port_is_usnic(ompi_common_verbs_port_item_t *port)
|
|
||||||
{
|
|
||||||
bool is_usnic = false;
|
|
||||||
uint32_t *vpi;
|
|
||||||
|
|
||||||
#if BTL_USNIC_HAVE_IBV_USNIC
|
|
||||||
/* If we have the IB_*_USNIC constants, then take any
|
|
||||||
device which advertises them */
|
|
||||||
if (IBV_TRANSPORT_USNIC == port->device->device->transport_type &&
|
|
||||||
IBV_NODE_USNIC == port->device->device->node_type) {
|
|
||||||
is_usnic = true;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
/* Or take any specific device that we know is a Cisco
|
|
||||||
VIC. Cisco's vendor ID is 0x1137. */
|
|
||||||
if (!is_usnic && 0x1137 == port->device->device_attr.vendor_id) {
|
|
||||||
for (vpi = mca_btl_usnic_component.vendor_part_ids; *vpi > 0; ++vpi) {
|
|
||||||
if (port->device->device_attr.vendor_part_id == *vpi) {
|
|
||||||
is_usnic = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return is_usnic;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* returns OMPI_SUCCESS if module initialization was successful, OMPI_ERROR
|
/* returns OMPI_SUCCESS if module initialization was successful, OMPI_ERROR
|
||||||
* otherwise */
|
* otherwise */
|
||||||
static int init_module_from_port(ompi_btl_usnic_module_t *module,
|
static int init_module_from_port(ompi_btl_usnic_module_t *module,
|
||||||
|
@ -126,8 +126,9 @@ recv_seg_constructor(
|
|||||||
|
|
||||||
/* on receive, BTL header starts after protocol header */
|
/* on receive, BTL header starts after protocol header */
|
||||||
seg->rs_protocol_header = bseg->us_list.ptr;
|
seg->rs_protocol_header = bseg->us_list.ptr;
|
||||||
bseg->us_btl_header =
|
bseg->us_btl_header = (ompi_btl_usnic_btl_header_t *)(
|
||||||
(ompi_btl_usnic_btl_header_t *) (seg->rs_protocol_header + 1);
|
((char *)seg->rs_protocol_header) +
|
||||||
|
OMPI_BTL_USNIC_PROTO_HDR_SZ);
|
||||||
|
|
||||||
/* initialize verbs work request */
|
/* initialize verbs work request */
|
||||||
seg->rs_recv_desc.wr_id = (unsigned long) seg;
|
seg->rs_recv_desc.wr_id = (unsigned long) seg;
|
||||||
|
@ -93,14 +93,26 @@ typedef struct ompi_btl_usnic_reg_t {
|
|||||||
struct ibv_mr* mr;
|
struct ibv_mr* mr;
|
||||||
} ompi_btl_usnic_reg_t;
|
} ompi_btl_usnic_reg_t;
|
||||||
|
|
||||||
|
|
||||||
|
/* UDP headers are always 42 bytes long */
|
||||||
|
#define OMPI_BTL_USNIC_UDP_HDR_SZ (42)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Header that is the beginning of every usnic packet buffer.
|
* Header that is the beginning of every usnic packet buffer.
|
||||||
*/
|
*/
|
||||||
typedef struct {
|
typedef union {
|
||||||
/* Verbs UD global resource header (GRH), which appears on the
|
/* Verbs UD global resource header (GRH), which appears on the receiving
|
||||||
receiving side only. */
|
* side only. Valid iff mca_btl_usnic_component.use_udp is false. */
|
||||||
struct ibv_grh grh;
|
struct ibv_grh grh;
|
||||||
} ompi_btl_usnic_protocol_header_t;
|
|
||||||
|
/* Valid iff mca_btl_usnic_component.use_udp is true. */
|
||||||
|
char udp_bytes[OMPI_BTL_USNIC_UDP_HDR_SZ];
|
||||||
|
} __attribute__((__packed__)) ompi_btl_usnic_protocol_header_t;
|
||||||
|
|
||||||
|
#define OMPI_BTL_USNIC_PROTO_HDR_SZ \
|
||||||
|
(mca_btl_usnic_component.use_udp ? \
|
||||||
|
OMPI_BTL_USNIC_UDP_HDR_SZ : \
|
||||||
|
sizeof(struct ibv_grh))
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* usnic header type
|
* usnic header type
|
||||||
|
@ -1488,7 +1488,7 @@ static void module_async_event_callback(int fd, short flags, void *arg)
|
|||||||
|
|
||||||
case IBV_EVENT_QP_FATAL:
|
case IBV_EVENT_QP_FATAL:
|
||||||
case IBV_EVENT_PORT_ERR:
|
case IBV_EVENT_PORT_ERR:
|
||||||
#if BTL_USNIC_HAVE_IBV_EVENT_GID_CHANGE
|
#if HAVE_DECL_IBV_EVENT_GID_CHANGE
|
||||||
case IBV_EVENT_GID_CHANGE:
|
case IBV_EVENT_GID_CHANGE:
|
||||||
#endif
|
#endif
|
||||||
default:
|
default:
|
||||||
|
@ -49,33 +49,12 @@ AC_DEFUN([MCA_ompi_btl_usnic_CONFIG],[
|
|||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
||||||
# Do we have the IBV_TRANSPORT_USNIC / IBV_NODE_USNIC defines?
|
|
||||||
# (note: if we have one, we have both)
|
|
||||||
btl_usnic_have_ibv_usnic=0
|
|
||||||
btl_usnic_have_ibv_event_gid_change=0
|
|
||||||
AS_IF([test "$btl_usnic_happy" = "yes"],
|
AS_IF([test "$btl_usnic_happy" = "yes"],
|
||||||
[AC_CHECK_DECL([IBV_NODE_USNIC],
|
[AC_CHECK_DECLS([IBV_EVENT_GID_CHANGE, ibv_event_type_str], [], [],
|
||||||
[btl_usnic_have_ibv_usnic=1],
|
|
||||||
[],
|
|
||||||
[ #include <infiniband/verbs.h>
|
|
||||||
])
|
|
||||||
|
|
||||||
AC_CHECK_DECL([IBV_EVENT_GID_CHANGE],
|
|
||||||
[btl_usnic_have_ibv_event_gid_change=1],
|
|
||||||
[],
|
|
||||||
[ #include <infiniband/verbs.h>
|
|
||||||
])
|
|
||||||
AC_CHECK_DECLS([ibv_event_type_str], [], [],
|
|
||||||
[#include <infiniband/verbs.h>
|
[#include <infiniband/verbs.h>
|
||||||
])
|
])
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
AC_DEFINE_UNQUOTED([BTL_USNIC_HAVE_IBV_USNIC],
|
|
||||||
[$btl_usnic_have_ibv_usnic],
|
|
||||||
[Whether we have IBV_NODE_USNIC / IBV_TRANSPORT_USNIC or not])
|
|
||||||
AC_DEFINE_UNQUOTED([BTL_USNIC_HAVE_IBV_EVENT_GID_CHANGE],
|
|
||||||
[$btl_usnic_have_ibv_event_gid_change],
|
|
||||||
[Whether we have IBV_EVENT_GID_CHANGE or not])
|
|
||||||
|
|
||||||
AS_IF([test "$btl_usnic_happy" = "yes"],
|
AS_IF([test "$btl_usnic_happy" = "yes"],
|
||||||
[btl_usnic_WRAPPER_EXTRA_LDFLAGS="$btl_usnic_LDFLAGS"
|
[btl_usnic_WRAPPER_EXTRA_LDFLAGS="$btl_usnic_LDFLAGS"
|
||||||
|
@ -2,7 +2,7 @@
|
|||||||
* Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved.
|
* Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved.
|
||||||
* All rights reserved.
|
* All rights reserved.
|
||||||
* Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved.
|
* Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved.
|
||||||
* Copyright (c) 2012 Cisco Systems, Inc. All rights reserved.
|
* Copyright (c) 2012-2014 Cisco Systems, Inc. All rights reserved.
|
||||||
* $COPYRIGHT$
|
* $COPYRIGHT$
|
||||||
*
|
*
|
||||||
* Additional copyrights may follow
|
* Additional copyrights may follow
|
||||||
@ -79,14 +79,22 @@ enum {
|
|||||||
OMPI_COMMON_VERBS_FLAGS_UD = 0x4,
|
OMPI_COMMON_VERBS_FLAGS_UD = 0x4,
|
||||||
OMPI_COMMON_VERBS_FLAGS_TRANSPORT_IB = 0x8,
|
OMPI_COMMON_VERBS_FLAGS_TRANSPORT_IB = 0x8,
|
||||||
OMPI_COMMON_VERBS_FLAGS_TRANSPORT_IWARP = 0x10,
|
OMPI_COMMON_VERBS_FLAGS_TRANSPORT_IWARP = 0x10,
|
||||||
|
OMPI_COMMON_VERBS_FLAGS_TRANSPORT_USNIC = 0x20,
|
||||||
|
OMPI_COMMON_VERBS_FLAGS_TRANSPORT_USNIC_UDP = 0x40,
|
||||||
/* Note that these 2 link layer flags will only be useful if
|
/* Note that these 2 link layer flags will only be useful if
|
||||||
defined(HAVE_IBV_LINK_LAYER_ETHERNET). Otherwise, they will be
|
defined(HAVE_IBV_LINK_LAYER_ETHERNET). Otherwise, they will be
|
||||||
ignored. */
|
ignored. */
|
||||||
OMPI_COMMON_VERBS_FLAGS_LINK_LAYER_IB = 0x20,
|
OMPI_COMMON_VERBS_FLAGS_LINK_LAYER_IB = 0x80,
|
||||||
OMPI_COMMON_VERBS_FLAGS_LINK_LAYER_ETHERNET = 0x40,
|
OMPI_COMMON_VERBS_FLAGS_LINK_LAYER_ETHERNET = 0x100,
|
||||||
OMPI_COMMON_VERBS_FLAGS_MAX
|
OMPI_COMMON_VERBS_FLAGS_MAX
|
||||||
};
|
};
|
||||||
|
|
||||||
|
enum {
|
||||||
|
/* a constant used when probing the usNIC transport type (custom L2 vs.
|
||||||
|
* UDP/IP) */
|
||||||
|
OMPI_COMMON_VERBS_USNIC_PROBE_MAGIC = 42
|
||||||
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Find a list of ibv_device ports that match a specific criteria.
|
* Find a list of ibv_device ports that match a specific criteria.
|
||||||
*
|
*
|
||||||
|
@ -163,10 +163,16 @@ static bool want_this_port(char **include_list, char **exclude_list,
|
|||||||
static const char *transport_name_to_str(enum ibv_transport_type transport_type)
|
static const char *transport_name_to_str(enum ibv_transport_type transport_type)
|
||||||
{
|
{
|
||||||
switch(transport_type) {
|
switch(transport_type) {
|
||||||
case IBV_TRANSPORT_IB: return "IB";
|
case IBV_TRANSPORT_IB: return "IB";
|
||||||
case IBV_TRANSPORT_IWARP: return "IWARP";
|
case IBV_TRANSPORT_IWARP: return "IWARP";
|
||||||
|
#if HAVE_DECL_IBV_TRANSPORT_USNIC
|
||||||
|
case IBV_TRANSPORT_USNIC: return "usNIC";
|
||||||
|
#endif
|
||||||
|
#if HAVE_DECL_IBV_TRANSPORT_USNIC_UDP
|
||||||
|
case IBV_TRANSPORT_USNIC_UDP: return "usNIC UDP";
|
||||||
|
#endif
|
||||||
case IBV_TRANSPORT_UNKNOWN:
|
case IBV_TRANSPORT_UNKNOWN:
|
||||||
default: return "unknown";
|
default: return "unknown";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -182,6 +188,44 @@ static const char *link_layer_to_str(int link_type)
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
/* Helper routine to detect Cisco usNIC devices (these are non-IB, non-RoCE,
|
||||||
|
* non-iWARP devices). See the usnic BTL for more information.
|
||||||
|
*
|
||||||
|
* Once usNIC is no longer new and the IBV_TRANSPORT_USNIC constant is widely
|
||||||
|
* available in the wild, all calls to it can be replaced with a simple check
|
||||||
|
* against said constant.
|
||||||
|
*/
|
||||||
|
static bool device_is_usnic(struct ibv_device *device)
|
||||||
|
{
|
||||||
|
/* A usNIC-capable VIC will present as one of:
|
||||||
|
* 1. _IWARP -- any libibverbs and old kernel
|
||||||
|
* 2. _UNKNOWN -- old libibverbs and new kernel
|
||||||
|
* 3. _USNIC -- new libibverbs and new kernel
|
||||||
|
*
|
||||||
|
* Where an "old kernel" is one that does not have this commit:
|
||||||
|
* http://bit.ly/kernel-180771a3
|
||||||
|
*/
|
||||||
|
#if HAVE_DECL_IBV_TRANSPORT_USNIC
|
||||||
|
if (IBV_TRANSPORT_USNIC == device->transport_type) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
#if HAVE_DECL_IBV_TRANSPORT_USNIC_UDP
|
||||||
|
if (IBV_TRANSPORT_USNIC_UDP == device->transport_type) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
if ((IBV_TRANSPORT_IWARP == device->transport_type ||
|
||||||
|
IBV_TRANSPORT_UNKNOWN == device->transport_type) &&
|
||||||
|
0 == strncmp(device->name, "usnic_", strlen("usnic_"))) {
|
||||||
|
/* if we are willing to open the device, query its attributes, then
|
||||||
|
* close it again, we could also check for Cisco's vendor ID (0x1137) */
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
/***********************************************************************/
|
/***********************************************************************/
|
||||||
|
|
||||||
static void check_sanity(char ***if_sanity_list, const char *dev_name, int port)
|
static void check_sanity(char ***if_sanity_list, const char *dev_name, int port)
|
||||||
@ -240,7 +284,8 @@ opal_list_t *ompi_common_verbs_find_ports(const char *if_include,
|
|||||||
uint32_t i, j;
|
uint32_t i, j;
|
||||||
opal_list_t *port_list = NULL;
|
opal_list_t *port_list = NULL;
|
||||||
opal_list_item_t *item;
|
opal_list_item_t *item;
|
||||||
bool want;
|
bool want, dev_is_usnic;
|
||||||
|
enum ibv_transport_type saved_transport_type;
|
||||||
|
|
||||||
/* Allocate a list to fill */
|
/* Allocate a list to fill */
|
||||||
port_list = OBJ_NEW(opal_list_t);
|
port_list = OBJ_NEW(opal_list_t);
|
||||||
@ -286,9 +331,46 @@ opal_list_t *ompi_common_verbs_find_ports(const char *if_include,
|
|||||||
device = devices[i];
|
device = devices[i];
|
||||||
check_sanity(&if_sanity_list, ibv_get_device_name(device), -1);
|
check_sanity(&if_sanity_list, ibv_get_device_name(device), -1);
|
||||||
|
|
||||||
device_context = ibv_open_device(device);
|
|
||||||
opal_output_verbose(5, stream, "examining verbs interface: %s",
|
opal_output_verbose(5, stream, "examining verbs interface: %s",
|
||||||
ibv_get_device_name(device));
|
ibv_get_device_name(device));
|
||||||
|
|
||||||
|
dev_is_usnic = false;
|
||||||
|
if ((flags & OMPI_COMMON_VERBS_FLAGS_TRANSPORT_USNIC) ||
|
||||||
|
(flags & OMPI_COMMON_VERBS_FLAGS_TRANSPORT_USNIC_UDP)) {
|
||||||
|
dev_is_usnic = device_is_usnic(device);
|
||||||
|
|
||||||
|
if (dev_is_usnic) {
|
||||||
|
opal_output_verbose(5, stream, "verbs interface %s is usnic, claims transport_type %s",
|
||||||
|
ibv_get_device_name(device),
|
||||||
|
transport_name_to_str(device->transport_type));
|
||||||
|
|
||||||
|
/* There are two flavors of libusnic_verbs (and its supporting
|
||||||
|
* kernel infrastructure):
|
||||||
|
* 1. One that speaks an L2-only format that uses the RoCE
|
||||||
|
* ethertype with a different version value.
|
||||||
|
* 2. A more modern one that speaks UDP/IP.
|
||||||
|
*
|
||||||
|
* Because 42-byte UDP headers are larger than the standard
|
||||||
|
* 40-byte IB headers, applications (like OMPI) must inform the
|
||||||
|
* library that they are prepared to see payloads starting at a
|
||||||
|
* non-standard offset. We do that by overriding the
|
||||||
|
* transport_type to a magic value before calling
|
||||||
|
* ibv_open_device. Flavor #1 ignores this field, while flavor
|
||||||
|
* #2 will set it back to IBV_TRANSPORT_USNIC (or some other
|
||||||
|
* non-magic value, depending on whether the first is available
|
||||||
|
* in libibverbs itself).
|
||||||
|
*
|
||||||
|
* Later we compare against this MAGIC value to see which
|
||||||
|
* flavor this device actually is. First save the current
|
||||||
|
* value so we can restore it after evaluating this device for
|
||||||
|
* its usNIC properties.
|
||||||
|
*/
|
||||||
|
saved_transport_type = device->transport_type;
|
||||||
|
device->transport_type = OMPI_COMMON_VERBS_USNIC_PROBE_MAGIC;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
device_context = ibv_open_device(device);
|
||||||
if (NULL == device_context) {
|
if (NULL == device_context) {
|
||||||
opal_show_help("help-ompi-common-verbs.txt",
|
opal_show_help("help-ompi-common-verbs.txt",
|
||||||
"ibv_open_device fail", true,
|
"ibv_open_device fail", true,
|
||||||
@ -317,6 +399,30 @@ opal_list_t *ompi_common_verbs_find_ports(const char *if_include,
|
|||||||
/* Check the device-specific flags to see if we want this
|
/* Check the device-specific flags to see if we want this
|
||||||
device */
|
device */
|
||||||
want = false;
|
want = false;
|
||||||
|
|
||||||
|
if (dev_is_usnic) {
|
||||||
|
if (flags & OMPI_COMMON_VERBS_FLAGS_TRANSPORT_USNIC &&
|
||||||
|
OMPI_COMMON_VERBS_USNIC_PROBE_MAGIC ==
|
||||||
|
(int) device->transport_type) {
|
||||||
|
want = true;
|
||||||
|
opal_output_verbose(5, stream,
|
||||||
|
"verbs interface %s has the right transport (usNIC/L2)",
|
||||||
|
ibv_get_device_name(device));
|
||||||
|
}
|
||||||
|
if (flags & OMPI_COMMON_VERBS_FLAGS_TRANSPORT_USNIC_UDP &&
|
||||||
|
OMPI_COMMON_VERBS_USNIC_PROBE_MAGIC !=
|
||||||
|
(int) device->transport_type) {
|
||||||
|
want = true;
|
||||||
|
opal_output_verbose(5, stream,
|
||||||
|
"verbs interface %s has the right transport (usNIC/UDP)",
|
||||||
|
ibv_get_device_name(device));
|
||||||
|
}
|
||||||
|
|
||||||
|
/* done checking for usNIC, restore the transport value for future
|
||||||
|
* calls to this routine */
|
||||||
|
device->transport_type = saved_transport_type;
|
||||||
|
}
|
||||||
|
|
||||||
if (flags & OMPI_COMMON_VERBS_FLAGS_TRANSPORT_IB &&
|
if (flags & OMPI_COMMON_VERBS_FLAGS_TRANSPORT_IB &&
|
||||||
IBV_TRANSPORT_IB == device->transport_type) {
|
IBV_TRANSPORT_IB == device->transport_type) {
|
||||||
opal_output_verbose(5, stream, "verbs interface %s has right type (IB)",
|
opal_output_verbose(5, stream, "verbs interface %s has right type (IB)",
|
||||||
|
Загрузка…
Ссылка в новой задаче
Block a user