1
1

libfabric: update to upstream d4ab6e56e23124e565ada939054a159737e52102

Fix a critical usnic bug, and other misc updates.
Этот коммит содержится в:
Jeff Squyres 2015-04-29 16:02:08 -07:00
родитель a50ad505e7
Коммит faf3324b0e
36 изменённых файлов: 629 добавлений и 179 удалений

Просмотреть файл

@ -86,6 +86,7 @@ libfabric_usnic_headers = \
libfabric/prov/usnic/src/usdf_cq.h \
libfabric/prov/usnic/src/usdf_dgram.h \
libfabric/prov/usnic/src/usdf_endpoint.h \
libfabric/prov/usnic/src/usdf_fake_ibv.c \
libfabric/prov/usnic/src/usdf_msg.h \
libfabric/prov/usnic/src/usdf_progress.h \
libfabric/prov/usnic/src/usdf_rdm.h \

Просмотреть файл

@ -201,6 +201,7 @@ _usnic_files = \
prov/usnic/src/usdf_ep_rdm.c \
prov/usnic/src/usdf_eq.c \
prov/usnic/src/usdf_fabric.c \
prov/usnic/src/usdf_fake_ibv.c \
prov/usnic/src/usdf_mem.c \
prov/usnic/src/usdf_msg.c \
prov/usnic/src/usdf_msg.h \

Просмотреть файл

@ -1,5 +1,5 @@
Version Libfabric v1.0.0rc6
Released on 2015-04-24
Released on 2015-04-29
Introduction
============

Просмотреть файл

@ -1,4 +1,4 @@
.TH fi_cm 3 "2015\-04\-23" "Libfabric Programmer\[aq]s Manual" "Libfabric v1.0.0rc6"
.TH fi_cm 3 "2015\-04\-29" "Libfabric Programmer\[aq]s Manual" "Libfabric v1.0.0rc6"
.SH NAME
.PP
fi_cm - Connection management operations
@ -167,9 +167,19 @@ peer endpoint address, respectively.
On input, the addrlen parameter should indicate the size of the addr
buffer.
If the actual address is larger than what can fit into the buffer, it
will be truncated.
will be truncated and -FI_ETOOSMALL will be returned.
On output, addrlen is set to the size of the buffer needed to store the
address, which may be larger than the input value.
.PP
fi_getname is not guaranteed to return a valid source address until
after the specified endpoint has been enabled or has had an address
assigned.
An endpoint may be enabled explicitly through fi_enable, or implicitly,
such as through fi_connect or fi_listen.
An address may be assigned using fi_setname.
fi_getpeer is not guaranteed to return a valid peer address until an
endpoint has been completely connected -- an FI_CONNECTED event has been
generated.
.SH FLAGS
.PP
Flag values are reserved and must be 0.

Просмотреть файл

@ -1,4 +1,4 @@
.TH fi_domain 3 "2015\-04\-23" "Libfabric Programmer\[aq]s Manual" "Libfabric v1.0.0rc6"
.TH fi_domain 3 "2015\-04\-28" "Libfabric Programmer\[aq]s Manual" "Libfabric v1.0.0rc6"
.SH NAME
.PP
fi_domain - Open a fabric access domain
@ -177,7 +177,7 @@ data flows.
FI_THREAD_FID, but with the added restriction that serialization is
required when accessing the same endpoint, even if multiple transmit and
receive contexts are used.
Conceptualy, FI_THREAD_ENDPOINT maps well to providers that implement
Conceptually, FI_THREAD_ENDPOINT maps well to providers that implement
fabric services in hardware but use a single command queue to access
different data flows.
.PP

Просмотреть файл

@ -1,4 +1,4 @@
.TH fi_endpoint 3 "2015\-04\-24" "Libfabric Programmer\[aq]s Manual" "Libfabric v1.0.0rc6"
.TH fi_endpoint 3 "2015\-04\-28" "Libfabric Programmer\[aq]s Manual" "Libfabric v1.0.0rc6"
.SH NAME
.PP
fi_endpoint - Fabric endpoint operations
@ -1086,8 +1086,8 @@ counters.
Completed receive operations are posted to the CQs bound to the
endpoint.
An endpoint may only be associated with a single receive context, and
all connectless endpoints associated with a shared receive context must
also share the same address vector.
all connectionless endpoints associated with a shared receive context
must also share the same address vector.
.PP
Endpoints associated with a shared transmit context may use dedicated
receive contexts, and vice-versa.

Просмотреть файл

@ -1,4 +1,4 @@
.TH fi_eq 3 "2015\-04\-13" "Libfabric Programmer\[aq]s Manual" "Libfabric v1.0.0rc6"
.TH fi_eq 3 "2015\-04\-28" "Libfabric Programmer\[aq]s Manual" "Libfabric v1.0.0rc6"
.SH NAME
.PP
fi_eq - Event queue operations
@ -244,7 +244,7 @@ corresponding object type (e.g., see \f[C]fi_av\f[](3) for a description
of how asynchronous address vector insertions are completed).
.PP
\f[I]Connection Notification\f[] : Connection notifications are
connection management notifications used to setup or teardown
connection management notifications used to setup or tear down
connections between endpoints.
There are three connection notification events: FI_CONNREQ,
FI_CONNECTED, and FI_SHUTDOWN.
@ -322,7 +322,7 @@ Endpoint errors may be result of numerous actions, but are often
associated with a failed operation.
Operations may fail because of buffer overruns, invalid permissions,
incorrect memory access keys, network routing failures, network
reachability issues, etc.
reach-ability issues, etc.
.PP
Asynchronous errors are reported using struct fi_eq_err_entry, as
defined below.

Просмотреть файл

@ -1,4 +1,4 @@
.TH fi_errno 3 "2015\-01\-08" "Libfabric Programmer\[aq]s Manual" "Libfabric v1.0.0rc6"
.TH fi_errno 3 "2015\-04\-28" "Libfabric Programmer\[aq]s Manual" "Libfabric v1.0.0rc6"
.SH NAME
.PP
fi_errno - fabric errors
@ -10,7 +10,7 @@ fi_strerror - Convert fabric error into a printable string
\f[C]
#include\ <rdma/fi_errno.h>
const\ char\ *fi_strerror(int\ errnum);
const\ char\ *fi_strerror(int\ errno);
\f[]
.fi
.SH ERRORS

Просмотреть файл

@ -1,4 +1,4 @@
.TH fi_tagged 3 "2015\-04\-17" "Libfabric Programmer\[aq]s Manual" "Libfabric v1.0.0rc6"
.TH fi_tagged 3 "2015\-04\-28" "Libfabric Programmer\[aq]s Manual" "Libfabric v1.0.0rc6"
.SH NAME
.PP
fi_tagged - Tagged data transfer operations
@ -257,7 +257,7 @@ of FI_ENOMSG.
.PP
If a peek request locates a matching message, the operation will
complete successfully.
The returned completion data will indicate the metadata associated with
The returned completion data will indicate the meta-data associated with
the message, such as the message length, completion flags, available CQ
data, tag, and source address.
The data available is subject to the completion entry format (e.g.

Просмотреть файл

@ -1,4 +1,4 @@
.TH fi_trigger 3 "2015\-01\-01" "Libfabric Programmer\[aq]s Manual" "Libfabric v1.0.0rc6"
.TH fi_trigger 3 "2015\-04\-28" "Libfabric Programmer\[aq]s Manual" "Libfabric v1.0.0rc6"
.SH NAME
.PP
fi_trigger - Triggered operations
@ -19,13 +19,13 @@ A triggered operation may be requested by specifying the FI_TRIGGER flag
as part of the operation.
Alternatively, an endpoint alias may be created and configured with the
FI_TRIGGER flag.
Such an endpoint is referred to as a triggerable endpoint.
All data transfer operations on a triggerable endpoint are deferred.
Such an endpoint is referred to as a trigger-able endpoint.
All data transfer operations on a trigger-able endpoint are deferred.
.PP
Any data transfer operation is potentially triggerable, subject to
Any data transfer operation is potentially trigger-able, subject to
provider constraints.
Triggerable endpoints are initialized such that only those interfaces
supported by the provider which are triggerable are available.
Trigger-able endpoints are initialized such that only those interfaces
supported by the provider which are trigger-able are available.
.PP
Triggered operations require that applications use struct
fi_triggered_context as their per operation context parameter.

Просмотреть файл

@ -1,4 +1,4 @@
.TH fi_provider 7 "2015\-03\-31" "Libfabric Programmer\[aq]s Manual" "Libfabric v1.0.0rc6"
.TH fi_provider 7 "2015\-04\-28" "Libfabric Programmer\[aq]s Manual" "Libfabric v1.0.0rc6"
.SH NAME
.PP
Fabric Interface Providers
@ -14,7 +14,7 @@ Devices and the drivers that plug into the libfabric framework are
referred to as fabric providers, or simply providers.
.PP
This distribution of libfabric contains the following providers
(although more may be available via run-time plugins):
(although more may be available via run-time plug-ins):
.PP
\f[I]PSM\f[] : High-speed InfiniBand networking from Intel.
See \f[C]fi_psm\f[](7) for more information.
@ -49,53 +49,88 @@ hardware constraints.
To assist in the development of applications, libfabric specifies the
following requirements that must be met by any fabric provider, if
requested by an application.
(Note that the instantiation of a specific fabric object is subject to
.PP
Note that the instantiation of a specific fabric object is subject to
application configuration parameters and need not meet these
requirements).
requirements.
.IP \[bu] 2
A fabric provider must support at least one endpoint type.
.IP \[bu] 2
All endpoints must support the message queue data transfer interface.
All endpoints must support the message queue data transfer interface
(fi_ops_msg).
.IP \[bu] 2
An endpoint that advertises support for a specific endpoint capability
must support the corresponding data transfer interface.
.IP \[bu] 2
Endpoints must support operations to send and receive data for any data
transfer operations that they support.
FI_ATOMIC - fi_ops_atomic
.IP \[bu] 2
Connectionless endpoints must support all relevant data transfer
routines.
(send / recv / write / read / etc.)
FI_RMA - fi_ops_rma
.IP \[bu] 2
Connectionless endpoints must support the CM interface getname.
FI_TAGGED - fi_ops_tagged
.IP \[bu] 2
Connectionless endpoints that support multicast operations must support
the CM interfaces join and leave.
Endpoints must support all transmit and receive operations for any data
transfer interface that they support.
.IP \[bu] 2
Connection-oriented interfaces must support the CM interfaces getname,
getpeer, connect, listen, accept, reject, and shutdown.
Exception: If an operation is only usable for an operation that the
provider does not support, and support for that operation is conveyed
using some other mechanism, the operation may return
.RS 2
.IP \[bu] 2
All endpoints must support all relevant \[aq]msg\[aq] data transfer
routines.
(sendmsg / recvmsg / writemsg / readmsg / etc.)
FI_ENOSYS.
For example, if the provider does not support injected data, it can set
the attribute inject_size = 0, and fail all fi_inject operations.
.RE
.IP \[bu] 2
Access domains must support opening address vector maps and tables.
The framework supplies wrappers around the \[aq]msg\[aq] operations that
can be used.
For example, the framework implements the sendv() msg operation by
calling sendmsg().
Providers may reference the general operation, and supply on the
sendmsg() implementation.
.IP \[bu] 2
Address vectors associated with domains that may be identified using IP
addresses must support the FI_SOCKADDR_IN input format.
Providers must set all operations to an implementation.
Function pointers may not be left NULL or uninitialized.
The framework supplies empty functions that return -FI_ENOSYS which can
be used for this purpose.
.IP \[bu] 2
Access domains must support opening completion queues and counters.
Endpoints must support the CM interface as follows:
.IP \[bu] 2
FI_EP_MSG endpoints must support all CM operations.
.IP \[bu] 2
FI_EP_DGRAM endpoints must support CM getname and setname.
.IP \[bu] 2
FI_EP_RDM endpoints must support CM getname and setname.
.IP \[bu] 2
Providers that support connectionless endpoints must support all AV
operations (fi_ops_av).
.IP \[bu] 2
Providers that support memory registration, must support all MR
operations (fi_ops_mr).
.IP \[bu] 2
Providers should support both completion queues and counters.
.IP \[bu] 2
If FI_RMA_EVENT is not supported, counter support is limited to local
events only.
.IP \[bu] 2
Completion queues must support the FI_CQ_FORMAT_CONTEXT and
FI_CQ_FORMAT_MSG formats.
FI_CQ_FORMAT_MSG.
.IP \[bu] 2
Event queues associated with tagged message transfers must support the
FI_CQ_FORMAT_TAGGED format.
Providers that support FI_REMOTE_CQ_DATA shall support
FI_CQ_FORMAT_DATA.
.IP \[bu] 2
Providers that support FI_TAGGED shall support FI_CQ_FORMAT_TAGGED.
.IP \[bu] 2
A provider is expected to be forward compatible, and must be able to be
compiled against expanded \f[C]fi_xxx_ops\f[] structures that define new
functions added after the provider was written.
Any unknown functions must be set to NULL.
.IP \[bu] 2
Providers shall document in their man page which features they support,
and any missing requirements.
.PP
Future versions of libfabric will automatically enable a more complete
set of features for providers that focus their implementation on a
narrow subset of libfabric capabilities.
.SH LOGGING INTERFACE
.PP
Logging is performed using the FI_ERR, FI_LOG, and FI_DEBUG macros.

Просмотреть файл

@ -1,4 +1,4 @@
.TH fi_psm 7 "2015\-04\-23" "Libfabric Programmer\[aq]s Manual" "Libfabric v1.0.0rc6"
.TH fi_psm 7 "2015\-04\-28" "Libfabric Programmer\[aq]s Manual" "Libfabric v1.0.0rc6"
.SH NAME
.PP
The PSM Fabric Provider
@ -94,7 +94,7 @@ The name server won\[aq]t work properly if there are more than one
processes from the same job (i.e.
with the same UUID) running on the same node and acting as servers.
For such scenario it is recommended to have each process getting local
transport address with \f[I]fi_cm_getname\f[] and exchanginge the
transport address with \f[I]fi_cm_getname\f[] and exchanging the
addresses with out-of-band mechanism.
.PP
The name server is on by default.
@ -106,7 +106,7 @@ created when the name server is on.
of the PSM Active Message functions.
The Active Message functions has limit on the size of data can be
transferred in a single message.
Large transfers can be divided into small chunks and be pipelined.
Large transfers can be divided into small chunks and be pipe-lined.
However, the bandwidth is sub-optimal by doing this way.
.PP
The \f[I]psm\f[] provider use PSM tag-matching message queue functions

Просмотреть файл

@ -1,4 +1,4 @@
.TH fi_usnic 7 "2015\-03\-31" "Libfabric Programmer\[aq]s Manual" "Libfabric v1.0.0rc6"
.TH fi_usnic 7 "2015\-04\-28" "Libfabric Programmer\[aq]s Manual" "Libfabric v1.0.0rc6"
.SH NAME
.PP
The usNIC Fabric Provider
@ -66,7 +66,7 @@ Passive endpoints only support listen CM operations.
\f[I]FI_EP_DGRAM\f[] endpoints support \f[C]fi_sendmsg()\f[] and
\f[C]fi_recvmsg()\f[], but all flags are ignored.
.IP \[bu] 2
\f[I]FI_EP_RDM\f[] and \f[I]FI_EP_MSG\f[] endponts do not support
\f[I]FI_EP_RDM\f[] and \f[I]FI_EP_MSG\f[] endpoints do not support
\f[C]fi_sendmsg()\f[] and \f[C]fi_recvmsg()\f[].
.IP \[bu] 2
Address vectors only support \f[C]FI_AV_MAP\f[].

Просмотреть файл

@ -1,11 +1,11 @@
.TH fi_verbs 7 "2015\-04\-23" "Libfabric Programmer\[aq]s Manual" "Libfabric v1.0.0rc6"
.TH fi_verbs 7 "2015\-04\-28" "Libfabric Programmer\[aq]s Manual" "Libfabric v1.0.0rc6"
.SH NAME
.PP
The Verbs Fabric Provider
.SH OVERVIEW
.PP
The verbs provider enables applications using OFI to be run over any
verbs hardware (Infiniband, iWARP, etc).
verbs hardware (Infiniband, iWarp, etc).
It uses the Linux Verbs API for network transport and provides a
translation OFI calls to appropriate verbs API calls.
It uses librdmacm for communication management and libibverbs for other
@ -25,7 +25,7 @@ Applications must take responsibility of posting receives for any
incoming CQ data.
.PP
\f[I]Progress\f[] : Verbs provider supports FI_PROGRESS_AUTO:
Asynchonous operations make forward progress automatically.
Asynchronous operations make forward progress automatically.
.PP
\f[I]Operation flags\f[] : Verbs provider supports FI_INJECT,
FI_COMPLETION, FI_REMOTE_CQ_DATA.
@ -43,7 +43,7 @@ supported.
are not supported.
.PP
\f[I]Endpoint features\f[] : Scalable endpoints and shared contexts are
not suppoted.
not supported.
fi_cancel, fi_tx/rx_size_left and fi_alias operations are not supported.
.PP
\f[I]Others\f[] : Other unsupported features include resource

Просмотреть файл

@ -126,9 +126,15 @@ int psmx_am_init(struct psmx_fid_domain *domain)
if (err)
return psmx_errno(err);
assert(psmx_am_handlers_idx[0] == PSMX_AM_RMA_HANDLER);
assert(psmx_am_handlers_idx[1] == PSMX_AM_MSG_HANDLER);
assert(psmx_am_handlers_idx[2] == PSMX_AM_ATOMIC_HANDLER);
if ((psmx_am_handlers_idx[0] != PSMX_AM_RMA_HANDLER) ||
(psmx_am_handlers_idx[1] != PSMX_AM_MSG_HANDLER) ||
(psmx_am_handlers_idx[2] != PSMX_AM_ATOMIC_HANDLER)) {
FI_WARN(&psmx_prov, FI_LOG_CORE,
"failed to register one or mroe AM handlers "
"at indecies %d, %d, %d\n", PSMX_AM_RMA_HANDLER,
PSMX_AM_MSG_HANDLER, PSMX_AM_ATOMIC_HANDLER);
return -FI_EBUSY;
}
psmx_am_handlers_initialized = 1;
}

Просмотреть файл

@ -116,6 +116,12 @@
FI_ORDER_SAR | FI_ORDER_SAW | FI_ORDER_SAS)
#define SOCK_EP_COMP_ORDER (FI_ORDER_STRICT | FI_ORDER_DATA)
#define SOCK_EP_DEFAULT_OP_FLAGS (FI_TRANSMIT_COMPLETE)
#define SOCK_EP_SET_TX_OP_FLAGS(_flags) do { \
if (!((_flags) & FI_INJECT_COMPLETE)) \
(_flags) |= FI_TRANSMIT_COMPLETE; \
} while (0)
#define SOCK_MODE (0)
#define SOCK_NO_COMPLETION (1ULL << 60)
@ -817,7 +823,8 @@ int sock_dgram_verify_ep_attr(struct fi_ep_attr *ep_attr, struct fi_tx_attr *tx_
struct fi_rx_attr *rx_attr);
int sock_msg_verify_ep_attr(struct fi_ep_attr *ep_attr, struct fi_tx_attr *tx_attr,
struct fi_rx_attr *rx_attr);
int sock_get_src_addr(struct sockaddr_in *dest_addr,
struct sockaddr_in *src_addr);
struct fi_info *sock_fi_info(enum fi_ep_type ep_type,
struct fi_info *hints, void *src_addr, void *dest_addr);

Просмотреть файл

@ -104,6 +104,7 @@ static ssize_t sock_ep_tx_atomic(struct fid_ep *ep,
if (!conn)
return -FI_EAGAIN;
SOCK_EP_SET_TX_OP_FLAGS(flags);
if (flags & SOCK_USE_OP_FLAGS)
flags |= tx_ctx->attr.op_flags;
@ -162,7 +163,6 @@ static ssize_t sock_ep_tx_atomic(struct fid_ep *ep,
for (i = 0; i< msg->iov_count; i++) {
tx_iov.ioc.addr = (uintptr_t) msg->msg_iov[i].addr;
tx_iov.ioc.count = msg->msg_iov[i].count;
tx_iov.ioc.key = (uintptr_t) msg->desc[i];
sock_tx_ctx_write(tx_ctx, &tx_iov, sizeof(tx_iov));
src_len += (tx_iov.ioc.count * datatype_sz);
}

Просмотреть файл

@ -36,6 +36,7 @@
#include <stdlib.h>
#include <string.h>
#include <limits.h>
#include "sock.h"
#include "sock_util.h"
@ -1321,6 +1322,9 @@ struct fi_info *sock_fi_info(enum fi_ep_type ep_type, struct fi_info *hints,
if (hints->rx_attr)
*(info->rx_attr) = *(hints->rx_attr);
if (hints->handle)
info->handle = hints->handle;
sock_set_domain_attr(hints->domain_attr, info->domain_attr);
sock_set_fabric_attr(hints->fabric_attr, info->fabric_attr);
} else {
@ -1332,6 +1336,39 @@ struct fi_info *sock_fi_info(enum fi_ep_type ep_type, struct fi_info *hints,
return info;
}
static int sock_ep_assign_src_addr(struct sock_ep *sock_ep, struct fi_info *info)
{
int ret;
struct addrinfo ai, *rai = NULL;
char hostname[HOST_NAME_MAX];
sock_ep->src_addr = calloc(1, sizeof(struct sockaddr_in));
if (!sock_ep->src_addr)
return -FI_ENOMEM;
if (info && info->dest_addr) {
return sock_get_src_addr(info->dest_addr, sock_ep->src_addr);
} else {
memset(&ai, 0, sizeof(ai));
ai.ai_family = AF_INET;
ai.ai_socktype = SOCK_STREAM;
if (gethostname(hostname, sizeof hostname) != 0) {
SOCK_LOG_INFO("gethostname failed!\n");
return -FI_EINVAL;
}
ret = getaddrinfo(hostname, NULL, &ai, &rai);
if (ret) {
SOCK_LOG_INFO("getaddrinfo failed!\n");
return -FI_EINVAL;
}
memcpy(sock_ep->src_addr, (struct sockaddr_in *)rai->ai_addr,
sizeof *sock_ep->src_addr);
freeaddrinfo(rai);
}
return 0;
}
int sock_alloc_endpoint(struct fid_domain *domain, struct fi_info *info,
struct sock_ep **ep, void *context, size_t fclass)
{
@ -1422,6 +1459,11 @@ int sock_alloc_endpoint(struct fid_domain *domain, struct fi_info *info,
sock_ep->info.handle = info->handle;
}
if (!sock_ep->src_addr && sock_ep_assign_src_addr(sock_ep, info)) {
SOCK_LOG_ERROR("failed to get src_address\n");
goto err;
}
atomic_initialize(&sock_ep->ref, 0);
atomic_initialize(&sock_ep->num_tx_ctx, 0);
atomic_initialize(&sock_ep->num_rx_ctx, 0);
@ -1490,6 +1532,10 @@ int sock_alloc_endpoint(struct fid_domain *domain, struct fi_info *info,
return 0;
err:
if (sock_ep->src_addr)
free(sock_ep->src_addr);
if (sock_ep->dest_addr)
free(sock_ep->dest_addr);
free(sock_ep);
return -FI_EINVAL;
}

Просмотреть файл

@ -75,7 +75,7 @@ const struct fi_ep_attr sock_dgram_ep_attr = {
const struct fi_tx_attr sock_dgram_tx_attr = {
.caps = SOCK_EP_DGRAM_CAP,
.mode = SOCK_MODE,
.op_flags = FI_TRANSMIT_COMPLETE,
.op_flags = SOCK_EP_DEFAULT_OP_FLAGS,
.msg_order = SOCK_EP_MSG_ORDER,
.inject_size = SOCK_EP_MAX_INJECT_SZ,
.size = SOCK_EP_TX_SZ,

Просмотреть файл

@ -77,7 +77,7 @@ static const struct fi_ep_attr sock_msg_ep_attr = {
static const struct fi_tx_attr sock_msg_tx_attr = {
.caps = SOCK_EP_MSG_CAP,
.mode = SOCK_MODE,
.op_flags = FI_TRANSMIT_COMPLETE,
.op_flags = SOCK_EP_DEFAULT_OP_FLAGS,
.msg_order = SOCK_EP_MSG_ORDER,
.inject_size = SOCK_EP_MAX_INJECT_SZ,
.size = SOCK_EP_TX_SZ,
@ -254,10 +254,75 @@ static int sock_ep_cm_getname(fid_t fid, void *addr, size_t *addrlen)
SOCK_LOG_ERROR("Invalid argument\n");
return -FI_EINVAL;
}
return (*addrlen == sizeof(struct sockaddr_in)) ? 0 : -FI_ETOOSMALL;
}
static int sock_pep_create_listener(struct sock_pep *pep)
{
int optval, ret;
socklen_t addr_size;
struct sockaddr_in addr;
struct addrinfo *s_res = NULL, *p;
struct addrinfo hints;
char sa_ip[INET_ADDRSTRLEN] = {0};
char sa_port[NI_MAXSERV] = {0};
pep->cm.do_listen = 1;
memset(&hints, 0, sizeof(hints));
hints.ai_family = AF_INET;
hints.ai_socktype = SOCK_DGRAM;
hints.ai_flags = AI_PASSIVE;
hints.ai_protocol = IPPROTO_UDP;
memcpy(sa_ip, inet_ntoa(pep->src_addr.sin_addr), INET_ADDRSTRLEN);
sprintf(sa_port, "%d", ntohs(pep->src_addr.sin_port));
ret = getaddrinfo(sa_ip, sa_port, &hints, &s_res);
if (ret) {
SOCK_LOG_ERROR("no available AF_INET address service:%s, %s\n",
sa_port, gai_strerror(ret));
return -FI_EINVAL;
}
for (p=s_res; p; p=p->ai_next) {
pep->cm.sock = socket(p->ai_family, p->ai_socktype,
p->ai_protocol);
if (pep->cm.sock >= 0) {
optval = 1;
if (setsockopt(pep->cm.sock, SOL_SOCKET, SO_REUSEADDR, &optval,
sizeof optval))
SOCK_LOG_ERROR("setsockopt failed\n");
if (!bind(pep->cm.sock, s_res->ai_addr, s_res->ai_addrlen))
break;
close(pep->cm.sock);
pep->cm.sock = -1;
}
}
freeaddrinfo(s_res);
if (pep->cm.sock < 0)
return -FI_EIO;
optval = 1;
if (setsockopt(pep->cm.sock, SOL_SOCKET, SO_REUSEADDR, &optval,
sizeof optval))
SOCK_LOG_ERROR("setsockopt failed\n");
if (pep->src_addr.sin_port == 0) {
addr_size = sizeof(addr);
if (getsockname(pep->cm.sock, (struct sockaddr*)&addr, &addr_size))
return -FI_EINVAL;
pep->src_addr.sin_port = addr.sin_port;
}
SOCK_LOG_INFO("Listener thread bound to %s:%d\n",
sa_ip, ntohs(pep->src_addr.sin_port));
return 0;
}
static int sock_ep_cm_setnmae(fid_t fid, void *addr, size_t addrlen)
static int sock_ep_cm_setname(fid_t fid, void *addr, size_t addrlen)
{
struct sock_ep *sock_ep = NULL;
struct sock_pep *sock_pep = NULL;
@ -272,13 +337,13 @@ static int sock_ep_cm_setnmae(fid_t fid, void *addr, size_t addrlen)
if (sock_ep->listener.listener_thread)
return -FI_EINVAL;
memcpy(sock_ep->src_addr, addr, addrlen);
break;
return sock_conn_listen(sock_ep);
case FI_CLASS_PEP:
sock_pep = container_of(fid, struct sock_pep, pep.fid);
if (sock_pep->cm.listener_thread)
return -FI_EINVAL;
memcpy(&sock_pep->src_addr, addr, addrlen);
break;
return sock_pep_create_listener(sock_pep);
default:
SOCK_LOG_ERROR("Invalid argument\n");
return -FI_EINVAL;
@ -298,7 +363,7 @@ static int sock_ep_cm_getpeer(struct fid_ep *ep, void *addr, size_t *addrlen)
sock_ep = container_of(ep, struct sock_ep, ep);
*addrlen = MIN(*addrlen, sizeof(struct sockaddr_in));
memcpy(addr, sock_ep->dest_addr, *addrlen);
return 0;
return (*addrlen == sizeof(struct sockaddr_in)) ? 0 : -FI_ETOOSMALL;
}
static int sock_ep_cm_create_socket(void)
@ -678,6 +743,8 @@ static int sock_ep_cm_connect(struct fid_ep *ep, const void *addr,
req->ep_attr = *_ep->info.ep_attr;
req->domain_attr = *_ep->info.domain_attr;
req->fabric_attr = *_ep->info.fabric_attr;
req->fabric_attr.fabric = NULL;
req->domain_attr.domain = NULL;
if (param && paramlen)
memcpy(&req->user_data, param, paramlen);
@ -777,7 +844,7 @@ static int sock_ep_cm_shutdown(struct fid_ep *ep, uint64_t flags)
struct fi_ops_cm sock_ep_cm_ops = {
.size = sizeof(struct fi_ops_cm),
.setname = sock_ep_cm_setnmae,
.setname = sock_ep_cm_setname,
.getname = sock_ep_cm_getname,
.getpeer = sock_ep_cm_getpeer,
.connect = sock_ep_cm_connect,
@ -791,6 +858,7 @@ static int sock_msg_endpoint(struct fid_domain *domain, struct fi_info *info,
struct sock_ep **ep, void *context, size_t fclass)
{
int ret;
struct sock_pep *pep;
if (info) {
if (info->ep_attr) {
@ -818,6 +886,11 @@ static int sock_msg_endpoint(struct fid_domain *domain, struct fi_info *info,
if (ret)
return ret;
if (info && info->handle && info->handle->fclass == FI_CLASS_PEP) {
pep = container_of(info->handle, struct sock_pep, pep.fid);
memcpy((*ep)->src_addr, &pep->src_addr, sizeof *(*ep)->src_addr);
}
if (!info || !info->ep_attr)
(*ep)->ep_attr = sock_msg_ep_attr;
@ -1046,69 +1119,8 @@ out:
return NULL;
}
static int sock_pep_create_listener_thread(struct sock_pep *pep)
static int sock_pep_start_listener_thread(struct sock_pep *pep)
{
int optval, ret;
socklen_t addr_size;
struct sockaddr_in addr;
struct addrinfo *s_res = NULL, *p;
struct addrinfo hints;
char sa_ip[INET_ADDRSTRLEN] = {0};
char sa_port[NI_MAXSERV] = {0};
pep->cm.do_listen = 1;
memset(&hints, 0, sizeof(hints));
hints.ai_family = AF_INET;
hints.ai_socktype = SOCK_DGRAM;
hints.ai_flags = AI_PASSIVE;
hints.ai_protocol = IPPROTO_UDP;
memcpy(sa_ip, inet_ntoa(pep->src_addr.sin_addr), INET_ADDRSTRLEN);
sprintf(sa_port, "%d", ntohs(pep->src_addr.sin_port));
ret = getaddrinfo(sa_ip, sa_port, &hints, &s_res);
if (ret) {
SOCK_LOG_ERROR("no available AF_INET address service:%s, %s\n",
sa_port, gai_strerror(ret));
return -FI_EINVAL;
}
for (p=s_res; p; p=p->ai_next) {
pep->cm.sock = socket(p->ai_family, p->ai_socktype,
p->ai_protocol);
if (pep->cm.sock >= 0) {
optval = 1;
if (setsockopt(pep->cm.sock, SOL_SOCKET, SO_REUSEADDR, &optval,
sizeof optval))
SOCK_LOG_ERROR("setsockopt failed\n");
if (!bind(pep->cm.sock, s_res->ai_addr, s_res->ai_addrlen))
break;
close(pep->cm.sock);
pep->cm.sock = -1;
}
}
freeaddrinfo(s_res);
if (pep->cm.sock < 0)
return -FI_EIO;
optval = 1;
if (setsockopt(pep->cm.sock, SOL_SOCKET, SO_REUSEADDR, &optval,
sizeof optval))
SOCK_LOG_ERROR("setsockopt failed\n");
if (pep->src_addr.sin_port == 0) {
addr_size = sizeof(addr);
if (getsockname(pep->cm.sock, (struct sockaddr*)&addr, &addr_size))
return -FI_EINVAL;
pep->src_addr.sin_port = addr.sin_port;
}
SOCK_LOG_INFO("Listener thread bound to %s:%d\n",
sa_ip, ntohs(pep->src_addr.sin_port));
if (pthread_create(&pep->cm.listener_thread, NULL,
sock_pep_listener_thread, (void *)pep)) {
SOCK_LOG_ERROR("Couldn't create listener thread\n");
@ -1121,7 +1133,15 @@ static int sock_pep_listen(struct fid_pep *pep)
{
struct sock_pep *_pep;
_pep = container_of(pep, struct sock_pep, pep);
return sock_pep_create_listener_thread(_pep);
if (_pep->cm.listener_thread)
return 0;
if (!_pep->cm.do_listen && sock_pep_create_listener(_pep)) {
SOCK_LOG_ERROR("Failed to create pep thread\n");
return -FI_EINVAL;
}
return sock_pep_start_listener_thread(_pep);
}
static int sock_pep_reject(struct fid_pep *pep, fid_t handle,
@ -1170,7 +1190,7 @@ out:
static struct fi_ops_cm sock_pep_cm_ops = {
.size = sizeof(struct fi_ops_cm),
.setname = sock_ep_cm_setnmae,
.setname = sock_ep_cm_setname,
.getname = sock_ep_cm_getname,
.getpeer = fi_no_getpeer,
.connect = fi_no_connect,

Просмотреть файл

@ -76,7 +76,7 @@ const struct fi_ep_attr sock_rdm_ep_attr = {
const struct fi_tx_attr sock_rdm_tx_attr = {
.caps = SOCK_EP_RDM_CAP,
.mode = SOCK_MODE,
.op_flags = FI_TRANSMIT_COMPLETE,
.op_flags = SOCK_EP_DEFAULT_OP_FLAGS,
.msg_order = SOCK_EP_MSG_ORDER,
.inject_size = SOCK_EP_MAX_INJECT_SZ,
.size = SOCK_EP_TX_SZ,

Просмотреть файл

@ -383,7 +383,7 @@ void sock_fabric_remove_service(struct sock_fabric *fab, int service)
fastlock_release(&fab->lock);
}
static int sock_get_src_addr(struct sockaddr_in *dest_addr,
int sock_get_src_addr(struct sockaddr_in *dest_addr,
struct sockaddr_in *src_addr)
{
int sock, ret;
@ -402,6 +402,7 @@ static int sock_get_src_addr(struct sockaddr_in *dest_addr,
}
ret = getsockname(sock, (struct sockaddr *) src_addr, &len);
src_addr->sin_port = 0;
if (ret) {
SOCK_LOG_INFO("getsockname failed\n");
ret = -errno;

Просмотреть файл

@ -205,6 +205,7 @@ static ssize_t sock_ep_sendmsg(struct fid_ep *ep, const struct fi_msg *msg,
SOCK_LOG_INFO("New sendmsg on TX: %p using conn: %p\n",
tx_ctx, conn);
SOCK_EP_SET_TX_OP_FLAGS(flags);
if (flags & SOCK_USE_OP_FLAGS)
flags |= tx_ctx->attr.op_flags;
memset(&tx_op, 0, sizeof(struct sock_op));
@ -516,6 +517,7 @@ static ssize_t sock_ep_tsendmsg(struct fid_ep *ep,
if (!conn)
return -FI_EAGAIN;
SOCK_EP_SET_TX_OP_FLAGS(flags);
if (flags & SOCK_USE_OP_FLAGS)
flags |= tx_ctx->attr.op_flags;

Просмотреть файл

@ -349,7 +349,7 @@ static void sock_pe_report_rx_error(struct sock_pe_entry *pe_entry, int rem)
sock_cntr_err_inc(pe_entry->comp->recv_cntr);
if (pe_entry->comp->recv_cq)
sock_cq_report_error(pe_entry->comp->recv_cq, pe_entry, rem,
FI_ENOSPC, -FI_ENOSPC, NULL);
FI_ETRUNC, -FI_ETRUNC, NULL);
}
static void sock_pe_report_tx_rma_read_err(struct sock_pe_entry *pe_entry, int err)

Просмотреть файл

@ -113,6 +113,7 @@ static ssize_t sock_ep_rma_readmsg(struct fid_ep *ep,
goto err;
}
SOCK_EP_SET_TX_OP_FLAGS(flags);
if (flags & SOCK_USE_OP_FLAGS)
flags |= tx_ctx->attr.op_flags;
memset(&tx_op, 0, sizeof(struct sock_op));
@ -137,7 +138,6 @@ static ssize_t sock_ep_rma_readmsg(struct fid_ep *ep,
for (i = 0; i< msg->iov_count; i++) {
tx_iov.iov.addr = (uintptr_t) msg->msg_iov[i].iov_base;
tx_iov.iov.len = msg->msg_iov[i].iov_len;
tx_iov.iov.key = (uintptr_t) msg->desc[i];
sock_tx_ctx_write(tx_ctx, &tx_iov, sizeof(tx_iov));
dst_len += tx_iov.iov.len;
}
@ -255,6 +255,7 @@ static ssize_t sock_ep_rma_writemsg(struct fid_ep *ep,
if (!conn)
return -FI_EAGAIN;
SOCK_EP_SET_TX_OP_FLAGS(flags);
if (flags & SOCK_USE_OP_FLAGS)
flags |= tx_ctx->attr.op_flags;
memset(&tx_op, 0, sizeof(struct sock_op));
@ -304,7 +305,6 @@ static ssize_t sock_ep_rma_writemsg(struct fid_ep *ep,
for (i = 0; i< msg->iov_count; i++) {
tx_iov.iov.addr = (uintptr_t) msg->msg_iov[i].iov_base;
tx_iov.iov.len = msg->msg_iov[i].iov_len;
tx_iov.iov.key = (uintptr_t) msg->desc[i];
sock_tx_ctx_write(tx_ctx, &tx_iov, sizeof(tx_iov));
src_len += tx_iov.iov.len;
}

Просмотреть файл

@ -457,6 +457,7 @@ ssize_t usdf_msg_ud_prefix_recv(struct fid_ep *ep, void *buf, size_t len, void *
ssize_t usdf_msg_ud_prefix_recvv(struct fid_ep *ep, const struct iovec *iov,
void **desc, size_t count, void *context);
/* Fake IBV provider */
void usdf_setup_fake_ibv_provider(void);
#endif /* _USDF_H_ */

Просмотреть файл

@ -1093,5 +1093,8 @@ struct fi_provider usdf_ops = {
USNIC_INI
{
#ifdef HAVE_VERBS
usdf_setup_fake_ibv_provider();
#endif
return (&usdf_ops);
}

Просмотреть файл

@ -0,0 +1,128 @@
/*
* Copyright (c) 2015, Cisco Systems, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/*
* The code in this file prevents spurious libibverbs warnings on
* stderr about devices that it doesn't recognize.
*
* Specifically, Cisco usNIC devices are exposed through the Linux
* InfiniBand kernel interface (i.e., they show up in
* /sys/class/infiniband). However, the userspace side of these
* drivers is not exposed through libibverbs (i.e., there is no
* libibverbs provider/plugin for usNIC). Therefore, when
* ibv_get_device_list() is invoked, libibverbs cannot find a plugin
* for usnic devices. This causes libibverbs to emit a spurious
* warning message on stderr.
*
* Since libfabric can have a verbs provider, libibverbs is invoked,
* triggering the sequence described above, resulting in warning
* messages about usnic devices. To avoid these extra stderr
* warnings, we insert a fake usnic verbs libibverbs provider that
* safely squelches these warnings.
*
* More specifically: the userspace side of usNIC is exposed through
* libfabric; we don't need libibverbs warnings about not being able
* to find a usnic driver.
*/
#if HAVE_CONFIG_H
# include <config.h>
#endif /* HAVE_CONFIG_H */
#include <stdio.h>
#include <infiniband/verbs.h>
#include <infiniband/driver.h>
/***********************************************************************/
#ifndef PCI_VENDOR_ID_CISCO
#define PCI_VENDOR_ID_CISCO 0x1137
#endif
static struct ibv_context *fake_alloc_context(struct ibv_device *ibdev,
int cmd_fd)
{
/* Nothing to do here */
return NULL;
}
static void fake_free_context(struct ibv_context *ibctx)
{
/* Nothing to do here */
}
/* Put just enough in here to convince libibverbs that this is a valid
device, and a little extra just in case someone looks at this
struct in a debugger. */
static struct ibv_device fake_dev = {
.ops = {
.alloc_context = fake_alloc_context,
.free_context = fake_free_context
},
.name = "fake ibv_device inserted by libfabric:usNIC"
};
static struct ibv_device *fake_driver_init(const char *uverbs_sys_path,
int abi_version)
{
char value[8];
int vendor;
/* This function should only be invoked for
/sys/class/infiniband/usnic_X devices, but double check just to
be absolutely sure: read the vendor ID and ensure that it is
Cisco. */
if (ibv_read_sysfs_file(uverbs_sys_path, "device/vendor",
value, sizeof(value)) < 0) {
return NULL;
}
sscanf(value, "%i", &vendor);
if (vendor == PCI_VENDOR_ID_CISCO) {
return &fake_dev;
}
/* We didn't find a device that we want to support */
return NULL;
}
void usdf_setup_fake_ibv_provider(void)
{
/* Register a fake driver for "usnic_verbs" devices */
ibv_register_driver("usnic_verbs", fake_driver_init);
}

Просмотреть файл

@ -750,10 +750,12 @@ usdf_rdm_inject(struct fid_ep *fep, const void *buf, size_t len,
wqe->rd_msg_id_be = htonl(msg_id);
memcpy(wqe->rd_inject_buf, buf, len);
wqe->rd_iov[0].iov_base = wqe->rd_inject_buf;
wqe->rd_iov[0].iov_len = len;
wqe->rd_last_iov = 0;
wqe->rd_cur_iov = 0;
wqe->rd_cur_ptr = buf;
wqe->rd_cur_ptr = wqe->rd_inject_buf;
wqe->rd_iov_resid = len;
wqe->rd_resid = len;
wqe->rd_length = len;

Просмотреть файл

@ -56,6 +56,7 @@
#include <linux/version.h>
#include <linux/netdevice.h>
#include <linux/skbuff.h>
#include <linux/pci.h>
#ifndef PCI_VENDOR_ID_CISCO
#define PCI_VENDOR_ID_CISCO 0x1137
@ -204,8 +205,19 @@ static inline bool skb_flow_dissect(const struct sk_buff *skb, struct flow_keys
#endif /*LINUX >= 3.3.0*/
#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 0))
#if (!RHEL_RELEASE_CODE || (RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(6, 6)))
#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0))
enum pkt_hash_types {
PKT_HASH_TYPE_NONE, /* Undefined type */
PKT_HASH_TYPE_L2, /* Input: src_MAC, dest_MAC */
PKT_HASH_TYPE_L3, /* Input: src_IP, dst_IP */
PKT_HASH_TYPE_L4, /* Input: src_IP, dst_IP, src_port, dst_port */
};
#endif /*kernel < 3.13 */
#endif /* !rhel or rhel < 6.6 */
#define skb_get_hash_raw(skb) (skb)->rxhash
#endif
#define skb_set_hash(skb, hash, type) skb->rxhash = (type == PKT_HASH_TYPE_L4) ? hash : 0;
#endif /* kernel < 3.14 */
#if !defined(__VMKLNX__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 24))
#define enic_wq_lock(wq_lock) spin_lock_irqsave(wq_lock, flags)

Просмотреть файл

@ -130,6 +130,53 @@ _usd_post_send_two(
return index;
}
static inline uint32_t
_usd_post_send_two_vlan(
struct usd_wq *wq,
const void *hdr,
size_t hdrlen,
const void *pkt,
size_t pktlen,
u_int8_t cq_entry,
u_int16_t vlan_tag)
{
struct vnic_wq *vwq;
uint32_t index;
struct wq_enet_desc *desc;
u_int8_t offload_mode = 0, eop;
u_int16_t mss = 7, header_length = 0;
u_int8_t vlan_tag_insert = 1, loopback = 0, fcoe_encap = 0;
vwq = &wq->uwq_vnic_wq;
desc = wq->uwq_next_desc;
index = wq->uwq_post_index;
eop = 0;
wq_enet_desc_enc(desc, (uintptr_t)hdr, hdrlen,
mss, header_length, offload_mode,
eop, 0, fcoe_encap,
vlan_tag_insert, vlan_tag, loopback);
desc = (struct wq_enet_desc *) ((uintptr_t)wq->uwq_desc_ring + (index<<4));
index = (index+1) & wq->uwq_post_index_mask;
eop = 1;
wq_enet_desc_enc(desc, (uintptr_t)pkt, pktlen,
mss, header_length, offload_mode,
eop, cq_entry, fcoe_encap,
vlan_tag_insert, vlan_tag, loopback);
wmb();
iowrite32(index, &vwq->ctrl->posted_index);
wq->uwq_next_desc = (struct wq_enet_desc *)
((uintptr_t)wq->uwq_desc_ring + (index<<4));
wq->uwq_post_index = (index+1) & wq->uwq_post_index_mask;
wq->uwq_send_credits -= 2;
return index;
}
/*
* Consume iov count credits, assumes that iov[0] includes usnic header
*/

Просмотреть файл

@ -87,6 +87,49 @@ usd_post_send_one_udp_normal(
return 0;
}
static int
usd_post_send_one_vlan_udp_normal(
struct usd_qp *uqp,
struct usd_dest *dest,
const void *buf,
size_t len,
uint16_t vlan,
uint32_t flags,
void *context)
{
struct usd_qp_impl *qp;
struct usd_udp_hdr *hdr;
struct usd_wq *wq;
uint32_t last_post;
uint8_t *copybuf;
struct usd_wq_post_info *info;
qp = to_qpi(uqp);
wq = &qp->uq_wq;
copybuf = wq->uwq_copybuf + wq->uwq_post_index * USD_SEND_MAX_COPY;
hdr = (struct usd_udp_hdr *)copybuf;
memcpy(hdr, &dest->ds_dest.ds_udp.u_hdr, sizeof(*hdr));
/* adjust lengths and insert source port */
hdr->uh_ip.tot_len = htons(len + sizeof(struct usd_udp_hdr) -
sizeof(struct ether_header));
hdr->uh_udp.len = htons((sizeof(struct usd_udp_hdr) -
sizeof(struct ether_header) -
sizeof(struct iphdr)) + len);
hdr->uh_udp.source =
qp->uq_attrs.uqa_local_addr.ul_addr.ul_udp.u_addr.sin_port;
last_post = _usd_post_send_two_vlan(wq, hdr, sizeof(*hdr), buf, len,
USD_SF_ISSET(flags, SIGNAL), vlan);
info = &wq->uwq_post_info[last_post];
info->wp_context = context;
info->wp_len = len;
return 0;
}
static int
usd_post_send_one_copy_udp_normal(
struct usd_qp *uqp,
@ -278,4 +321,5 @@ struct usd_qp_ops usd_qp_ops_udp_normal = {
.qo_post_send_one_copy = usd_post_send_one_copy_udp_normal,
.qo_post_send_two_copy = usd_post_send_two_copy_udp_normal,
.qo_post_send_iov = usd_post_send_iov_udp_normal,
.qo_post_send_one_vlan = usd_post_send_one_vlan_udp_normal,
};

Просмотреть файл

@ -134,6 +134,7 @@ struct usnic_vnic_barres_info {
struct usnic_ib_create_qp_resp_v0 {
USNIC_IB_CREATE_QP_RESP_V0_FIELDS
u32 reserved[9];
};
struct usnic_ib_create_qp_resp {
@ -146,6 +147,10 @@ struct usnic_ib_create_qp_resp {
u32 pad_to_8byte;
} v1;
} u;
/* v0 had a "reserved[9]" field, must not shrink the response or we can
* corrupt newer clients running on older kernels */
u32 reserved[6];
};
#define USNIC_CTX_RESP_VERSION 1

Просмотреть файл

@ -151,6 +151,9 @@ struct usd_qp_ops {
int (*qo_post_send_iov)(struct usd_qp *qp,
struct usd_dest *dest, const struct iovec* iov,
size_t iov_count, uint32_t flags, void *context);
int (*qo_post_send_one_vlan)(struct usd_qp *qp,
struct usd_dest *dest, const void *buf, size_t len,
u_int16_t vlan, uint32_t flags, void *context);
};
/*
@ -264,7 +267,7 @@ struct usd_device_entry {
* Send flags
*/
enum usd_send_flag_shift {
USD_SFS_SIGNAL
USD_SFS_SIGNAL,
};
#define USD_SF_SIGNAL (1 << USD_SFS_SIGNAL)
@ -539,6 +542,28 @@ usd_post_send_one(
qp, dest, buf, len, flags, context);
}
/*
* post a single-buffer send from registered memory to specified VLAN
* IN:
* qp
* dest
* buf -
* Requires 2 send credits
*/
static inline int
usd_post_send_one_vlan(
struct usd_qp *qp,
struct usd_dest *dest,
const void *buf,
size_t len,
u_int16_t vlan,
uint32_t flags,
void *context)
{
return qp->uq_ops.qo_post_send_one_vlan(
qp, dest, buf, len, vlan, flags, context);
}
/*
* post a single-buffer send from registered memory
* Caller must allow sufficient space *before* the packet for usd header

Просмотреть файл

@ -48,6 +48,7 @@
#include <infiniband/ib.h>
#include <infiniband/verbs.h>
#include <infiniband/driver.h>
#include <rdma/rdma_cma.h>
#include <rdma/fabric.h>
@ -174,6 +175,8 @@ const struct fi_domain_attr verbs_domain_attr = {
.mr_mode = FI_MR_BASIC,
.mr_key_size = sizeof_field(struct ibv_sge, lkey),
.cq_data_size = sizeof_field(struct ibv_send_wr, imm_data),
.tx_ctx_cnt = 1024,
.rx_ctx_cnt = 1024,
.max_ep_tx_ctx = 1,
.max_ep_rx_ctx = 1,
};
@ -193,7 +196,6 @@ const struct fi_rx_attr verbs_rx_attr = {
.mode = VERBS_RX_MODE,
.msg_order = VERBS_MSG_ORDER,
.total_buffered_recv = 0,
.size = 256,
};
const struct fi_tx_attr verbs_tx_attr = {
@ -202,7 +204,7 @@ const struct fi_tx_attr verbs_tx_attr = {
.op_flags = VERBS_TX_OP_FLAGS,
.msg_order = VERBS_MSG_ORDER,
.inject_size = 0,
.size = 256,
.rma_iov_limit = 1,
};
static struct fi_info *verbs_info = NULL;
@ -603,8 +605,9 @@ static int fi_ibv_rai_to_fi(struct rdma_addrinfo *rai, struct fi_info *fi)
return 0;
}
static inline int fi_ibv_get_inject_size(struct ibv_context *ctx,
const struct fi_info *hints, struct fi_info *info)
static inline int fi_ibv_get_qp_cap(struct ibv_context *ctx,
const struct fi_info *hints, struct ibv_device_attr *device_attr,
struct fi_info *info)
{
struct ibv_pd *pd;
struct ibv_cq *cq;
@ -622,13 +625,24 @@ static inline int fi_ibv_get_inject_size(struct ibv_context *ctx,
goto err1;
}
/* TODO: serialize access to string buffers */
fi_read_file(FI_CONF_DIR, "def_send_wr",
def_send_wr, sizeof def_send_wr);
fi_read_file(FI_CONF_DIR, "def_recv_wr",
def_recv_wr, sizeof def_recv_wr);
fi_read_file(FI_CONF_DIR, "def_send_sge",
def_send_sge, sizeof def_send_sge);
fi_read_file(FI_CONF_DIR, "def_recv_sge",
def_recv_sge, sizeof def_recv_sge);
memset(&init_attr, 0, sizeof init_attr);
init_attr.send_cq = cq;
init_attr.recv_cq = cq;
init_attr.cap.max_send_wr = 1;
init_attr.cap.max_recv_wr = 1;
init_attr.cap.max_send_sge = 1;
init_attr.cap.max_recv_sge = 1;
init_attr.cap.max_send_wr = atoi(def_send_wr);
init_attr.cap.max_recv_wr = atoi(def_recv_wr);
init_attr.cap.max_send_sge = MIN(atoi(def_send_sge), device_attr->max_sge);
init_attr.cap.max_recv_sge = MIN(atoi(def_recv_sge), device_attr->max_sge);
if (hints && hints->tx_attr && hints->tx_attr->inject_size) {
init_attr.cap.max_inline_data = hints->tx_attr->inject_size;
@ -647,6 +661,11 @@ static inline int fi_ibv_get_inject_size(struct ibv_context *ctx,
}
info->tx_attr->inject_size = init_attr.cap.max_inline_data;
info->tx_attr->iov_limit = init_attr.cap.max_send_sge;
info->tx_attr->size = init_attr.cap.max_send_wr;
info->rx_attr->iov_limit = init_attr.cap.max_recv_sge;
info->rx_attr->size = init_attr.cap.max_recv_wr;
ibv_destroy_qp(qp);
err2:
@ -664,24 +683,20 @@ static int fi_ibv_get_device_attrs(struct ibv_context *ctx,
struct ibv_port_attr port_attr;
int ret = 0;
ret = fi_ibv_get_inject_size(ctx, hints, info);
if (ret)
return ret;
ret = ibv_query_device(ctx, &device_attr);
if (ret)
return -errno;
info->domain_attr->cq_cnt = device_attr.max_cq;
info->domain_attr->ep_cnt = device_attr.max_qp;
/* TODO find correct optimum values for ctx_cnt */
info->domain_attr->tx_ctx_cnt = device_attr.max_qp;
info->domain_attr->rx_ctx_cnt = device_attr.max_qp;
info->domain_attr->tx_ctx_cnt = MIN(info->domain_attr->tx_ctx_cnt, device_attr.max_qp);
info->domain_attr->rx_ctx_cnt = MIN(info->domain_attr->rx_ctx_cnt, device_attr.max_qp);
info->domain_attr->max_ep_tx_ctx = device_attr.max_qp;
info->domain_attr->max_ep_rx_ctx = device_attr.max_qp;
info->tx_attr->iov_limit = device_attr.max_sge;
info->tx_attr->rma_iov_limit = device_attr.max_sge;
info->rx_attr->iov_limit = device_attr.max_sge;
ret = fi_ibv_get_qp_cap(ctx, hints, &device_attr, info);
if (ret)
return ret;
ret = ibv_query_port(ctx, 1, &port_attr);
if (ret)
@ -694,6 +709,33 @@ static int fi_ibv_get_device_attrs(struct ibv_context *ctx,
return 0;
}
/*
* USNIC plugs into the verbs framework, but is not a usable device.
* Manually check for devices and fail gracefully if none are present.
* This avoids the lower libraries (libibverbs and librdmacm) from
* reporting error messages to stderr.
*/
static int fi_ibv_have_device(void)
{
struct ibv_device **devs;
struct ibv_context *verbs;
int i;
devs = ibv_get_device_list(NULL);
if (!devs)
return 0;
for (i = 0; devs[i]; i++) {
verbs = ibv_open_device(devs[i]);
if (verbs) {
ibv_close_device(verbs);
return 1;
}
}
return 0;
}
static int fi_ibv_init_info(const struct fi_info *hints)
{
struct ibv_context *ctx, **ctx_list;
@ -709,16 +751,23 @@ static int fi_ibv_init_info(const struct fi_info *hints)
if (verbs_info)
goto unlock;
if (!fi_ibv_have_device()) {
ret = -FI_ENODATA;
goto err1;
}
/* TODO Handle the case where multiple devices are returned */
ctx_list = rdma_get_devices(&num_devices);
if (!num_devices)
return -errno;
if (!num_devices) {
ret = (errno == ENODEV) ? -FI_ENODATA : -errno;
goto err1;
}
ctx = *ctx_list;
if (!(fi = fi_allocinfo())) {
ret = -FI_ENOMEM;
goto err1;
goto err2;
}
fi->caps = VERBS_CAPS;
@ -732,20 +781,20 @@ static int fi_ibv_init_info(const struct fi_info *hints)
ret = fi_ibv_get_device_attrs(ctx, hints, fi);
if (ret)
goto err2;
goto err3;
switch (ctx->device->transport_type) {
case IBV_TRANSPORT_IB:
if(ibv_query_gid(ctx, 1, 0, &gid)) {
ret = -errno;
goto err2;
goto err3;
}
name_len = strlen(VERBS_IB_PREFIX) + INET6_ADDRSTRLEN;
if (!(fi->fabric_attr->name = calloc(1, name_len + 1))) {
ret = -FI_ENOMEM;
goto err2;
goto err3;
}
snprintf(fi->fabric_attr->name, name_len, VERBS_IB_PREFIX "%lx",
@ -757,7 +806,7 @@ static int fi_ibv_init_info(const struct fi_info *hints)
fi->fabric_attr->name = strdup(VERBS_IWARP_FABRIC);
if (!fi->fabric_attr->name) {
ret = -FI_ENOMEM;
goto err2;
goto err3;
}
fi->ep_attr->protocol = FI_PROTO_IWARP;
@ -766,25 +815,26 @@ static int fi_ibv_init_info(const struct fi_info *hints)
default:
FI_INFO(&fi_ibv_prov, FI_LOG_CORE, "Unknown transport type");
ret = -FI_ENODATA;
goto err2;
goto err3;
}
if (!(fi->domain_attr->name = strdup(ctx->device->name))) {
ret = -FI_ENOMEM;
goto err2;
goto err3;
}
verbs_info = fi;
rdma_free_devices(ctx_list);
unlock:
pthread_mutex_unlock(&verbs_info_lock);
return 0;
err2:
err3:
fi_freeinfo(fi);
err1:
err2:
rdma_free_devices(ctx_list);
err1:
pthread_mutex_unlock(&verbs_info_lock);
return ret;
}
@ -3176,6 +3226,7 @@ static int fi_ibv_pep_listen(struct fid_pep *pep)
static struct fi_ops_cm fi_ibv_pep_cm_ops = {
.size = sizeof(struct fi_ops_cm),
.setname = fi_no_setname,
.getname = fi_ibv_pep_getname,
.getpeer = fi_no_getpeer,
.connect = fi_no_connect,
@ -3254,6 +3305,7 @@ err:
static int fi_ibv_fabric_close(fid_t fid)
{
fi_freeinfo(verbs_info);
verbs_info = NULL;
free(fid);
return 0;
}

Просмотреть файл

@ -61,6 +61,8 @@
* Indentation delineates lists and dictionaries (or they can be inline).
*/
#define FI_BUFSIZ 8192
#define TAB " "
#define CASEENUMSTR(SYM) \
@ -77,11 +79,11 @@ static void fi_remove_comma(char *buffer)
static void strcatf(char *dest, const char *fmt, ...)
{
size_t len = strlen(dest);
size_t len = strnlen(dest,FI_BUFSIZ);
va_list arglist;
va_start (arglist, fmt);
vsnprintf(&dest[len], BUFSIZ - 1 - len, fmt, arglist);
vsnprintf(&dest[len], FI_BUFSIZ - 1 - len, fmt, arglist);
va_end (arglist);
}
@ -555,7 +557,7 @@ char *DEFAULT_SYMVER_PRE(fi_tostr)(const void *data, enum fi_type datatype)
enumval = *(const int *) data;
if (!buf) {
buf = calloc(BUFSIZ, 1);
buf = calloc(FI_BUFSIZ, 1);
if (!buf)
return NULL;
}