1
1
Pull down a new embedded copy of libfabric from
https://github.com/ofiwg/libfabric.
Этот коммит содержится в:
Jeff Squyres 2015-01-24 05:48:48 -08:00
родитель ff7be586dd
Коммит 7d5755f62b
32 изменённых файлов: 532 добавлений и 223 удалений

Просмотреть файл

@ -371,6 +371,7 @@ dummy_man_pages = \
man/fi_recvv.3 \
man/fi_reject.3 \
man/fi_rx_addr.3 \
man/fi_rx_size_left.3 \
man/fi_send.3 \
man/fi_senddata.3 \
man/fi_sendmsg.3 \
@ -389,6 +390,7 @@ dummy_man_pages = \
man/fi_tsenddata.3 \
man/fi_tsendmsg.3 \
man/fi_tsendv.3 \
man/fi_tx_size_left.3 \
man/fi_wait.3 \
man/fi_wait_open.3 \
man/fi_write.3 \

Просмотреть файл

@ -1,7 +1,7 @@
This README is for userspace RDMA fabric library.
Version Libfabric v0.0.2
Released on 2015-01-15
Released on 2015-01-24
Building
========

Просмотреть файл

@ -262,6 +262,8 @@ static struct fi_ops_msg X = {
.inject = fi_no_msg_inject,
.senddata = fi_no_msg_senddata,
.injectdata = fi_no_msg_injectdata,
.rx_size_left = fi_no_msg_rx_size_left,
.tx_size_left = fi_no_msg_tx_size_left,
};
*/
ssize_t fi_no_msg_recv(struct fid_ep *ep, void *buf, size_t len, void *desc,
@ -282,6 +284,8 @@ ssize_t fi_no_msg_senddata(struct fid_ep *ep, const void *buf, size_t len, void
uint64_t data, fi_addr_t dest_addr, void *context);
ssize_t fi_no_msg_injectdata(struct fid_ep *ep, const void *buf, size_t len,
uint64_t data, fi_addr_t dest_addr);
ssize_t fi_no_msg_rx_size_left(struct fid_ep *ep);
ssize_t fi_no_msg_tx_size_left(struct fid_ep *ep);
/*
static struct fi_ops_wait X = {

Просмотреть файл

@ -376,11 +376,6 @@ static inline int fi_close(struct fid *fid)
return fid->ops->close(fid);
}
static inline int fi_bind(struct fid *fid, struct fid *bfid, uint64_t flags)
{
return fid->ops->bind(fid, bfid, flags);
}
struct fi_alias {
struct fid **fid;
uint64_t flags;

Просмотреть файл

@ -99,6 +99,8 @@ struct fi_ops_msg {
uint64_t data, fi_addr_t dest_addr, void *context);
ssize_t (*injectdata)(struct fid_ep *ep, const void *buf, size_t len,
uint64_t data, fi_addr_t dest_addr);
ssize_t (*rx_size_left)(struct fid_ep *ep);
ssize_t (*tx_size_left)(struct fid_ep *ep);
};
struct fi_ops_cm;
@ -171,6 +173,11 @@ static inline int fi_ep_bind(struct fid_ep *ep, struct fid *bfid, uint64_t flags
return ep->fid.ops->bind(&ep->fid, bfid, flags);
}
static inline int fi_pep_bind(struct fid_pep *pep, struct fid *bfid, uint64_t flags)
{
return pep->fid.ops->bind(&pep->fid, bfid, flags);
}
static inline int fi_scalable_ep_bind(struct fid_sep *sep, struct fid *bfid, uint64_t flags)
{
return sep->fid.ops->bind(&sep->fid, bfid, flags);
@ -291,6 +298,18 @@ fi_injectdata(struct fid_ep *ep, const void *buf, size_t len,
return ep->msg->injectdata(ep, buf, len, data, dest_addr);
}
static inline ssize_t
fi_rx_size_left(struct fid_ep *ep)
{
return ep->msg->rx_size_left(ep);
}
static inline ssize_t
fi_tx_size_left(struct fid_ep *ep)
{
return ep->msg->tx_size_left(ep);
}
#else // FABRIC_DIRECT
#include <rdma/fi_direct_endpoint.h>
#endif

Просмотреть файл

@ -1,4 +1,4 @@
.TH fi_endpoint 3 "2015\-01\-08" "Libfabric Programmer\[aq]s Manual" "\@VERSION\@"
.TH fi_endpoint 3 "2015\-01\-16" "Libfabric Programmer\[aq]s Manual" "\@VERSION\@"
.SH NAME
.PP
fi_endpoint - Fabric endpoint operations
@ -426,6 +426,13 @@ datagram queue pairs.
protocol known as PSM, performance scaled messaging.
PSMX is an extended version of the PSM protocol to support the libfabric
interfaces.
.PP
\f[I]FI_PROTO_UDP\f[] : The protocol sends and receives UDP datagrams.
For example, an endpoint using \f[I]FI_PROTO_UDP\f[] will be able to
communicate with a remote peer that is using Berkeley
\f[I]SOCK_DGRAM\f[] sockets using \f[I]IPPROTO_UDP\f[].
.PP
\f[I]FI_PROTO_SOCK_TCP\f[] : The protocol is layered over TCP packets.
.SS protocol_version - Protocol Version
.PP
Identifies which version of the protocol is employed by the provider.
@ -954,8 +961,9 @@ Use of this flag may cause a single posted receive operation to generate
multiple completions as messages are placed into the buffer.
The placement of received data into the buffer may be subjected to
provider specific alignment restrictions.
The buffer will be freed from the endpoint when a message is received
that cannot fit into the remaining free buffer space.
The buffer will be returned to the application\[aq]s control, and an
\f[I]FI_MULTI_RECV\f[] completion will be generated, when a message is
received that cannot fit into the remaining free buffer space.
.PP
\f[I]FI_BUFFERED_RECV\f[] : If set, the communication interface
implementation should attempt to queue inbound data that arrives before

Просмотреть файл

@ -1,4 +1,4 @@
.TH fi_getinfo 3 "2015\-01\-07" "Libfabric Programmer\[aq]s Manual" "\@VERSION\@"
.TH fi_getinfo 3 "2015\-01\-20" "Libfabric Programmer\[aq]s Manual" "\@VERSION\@"
.SH NAME
.PP
fi_getinfo / fi_freeinfo - Obtain / free fabric interface information
@ -11,7 +11,7 @@ fi_getinfo / fi_freeinfo - Obtain / free fabric interface information
int\ fi_getinfo(int\ version,\ const\ char\ *node,\ const\ char\ *service,
\ \ \ \ \ \ \ \ uint64_t\ flags,\ struct\ fi_info\ *hints,\ struct\ fi_info\ **info);
int\ fi_freeinfo(struct\ fi_info\ *info);
void\ fi_freeinfo(struct\ fi_info\ *info);
struct\ fi_info\ *fi_dupinfo(const\ struct\ fi_info\ *info);
\f[]
@ -435,10 +435,10 @@ Such received messages will indicate a transfer size of 0 bytes.
.PP
\f[I]FI_PROV_MR_ATTR\f[] : The provider assigns one or more attributes
associated with a memory registration request.
The provider will set this mode if it returns the the memory
registration keys that applications must use, or if it requires that the
MR offset associated with a memory region be the same as the virtual
address of the memory.
The provider will set this mode if it returns the memory registration
keys that applications must use, or if it requires that the MR offset
associated with a memory region be the same as the virtual address of
the memory.
.PP
Applications that support provider MR attributes will need to exchange
MR parameters with remote peers for RMA and atomic operations.
@ -479,7 +479,7 @@ these operations.
.PP
A provider may support one or more of the following addressing formats.
In some cases, a selected addressing format may need to be translated or
mapped into into an address which is native to the fabric.
mapped into an address which is native to the fabric.
See \f[C]fi_av\f[](3).
.PP
\f[I]FI_FORMAT_UNSPEC\f[] : FI_FORMAT_UNSPEC indicates that a provider

Просмотреть файл

@ -1,4 +1,4 @@
.TH fi_msg 3 "2014\-12\-08" "Libfabric Programmer\[aq]s Manual" "\@VERSION\@"
.TH fi_msg 3 "2015\-01\-23" "Libfabric Programmer\[aq]s Manual" "\@VERSION\@"
.SH NAME
.PP
fi_msg - Message data transfer operations
@ -42,6 +42,10 @@ ssize_t\ fi_inject(struct\ fid_ep\ *ep,\ void\ *buf,\ size_t\ len,
ssize_t\ fi_senddata(struct\ fid_ep\ *ep,\ void\ *buf,\ size_t\ len,
\ \ \ \ void\ *desc,\ uint64_t\ data,\ fi_addr_t\ dest_addr,\ void\ *context);
ssize_t\ fi_rx_size_left(struct\ fid_ep\ *ep);
ssize_t\ fi_tx_size_left(struct\ fid_ep\ *ep);
\f[]
.fi
.SH ARGUMENTS
@ -97,6 +101,10 @@ asynchronously.
Users should not touch the posted data buffer(s) until the receive
operation has completed.
.PP
The "size_left" functions -- fi_rx_size_left, fi_tx_size_left -- return
a lower bound on the number of receive/send operations that may be
posted to the given endpoint without returning -FI_EAGAIN.
.PP
Completed message operations are reported to the user through one or
more event collectors associated with the endpoint.
Users provide context which are associated with each operation, and is
@ -173,6 +181,26 @@ The fi_recvmsg call supports posting buffers over both connected and
unconnected endpoints, with the ability to control the receive operation
per call through the use of flags.
The fi_recvmsg function takes a struct fi_msg as input.
.SS fi_rx_size_left
.PP
The fi_rx_size_left call returns a lower bound on the number of receive
operations that may be posted to the given endpoint without that
operation returning -FI_EAGAIN.
Depending on the specific details of the subsequently posted receive
operations (e.g., number of iov entries, which receive function is
called, etc.)
, it may be possible to post more receive operations than originally
indicated by fi_rx_size_left.
.SS fi_tx_size_left
.PP
The fi_tx_size_left call returns a lower bound on the number of send
operations that may be posted to the given endpoint without that
operation returning -FI_EAGAIN.
Depending on the specific details of the subsequently posted send
operations (e.g., number of iov entries, which send function is called,
etc.)
, it may be possible to post more send operations than originally
indicated by fi_tx_size_left.
.SH FLAGS
.PP
The fi_recvmsg and fi_sendmsg calls allow the user to specify flags

Просмотреть файл

@ -0,0 +1 @@
.so man3/fi_msg.3

Просмотреть файл

@ -0,0 +1 @@
.so man3/fi_msg.3

Просмотреть файл

@ -527,6 +527,11 @@ struct psmx_fid_ep {
size_t min_multi_recv;
};
struct psmx_fid_stx {
struct fid_stx stx;
struct psmx_fid_domain *domain;
};
struct psmx_fid_mr {
struct fid_mr mr;
struct psmx_fid_domain *domain;
@ -577,6 +582,8 @@ int psmx_wait_open(struct fid_fabric *fabric, struct fi_wait_attr *attr,
struct fid_wait **waitset);
int psmx_ep_open(struct fid_domain *domain, struct fi_info *info,
struct fid_ep **ep, void *context);
int psmx_stx_ctx(struct fid_domain *domain, struct fi_tx_attr *attr,
struct fid_stx **stx, void *context);
int psmx_cq_open(struct fid_domain *domain, struct fi_cq_attr *attr,
struct fid_cq **cq, void *context);
int psmx_av_open(struct fid_domain *domain, struct fi_av_attr *attr,

Просмотреть файл

@ -438,6 +438,10 @@ int psmx_am_atomic_handler(psm_am_token_t token, psm_epaddr_t epaddr,
key = args[3].u64;
datatype = args[4].u32w0;
op = args[4].u32w1;
if (op == FI_ATOMIC_READ)
len = fi_datatype_size(datatype) * count;
assert(len == fi_datatype_size(datatype) * count);
mr = psmx_mr_hash_get(key);
@ -994,7 +998,7 @@ ssize_t _psmx_atomic_readwrite(struct fid_ep *ep,
return 0;
}
if (!buf)
if (!buf && op != FI_ATOMIC_READ)
return -EINVAL;
if (datatype < 0 || datatype >= FI_DATATYPE_LAST)
@ -1062,7 +1066,7 @@ ssize_t _psmx_atomic_readwrite(struct fid_ep *ep,
args[4].u32w1 = op;
err = psm_am_request_short((psm_epaddr_t) dest_addr,
PSMX_AM_ATOMIC_HANDLER, args, 5,
(void *)buf, len, am_flags, NULL, NULL);
(void *)buf, (buf?len:0), am_flags, NULL, NULL);
return 0;
}

Просмотреть файл

@ -191,12 +191,14 @@ static struct psmx_cq_event *psmx_cq_create_event_from_status(
case FI_CQ_FORMAT_MSG:
event->cqe.msg.op_context = op_context;
event->cqe.msg.flags = 0;
event->cqe.msg.len = psm_status->nbytes;
break;
case FI_CQ_FORMAT_DATA:
event->cqe.data.op_context = op_context;
event->cqe.data.buf = buf;
event->cqe.data.flags = 0;
event->cqe.data.len = psm_status->nbytes;
if (data)
event->cqe.data.data = data;
@ -205,6 +207,7 @@ static struct psmx_cq_event *psmx_cq_create_event_from_status(
case FI_CQ_FORMAT_TAGGED:
event->cqe.tagged.op_context = op_context;
event->cqe.tagged.buf = buf;
event->cqe.tagged.flags = 0;
event->cqe.tagged.len = psm_status->nbytes;
event->cqe.tagged.tag = psm_status->msg_tag;
if (data)

Просмотреть файл

@ -83,6 +83,8 @@ static struct fi_ops_domain psmx_domain_ops = {
.endpoint = psmx_ep_open,
.cntr_open = psmx_cntr_open,
.poll_open = psmx_poll_open,
.stx_ctx = psmx_stx_ctx,
.srx_ctx = fi_no_srx_context,
};
int psmx_domain_open(struct fid_fabric *fabric, struct fi_info *info,

Просмотреть файл

@ -160,6 +160,7 @@ static int psmx_ep_bind(struct fid *fid, struct fid *bfid, uint64_t flags)
struct psmx_fid_av *av;
struct psmx_fid_cq *cq;
struct psmx_fid_cntr *cntr;
struct psmx_fid_stx *stx;
int err;
ep = container_of(fid, struct psmx_fid_ep, ep.fid);
@ -222,6 +223,13 @@ static int psmx_ep_bind(struct fid *fid, struct fid *bfid, uint64_t flags)
return err;
break;
case FI_CLASS_STX_CTX:
stx = container_of(bfid,
struct psmx_fid_stx, stx.fid);
if (ep->domain != stx->domain)
return -EINVAL;
break;
default:
return -ENOSYS;
}
@ -361,3 +369,44 @@ int psmx_ep_open(struct fid_domain *domain, struct fi_info *info,
return 0;
}
/* STX support is essentially no-op since PSM supports only one send/recv
* context and thus always works in shared context mode.
*/
static int psmx_stx_close(fid_t fid)
{
struct psmx_fid_stx *stx;
stx = container_of(fid, struct psmx_fid_stx, stx.fid);
free(stx);
return 0;
}
static struct fi_ops psmx_fi_ops_stx = {
.size = sizeof(struct fi_ops),
.close = psmx_stx_close,
.bind = fi_no_bind,
.control = fi_no_control,
};
int psmx_stx_ctx(struct fid_domain *domain, struct fi_tx_attr *attr,
struct fid_stx **stx, void *context)
{
struct psmx_fid_stx *stx_priv;
psmx_debug("%s\n", __func__);
stx_priv = (struct psmx_fid_stx *) calloc(1, sizeof *stx_priv);
if (!stx_priv)
return -ENOMEM;
stx_priv->stx.fid.fclass = FI_CLASS_STX_CTX;
stx_priv->stx.fid.context = context;
stx_priv->stx.fid.ops = &psmx_fi_ops_stx;
stx_priv->domain = container_of(domain, struct psmx_fid_domain, domain.fid);
*stx = &stx_priv->stx;
return 0;
}

Просмотреть файл

@ -270,8 +270,12 @@ static int psmx_mr_reg(struct fid_domain *domain, const void *buf, size_t len,
uint64_t key;
domain_priv = container_of(domain, struct psmx_fid_domain, domain);
if (!(domain_priv->mode & FI_PROV_MR_ATTR) && psmx_mr_hash_get(requested_key))
if (flags & FI_MR_KEY) {
if (domain_priv->mode & FI_PROV_MR_ATTR)
return -FI_EBADFLAGS;
if (psmx_mr_hash_get(requested_key))
return -FI_ENOKEY;
}
mr_priv = (struct psmx_fid_mr *) calloc(1, sizeof(*mr_priv) + sizeof(struct iovec));
if (!mr_priv)
@ -281,7 +285,7 @@ static int psmx_mr_reg(struct fid_domain *domain, const void *buf, size_t len,
mr_priv->mr.fid.context = context;
mr_priv->mr.fid.ops = &psmx_fi_ops;
mr_priv->mr.mem_desc = mr_priv;
if (!(domain_priv->mode & FI_PROV_MR_ATTR)) {
if (flags & FI_MR_KEY) {
key = requested_key;
}
else {
@ -293,10 +297,12 @@ static int psmx_mr_reg(struct fid_domain *domain, const void *buf, size_t len,
mr_priv->domain = domain_priv;
mr_priv->access = access;
mr_priv->flags = flags;
mr_priv->offset = (flags & FI_MR_OFFSET) ? offset : 0;
mr_priv->iov_count = 1;
mr_priv->iov[0].iov_base = (void *)buf;
mr_priv->iov[0].iov_len = len;
mr_priv->offset = (flags & FI_MR_OFFSET) ?
((uint64_t)mr_priv->iov[0].iov_base - offset) :
0;
psmx_mr_hash_add(mr_priv);
@ -316,8 +322,12 @@ static int psmx_mr_regv(struct fid_domain *domain,
uint64_t key;
domain_priv = container_of(domain, struct psmx_fid_domain, domain);
if (!(domain_priv->mode & FI_PROV_MR_ATTR) && psmx_mr_hash_get(requested_key))
if (flags & FI_MR_KEY) {
if (domain_priv->mode & FI_PROV_MR_ATTR)
return -FI_EBADFLAGS;
if (psmx_mr_hash_get(requested_key))
return -FI_ENOKEY;
}
if (count == 0 || iov == NULL)
return -EINVAL;
@ -332,7 +342,7 @@ static int psmx_mr_regv(struct fid_domain *domain,
mr_priv->mr.fid.context = context;
mr_priv->mr.fid.ops = &psmx_fi_ops;
mr_priv->mr.mem_desc = mr_priv;
if (!(domain_priv->mode & FI_PROV_MR_ATTR)) {
if (flags & FI_MR_KEY) {
key = requested_key;
}
else {
@ -344,12 +354,14 @@ static int psmx_mr_regv(struct fid_domain *domain,
mr_priv->domain = domain_priv;
mr_priv->access = access;
mr_priv->flags = flags;
mr_priv->offset = (flags & FI_MR_OFFSET) ? offset : 0;
mr_priv->iov_count = count;
for (i=0; i<count; i++)
mr_priv->iov[i] = iov[i];
psmx_mr_normalize_iov(mr_priv->iov, &mr_priv->iov_count);
mr_priv->offset = (flags & FI_MR_OFFSET) ?
((uint64_t)mr_priv->iov[0].iov_base - offset) :
0;
psmx_mr_hash_add(mr_priv);
*mr = &mr_priv->mr;
@ -366,8 +378,12 @@ static int psmx_mr_regattr(struct fid_domain *domain, const struct fi_mr_attr *a
uint64_t key;
domain_priv = container_of(domain, struct psmx_fid_domain, domain);
if (!(domain_priv->mode & FI_PROV_MR_ATTR) && psmx_mr_hash_get(attr->requested_key))
if (flags & FI_MR_KEY) {
if (domain_priv->mode & FI_PROV_MR_ATTR)
return -FI_EBADFLAGS;
if (psmx_mr_hash_get(attr->requested_key))
return -FI_ENOKEY;
}
if (!attr)
return -EINVAL;
@ -382,9 +398,10 @@ static int psmx_mr_regattr(struct fid_domain *domain, const struct fi_mr_attr *a
return -ENOMEM;
mr_priv->mr.fid.fclass = FI_CLASS_MR;
mr_priv->mr.fid.context = attr->context;
mr_priv->mr.fid.ops = &psmx_fi_ops;
mr_priv->mr.mem_desc = mr_priv;
if (!(domain_priv->mode & FI_PROV_MR_ATTR)) {
if (flags & FI_MR_KEY) {
key = attr->requested_key;
}
else {
@ -394,17 +411,16 @@ static int psmx_mr_regattr(struct fid_domain *domain, const struct fi_mr_attr *a
}
mr_priv->mr.key = key;
mr_priv->domain = domain_priv;
mr_priv->access = FI_READ | FI_WRITE | FI_REMOTE_READ | FI_REMOTE_WRITE;
mr_priv->access = attr->access;
mr_priv->flags = flags;
mr_priv->offset = (flags & FI_MR_OFFSET) ? attr->offset : 0;
mr_priv->iov_count = attr->iov_count;
for (i=0; i<attr->iov_count; i++)
mr_priv->iov[i] = attr->mr_iov[i];
mr_priv->mr.fid.context = attr->context;
mr_priv->access = attr->access;
psmx_mr_normalize_iov(mr_priv->iov, &mr_priv->iov_count);
mr_priv->offset = (flags & FI_MR_OFFSET) ?
((uint64_t)mr_priv->iov[0].iov_base - attr->offset) :
0;
psmx_mr_hash_add(mr_priv);
*mr = &mr_priv->mr;

Просмотреть файл

@ -359,5 +359,7 @@ struct fi_ops_msg psmx_msg_ops = {
.inject = psmx_inject,
.senddata = fi_no_msg_senddata,
.injectdata = fi_no_msg_injectdata,
.rx_size_left = fi_no_msg_rx_size_left,
.tx_size_left = fi_no_msg_tx_size_left,
};

Просмотреть файл

@ -626,5 +626,7 @@ struct fi_ops_msg psmx_msg2_ops = {
.inject = psmx_inject2,
.senddata = fi_no_msg_senddata,
.injectdata = fi_no_msg_injectdata,
.rx_size_left = fi_no_msg_rx_size_left,
.tx_size_left = fi_no_msg_tx_size_left,
};

Просмотреть файл

@ -136,6 +136,7 @@ struct sock_conn_map {
int used;
int size;
struct sock_domain *domain;
fastlock_t lock;
};
struct sock_domain {
@ -151,7 +152,6 @@ struct sock_domain {
enum fi_progress progress_mode;
struct index_map mr_idm;
struct sock_pe *pe;
struct sock_conn_map u_cmap;
struct sock_conn_map r_cmap;
pthread_t listen_thread;
int listening;
@ -848,9 +848,14 @@ int sock_av_compare_addr(struct sock_av *av,
struct sock_conn *sock_conn_map_lookup_key(struct sock_conn_map *conn_map,
uint16_t key);
uint16_t sock_conn_map_connect(struct sock_domain *dom,
struct sock_conn_map *map,
struct sockaddr_in *addr);
uint16_t sock_conn_map_lookup(struct sock_conn_map *map,
struct sockaddr_in *addr);
uint16_t sock_conn_map_match_or_connect(struct sock_domain *dom,
struct sock_conn_map *map,
struct sockaddr_in *addr, int match_only);
struct sockaddr_in *addr);
int sock_conn_listen(struct sock_domain *domain);
int sock_conn_map_clear_pe_entry(struct sock_conn *conn_entry, uint16_t key);
void sock_conn_map_destroy(struct sock_conn_map *cmap);

Просмотреть файл

@ -60,9 +60,9 @@ fi_addr_t sock_av_lookup_key(struct sock_av *av, int key)
idx = av_addr - &av->table[0];
if (!av->key[idx]) {
av->key[idx] = sock_conn_map_match_or_connect(
av->domain, av->cmap,
(struct sockaddr_in*)&av_addr->addr, 1);
av->key[idx] = sock_conn_map_lookup(
av->cmap,
(struct sockaddr_in*)&av_addr->addr);
if (!av->key[idx]) {
continue;
}
@ -123,7 +123,7 @@ struct sock_conn *sock_av_lookup_addr(struct sock_av *av,
if (!av->key[idx]) {
av->key[idx] = sock_conn_map_match_or_connect(
av->domain, av->cmap,
(struct sockaddr_in*)&av_addr->addr, 0);
(struct sockaddr_in*)&av_addr->addr);
if (!av->key[idx]) {
SOCK_LOG_ERROR("failed to match or connect to addr %lu\n", addr);
errno = EINVAL;

Просмотреть файл

@ -53,9 +53,9 @@
#include "sock.h"
#include "sock_util.h"
static int _init_map(struct sock_conn_map *map, int init_size)
static int sock_conn_map_init(struct sock_conn_map *map, int init_size)
{
map->table = (struct sock_conn*)calloc(init_size,
map->table = (struct sock_conn*)calloc(init_size,
sizeof(struct sock_conn));
if (!map->table)
return -FI_ENOMEM;
@ -64,7 +64,7 @@ static int _init_map(struct sock_conn_map *map, int init_size)
return 0;
}
static int _increase_map(struct sock_conn_map *map, int new_size)
static int sock_conn_map_increase(struct sock_conn_map *map, int new_size)
{
if (map->used + new_size > map->size) {
void *_table = realloc(map->table, map->size * sizeof(struct
@ -75,7 +75,6 @@ static int _increase_map(struct sock_conn_map *map, int new_size)
map->size = MAX(map->size, new_size) * 2;
map->table = (struct sock_conn*) _table;
}
return 0;
}
@ -103,32 +102,60 @@ struct sock_conn *sock_conn_map_lookup_key(struct sock_conn_map *conn_map,
#define SOCK_ADDR_IN_PORT(sa)SOCK_ADDR_IN_PTR(sa)->sin_port
#define SOCK_ADDR_IN_ADDR(sa)SOCK_ADDR_IN_PTR(sa)->sin_addr
uint16_t sock_conn_map_match_or_connect(struct sock_domain *dom,
struct sock_conn_map *map,
struct sockaddr_in *addr, int match_only)
uint16_t sock_conn_map_lookup(struct sock_conn_map *map,
struct sockaddr_in *addr)
{
int i, conn_fd, arg, optval;
socklen_t optlen;
char sa_ip[INET_ADDRSTRLEN];
int i;
struct sockaddr_in *entry;
struct timeval tv;
fd_set fds;
struct sock_conn *conn;
/* match */
fastlock_acquire(&map->lock);
for (i=0; i < map->used; i++) {
entry = (struct sockaddr_in *)&(map->table[i].addr);
if ((SOCK_ADDR_IN_ADDR(entry).s_addr ==
SOCK_ADDR_IN_ADDR(addr).s_addr) &&
(SOCK_ADDR_IN_PORT(entry) == SOCK_ADDR_IN_PORT(addr))) {
fastlock_release(&map->lock);
return i+1;
}
}
fastlock_release(&map->lock);
return 0;
}
if (match_only)
return 0;
static int sock_conn_map_insert(struct sock_conn_map *map,
struct sockaddr_in *addr,
int conn_fd)
{
int index;
fastlock_acquire(&map->lock);
if (map->size == map->used) {
if (sock_conn_map_increase(map, map->size * 2)) {
fastlock_release(&map->lock);
return 0;
}
}
index = map->used;
memcpy(&map->table[index].addr, addr, sizeof *addr);
map->table[index].sock_fd = conn_fd;
sock_comm_buffer_init(&map->table[index]);
map->used++;
fastlock_release(&map->lock);
return index + 1;
}
uint16_t sock_conn_map_connect(struct sock_domain *dom,
struct sock_conn_map *map,
struct sockaddr_in *addr)
{
int conn_fd, optval, ret;
char sa_ip[INET_ADDRSTRLEN];
unsigned short reply;
struct timeval tv;
socklen_t optlen;
uint64_t flags;
fd_set fds;
/* no matching entry, connect */
conn_fd = socket(AF_INET, SOCK_STREAM, 0);
if (conn_fd < 0) {
SOCK_LOG_ERROR("failed to create conn_fd, errno: %d\n", errno);
@ -137,22 +164,13 @@ uint16_t sock_conn_map_match_or_connect(struct sock_domain *dom,
optval = 1;
setsockopt(conn_fd, SOL_SOCKET, SO_REUSEADDR, &optval, sizeof optval);
optval = 1;
setsockopt(conn_fd, SOL_SOCKET, SO_REUSEPORT, &optval, sizeof optval);
fcntl(conn_fd, F_SETFL, O_NONBLOCK);
memcpy(sa_ip, inet_ntoa(addr->sin_addr), INET_ADDRSTRLEN);
SOCK_LOG_INFO("Connecting to: %s:%d\n",
sa_ip, ntohs(((struct sockaddr_in*)addr)->sin_port));
if (bind(conn_fd, (struct sockaddr*)&dom->src_addr,
sizeof(struct sockaddr_in)) != 0) {
SOCK_LOG_ERROR("Cannot bind to src_addr\n");
return 0;
}
SOCK_LOG_INFO("Binding conn_fd to port: %d\n",
ntohs(((struct sockaddr_in*)&dom->src_addr)->sin_port));
flags = fcntl(conn_fd, F_GETFL, 0);
fcntl(conn_fd, F_SETFL, flags | O_NONBLOCK);
if (connect(conn_fd, addr, sizeof *addr) < 0) {
if (errno == EINPROGRESS) {
@ -167,53 +185,79 @@ uint16_t sock_conn_map_match_or_connect(struct sock_domain *dom,
if (optval) {
SOCK_LOG_ERROR("failed to connect %d - %s\n", optval,
strerror(optval));
strerror(optval));
close(conn_fd);
return 0;
}
} else {
SOCK_LOG_ERROR("Timeout or error to connect %d - %s\n", optval,
strerror(optval));
strerror(optval));
close(conn_fd);
return 0;
}
} else {
SOCK_LOG_ERROR("Error connecting %d - %s\n", errno,
strerror(errno));
strerror(errno));
close(conn_fd);
return 0;
}
}
flags = fcntl(conn_fd, F_GETFL, 0);
flags &= (~O_NONBLOCK);
fcntl(conn_fd, F_SETFL, flags);
ret = send(conn_fd,
&((struct sockaddr_in*)&dom->src_addr)->sin_port,
sizeof(unsigned short), 0);
if (ret != sizeof(unsigned short)) {
SOCK_LOG_ERROR("Cannot exchange port\n");
return 0;
}
arg = fcntl(conn_fd, F_GETFL, NULL);
arg &= (~O_NONBLOCK);
fcntl(conn_fd, F_SETFL, arg);
ret = recv(conn_fd,
&reply, sizeof(unsigned short), 0);
if (ret != sizeof(unsigned short)) {
SOCK_LOG_ERROR("Cannot exchange port: %d\n", ret);
return 0;
}
memcpy(&map->table[map->used].addr, addr, sizeof *addr);
map->table[map->used].sock_fd = conn_fd;
conn = &map->table[map->used];
sock_comm_buffer_init(conn);
map->used++;
return map->used;
reply = ntohs(reply);
SOCK_LOG_INFO("Connect response: %d\n", ntohs(reply));
if (reply == 0) {
ret = sock_conn_map_insert(map, addr, conn_fd);
} else {
ret = sock_conn_map_lookup(map, addr);
close(conn_fd);
}
return ret;
}
static void * _sock_conn_listen(void *arg)
uint16_t sock_conn_map_match_or_connect(struct sock_domain *dom,
struct sock_conn_map *map,
struct sockaddr_in *addr)
{
uint16_t index;
return (index = sock_conn_map_lookup(map, addr)) ?
index : sock_conn_map_connect(dom, map, addr);
}
static void *_sock_conn_listen(void *arg)
{
struct sock_domain *domain = (struct sock_domain*) arg;
struct sock_conn_map *map = &domain->r_cmap;
struct addrinfo *s_res = NULL, *p;
struct addrinfo hints;
int optval, flags, tmp;
int listen_fd = 0, conn_fd;
int listen_fd = 0, conn_fd, ret;
struct sockaddr_in remote;
socklen_t addr_size;
struct sock_conn *conn;
struct pollfd poll_fds[2];
char service[NI_MAXSERV];
struct sockaddr_in addr;
char sa_ip[INET_ADDRSTRLEN];
unsigned short port;
uint16_t index;
memset(&hints, 0, sizeof(hints));
hints.ai_family = AF_INET;
@ -235,13 +279,9 @@ static void * _sock_conn_listen(void *arg)
fcntl(listen_fd, F_SETFL, flags | O_NONBLOCK);
optval = 1;
setsockopt(listen_fd, SOL_SOCKET, SO_REUSEADDR, &optval, sizeof
optval);
optval = 1;
setsockopt(listen_fd, SOL_SOCKET, SO_REUSEPORT, &optval, sizeof
optval);
setsockopt(listen_fd, SOL_SOCKET, SO_REUSEADDR, &optval,
sizeof optval);
if (!bind(listen_fd, s_res->ai_addr, s_res->ai_addrlen))
break;
close(listen_fd);
@ -263,7 +303,7 @@ static void * _sock_conn_listen(void *arg)
SOCK_LOG_INFO("Bound to port: %d\n", domain->service);
}
if (listen(listen_fd, 128)) {
if (listen(listen_fd, 0)) {
SOCK_LOG_ERROR("failed to listen socket: %d\n", errno);
goto err;
}
@ -294,21 +334,25 @@ static void * _sock_conn_listen(void *arg)
addr_size = sizeof(struct sockaddr_in);
getpeername(conn_fd, &remote, &addr_size);
memcpy(sa_ip, inet_ntoa(remote.sin_addr), INET_ADDRSTRLEN);
SOCK_LOG_INFO("ACCEPT: %s, %d\n", sa_ip, ntohs(remote.sin_port));
/* TODO: lock for multi-threads */
if ((map->size - map->used) == 0) {
_increase_map(map, map->size*2);
}
memcpy(&map->table[map->used].addr, &remote, addr_size);
map->table[map->used].sock_fd = conn_fd;
ret = recv(conn_fd, &port, sizeof(port), 0);
if (ret != sizeof(port))
SOCK_LOG_ERROR("Cannot exchange port\n");
conn = &map->table[map->used];
sock_comm_buffer_init(conn);
remote.sin_port = port;
SOCK_LOG_INFO("Remote port: %d\n", ntohs(port));
index = sock_conn_map_lookup(map, &remote);
port = (index) ? 1 : 0;
ret = send(conn_fd, &port, sizeof(port), 0);
if (ret != sizeof(port))
SOCK_LOG_ERROR("Cannot exchange port\n");
map->used++;
if (index == 0)
sock_conn_map_insert(map, &remote, conn_fd);
else
close(conn_fd);
}
close(listen_fd);
@ -322,7 +366,7 @@ err:
int sock_conn_listen(struct sock_domain *domain)
{
_init_map(&domain->r_cmap, 128); /* TODO: init cmap size */
sock_conn_map_init(&domain->r_cmap, 128); /* TODO: init cmap size */
pthread_create(&domain->listen_thread, 0, _sock_conn_listen, domain);
return 0;
}

Просмотреть файл

@ -133,10 +133,9 @@ static int sock_dom_close(struct fid *fid)
return -FI_EBUSY;
}
if (dom->u_cmap.size)
sock_conn_map_destroy(&dom->u_cmap);
if (dom->r_cmap.size)
sock_conn_map_destroy(&dom->r_cmap);
fastlock_destroy(&dom->r_cmap.lock);
sock_pe_finalize(dom->pe);
fastlock_destroy(&dom->lock);
@ -267,7 +266,8 @@ static int sock_regattr(struct fid_domain *domain, const struct fi_mr_attr *attr
_mr->access = attr->access;
_mr->flags = flags;
_mr->offset = (flags & FI_MR_OFFSET) ?
attr->offset : (uintptr_t) attr->mr_iov[0].iov_base;
(uintptr_t) attr->mr_iov[0].iov_base + attr->offset :
(uintptr_t) attr->mr_iov[0].iov_base;
fastlock_acquire(&dom->lock);
key = (dom->info.mode & FI_PROV_MR_ATTR) ?
@ -456,8 +456,7 @@ int sock_domain(struct fid_fabric *fabric, struct fi_info *info,
}
sock_domain->r_cmap.domain = sock_domain;
sock_domain->u_cmap.domain = sock_domain;
fastlock_init(&sock_domain->r_cmap.lock);
if(socketpair(AF_UNIX, SOCK_STREAM, 0, sock_domain->signal_fds) < 0)
goto err;

Просмотреть файл

@ -952,6 +952,8 @@ int sock_stx_ctx(struct fid_domain *domain,
return -FI_ENOMEM;
tx_ctx->domain = dom;
tx_ctx->fid.ctx.fid.fclass = FI_CLASS_STX_CTX;
tx_ctx->fid.stx.fid.ops = &sock_ctx_ops;
tx_ctx->fid.stx.ops = &sock_ep_ops;
atomic_inc(&dom->ref);
@ -1188,7 +1190,7 @@ struct sock_conn *sock_ep_lookup_conn(struct sock_ep *ep)
{
if (!ep->key) {
ep->key = sock_conn_map_match_or_connect(
ep->domain, &ep->domain->r_cmap, ep->dest_addr, 0);
ep->domain, &ep->domain->r_cmap, ep->dest_addr);
if (!ep->key) {
SOCK_LOG_ERROR("failed to match or connect to addr\n");
errno = EINVAL;

Просмотреть файл

@ -317,6 +317,8 @@ struct fi_ops_msg sock_ep_msg_ops = {
.sendmsg = sock_ep_sendmsg,
.inject = sock_ep_inject,
.senddata = sock_ep_senddata,
.rx_size_left = fi_no_msg_rx_size_left,
.tx_size_left = fi_no_msg_tx_size_left,
};
static ssize_t sock_ep_trecvmsg(struct fid_ep *ep,

Просмотреть файл

@ -552,23 +552,26 @@ static int sock_pe_process_rx_write(struct sock_pe *pe, struct sock_rx_ctx *rx_c
rem = pe_entry->msg_hdr.msg_len - len;
for (i = 0; rem > 0 && i < pe_entry->msg_hdr.dest_iov_len; i++) {
mr = sock_mr_verify_key(rx_ctx->domain,
pe_entry->pe.rx.rx_iov[i].iov.key,
(void*)pe_entry->pe.rx.rx_iov[i].iov.addr,
pe_entry->pe.rx.rx_iov[i].iov.len,
FI_REMOTE_WRITE);
if (!mr) {
SOCK_LOG_ERROR("Remote memory access error: %p, %lu, %lu\n",
(void*)pe_entry->pe.rx.rx_iov[i].iov.addr,
pe_entry->pe.rx.rx_iov[i].iov.len,
pe_entry->pe.rx.rx_iov[i].iov.key);
sock_pe_send_response(pe, pe_entry, 0, SOCK_OP_WRITE_ERROR);
break;
if ((len - pe_entry->done_len) == pe_entry->pe.rx.rx_iov[i].iov.addr) {
mr = sock_mr_verify_key(rx_ctx->domain,
pe_entry->pe.rx.rx_iov[i].iov.key,
(void*)pe_entry->pe.rx.rx_iov[i].iov.addr,
pe_entry->pe.rx.rx_iov[i].iov.len,
FI_REMOTE_WRITE);
if (!mr) {
SOCK_LOG_ERROR("Remote memory access error: %p, %lu, %lu\n",
(void*)pe_entry->pe.rx.rx_iov[i].iov.addr,
pe_entry->pe.rx.rx_iov[i].iov.len,
pe_entry->pe.rx.rx_iov[i].iov.key);
sock_pe_send_response(pe, pe_entry, 0,
SOCK_OP_WRITE_ERROR);
break;
}
if (mr->flags & FI_MR_OFFSET)
pe_entry->pe.rx.rx_iov[i].iov.addr += mr->offset;
}
if (mr->flags & FI_MR_OFFSET)
pe_entry->pe.rx.rx_iov[i].iov.addr += mr->offset;
if (sock_pe_recv_field(pe_entry,
(void*)pe_entry->pe.rx.rx_iov[i].iov.addr,
pe_entry->pe.rx.rx_iov[i].iov.len, len))

Просмотреть файл

@ -315,6 +315,44 @@ usdf_dgram_inject(struct fid_ep *fep, const void *buf, size_t len,
return 0;
}
ssize_t usdf_dgram_rx_size_left(struct fid_ep *fep)
{
struct usdf_ep *ep;
if (fep == NULL)
return -FI_EINVAL;
ep = ep_ftou(fep);
if (ep->e.dg.ep_qp == NULL)
return -FI_EOPBADSTATE; /* EP not enabled */
/* NOTE-SIZE-LEFT: divide by constant right now, rather than keeping
* track of the rx_attr->iov_limit value we gave to the user. This
* sometimes under-reports the number of RX ops that could be posted,
* but it avoids touching a cache line that we don't otherwise need.
*
* sendv/recvv could potentially post iov_limit+1 descriptors
*/
return usd_get_recv_credits(ep->e.dg.ep_qp) / (USDF_DGRAM_DFLT_SGE + 1);
}
ssize_t usdf_dgram_tx_size_left(struct fid_ep *fep)
{
struct usdf_ep *ep;
if (fep == NULL)
return -FI_EINVAL;
ep = ep_ftou(fep);
if (ep->e.dg.ep_qp == NULL)
return -FI_EOPBADSTATE; /* EP not enabled */
/* see NOTE-SIZE-LEFT */
return usd_get_send_credits(ep->e.dg.ep_qp) / (USDF_DGRAM_DFLT_SGE + 1);
}
/*
* Versions that rely on user to reserve space for header at start of buffer
*/
@ -482,4 +520,40 @@ usdf_dgram_prefix_sendmsg(struct fid_ep *fep, const struct fi_msg *msg, uint64_t
(fi_addr_t)msg->addr, msg->context);
}
ssize_t usdf_dgram_prefix_rx_size_left(struct fid_ep *fep)
{
struct usdf_ep *ep;
if (fep == NULL)
return -FI_EINVAL;
ep = ep_ftou(fep);
if (ep->e.dg.ep_qp == NULL)
return -FI_EOPBADSTATE; /* EP not enabled */
/* prefix_recvv can post up to iov_limit descriptors
*
* also see NOTE-SIZE-LEFT */
return (usd_get_recv_credits(ep->e.dg.ep_qp) / USDF_DGRAM_DFLT_SGE);
}
ssize_t usdf_dgram_prefix_tx_size_left(struct fid_ep *fep)
{
struct usdf_ep *ep;
if (fep == NULL)
return -FI_EINVAL;
ep = ep_ftou(fep);
if (ep->e.dg.ep_qp == NULL)
return -FI_EOPBADSTATE; /* EP not enabled */
/* prefix_sendvcan post up to iov_limit descriptors
*
* also see NOTE-SIZE-LEFT */
return (usd_get_send_credits(ep->e.dg.ep_qp) / USDF_DGRAM_DFLT_SGE);
}

Просмотреть файл

@ -56,6 +56,8 @@ ssize_t usdf_dgram_inject(struct fid_ep *ep, const void *buf, size_t len,
fi_addr_t dest_addr);
ssize_t usdf_dgram_senddata(struct fid_ep *ep, const void *buf, size_t len,
void *desc, uint64_t data, fi_addr_t dest_addr, void *context);
ssize_t usdf_dgram_rx_size_left(struct fid_ep *ep);
ssize_t usdf_dgram_tx_size_left(struct fid_ep *ep);
ssize_t usdf_dgram_prefix_recv(struct fid_ep *ep, void *buf, size_t len,
void *desc, fi_addr_t src_addr, void *context);
@ -69,5 +71,7 @@ ssize_t usdf_dgram_prefix_sendv(struct fid_ep *fep, const struct iovec *iov,
void **desc, size_t count, fi_addr_t dest_addr, void *context);
ssize_t usdf_dgram_prefix_sendmsg(struct fid_ep *ep, const struct fi_msg *msg,
uint64_t flags);
ssize_t usdf_dgram_prefix_rx_size_left(struct fid_ep *ep);
ssize_t usdf_dgram_prefix_tx_size_left(struct fid_ep *ep);
#endif /* _USDF_DGRAM_H_ */

Просмотреть файл

@ -76,6 +76,15 @@ usdf_ep_dgram_enable(struct fid_ep *fep)
ep = ep_ftou(fep);
if (ep->e.dg.ep_wcq == NULL) {
ret = -FI_EOPBADSTATE;
goto fail;
}
if (ep->e.dg.ep_rcq == NULL) {
ret = -FI_EOPBADSTATE;
goto fail;
}
filt.uf_type = USD_FTY_UDP_SOCK;
filt.uf_filter.uf_udp_sock.u_sock = ep->e.dg.ep_sock;
@ -90,7 +99,7 @@ usdf_ep_dgram_enable(struct fid_ep *fep)
&filt,
&ep->e.dg.ep_qp);
} else {
ret = -EAGAIN;
ret = -FI_EAGAIN;
}
if (ret != 0) {
@ -261,6 +270,10 @@ usdf_ep_dgram_close(fid_t fid)
usdf_ep_dgram_deref_cq(ep->e.dg.ep_wcq);
usdf_ep_dgram_deref_cq(ep->e.dg.ep_rcq);
if (ep->e.dg.ep_sock != -1) {
close(ep->e.dg.ep_sock);
}
free(ep);
return 0;
}
@ -286,6 +299,8 @@ static struct fi_ops_msg usdf_dgram_ops = {
.inject = usdf_dgram_inject,
.senddata = usdf_dgram_senddata,
.injectdata = fi_no_msg_injectdata,
.rx_size_left = usdf_dgram_rx_size_left,
.tx_size_left = usdf_dgram_tx_size_left,
};
static struct fi_ops_msg usdf_dgram_prefix_ops = {
@ -299,6 +314,8 @@ static struct fi_ops_msg usdf_dgram_prefix_ops = {
.inject = usdf_dgram_inject,
.senddata = usdf_dgram_senddata,
.injectdata = fi_no_msg_injectdata,
.rx_size_left = usdf_dgram_prefix_rx_size_left,
.tx_size_left = usdf_dgram_prefix_tx_size_left,
};
static struct fi_ops_cm usdf_cm_dgram_ops = {

Просмотреть файл

@ -446,6 +446,7 @@ usdf_pep_open(struct fid_fabric *fabric, struct fi_info *info,
struct usdf_pep *pep;
struct usdf_fabric *fp;
int ret;
int optval;
if (info->ep_type != FI_EP_MSG) {
return -FI_ENODEV;
@ -485,6 +486,16 @@ usdf_pep_open(struct fid_fabric *fabric, struct fi_info *info,
goto fail;
}
/* set SO_REUSEADDR to prevent annoying "Address already in use" errors
* on successive runs of programs listening on a well known port */
optval = 1;
ret = setsockopt(pep->pep_sock, SOL_SOCKET, SO_REUSEADDR, &optval,
sizeof(optval));
if (ret == -1) {
ret = -errno;
goto fail;
}
ret = bind(pep->pep_sock, (struct sockaddr *)info->src_addr,
info->src_addrlen);
if (ret == -1) {

Просмотреть файл

@ -1,5 +1,5 @@
/*
* Copyright (c) 2013-2014 Intel Corporation, Inc. All rights reserved.
* Copyright (c) 2013-2015 Intel Corporation, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@ -72,7 +72,7 @@
#define VERBS_CAPS (FI_MSG | FI_RMA | FI_ATOMICS | FI_READ | FI_WRITE | \
FI_SEND | FI_RECV | FI_REMOTE_READ | FI_REMOTE_WRITE | \
FI_REMOTE_CQ_DATA | FI_REMOTE_COMPLETE)
FI_REMOTE_CQ_DATA | FI_REMOTE_SIGNAL)
#define VERBS_MODE (FI_LOCAL_MR | FI_PROV_MR_ATTR)
#define VERBS_MSG_ORDER (FI_ORDER_RAR | FI_ORDER_RAW | FI_ORDER_RAS | \
FI_ORDER_WAW | FI_ORDER_WAS | FI_ORDER_SAW | FI_ORDER_SAS )
@ -127,6 +127,7 @@ struct fi_ibv_msg_ep {
uint32_t inline_size;
};
static const char *local_node = "localhost";
static char def_send_wr[16] = "384";
static char def_recv_wr[16] = "384";
static char def_send_sge[16] = "4";
@ -371,7 +372,10 @@ static int fi_ibv_check_info(struct fi_info *info)
return -FI_ENODATA;
}
if (!(info->caps & VERBS_CAPS) && info->caps)
if (info->caps && (info->caps & ~VERBS_CAPS))
return -FI_ENODATA;
if ((info->mode & VERBS_MODE) != VERBS_MODE)
return -FI_ENODATA;
if (info->fabric_attr) {
@ -433,9 +437,6 @@ static int fi_ibv_check_dev_limits(struct fi_domain_attr *domain_attr,
return 0;
}
/*
* TODO: this is not the full set of checks which are needed
*/
static int fi_ibv_fi_to_rai(struct fi_info *fi, uint64_t flags, struct rdma_addrinfo *rai)
{
memset(rai, 0, sizeof *rai);
@ -443,30 +444,19 @@ static int fi_ibv_fi_to_rai(struct fi_info *fi, uint64_t flags, struct rdma_addr
rai->ai_flags = RAI_PASSIVE;
if (flags & FI_NUMERICHOST)
rai->ai_flags |= RAI_NUMERICHOST;
// if (fi->flags & FI_FAMILY)
// rai->ai_flags |= RAI_FAMILY;
// rai->ai_family = fi->sa_family;
if (fi->ep_type == FI_EP_MSG || fi->caps & FI_RMA || (fi->ep_attr &&
(fi->ep_attr->protocol == FI_PROTO_RDMA_CM_IB_RC ||
fi->ep_attr->protocol == FI_PROTO_IWARP))) {
rai->ai_qp_type = IBV_QPT_RC;
rai->ai_port_space = RDMA_PS_TCP;
} else if (fi->ep_type == FI_EP_DGRAM || (fi->ep_attr &&
fi->ep_attr->protocol == FI_PROTO_IB_UD)) {
rai->ai_qp_type = IBV_QPT_UD;
rai->ai_port_space = RDMA_PS_UDP;
}
rai->ai_qp_type = IBV_QPT_RC;
rai->ai_port_space = RDMA_PS_TCP;
if (fi->src_addrlen) {
if (fi && fi->src_addrlen) {
if (!(rai->ai_src_addr = malloc(fi->src_addrlen)))
return ENOMEM;
return -FI_ENOMEM;
memcpy(rai->ai_src_addr, fi->src_addr, fi->src_addrlen);
rai->ai_src_len = fi->src_addrlen;
}
if (fi->dest_addrlen) {
if (fi && fi->dest_addrlen) {
if (!(rai->ai_dst_addr = malloc(fi->dest_addrlen)))
return ENOMEM;
return -FI_ENOMEM;
memcpy(rai->ai_dst_addr, fi->dest_addr, fi->dest_addrlen);
rai->ai_dst_len = fi->dest_addrlen;
}
@ -477,27 +467,20 @@ static int fi_ibv_fi_to_rai(struct fi_info *fi, uint64_t flags, struct rdma_addr
static int fi_ibv_rai_to_fi(struct rdma_addrinfo *rai, struct fi_info *hints,
struct fi_info *fi)
{
// fi->sa_family = rai->ai_family;
if (rai->ai_qp_type == IBV_QPT_RC || rai->ai_port_space == RDMA_PS_TCP) {
fi->caps |= FI_MSG | FI_RMA;
fi->ep_type = FI_EP_MSG;
} else if (rai->ai_qp_type == IBV_QPT_UD ||
rai->ai_port_space == RDMA_PS_UDP) {
fi->ep_attr->protocol = FI_PROTO_IB_UD;
fi->caps |= FI_MSG;
fi->ep_type = FI_EP_DGRAM;
}
fi->caps = VERBS_CAPS;
fi->mode = VERBS_MODE;
fi->ep_type = FI_EP_MSG;
if (rai->ai_src_len) {
if (!(fi->src_addr = malloc(rai->ai_src_len)))
return ENOMEM;
return -FI_ENOMEM;
memcpy(fi->src_addr, rai->ai_src_addr, rai->ai_src_len);
fi->src_addrlen = rai->ai_src_len;
fi->addr_format = FI_SOCKADDR;
}
if (rai->ai_dst_len) {
if (!(fi->dest_addr = malloc(rai->ai_dst_len)))
return ENOMEM;
return -FI_ENOMEM;
memcpy(fi->dest_addr, rai->ai_dst_addr, rai->ai_dst_len);
fi->dest_addrlen = rai->ai_dst_len;
fi->addr_format = FI_SOCKADDR;
@ -603,17 +586,19 @@ fi_ibv_getepinfo(const char *node, const char *service,
ret = fi_ibv_check_info(hints);
if (ret)
return ret;
ret = fi_ibv_fi_to_rai(hints, flags, &rai_hints);
if (ret)
return ret;
ret = rdma_getaddrinfo((char *) node, (char *) service,
&rai_hints, &rai);
} else {
ret = rdma_getaddrinfo((char *) node, (char *) service,
NULL, &rai);
}
ret = fi_ibv_fi_to_rai(hints, flags, &rai_hints);
if (ret)
return ret;
if (!node && !rai_hints.ai_src_addr && !rai_hints.ai_dst_addr) {
node = local_node;
rai_hints.ai_flags |= RAI_PASSIVE;
}
ret = rdma_getaddrinfo((char *) node, (char *) service,
&rai_hints, &rai);
if (ret)
return (errno == ENODEV) ? -FI_ENODATA : -errno;
@ -923,6 +908,8 @@ static struct fi_ops_msg fi_ibv_msg_ep_msg_ops = {
.inject = fi_no_msg_inject,
.senddata = fi_ibv_msg_ep_senddata,
.injectdata = fi_no_msg_injectdata,
.rx_size_left = fi_no_msg_rx_size_left,
.tx_size_left = fi_no_msg_tx_size_left,
};
static ssize_t
@ -2499,6 +2486,9 @@ static int fi_ibv_open_device_by_name(struct fi_ibv_domain *domain, const char *
struct ibv_context **dev_list;
int i, ret = -FI_ENODEV;
if (!name)
return -FI_EINVAL;
dev_list = rdma_get_devices(NULL);
if (!dev_list)
return -errno;

Просмотреть файл

@ -349,6 +349,14 @@ ssize_t fi_no_msg_injectdata(struct fid_ep *ep, const void *buf, size_t len,
{
return -FI_ENOSYS;
}
ssize_t fi_no_msg_rx_size_left(struct fid_ep *ep)
{
return -FI_ENOSYS;
}
ssize_t fi_no_msg_tx_size_left(struct fid_ep *ep)
{
return -FI_ENOSYS;
}
/*
* struct fi_ops_wait

Просмотреть файл

@ -136,10 +136,6 @@ static void fi_ini(void)
if (init)
goto unlock;
fi_register_provider(VERBS_INIT);
fi_register_provider(PSM_INIT);
fi_register_provider(USNIC_INIT);
#ifdef HAVE_LIBDL
struct dirent **liblist;
int n;
@ -182,8 +178,14 @@ static void fi_ini(void)
free(liblist);
done:
#endif
fi_register_provider(PSM_INIT);
fi_register_provider(USNIC_INIT);
fi_register_provider(VERBS_INIT);
fi_register_provider(SOCKETS_INIT);
init = 1;
unlock:
pthread_mutex_unlock(&ini_lock);
}
@ -206,50 +208,6 @@ static struct fi_prov *fi_getprov(const char *prov_name)
return NULL;
}
__attribute__((visibility ("default")))
int fi_getinfo_(uint32_t version, const char *node, const char *service,
uint64_t flags, struct fi_info *hints, struct fi_info **info)
{
struct fi_prov *prov;
struct fi_info *tail, *cur;
int ret = -FI_ENOSYS;
if (!init)
fi_ini();
*info = tail = NULL;
for (prov = prov_head; prov; prov = prov->next) {
if (!prov->provider->getinfo)
continue;
if (hints->fabric_attr && hints->fabric_attr->prov_name &&
strcmp(prov->provider->name, hints->fabric_attr->prov_name))
continue;
ret = prov->provider->getinfo(version, node, service, flags,
hints, &cur);
if (ret) {
if (ret == -FI_ENODATA)
continue;
break;
}
if (!*info)
*info = cur;
else
tail->next = cur;
for (tail = cur; tail->next; tail = tail->next) {
tail->fabric_attr->prov_name = strdup(prov->provider->name);
tail->fabric_attr->prov_version = prov->provider->version;
}
tail->fabric_attr->prov_name = strdup(prov->provider->name);
tail->fabric_attr->prov_version = prov->provider->version;
}
return *info ? 0 : ret;
}
default_symver(fi_getinfo_, fi_getinfo);
__attribute__((visibility ("default")))
void fi_freeinfo_(struct fi_info *info)
{
@ -277,6 +235,55 @@ void fi_freeinfo_(struct fi_info *info)
}
default_symver(fi_freeinfo_, fi_freeinfo);
__attribute__((visibility ("default")))
int fi_getinfo_(uint32_t version, const char *node, const char *service,
uint64_t flags, struct fi_info *hints, struct fi_info **info)
{
struct fi_prov *prov;
struct fi_info *tail, *cur;
int ret = -FI_ENOSYS;
if (!init)
fi_ini();
*info = tail = NULL;
for (prov = prov_head; prov; prov = prov->next) {
if (!prov->provider->getinfo)
continue;
if (hints->fabric_attr && hints->fabric_attr->prov_name &&
strcmp(prov->provider->name, hints->fabric_attr->prov_name))
continue;
ret = prov->provider->getinfo(version, node, service, flags,
hints, &cur);
if (ret) {
if (ret == -FI_ENODATA) {
continue;
} else {
/* a provider has an error, clean up and bail */
fi_freeinfo_(*info);
*info = NULL;
return ret;
}
}
if (!*info)
*info = cur;
else
tail->next = cur;
for (tail = cur; tail->next; tail = tail->next) {
tail->fabric_attr->prov_name = strdup(prov->provider->name);
tail->fabric_attr->prov_version = prov->provider->version;
}
tail->fabric_attr->prov_name = strdup(prov->provider->name);
tail->fabric_attr->prov_version = prov->provider->version;
}
return *info ? 0 : ret;
}
default_symver(fi_getinfo_, fi_getinfo);
__attribute__((visibility ("default")))
struct fi_info *fi_dupinfo_(const struct fi_info *info)
{