1
1

libfabric: update to Github libfabric 0d7daf720f04

Этот коммит содержится в:
Jeff Squyres 2015-03-26 14:39:23 -07:00
родитель 7f3aab9cfa
Коммит a85edb8ad4
83 изменённых файлов: 2909 добавлений и 2533 удалений

Просмотреть файл

@ -27,7 +27,8 @@ rdmainclude_HEADERS =
# internal utility functions shared by in-tree providers:
common_srcs = \
src/common.c \
src/enosys.c
src/enosys.c \
src/log.c
# ensure dl-built providers link back to libfabric
linkback = $(top_builddir)/src/libfabric.la
@ -42,7 +43,6 @@ src_libfabric_la_SOURCES = \
include/prov.h \
src/fabric.c \
src/fi_tostr.c \
src/log.c \
$(common_srcs)
if MACOS
@ -216,7 +216,7 @@ _usnic_files = \
_usnic_cppflags = \
-D__LIBUSNIC__ \
-DHAVE_LIBNL3=$(HAVE_LIBNL3) $(usnic_libnl_CPPFLAGS) \
-DHAVE_LIBNL3=$(HAVE_LIBNL3) $(usnic_nl_CPPFLAGS) \
-I$(top_srcdir)/prov/usnic/src/usnic_direct
rdmainclude_HEADERS += \
@ -226,13 +226,16 @@ if HAVE_USNIC_DL
pkglib_LTLIBRARIES += libusnic-fi.la
libusnic_fi_la_CPPFLAGS = $(AM_CPPFLAGS) $(_usnic_cppflags)
libusnic_fi_la_SOURCES = $(_usnic_files) $(common_srcs)
libusnic_fi_la_LDFLAGS = -module -avoid-version -shared -export-dynamic
libusnic_fi_la_LIBADD = $(linkback) $(usnic_libnl_LIBS)
libusnic_fi_la_LDFLAGS = \
$(usnic_ln_LDFLAGS) \
-module -avoid-version -shared -export-dynamic
libusnic_fi_la_LIBADD = $(linkback) $(usnic_nl_LIBS)
libusnic_fi_la_DEPENDENCIES = $(linkback)
else !HAVE_USNIC_DL
src_libfabric_la_SOURCES += $(_usnic_files)
src_libfabric_la_CPPFLAGS += $(_usnic_cppflags)
src_libfabric_la_LIBADD += $(usnic_libnl_LIBS)
src_libfabric_la_LDFLAGS += $(usnic_nl_LDFLAGS)
src_libfabric_la_LIBADD += $(usnic_nl_LIBS)
endif !HAVE_USNIC_DL
endif HAVE_USNIC
@ -328,6 +331,11 @@ real_man_pages = \
man/man3/fi_trigger.3 \
man/man3/fi_version.3 \
man/man7/fabric.7 \
man/man7/fi_provider.7 \
man/man7/fi_psm.7 \
man/man7/fi_sockets.7 \
man/man7/fi_usnic.7 \
man/man7/fi_verbs.7 \
man/man7/fi_direct.7
dummy_man_pages = \
@ -397,7 +405,9 @@ dummy_man_pages = \
man/man3/fi_mr_regattr.3 \
man/man3/fi_mr_regv.3 \
man/man3/fi_open.3 \
man/man3/fi_open_ops.3 \
man/man3/fi_passive_ep.3 \
man/man3/fi_pep_bind.3 \
man/man3/fi_poll_add.3 \
man/man3/fi_poll_del.3 \
man/man3/fi_poll_open.3 \
@ -410,6 +420,7 @@ dummy_man_pages = \
man/man3/fi_reject.3 \
man/man3/fi_rx_addr.3 \
man/man3/fi_rx_size_left.3 \
man/man3/fi_scalable_ep_bind.3 \
man/man3/fi_send.3 \
man/man3/fi_senddata.3 \
man/man3/fi_sendmsg.3 \

Просмотреть файл

@ -1,7 +1,7 @@
This README is for userspace RDMA fabric library.
Version Libfabric v1.0.0rc3
Released on 2015-03-12
Released on 2015-03-26
Building
========

Просмотреть файл

@ -28,7 +28,7 @@
/* Define to 1 if you have the `dl' library (-ldl). */
#undef HAVE_LIBDL
/* set to 1 if should use libnl v3, set to 0 for libnl v11 */
/* Whether we have libl or libnl3 */
#undef HAVE_LIBNL3
/* Define to 1 if you have the `pthread' library (-lpthread). */
@ -37,6 +37,12 @@
/* Define to 1 if you have the <memory.h> header file. */
#undef HAVE_MEMORY_H
/* Define to 1 if you have the <netlink/netlink.h> header file. */
#undef HAVE_NETLINK_NETLINK_H
/* Define to 1 if you have the <netlink/version.h> header file. */
#undef HAVE_NETLINK_VERSION_H
/* psm provider is built */
#undef HAVE_PSM

Просмотреть файл

@ -85,10 +85,12 @@ AC_DEFUN([FI_PROVIDER_SETUP],[
[AC_MSG_WARN([$1 provider was selected to be built as DL])
AC_MSG_WARN([but libfabric is being built as static-only])
AC_MSG_ERROR([This is an impossible situation. Cannot continue.])])
AC_MSG_NOTICE([$1 provider: build as plugin])
],
[PROVIDERS_STATIC="prov/$1/lib$1.la $PROVIDERS_STATIC"])
[PROVIDERS_STATIC="prov/$1/lib$1.la $PROVIDERS_STATIC"
AC_MSG_NOTICE([$1 provider: include in libfabric])])
],
[AC_MSG_NOTICE([$1 provider disabled])])
[AC_MSG_NOTICE([$1 provider: disabled])])
AC_DEFINE_UNQUOTED([HAVE_]m4_translit([$1], [a-z], [A-Z]), $$1_happy, [$1 provider is built])
AC_DEFINE_UNQUOTED([HAVE_]m4_translit([$1], [a-z], [A-Z])[_DL], $$1_dl, [$1 provider is built as DSO])

Просмотреть файл

@ -197,7 +197,6 @@ int fi_read_file(const char *dir, const char *file, char *buf, size_t size);
int fi_poll_fd(int fd, int timeout);
int fi_wait_cond(pthread_cond_t *cond, pthread_mutex_t *mut, int timeout);
int fi_sockaddr_len(struct sockaddr *addr);
size_t fi_datatype_size(enum fi_datatype datatype);
uint64_t fi_tag_bits(uint64_t mem_tag_format);
uint64_t fi_tag_format(uint64_t tag_bits);

Просмотреть файл

@ -64,7 +64,7 @@ int fi_no_ops_open(struct fid *fid, const char *name,
static struct fi_ops_fabric X = {
.size = sizeof(struct fi_ops_fabric),
.domain = fi_no_domain,
.endpoint = fi_no_passive_ep,
.passive_ep = fi_no_passive_ep,
.eq_open = fi_no_eq_open,
.wait_open = fi_no_wait_open,
};
@ -150,7 +150,7 @@ int fi_no_atomic_compwritevalid(struct fid_ep *ep,
/*
static struct fi_ops_cm X = {
.size = sizeof(struct fi_ops_cm),
.getname = X,
.getname = fi_no_getname,
.getpeer = fi_no_getpeer,
.connect = fi_no_connect,
.listen = fi_no_listen,
@ -169,24 +169,13 @@ int fi_no_reject(struct fid_pep *pep, fi_connreq_t connreq,
const void *param, size_t paramlen);
int fi_no_shutdown(struct fid_ep *ep, uint64_t flags);
/*
static struct fi_ops_av X = {
.size = sizeof(struct fi_ops_av),
.insert = X,
.insertsvc = X,
.insertsym = X,
.remove = X,
.lookup = X,
.straddr = X,
};
*/
/*
static struct fi_ops_domain X = {
.size = sizeof(struct fi_ops_domain),
.av_open = fi_no_av_open,
.cq_open = fi_no_cq_open,
.endpoint = fi_no_endpoint,
.scalable_ep = fi_no_scalable_ep,
.cntr_open = fi_no_cntr_open,
.poll_open = fi_no_poll_open,
.stx_ctx = fi_no_stx_context,
@ -199,6 +188,8 @@ int fi_no_cq_open(struct fid_domain *domain, struct fi_cq_attr *attr,
struct fid_cq **cq, void *context);
int fi_no_endpoint(struct fid_domain *domain, struct fi_info *info,
struct fid_ep **ep, void *context);
int fi_no_scalable_ep(struct fid_domain *domain, struct fi_info *info,
struct fid_ep **sep, void *context);
int fi_no_cntr_open(struct fid_domain *domain, struct fi_cntr_attr *attr,
struct fid_cntr **cntr, void *context);
int fi_no_poll_open(struct fid_domain *domain, struct fi_poll_attr *attr,
@ -229,7 +220,6 @@ int fi_no_mr_regattr(struct fid *fid, const struct fi_mr_attr *attr,
/*
static struct fi_ops_ep X = {
.size = sizeof(struct fi_ops_ep),
.enable = fi_no_enable,
.cancel = fi_no_cancel,
.getopt = fi_no_getopt,
.setopt = fi_no_setopt,
@ -239,7 +229,6 @@ static struct fi_ops_ep X = {
.tx_size_left = fi_no_tx_size_left,
};
*/
int fi_no_enable(struct fid_ep *ep);
ssize_t fi_no_cancel(fid_t fid, void *context);
int fi_no_getopt(fid_t fid, int level, int optname,
void *optval, size_t *optlen);
@ -298,6 +287,8 @@ static struct fi_ops_wait X = {
static struct fi_ops_poll X = {
.size = sizeof(struct fi_ops_poll),
.poll = X,
.poll_add = X,
.poll_del = X,
};
*/
@ -323,8 +314,9 @@ static struct fi_ops_cq X = {
.readfrom = fi_no_cq_readfrom,
.readerr = X,
.write = fi_no_cq_write,
.writeerr = fi_no_cq_writeerr,
.sread = fi_no_cq_sread,
.sreadfrom = fi_no_cq_readfrom,
.sreadfrom = fi_no_cq_sreadfrom,
.strerror = X,
};
*/
@ -426,7 +418,15 @@ ssize_t fi_no_tagged_search(struct fid_ep *ep, uint64_t *tag, uint64_t ignore,
uint64_t flags, fi_addr_t *src_addr, size_t *len, void *context);
/*
* fi_ops_av
static struct fi_ops_av X = {
.size = sizeof(struct fi_ops_av),
.insert = fi_no_av_insert,
.insertsvc = fi_no_av_insertsvc,
.insertsym = fi_no_av_insertsym,
.remove = fi_no_av_remove,
.lookup = X,
.straddr = X,
};
*/
int fi_no_av_insert(struct fid_av *av, const void *addr, size_t count,
fi_addr_t *fi_addr, uint64_t flags, void *context);

Просмотреть файл

@ -239,7 +239,7 @@ static inline int dlistfd_empty(struct dlistfd_head *head)
static inline void dlistfd_signal(struct dlistfd_head *head)
{
if (head->fdwcnt == head->fdrcnt) {
(void) write(head->fd[LIST_WRITE_FD], head, sizeof head);
if (write(head->fd[LIST_WRITE_FD], head, sizeof head) == sizeof head)
head->fdwcnt++;
}
}
@ -248,7 +248,7 @@ static inline void dlistfd_reset(struct dlistfd_head *head)
{
void *buf;
if (dlistfd_empty(head) && (head->fdrcnt < head->fdwcnt)) {
(void) read(head->fd[LIST_READ_FD], &buf, sizeof buf);
if (read(head->fd[LIST_READ_FD], &buf, sizeof buf) == sizeof buf)
head->fdrcnt++;
}
}

Просмотреть файл

@ -213,7 +213,7 @@ static inline size_t rbfdavail(struct ringbuffd *rbfd)
static inline void rbfdsignal(struct ringbuffd *rbfd)
{
if (rbfd->fdwcnt == rbfd->fdrcnt) {
write(rbfd->fd[RB_WRITE_FD], rbfd, sizeof rbfd);
if (write(rbfd->fd[RB_WRITE_FD], rbfd, sizeof rbfd) == sizeof rbfd)
rbfd->fdwcnt++;
}
}
@ -223,7 +223,7 @@ static inline void rbfdreset(struct ringbuffd *rbfd)
void *buf;
if (rbfdempty(rbfd) && (rbfd->fdrcnt < rbfd->fdwcnt)) {
read(rbfd->fd[RB_READ_FD], &buf, sizeof buf);
if (read(rbfd->fd[RB_READ_FD], &buf, sizeof buf) == sizeof buf)
rbfd->fdrcnt++;
}
}

Просмотреть файл

@ -157,7 +157,7 @@ enum {
#define FI_ADDR_UNSPEC UINT64_MAX
#define FI_ADDR_NOTAVAIL UINT64_MAX
#define FI_SHARED_CONTEXT UINT64_MAX
#define FI_SHARED_CONTEXT (-(size_t)1)
typedef uint64_t fi_addr_t;
FI_DEFINE_HANDLE(fi_connreq_t);

Просмотреть файл

@ -1,4 +1,4 @@
.TH fi_cq 3 "2015\-02\-23" "Libfabric Programmer\[aq]s Manual" "\@VERSION\@"
.TH fi_cq 3 "2015\-03\-24" "Libfabric Programmer\[aq]s Manual" "\@VERSION\@"
.SH NAME
.PP
fi_cq - Completion queue operations
@ -463,7 +463,8 @@ fi_cq_read / fi_cq_readfrom / fi_cq_readerr fi_cq_sread /
fi_cq_sreadfrom : On success, returns the number of completion events
retrieved from the completion queue.
On error, a negative value corresponding to fabric errno is returned.
On timeout, -FI_ETIMEDOUT is returned.
If no completions are available to return from the CQ, -FI_EAGAIN will
be returned.
.PP
fi_cq_write / fi_cq_writeerr : On success, returns the number of bytes
read from or written to the completion queue.

Просмотреть файл

@ -1,4 +1,4 @@
.TH fi_domain 3 "2015\-02\-28" "Libfabric Programmer\[aq]s Manual" "\@VERSION\@"
.TH fi_domain 3 "2015\-03\-24" "Libfabric Programmer\[aq]s Manual" "\@VERSION\@"
.SH NAME
.PP
fi_domain - Open a fabric access domain
@ -77,6 +77,10 @@ that the provider should perform all memory registration operations
asynchronously, with the completion reported through the event queue.
If an event queue is not bound to the domain with the FI_REG_MR flag,
then memory registration requests complete synchronously.
.PP
See \f[C]fi_av_bind\f[](3), \f[C]fi_ep_bind\f[](3),
\f[C]fi_mr_bind\f[](3), \f[C]fi_pep_bind\f[](3), and
\f[C]fi_scalable_ep_bind\f[](3) for more information.
.SS fi_close
.PP
The fi_close call is used to release all resources associated with a
@ -174,8 +178,8 @@ Conceptualy, FI_THREAD_ENDPOINT maps well to providers that implement
fabric services in hardware but use a single command queue to access
different data flows.
.PP
\f[I]FI_THREAD_COMPLETION\f[] The completion threading model is intended
for providers that make use of manual progress.
\f[I]FI_THREAD_COMPLETION\f[] : The completion threading model is
intended for providers that make use of manual progress.
Applications must serialize access to all objects that are associated
through the use of having a shared completion structure.
This includes endpoint, completion queue, counter, wait set, and poll
@ -466,6 +470,6 @@ vectors.
.SH SEE ALSO
.PP
\f[C]fi_getinfo\f[](3), \f[C]fi_endpoint\f[](3), \f[C]fi_av\f[](3),
\f[C]fi_eq\f[](3), \f[C]fi_mr\f[](3)
\f[C]fi_ep\f[](3), \f[C]fi_eq\f[](3), \f[C]fi_mr\f[](3)
.SH AUTHORS
OpenFabrics.

Просмотреть файл

@ -1,4 +1,4 @@
.TH fi_endpoint 3 "2015\-02\-27" "Libfabric Programmer\[aq]s Manual" "\@VERSION\@"
.TH fi_endpoint 3 "2015\-03\-23" "Libfabric Programmer\[aq]s Manual" "\@VERSION\@"
.SH NAME
.PP
fi_endpoint - Fabric endpoint operations
@ -458,7 +458,7 @@ an endpoint.
.nf
\f[C]
struct\ fi_ep_attr\ {
\ \ \ \ enum\ fi_ep_type\ ep_type;
\ \ \ \ enum\ fi_ep_type\ type;
\ \ \ \ uint32_t\ \ \ \ \ \ \ \ protocol;
\ \ \ \ uint32_t\ \ \ \ \ \ \ \ protocol_version;
\ \ \ \ size_t\ \ \ \ \ \ \ \ \ \ max_msg_size;
@ -1080,16 +1080,27 @@ flag will be ignored by the target.
The local endpoint must be configured with the FI_REMOTE_SIGNAL
capability in order to specify this flag.
.PP
\f[I]FI_REMOTE_COMPLETE\f[] : Indicates that local completions should
not be generated until the operation has completed on the remote side.
When set, if the target endpoint experiences an error receiving the
transferred data, that error will be reported back to the initiator of
the request.
\f[I]FI_REMOTE_COMPLETE\f[] : Generally, this flag indicates that an
operation will not complete until it has been accepted into the fabric
and acknowledged by a remote service.
When used with unreliable endpoints, local completions should not be
generated until the associated operation has been successfully delivered
into the fabric.
For example, the corresponding messages have been placed on the wire.
When used with reliable endpoints, this flag indicates that the
operation will not complete until it has been acknowledged by the
target, or a proxy for the target that is responsible for ensuring its
reliable delivery.
For example, this flag often implies that a completion is not generated
until an ack has been received from the target.
.PP
Note that when set, if the target endpoint experiences an error
receiving the transferred data, that error will often be reported back
to the initiator of the request.
This includes errors which may not normally be reported to the
initiator.
For example, if the receive data is truncated at the target because the
provided receive buffer is too small, the initiator will be notified of
the truncation.
initiator, such as remote buffer overruns.
.PP
\f[I]FI_REMOTE_COMMIT\f[] : This flag is defined for future use.
.SH NOTES
.PP
Users should call fi_close to release all resources allocated to the

Просмотреть файл

@ -1,4 +1,4 @@
.TH fi_eq 3 "2015\-03\-11" "Libfabric Programmer\[aq]s Manual" "\@VERSION\@"
.TH fi_eq 3 "2015\-03\-24" "Libfabric Programmer\[aq]s Manual" "\@VERSION\@"
.SH NAME
.PP
fi_eq - Event queue operations
@ -28,28 +28,26 @@ int\ fi_close(struct\ fid\ *eq);
int\ fi_control(struct\ fid\ *eq,\ int\ command,\ void\ *arg);
int\ fi_eq_read(struct\ fid_eq\ *eq,\ uint32_t\ *event,
ssize_t\ fi_eq_read(struct\ fid_eq\ *eq,\ uint32_t\ *event,
\ \ \ \ void\ *buf,\ size_t\ len,\ uint64_t\ flags);
int\ fi_eq_readerr(struct\ fid_eq\ *eq,\ struct\ fi_eq_err_entry\ *buf,
\ \ \ \ size_t\ len,\ uint64_t\ flags);
ssize_t\ fi_eq_readerr(struct\ fid_eq\ *eq,\ struct\ fi_eq_err_entry\ *buf,
\ \ \ \ uint64_t\ flags);
int\ fi_eq_write(struct\ fid_eq\ *eq,\ uint32_t\ event,
ssize_t\ fi_eq_write(struct\ fid_eq\ *eq,\ uint32_t\ event,
\ \ \ \ const\ void\ *buf,\ size_t\ len,\ uint64_t\ flags);
int\ fi_eq_sread(struct\ fid_eq\ *eq,\ uint32_t\ *event,
ssize_t\ fi_eq_sread(struct\ fid_eq\ *eq,\ uint32_t\ *event,
\ \ \ \ void\ *buf,\ size_t\ len,\ int\ timeout,\ uint64_t\ flags);
const\ char\ *\ fi_eq_strerror(struct\ fid_eq\ *eq,\ int\ prov_errno,
\ \ \ \ \ \ const\ void\ *err_data,\ void\ *buf,\ size_t\ len);
\ \ \ \ \ \ const\ void\ *err_data,\ char\ *buf,\ size_t\ len);
\f[]
.fi
.SH ARGUMENTS
.PP
\f[I]fabric\f[] : Opened fabric descriptor
.PP
\f[I]domain\f[] : Open resource domain
.PP
\f[I]eq\f[] : Event queue
.PP
\f[I]attr\f[] : Event queue attributes
@ -258,7 +256,7 @@ Connection notifications are reported using
struct\ fi_eq_cm_entry\ {
\ \ \ \ fid_t\ \ \ \ \ \ \ \ \ \ \ \ fid;\ \ \ \ \ \ \ \ /*\ fid\ associated\ with\ request\ */
\ \ \ \ struct\ fi_info\ \ *info;\ \ \ \ \ \ \ /*\ endpoint\ information\ */
\ \ \ \ uint8_t\ \ \ \ \ \ \ \ \ data[0];\ \ \ \ \ /*\ app\ connection\ data\ */
\ \ \ \ uint8_t\ \ \ \ \ \ \ \ \ data[];\ \ \ \ \ /*\ app\ connection\ data\ */
};
\f[]
.fi
@ -344,7 +342,6 @@ struct\ fi_eq_err_entry\ {
\ \ \ \ fid_t\ \ \ \ \ \ \ \ \ \ \ \ fid;\ \ \ \ \ \ \ \ /*\ fid\ associated\ with\ error\ */
\ \ \ \ void\ \ \ \ \ \ \ \ \ \ \ \ *context;\ \ \ \ /*\ operation\ context\ */
\ \ \ \ uint64_t\ \ \ \ \ \ \ \ \ data;\ \ \ \ \ \ \ /*\ completion-specific\ data\ */
\ \ \ \ uint32_t\ \ \ \ \ \ \ \ \ index;\ \ \ \ \ \ /*\ index\ for\ vector\ ops\ */
\ \ \ \ int\ \ \ \ \ \ \ \ \ \ \ \ \ \ err;\ \ \ \ \ \ \ \ /*\ positive\ error\ code\ */
\ \ \ \ int\ \ \ \ \ \ \ \ \ \ \ \ \ \ prov_errno;\ /*\ provider\ error\ code\ */
\ \ \ \ void\ \ \ \ \ \ \ \ \ \ \ \ *err_data;\ \ \ /*\ additional\ error\ data\ */
@ -378,18 +375,15 @@ read past the end of the referenced buffer.
fi_eq_open : Returns 0 on success.
On error, a negative value corresponding to fabric errno is returned.
.PP
fi_eq_read / fi_eq_readerr
.PD 0
.P
.PD
fi_eq_sread
.PD 0
.P
.PD
fi_eq_write : On success, returns the number of bytes read from or
written to the event queue.
fi_eq_read / fi_eq_readerr / fi_eq_sread : On success, returns the
number of bytes read from the event queue.
On error, a negative value corresponding to fabric errno is returned.
If no data is available to be read from the event queue, -FI_EAGAIN is
returned.
.PP
fi_eq_write : On success, returns the number of bytes written to the
event queue.
On error, a negative value corresponding to fabric errno is returned.
On timeout, fi_eq_sread returns -FI_ETIMEDOUT.
.PP
fi_eq_strerror : Returns a character string interpretation of the
provider specific error returned with a completion.

Просмотреть файл

@ -1,4 +1,4 @@
.TH fi_fabric 3 "2015\-01\-24" "Libfabric Programmer\[aq]s Manual" "\@VERSION\@"
.TH fi_fabric 3 "2015\-03\-16" "Libfabric Programmer\[aq]s Manual" "\@VERSION\@"
.SH NAME
.PP
fi_fabric - Fabric domain operations
@ -131,6 +131,19 @@ A fabric identifier.
.SS prov_name
.PP
The name of the underlying fabric provider.
.PP
For debugging and administrative purposes, environment variables can be
used to control which fabric providers will be registered with
libfabric.
Specifying "FI_PROVIDER=foo,bar" will allow any providers with the names
"foo" or "bar" to be registered.
Similarly, specifying "FI_PROVIDER=^foo,bar" will prevent any providers
with the names "foo" or "bar" from being registered.
Providers which are not registered will not appear in fi_getinfo
results.
Applications which need a specific set of providers should implement
their own filtering of fi_getinfo\[aq]s results rather than relying on
these environment variables in a production setting.
.SS prov_version
.PP
Version information for the fabric provider.

Просмотреть файл

@ -1,4 +1,4 @@
.TH fi_getinfo 3 "2015\-02\-28" "Libfabric Programmer\[aq]s Manual" "\@VERSION\@"
.TH fi_getinfo 3 "2015\-03\-19" "Libfabric Programmer\[aq]s Manual" "\@VERSION\@"
.SH NAME
.PP
fi_getinfo / fi_freeinfo - Obtain / free fabric interface information
@ -328,11 +328,6 @@ FI_REMOTE_SIGNAL flag on data transfer operations.
Support requires marking outbound data transfers as signaled and
handling incoming transfers appropriately.
.PP
\f[I]FI_REMOTE_COMPLETE\f[] : Indicates that the endpoint support the
FI_REMOTE_COMPLETE flag on data transfer operations.
Support requires marking outbound data transfers as using remote
completions and responding to incoming transfers appropriately.
.PP
\f[I]FI_CANCEL\f[] : Indicates that the user desires the ability to
cancel outstanding data transfer operations.
If FI_CANCEL is not set, a provider may optimize code paths with the
@ -368,7 +363,7 @@ FI_NAMED_RX_CTX, FI_DIRECTD_RECV, FI_READ, FI_WRITE, FI_RECV, FI_SEND,
FI_REMOTE_READ, and FI_REMOTE_WRITE.
.PP
Secondary capabilities: FI_DYNAMIC_MR, FI_MULTI_RECV, FI_SOURCE,
FI_CANCEL, FI_FENCE, FI_REMOTE_COMPLETE
FI_CANCEL, FI_FENCE
.SH MODE
.PP
The operational mode bits are used to convey requirements that an

Просмотреть файл

@ -0,0 +1 @@
.so man3/fi_domain.3

Просмотреть файл

@ -0,0 +1 @@
.so man3/fi_endpoint.3

Просмотреть файл

@ -0,0 +1 @@
.so man3/fi_endpoint.3

Просмотреть файл

@ -1,4 +1,4 @@
.TH fabric 7 "2014\-12\-15" "Libfabric Programmer\[aq]s Manual" "\@VERSION\@"
.TH fabric 7 "2015\-03\-16" "Libfabric Programmer\[aq]s Manual" "\@VERSION\@"
.SH NAME
.PP
Fabric Interface Library
@ -32,7 +32,7 @@ All fabric hardware devices and their software drivers are required to
support this framework.
Devices and the drivers that plug into the libfabric framework are
referred to as fabric providers, or simply providers.
Provider details may be found in fi_prov.
Provider details may be found in \f[C]fi_provider\f[](7).
.PP
\f[I]Fabric Interfaces\f[] : The second component is a set of
communication operations.
@ -41,6 +41,7 @@ can support.
It is not required that providers implement all the interfaces that are
defined; however, providers clearly indicate which interfaces they do
support.
.SH FABRIC INTERFACES
.PP
The fabric interfaces are designed such that they are cohesive and not
simply a union of disjoint interfaces.
@ -167,70 +168,11 @@ Atomic operations include well-known functionality, such as atomic-add
and compare-and-swap, plus several other pre-defined calls.
Unlike other data transfer interfaces, atomic operations are aware of
the data formatting at the target memory region.
.SH PROVIDER REQUIREMENTS
.PP
Libfabric provides a general framework for supporting multiple types of
fabric objects and their related interfaces.
Fabric providers have a large amount of flexibility in selecting which
components they are able and willing to support, based on specific
hardware constraints.
To assist in the development of applications, libfabric specifies the
following requirements that must be met by any fabric provider, if
requested by an application.
(Note that the instantiation of a specific fabric object is subject to
application configuration parameters and need not meet these
requirements).
.IP \[bu] 2
A fabric provider must support at least one endpoint type.
.IP \[bu] 2
All endpoints must support the message queue data transfer interface.
.IP \[bu] 2
An endpoint that advertises support for a specific endpoint capability
must support the corresponding data transfer interface.
.IP \[bu] 2
Endpoints must support operations to send and receive data for any data
transfer operations that they support.
.IP \[bu] 2
Connectionless endpoints must support all relevant data transfer
routines.
(send / recv / write / read / etc.)
.IP \[bu] 2
Connectionless endpoints must support the CM interface getname.
.IP \[bu] 2
Connectionless endpoints that support multicast operations must support
the CM interfaces join and leave.
.IP \[bu] 2
Connection-oriented interfaces must support the CM interfaces getname,
getpeer, connect, listen, accept, reject, and shutdown.
.IP \[bu] 2
All endpoints must support all relevant \[aq]msg\[aq] data transfer
routines.
(sendmsg / recvmsg / writemsg / readmsg / etc.)
.IP \[bu] 2
Access domains must support opening address vector maps and tables.
.IP \[bu] 2
Address vectors associated with domains that may be identified using IP
addresses must support FI_SOCKADDR_IN and FI_SOCKADDR_IN6 input formats.
.IP \[bu] 2
Address vectors must support FI_ADDR, FI_ADDR_INDEX, and FI_AV output
formats.
.IP \[bu] 2
Access domains must support opening completion queues and counters.
.IP \[bu] 2
Completion queues must support the FI_CQ_FORMAT_CONTEXT and
FI_CQ_FORMAT_MSG formats.
.IP \[bu] 2
Event queues associated with tagged message transfers must support the
FI_CQ_FORMAT_TAGGED format.
.IP \[bu] 2
A provider is expected to be forward compatible, and must be able to be
compiled against expanded \f[C]fi_xxx_ops\f[] structures that define new
functions added after the provider was written.
Any unknown functions must be set to NULL.
.SH SEE ALSO
.PP
\f[C]fi_getinfo\f[](3), \f[C]fi_endpoint\f[](3), \f[C]fi_domain\f[](3),
\f[C]fi_av\f[](3), \f[C]fi_eq\f[](3), \f[C]fi_cq\f[](3),
\f[C]fi_cntr\f[](3), \f[C]fi_mr\f[](3)
\f[C]fi_provider\f[](7), \f[C]fi_getinfo\f[](3),
\f[C]fi_endpoint\f[](3), \f[C]fi_domain\f[](3), \f[C]fi_av\f[](3),
\f[C]fi_eq\f[](3), \f[C]fi_cq\f[](3), \f[C]fi_cntr\f[](3),
\f[C]fi_mr\f[](3)
.SH AUTHORS
OpenFabrics.

Просмотреть файл

@ -0,0 +1,107 @@
.TH fi_provider 7 "2015\-03\-16" "Libfabric Programmer\[aq]s Manual" "\@VERSION\@"
.SH NAME
.PP
Fabric Interface Providers
.SH OVERVIEW
.PP
Conceptually, a fabric provider may be viewed as a local hardware NIC
driver, though a provider is not limited by this definition.
The first component of libfabric is a general purpose framework that is
capable of handling different types of fabric hardware.
All fabric hardware devices and their software drivers are required to
support this framework.
Devices and the drivers that plug into the libfabric framework are
referred to as fabric providers, or simply providers.
.PP
This distribution of libfabric contains the following providers
(although more may be available via run-time plugins):
.PP
\f[I]PSM\f[] : High-speed InfiniBand networking from Intel.
See \f[C]fi_psm\f[](7) for more information.
.PP
\f[I]Sockets\f[] : A general purpose provider that can be used on any
network that supports TCP/UDP sockets.
This provider is not intended to provide performance improvements over
regular TCP/UDP sockets, but rather to allow developers to write, test,
and debug application code even on platforms that do not have high-speed
networking.
See \f[C]fi_sockets\f[](7) for more information.
.PP
\f[I]usNIC\f[] : Ultra low latency Ethernet networking over Cisco
userspace VIC adapters.
See \f[C]fi_usnic\f[](7) for more information.
.PP
\f[I]Verbs\f[] : This provider uses the Linux Verbs API for network
transport.
Application performance is, obviously expected to be similar to that of
the native Linux Verbs API.
Analogous to the Sockets provider, the Verbs provider is intended to
enable developers to write, test, and debug application code on
platforms that only have Linux Verbs-based networking.
See \f[C]fi_verbs\f[](7) for more information.
.SH PROVIDER REQUIREMENTS
.PP
Libfabric provides a general framework for supporting multiple types of
fabric objects and their related interfaces.
Fabric providers have a large amount of flexibility in selecting which
components they are able and willing to support, based on specific
hardware constraints.
To assist in the development of applications, libfabric specifies the
following requirements that must be met by any fabric provider, if
requested by an application.
(Note that the instantiation of a specific fabric object is subject to
application configuration parameters and need not meet these
requirements).
.IP \[bu] 2
A fabric provider must support at least one endpoint type.
.IP \[bu] 2
All endpoints must support the message queue data transfer interface.
.IP \[bu] 2
An endpoint that advertises support for a specific endpoint capability
must support the corresponding data transfer interface.
.IP \[bu] 2
Endpoints must support operations to send and receive data for any data
transfer operations that they support.
.IP \[bu] 2
Connectionless endpoints must support all relevant data transfer
routines.
(send / recv / write / read / etc.)
.IP \[bu] 2
Connectionless endpoints must support the CM interface getname.
.IP \[bu] 2
Connectionless endpoints that support multicast operations must support
the CM interfaces join and leave.
.IP \[bu] 2
Connection-oriented interfaces must support the CM interfaces getname,
getpeer, connect, listen, accept, reject, and shutdown.
.IP \[bu] 2
All endpoints must support all relevant \[aq]msg\[aq] data transfer
routines.
(sendmsg / recvmsg / writemsg / readmsg / etc.)
.IP \[bu] 2
Access domains must support opening address vector maps and tables.
.IP \[bu] 2
Address vectors associated with domains that may be identified using IP
addresses must support FI_SOCKADDR_IN and FI_SOCKADDR_IN6 input formats.
.IP \[bu] 2
Address vectors must support FI_ADDR, FI_ADDR_INDEX, and FI_AV output
formats.
.IP \[bu] 2
Access domains must support opening completion queues and counters.
.IP \[bu] 2
Completion queues must support the FI_CQ_FORMAT_CONTEXT and
FI_CQ_FORMAT_MSG formats.
.IP \[bu] 2
Event queues associated with tagged message transfers must support the
FI_CQ_FORMAT_TAGGED format.
.IP \[bu] 2
A provider is expected to be forward compatible, and must be able to be
compiled against expanded \f[C]fi_xxx_ops\f[] structures that define new
functions added after the provider was written.
Any unknown functions must be set to NULL.
.SH SEE ALSO
.PP
\f[C]fi_psm\f[](7), \f[C]fi_sockets\f[](7), \f[C]fi_usnic\f[](7),
\f[C]fi_verbs\f[](7),
.SH AUTHORS
OpenFabrics.

Просмотреть файл

@ -0,0 +1,24 @@
.TH fi_psm 7 "2015\-03\-16" "Libfabric Programmer\[aq]s Manual" "\@VERSION\@"
.SH NAME
.PP
The PSM Fabric Provider
.SH OVERVIEW
.PP
\&...Whatever the PSM provider maintainer wants to put here...
.PP
Suggestions:
.IP \[bu] 2
Document what is working
.IP \[bu] 2
Document what has been tested
.IP \[bu] 2
Document what is know to NOT be working
.IP \[bu] 2
Document any other things app developers and end users should know about
this provider (e.g., run-time tunable parameters, differences in
behavior between this and other providers, etc.)
.SH SEE ALSO
.PP
\f[C]fabric\f[](7), \f[C]fi_provider\f[](7),
.SH AUTHORS
OpenFabrics.

Просмотреть файл

@ -0,0 +1,24 @@
.TH fi_sockets 7 "2015\-03\-16" "Libfabric Programmer\[aq]s Manual" "\@VERSION\@"
.SH NAME
.PP
The Sockets Fabric Provider
.SH OVERVIEW
.PP
\&...Whatever the sockets provider maintainer wants to put here...
.PP
Suggestions:
.IP \[bu] 2
Document what is working
.IP \[bu] 2
Document what has been tested
.IP \[bu] 2
Document what is know to NOT be working
.IP \[bu] 2
Document any other things app developers and end users should know about
this provider (e.g., run-time tunable parameters, differences in
behavior between this and other providers, etc.)
.SH SEE ALSO
.PP
\f[C]fabric\f[](7), \f[C]fi_provider\f[](7),
.SH AUTHORS
OpenFabrics.

Просмотреть файл

@ -0,0 +1,150 @@
.TH fi_usnic 7 "2015\-03\-24" "Libfabric Programmer\[aq]s Manual" "\@VERSION\@"
.SH NAME
.PP
The usNIC Fabric Provider
.SH OVERVIEW
.PP
The \f[I]usnic\f[] provider is designed to run over the Cisco VIC
(virtualized NIC) hardware on Cisco UCS servers.
It utilizes the Cisco usNIC (userspace NIC) capabilities of the VIC to
enable ultra low latency and other offload capabilities on Ethernet
networks.
.SH RELEASE NOTES
.PP
The following capabilities are currently supported via the
\f[I]usnic\f[] libfabric provider:
.IP \[bu] 2
The \f[I]usnic\f[] libfabric provider requires the use of the "libnl"
library.
.IP \[bu] 2
There are two versions of libnl generally available: v1 and v3; the
usnic provider can use either version.
.IP \[bu] 2
If you are building libfabric/the usnic provider from source, you will
need to have the libnl header files available (e.g., if you are
installing libnl from RPM or other packaging system, install the
"-devel" versions of the package).
.IP \[bu] 2
If you have libnl (either v1 or v3) installed in a non-standard location
(e.g., not in /usr/lib or /usr/lib64), you may need to tell
libfabric\[aq]s configure where to find libnl via the
\f[C]--with-libnl=DIR\f[] command line option (where DIR is the
installation prefix of the libnl package).
.IP \[bu] 2
The most common way to use the libfabric usnic provider is via an MPI
implementation that uses libfabric (and the usnic provider) as a lower
layer transport.
MPI applications do not need to know anything about libfabric or usnic
in this use case -- the MPI implementation hides all these details from
the application.
.IP \[bu] 2
If you are writing applications directly to the libfabric API:
.IP \[bu] 2
\f[I]FI_EP_DGRAM\f[] endpoints are the best supported method of
utilizing the usNIC interface.
Specifically, the \f[I]FI_EP_DGRAM\f[] endpoint type has been
extensively tested as the underlying layer for Open MPI\[aq]s
\f[I]usnic\f[] BTL.
.IP \[bu] 2
\f[I]FI_EP_MSG\f[] and \f[I]FI_EP_RDM\f[] endpoints are implemented, but
are only lightly tested.
It is likely that there are still some bugs in these endpoint types.
.IP \[bu] 2
Other capabilities, such as the tag matching interface and RDMA
operations, are not yet implemented.
.IP \[bu] 2
The usnic libfabric provider supports extensions that provide
information and functionality beyond the standard libfabric interface.
See the "USNIC EXTENSIONS" section, below.
.SH USNIC EXTENSIONS
.PP
The usnic libfabric provider exports extensions for additional VIC,
usNIC, and Ethernet capabilities not provided by the standard libfabric
interface.
.PP
These extensions are available via the "fi_ext_usnic.h" header file.
.PP
The following is an example of how to utilize the usnic "fabric getinfo"
extension, which returns IP and SR-IOV information about a usNIC
interface obtained from the \f[C]fi_getinfo\f[](3) function.
.IP
.nf
\f[C]
#include\ <stdio.h>
#include\ <rdma/fabric.h>
/*\ The\ usNIC\ extensions\ are\ all\ in\ the
\ \ \ rdma/fi_ext_usnic.h\ header\ */
#include\ <rdma/fi_ext_usnic.h>
int\ main(int\ argc,\ char\ *argv[])\ {
\ \ \ \ struct\ fi_info\ *info;
\ \ \ \ struct\ fi_info\ *info_list;
\ \ \ \ struct\ fi_info\ hints\ =\ {0};
\ \ \ \ struct\ fi_ep_attr\ ep_attr\ =\ {0};
\ \ \ \ struct\ fi_fabric_attr\ fabric_attr\ =\ {0};
\ \ \ \ fabric_attr.prov_name\ =\ "usnic";
\ \ \ \ ep_attr.type\ =\ FI_EP_DGRAM;
\ \ \ \ hints.caps\ =\ FI_MSG;
\ \ \ \ hints.mode\ =\ FI_LOCAL_MR\ |\ FI_MSG_PREFIX;
\ \ \ \ hints.addr_format\ =\ FI_SOCKADDR;
\ \ \ \ hints.ep_attr\ =\ &ep_attr;
\ \ \ \ hints.fabric_attr\ =\ &fabric_attr;
\ \ \ \ /*\ Find\ all\ usnic\ providers\ */
\ \ \ \ fi_getinfo(FI_VERSION(1,\ 0),\ NULL,\ 0,\ 0,\ &hints,\ &info_list);
\ \ \ \ for\ (info\ =\ info_list;\ NULL\ !=\ info;\ info\ =\ info->next)\ {
\ \ \ \ \ \ \ \ /*\ Open\ the\ fabric\ on\ the\ interface\ */
\ \ \ \ \ \ \ \ struct\ fid_fabric\ *fabric;
\ \ \ \ \ \ \ \ fi_fabric(info->fabric_attr,\ &fabric,\ NULL);
\ \ \ \ \ \ \ \ /*\ Pass\ FI_USNIC_FABRIC_OPS_1\ to\ get\ usnic\ ops
\ \ \ \ \ \ \ \ \ \ \ on\ the\ fabric\ */
\ \ \ \ \ \ \ \ struct\ fi_usnic_ops_fabric\ *usnic_fabric_ops;
\ \ \ \ \ \ \ \ fi_open_ops(&fabric->fid,\ FI_USNIC_FABRIC_OPS_1,\ 0,
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ (void\ **)\ &usnic_fabric_ops,\ NULL);
\ \ \ \ \ \ \ \ /*\ Now\ use\ the\ returned\ usnic\ ops\ structure\ to\ call
\ \ \ \ \ \ \ \ \ \ \ usnic\ extensions.\ \ The\ following\ extension\ queries
\ \ \ \ \ \ \ \ \ \ \ some\ IP\ and\ SR-IOV\ characteristics\ about\ the
\ \ \ \ \ \ \ \ \ \ \ usNIC\ device.\ */
\ \ \ \ \ \ \ \ struct\ fi_usnic_info\ usnic_info;
\ \ \ \ \ \ \ \ usnic_fabric_ops->getinfo(FI_EXT_USNIC_INFO_VERSION,
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ fabric,\ &usnic_info);
\ \ \ \ \ \ \ \ printf("Fabric\ interface\ %s\ is\ %s:\\n"
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ "\\tNetmask:\ \ 0x%08x\\n\\tLink\ speed:\ %d\\n"
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ "\\tSR-IOV\ VFs:\ %d\\n\\tQPs\ per\ SR-IOV\ VF:\ %d\\n"
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ "\\tCQs\ per\ SR-IOV\ VF:\ %d\\n",
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ info->fabric_attr->name,
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ usnic_info.ui.v1.ui_ifname,
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ usnic_info.ui.v1.ui_netmask_be,
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ usnic_info.ui.v1.ui_link_speed,
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ usnic_info.ui.v1.ui_num_vf,
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ usnic_info.ui.v1.ui_qp_per_vf,
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ usnic_info.ui.v1.ui_cq_per_vf);
\ \ \ \ \ \ \ \ fi_close(&fabric->fid);
\ \ \ \ }
\ \ \ \ fi_freeinfo(info_list);
\ \ \ \ return\ 0;
}
\f[]
.fi
.PP
Note that other usnic extensions are defined for other fabric objects.
The second argument to \f[C]fi_open_ops\f[](3) is used to identify both
the fid type and the extension family.
For example, \f[I]FI_USNIC_AV_OPS_1\f[] can be used in conjunction with
an \f[C]fi_av\f[] fid to obtain usnic extensions for address vectors.
.PP
See fi_ext_usnic.h for more details.
.SH SEE ALSO
.PP
\f[C]fabric\f[](7), \f[C]fi_open_ops\f[](3), \f[C]fi_provider\f[](7),
.SH AUTHORS
OpenFabrics.

Просмотреть файл

@ -0,0 +1,24 @@
.TH fi_verbs 7 "2015\-03\-16" "Libfabric Programmer\[aq]s Manual" "\@VERSION\@"
.SH NAME
.PP
The Verbs Fabric Provider
.SH OVERVIEW
.PP
\&...Whatever the Verbs provider maintainer wants to put here...
.PP
Suggestions:
.IP \[bu] 2
Document what is working
.IP \[bu] 2
Document what has been tested
.IP \[bu] 2
Document what is know to NOT be working
.IP \[bu] 2
Document any other things app developers and end users should know about
this provider (e.g., run-time tunable parameters, differences in
behavior between this and other providers, etc.)
.SH SEE ALSO
.PP
\f[C]fabric\f[](7), \f[C]fi_provider\f[](7),
.SH AUTHORS
OpenFabrics.

Просмотреть файл

@ -1,4 +1,4 @@
dnl Configury specific to the libfabrics PSM provider
dnl Configury specific to the libfabric PSM provider
dnl Called to configure this provider
dnl

Просмотреть файл

@ -58,7 +58,7 @@ extern "C" {
FI_RMA | FI_MULTI_RECV | \
FI_READ | FI_WRITE | FI_SEND | FI_RECV | \
FI_REMOTE_READ | FI_REMOTE_WRITE | \
FI_REMOTE_COMPLETE | FI_REMOTE_SIGNAL | \
FI_REMOTE_SIGNAL | \
FI_CANCEL | FI_TRIGGER | \
FI_DYNAMIC_MR | \
PSMX_CAP_EXT)
@ -85,8 +85,6 @@ enum psmx_context_type {
PSMX_TRECV_CONTEXT,
PSMX_WRITE_CONTEXT,
PSMX_READ_CONTEXT,
PSMX_INJECT_CONTEXT,
PSMX_INJECT_WRITE_CONTEXT,
PSMX_REMOTE_WRITE_CONTEXT,
PSMX_REMOTE_READ_CONTEXT,
};
@ -490,8 +488,6 @@ struct psmx_fid_ep {
uint64_t caps;
struct fi_context nocomp_send_context;
struct fi_context nocomp_recv_context;
struct fi_context sendimm_context;
struct fi_context writeimm_context;
size_t min_multi_recv;
};

Просмотреть файл

@ -1493,6 +1493,7 @@ static int psmx_atomic_compwritevalid(struct fid_ep *ep,
}
struct fi_ops_atomic psmx_atomic_ops = {
.size = sizeof(struct fi_ops_atomic),
.write = psmx_atomic_write,
.writev = psmx_atomic_writev,
.writemsg = psmx_atomic_writemsg,

Просмотреть файл

@ -294,6 +294,8 @@ static struct fi_ops psmx_fi_ops = {
static struct fi_ops_av psmx_av_ops = {
.size = sizeof(struct fi_ops_av),
.insert = psmx_av_insert,
.insertsvc = fi_no_av_insertsvc,
.insertsym = fi_no_av_insertsym,
.remove = psmx_av_remove,
.lookup = psmx_av_lookup,
.straddr = psmx_av_straddr,

Просмотреть файл

@ -343,6 +343,7 @@ static struct fi_ops psmx_fi_ops = {
.close = psmx_cntr_close,
.bind = fi_no_bind,
.control = psmx_cntr_control,
.ops_open = fi_no_ops_open,
};
static struct fi_ops_cntr psmx_cntr_ops = {

Просмотреть файл

@ -349,16 +349,6 @@ int psmx_cq_poll_mq(struct psmx_fid_cq *cq, struct psmx_fid_domain *domain,
tmp_cntr = tmp_ep->read_cntr;
break;
case PSMX_INJECT_CONTEXT:
tmp_cntr = tmp_ep->send_cntr;
free(fi_context);
break;
case PSMX_INJECT_WRITE_CONTEXT:
tmp_cntr = tmp_ep->write_cntr;
free(fi_context);
break;
case PSMX_SEND_CONTEXT:
case PSMX_TSEND_CONTEXT:
tmp_cq = tmp_ep->send_cq;
@ -546,7 +536,7 @@ static ssize_t psmx_cq_readfrom(struct fid_cq *cq, void *buf, size_t count,
}
}
return read_count;
return read_count ? read_count : -FI_EAGAIN;
}
static ssize_t psmx_cq_read(struct fid_cq *cq, void *buf, size_t count)
@ -726,6 +716,7 @@ static struct fi_ops psmx_fi_ops = {
.close = psmx_cq_close,
.bind = fi_no_bind,
.control = psmx_cq_control,
.ops_open = fi_no_ops_open,
};
static struct fi_ops_cq psmx_cq_ops = {

Просмотреть файл

@ -75,6 +75,7 @@ static struct fi_ops psmx_fi_ops = {
.close = psmx_domain_close,
.bind = fi_no_bind,
.control = fi_no_control,
.ops_open = fi_no_ops_open,
};
static struct fi_ops_domain psmx_domain_ops = {
@ -82,6 +83,7 @@ static struct fi_ops_domain psmx_domain_ops = {
.av_open = psmx_av_open,
.cq_open = psmx_cq_open,
.endpoint = psmx_ep_open,
.scalable_ep = fi_no_scalable_ep,
.cntr_open = psmx_cntr_open,
.poll_open = psmx_poll_open,
.stx_ctx = psmx_stx_ctx,

Просмотреть файл

@ -276,6 +276,7 @@ static struct fi_ops psmx_fi_ops = {
.close = psmx_ep_close,
.bind = psmx_ep_bind,
.control = psmx_ep_control,
.ops_open = fi_no_ops_open,
};
static struct fi_ops_ep psmx_ep_ops = {
@ -325,10 +326,6 @@ int psmx_ep_open(struct fid_domain *domain, struct fi_info *info,
PSMX_CTXT_EP(&ep_priv->nocomp_send_context) = ep_priv;
PSMX_CTXT_TYPE(&ep_priv->nocomp_recv_context) = PSMX_NOCOMP_RECV_CONTEXT;
PSMX_CTXT_EP(&ep_priv->nocomp_recv_context) = ep_priv;
PSMX_CTXT_TYPE(&ep_priv->sendimm_context) = PSMX_INJECT_CONTEXT;
PSMX_CTXT_EP(&ep_priv->sendimm_context) = ep_priv;
PSMX_CTXT_TYPE(&ep_priv->writeimm_context) = PSMX_INJECT_WRITE_CONTEXT;
PSMX_CTXT_EP(&ep_priv->writeimm_context) = ep_priv;
if (ep_cap & FI_TAGGED)
ep_priv->ep.tagged = &psmx_tagged_ops;

Просмотреть файл

@ -312,6 +312,8 @@ static struct fi_ops psmx_fabric_fi_ops = {
static struct fi_ops_fabric psmx_fabric_ops = {
.size = sizeof(struct fi_ops_fabric),
.domain = psmx_domain_open,
.passive_ep = fi_no_passive_ep,
.eq_open = fi_no_eq_open,
.wait_open = psmx_wait_open,
};
@ -399,11 +401,9 @@ PSM_INI
int check_version;
int err;
fi_log_init();
psmx_env.name_server = psmx_get_int_env("OFI_PSM_NAME_SERVER", 1);
psmx_env.am_msg = psmx_get_int_env("OFI_PSM_AM_MSG", 0);
psmx_env.tagged_rma = psmx_get_int_env("OFI_PSM_TAGGED_RMA", 0);
psmx_env.tagged_rma = psmx_get_int_env("OFI_PSM_TAGGED_RMA", 1);
psmx_env.warning = psmx_get_int_env("OFI_PSM_WARNING", 1);
psmx_env.uuid = getenv("OFI_PSM_UUID");
if (!psmx_env.uuid)

Просмотреть файл

@ -199,6 +199,7 @@ static struct fi_ops psmx_fi_ops = {
.close = psmx_mr_close,
.bind = psmx_mr_bind,
.control = fi_no_control,
.ops_open = fi_no_ops_open,
};
static void psmx_mr_normalize_iov(struct iovec *iov, size_t *count)

Просмотреть файл

@ -42,7 +42,6 @@ ssize_t _psmx_recv(struct fid_ep *ep, void *buf, size_t len,
psm_mq_req_t psm_req;
uint64_t psm_tag, psm_tagsel;
struct fi_context *fi_context;
int user_fi_context = 0;
int err;
int recv_flag = 0;
size_t idx;
@ -99,7 +98,6 @@ ssize_t _psmx_recv(struct fid_ep *ep, void *buf, size_t len,
return -FI_EINVAL;
fi_context = context;
user_fi_context = 1;
if (flags & FI_MULTI_RECV) {
struct psmx_multi_recv *req;
@ -131,7 +129,7 @@ ssize_t _psmx_recv(struct fid_ep *ep, void *buf, size_t len,
if (err != PSM_OK)
return psmx_errno(err);
if (user_fi_context)
if (fi_context == context)
PSMX_CTXT_REQ(fi_context) = psm_req;
return 0;
@ -201,7 +199,6 @@ ssize_t _psmx_send(struct fid_ep *ep, const void *buf, size_t len,
psm_mq_req_t psm_req;
uint64_t psm_tag;
struct fi_context * fi_context;
int user_fi_context = 0;
int err;
size_t idx;
@ -246,17 +243,22 @@ ssize_t _psmx_send(struct fid_ep *ep, const void *buf, size_t len,
psm_tag = ep_priv->domain->psm_epid | PSMX_MSG_BIT;
if (flags & FI_INJECT) {
fi_context = malloc(sizeof(*fi_context) + len);
if (!fi_context)
return -FI_ENOMEM;
if (len > PSMX_INJECT_SIZE)
return -FI_EMSGSIZE;
memcpy((void *)fi_context + sizeof(*fi_context), buf, len);
buf = (void *)fi_context + sizeof(*fi_context);
err = psm_mq_send(ep_priv->domain->psm_mq, psm_epaddr, send_flag,
psm_tag, buf, len);
PSMX_CTXT_TYPE(fi_context) = PSMX_INJECT_CONTEXT;
PSMX_CTXT_EP(fi_context) = ep_priv;
if (err != PSM_OK)
return psmx_errno(err);
if (ep_priv->send_cntr)
psmx_cntr_inc(ep_priv->send_cntr);
return 0;
}
else if (ep_priv->send_cq_event_flag && !(flags & FI_COMPLETION) && !context) {
if (ep_priv->send_cq_event_flag && !(flags & FI_COMPLETION) && !context) {
fi_context = &ep_priv->nocomp_send_context;
}
else {
@ -264,13 +266,10 @@ ssize_t _psmx_send(struct fid_ep *ep, const void *buf, size_t len,
return -FI_EINVAL;
fi_context = context;
if (fi_context != &ep_priv->sendimm_context) {
user_fi_context = 1;
PSMX_CTXT_TYPE(fi_context) = PSMX_SEND_CONTEXT;
PSMX_CTXT_USER(fi_context) = (void *)buf;
PSMX_CTXT_EP(fi_context) = ep_priv;
}
}
err = psm_mq_isend(ep_priv->domain->psm_mq, psm_epaddr, send_flag,
psm_tag, buf, len, (void *)fi_context, &psm_req);
@ -278,7 +277,7 @@ ssize_t _psmx_send(struct fid_ep *ep, const void *buf, size_t len,
if (err != PSM_OK)
return psmx_errno(err);
if (user_fi_context)
if (fi_context == context)
PSMX_CTXT_REQ(fi_context) = psm_req;
return 0;

Просмотреть файл

@ -551,7 +551,7 @@ static ssize_t _psmx_send2(struct fid_ep *ep, const void *buf, size_t len,
req->cq_flags = FI_SEND | FI_MSG;
if ((ep_priv->send_cq_event_flag && !(flags & FI_COMPLETION)) ||
(context == &ep_priv->sendimm_context))
(flags & FI_INJECT))
req->no_event = 1;
args[0].u32w0 = PSMX_AM_REQ_SEND | (msg_size == len ? PSMX_AM_EOM : 0);
@ -639,7 +639,7 @@ static ssize_t psmx_inject2(struct fid_ep *ep, const void *buf, size_t len,
ep_priv = container_of(ep, struct psmx_fid_ep, ep);
/* TODO: optimize it & guarantee buffered */
return _psmx_send2(ep, buf, len, NULL, dest_addr, &ep_priv->sendimm_context,
return _psmx_send2(ep, buf, len, NULL, dest_addr, NULL,
ep_priv->flags | FI_INJECT);
}

Просмотреть файл

@ -677,6 +677,9 @@ ssize_t _psmx_write(struct fid_ep *ep, const void *buf, size_t len,
addr, key, context, flags, data);
if (flags & FI_INJECT) {
if (len > PSMX_INJECT_SIZE)
return -FI_EMSGSIZE;
req = malloc(sizeof(*req) + len);
if (!req)
return -FI_ENOMEM;
@ -685,7 +688,6 @@ ssize_t _psmx_write(struct fid_ep *ep, const void *buf, size_t len,
memcpy((void *)req + sizeof(*req), (void *)buf, len);
buf = (void *)req + sizeof(*req);
PSMX_CTXT_TYPE(&req->fi_context) = PSMX_INJECT_WRITE_CONTEXT;
req->no_event = 1;
}
else {
@ -833,6 +835,7 @@ static ssize_t psmx_writedata(struct fid_ep *ep, const void *buf, size_t len, vo
}
struct fi_ops_rma psmx_rma_ops = {
.size = sizeof(struct fi_ops_rma),
.read = psmx_read,
.readv = psmx_readv,
.readmsg = psmx_readmsg,

Просмотреть файл

@ -41,7 +41,6 @@ ssize_t _psmx_tagged_recv(struct fid_ep *ep, void *buf, size_t len,
psm_mq_req_t psm_req;
uint64_t psm_tag, psm_tagsel;
struct fi_context *fi_context;
int user_fi_context = 0;
int err;
ep_priv = container_of(ep, struct psmx_fid_ep, ep);
@ -89,7 +88,6 @@ ssize_t _psmx_tagged_recv(struct fid_ep *ep, void *buf, size_t len,
return -FI_EINVAL;
fi_context = context;
user_fi_context= 1;
PSMX_CTXT_TYPE(fi_context) = PSMX_TRECV_CONTEXT;
PSMX_CTXT_USER(fi_context) = buf;
PSMX_CTXT_EP(fi_context) = ep_priv;
@ -98,10 +96,11 @@ ssize_t _psmx_tagged_recv(struct fid_ep *ep, void *buf, size_t len,
err = psm_mq_irecv(ep_priv->domain->psm_mq,
psm_tag, psm_tagsel, 0, /* flags */
buf, len, (void *)fi_context, &psm_req);
if (err != PSM_OK)
return psmx_errno(err);
if (user_fi_context)
if (fi_context == context)
PSMX_CTXT_REQ(fi_context) = psm_req;
return 0;
@ -360,7 +359,6 @@ ssize_t _psmx_tagged_send(struct fid_ep *ep, const void *buf, size_t len,
psm_mq_req_t psm_req;
uint64_t psm_tag;
struct fi_context *fi_context;
int user_fi_context = 0;
int err;
size_t idx;
@ -412,17 +410,22 @@ ssize_t _psmx_tagged_send(struct fid_ep *ep, const void *buf, size_t len,
psm_tag = tag & (~ep_priv->domain->reserved_tag_bits);
if (flags & FI_INJECT) {
fi_context = malloc(sizeof(*fi_context) + len);
if (!fi_context)
return -FI_ENOMEM;
if (len > PSMX_INJECT_SIZE)
return -FI_EMSGSIZE;
memcpy((void *)fi_context + sizeof(*fi_context), buf, len);
buf = (void *)fi_context + sizeof(*fi_context);
err = psm_mq_send(ep_priv->domain->psm_mq, psm_epaddr, 0,
psm_tag, buf, len);
PSMX_CTXT_TYPE(fi_context) = PSMX_INJECT_CONTEXT;
PSMX_CTXT_EP(fi_context) = ep_priv;
if (err != PSM_OK)
return psmx_errno(err);
if (ep_priv->send_cntr)
psmx_cntr_inc(ep_priv->send_cntr);
return 0;
}
else if (ep_priv->send_cq_event_flag && !(flags & FI_COMPLETION) && !context) {
if (ep_priv->send_cq_event_flag && !(flags & FI_COMPLETION) && !context) {
fi_context = &ep_priv->nocomp_send_context;
}
else {
@ -430,13 +433,10 @@ ssize_t _psmx_tagged_send(struct fid_ep *ep, const void *buf, size_t len,
return -FI_EINVAL;
fi_context = context;
if (fi_context != &ep_priv->sendimm_context) {
user_fi_context = 1;
PSMX_CTXT_TYPE(fi_context) = PSMX_TSEND_CONTEXT;
PSMX_CTXT_USER(fi_context) = (void *)buf;
PSMX_CTXT_EP(fi_context) = ep_priv;
}
}
err = psm_mq_isend(ep_priv->domain->psm_mq, psm_epaddr, 0,
psm_tag, buf, len, (void*)fi_context, &psm_req);
@ -444,7 +444,7 @@ ssize_t _psmx_tagged_send(struct fid_ep *ep, const void *buf, size_t len,
if (err != PSM_OK)
return psmx_errno(err);
if (user_fi_context)
if (fi_context == context)
PSMX_CTXT_REQ(fi_context) = psm_req;
return 0;
@ -589,30 +589,26 @@ ssize_t psmx_tagged_inject_no_flag_av_map(struct fid_ep *ep, const void *buf, si
{
struct psmx_fid_ep *ep_priv;
psm_epaddr_t psm_epaddr;
psm_mq_req_t psm_req;
uint64_t psm_tag;
struct fi_context *fi_context;
int err;
if (len > PSMX_INJECT_SIZE)
return -FI_EMSGSIZE;
ep_priv = container_of(ep, struct psmx_fid_ep, ep);
psm_epaddr = (psm_epaddr_t) dest_addr;
psm_tag = tag & (~ep_priv->domain->reserved_tag_bits);
fi_context = malloc(sizeof(*fi_context) + len);
if (!fi_context)
return -FI_ENOMEM;
memcpy((void *)fi_context + sizeof(*fi_context), buf, len);
buf = (void *)fi_context + sizeof(*fi_context);
PSMX_CTXT_TYPE(fi_context) = PSMX_INJECT_CONTEXT;
PSMX_CTXT_EP(fi_context) = ep_priv;
err = psm_mq_isend(ep_priv->domain->psm_mq, psm_epaddr, 0,
psm_tag, buf, len, (void*)fi_context, &psm_req);
err = psm_mq_send(ep_priv->domain->psm_mq, psm_epaddr, 0, psm_tag, buf, len);
if (err != PSM_OK)
return psmx_errno(err);
if (ep_priv->send_cntr)
psmx_cntr_inc(ep_priv->send_cntr);
return 0;
}
ssize_t psmx_tagged_inject_no_flag_av_table(struct fid_ep *ep, const void *buf, size_t len,
@ -621,12 +617,13 @@ ssize_t psmx_tagged_inject_no_flag_av_table(struct fid_ep *ep, const void *buf,
struct psmx_fid_ep *ep_priv;
struct psmx_fid_av *av;
psm_epaddr_t psm_epaddr;
psm_mq_req_t psm_req;
uint64_t psm_tag;
struct fi_context *fi_context;
int err;
size_t idx;
if (len > PSMX_INJECT_SIZE)
return -FI_EMSGSIZE;
ep_priv = container_of(ep, struct psmx_fid_ep, ep);
av = ep_priv->av;
@ -637,20 +634,15 @@ ssize_t psmx_tagged_inject_no_flag_av_table(struct fid_ep *ep, const void *buf,
psm_epaddr = av->psm_epaddrs[idx];
psm_tag = tag & (~ep_priv->domain->reserved_tag_bits);
fi_context = malloc(sizeof(*fi_context) + len);
if (!fi_context)
return -FI_ENOMEM;
memcpy((void *)fi_context + sizeof(*fi_context), buf, len);
buf = (void *)fi_context + sizeof(*fi_context);
PSMX_CTXT_TYPE(fi_context) = PSMX_INJECT_CONTEXT;
PSMX_CTXT_EP(fi_context) = ep_priv;
err = psm_mq_isend(ep_priv->domain->psm_mq, psm_epaddr, 0,
psm_tag, buf, len, (void*)fi_context, &psm_req);
err = psm_mq_send(ep_priv->domain->psm_mq, psm_epaddr, 0, psm_tag, buf, len);
if (err != PSM_OK)
return psmx_errno(err);
if (ep_priv->send_cntr)
psmx_cntr_inc(ep_priv->send_cntr);
return 0;
}
static ssize_t psmx_tagged_send(struct fid_ep *ep, const void *buf, size_t len,

Просмотреть файл

@ -1,4 +1,4 @@
dnl Configury specific to the libfabrics sockets provider
dnl Configury specific to the libfabric sockets provider
dnl Called to configure this provider
dnl

Просмотреть файл

@ -60,7 +60,7 @@
#define SOCK_EP_MAX_MSG_SZ (1<<23)
#define SOCK_EP_MAX_INJECT_SZ ((1<<8) - 1)
#define SOCK_EP_MAX_BUFF_RECV (1<<20)
#define SOCK_EP_MAX_BUFF_RECV (1<<24)
#define SOCK_EP_MAX_ORDER_RAW_SZ SOCK_EP_MAX_MSG_SZ
#define SOCK_EP_MAX_ORDER_WAR_SZ SOCK_EP_MAX_MSG_SZ
#define SOCK_EP_MAX_ORDER_WAW_SZ SOCK_EP_MAX_MSG_SZ
@ -89,7 +89,7 @@
#define SOCK_TAG_SIZE (sizeof(uint64_t))
#define SOCK_PEP_LISTENER_TIMEOUT (10000)
#define SOCK_CM_COMM_TIMEOUT (5000)
#define SOCK_CM_COMM_TIMEOUT (2000)
#define SOCK_EP_MAX_RETRY (5)
#define SOCK_EP_MAX_CM_DATA_SZ (256)
@ -98,8 +98,8 @@
FI_DIRECTED_RECV | FI_MULTI_RECV | \
FI_SOURCE | FI_READ | FI_WRITE | FI_RECV | FI_SEND | \
FI_REMOTE_READ | FI_REMOTE_WRITE | \
FI_COMPLETION | FI_REMOTE_SIGNAL | FI_REMOTE_COMPLETE | \
FI_MORE | FI_CANCEL | FI_FENCE)
FI_COMPLETION | FI_REMOTE_SIGNAL | \
FI_MORE | FI_CANCEL | FI_FENCE | FI_REMOTE_COMPLETE)
#define SOCK_EP_MSG_CAP SOCK_EP_RDM_CAP
@ -107,16 +107,15 @@
FI_NAMED_RX_CTX | FI_DIRECTED_RECV | \
FI_MULTI_RECV | FI_SOURCE | FI_RECV | FI_SEND | \
FI_COMPLETION | FI_REMOTE_SIGNAL | \
FI_REMOTE_COMPLETE | FI_MORE | FI_CANCEL | \
FI_MORE | FI_CANCEL | FI_REMOTE_COMPLETE | \
FI_FENCE)
#define SOCK_DEF_OPS (FI_SEND | FI_RECV )
#define SOCK_EP_MSG_ORDER (FI_ORDER_RAR | FI_ORDER_RAW | FI_ORDER_RAS| \
FI_ORDER_WAR | FI_ORDER_WAW | FI_ORDER_WAS | \
FI_ORDER_SAR | FI_ORDER_SAW | FI_ORDER_SAS)
#define SOCK_MODE (0)
#define SOCK_NO_COMPLETION (1ULL << 60)
#define SOCK_COMM_BUF_SZ (SOCK_EP_MAX_MSG_SZ)
#define SOCK_COMM_THRESHOLD (128 * 1024)
@ -124,20 +123,26 @@
#define SOCK_MAJOR_VERSION 1
#define SOCK_MINOR_VERSION 0
#define SOCK_INJECT_OK(_flgs) ((_flgs) & FI_INJECT)
struct sock_service_entry {
int service;
struct dlist_entry entry;
};
struct sock_fabric {
struct fid_fabric fab_fid;
atomic_t ref;
struct dlist_entry service_list;
fastlock_t lock;
};
struct sock_conn {
int sock_fd;
struct sockaddr addr;
struct sockaddr_in addr;
struct sock_pe_entry *rx_pe_entry;
struct sock_pe_entry *tx_pe_entry;
struct ringbuf inbuf;
struct ringbuf outbuf;
struct sock_ep *ep;
};
struct sock_conn_map {
@ -146,7 +151,6 @@ struct sock_conn_map {
int size;
struct sock_domain *domain;
fastlock_t lock;
struct sockaddr_storage curr_addr;
};
struct sock_domain {
@ -155,7 +159,6 @@ struct sock_domain {
struct sock_fabric *fab;
fastlock_t lock;
atomic_t ref;
short ep_count;
struct sock_eq *eq;
struct sock_eq *mr_eq;
@ -164,11 +167,6 @@ struct sock_domain {
struct index_map mr_idm;
struct sock_pe *pe;
struct sock_conn_map r_cmap;
pthread_t listen_thread;
int listening;
char service[NI_MAXSERV];
int signal_fds[2];
struct sockaddr_storage src_addr;
};
struct sock_cntr {
@ -208,8 +206,7 @@ struct sock_mr {
struct sock_av_addr {
struct sockaddr_storage addr;
uint8_t valid;
uint16_t rem_ep_id;
uint8_t reserved[5];
uint8_t reserved[7];
};
struct sock_av_table_hdr {
@ -309,9 +306,9 @@ struct sock_op_send {
uint64_t flags;
uint64_t context;
uint64_t dest_addr;
struct sock_conn *conn;
uint64_t buf;
struct sock_ep *ep;
struct sock_conn *conn;
};
struct sock_op_tsend {
@ -319,10 +316,10 @@ struct sock_op_tsend {
uint64_t flags;
uint64_t context;
uint64_t dest_addr;
struct sock_conn *conn;
uint64_t tag;
uint64_t buf;
struct sock_ep *ep;
struct sock_conn *conn;
uint64_t tag;
};
union sock_iov {
@ -383,22 +380,30 @@ struct sock_cm_entry {
int sock;
int do_listen;
int signal_fds[2];
uint64_t next_msg_id;
fastlock_t lock;
int shutdown_received;
pthread_t listener_thread;
struct dlist_entry msg_list;
};
struct sock_conn_listener {
int sock;
int do_listen;
int signal_fds[2];
pthread_t listener_thread;
char service[NI_MAXSERV];
};
struct sock_ep {
struct fid_ep ep;
size_t fclass;
uint64_t op_flags;
uint8_t connected;
char reserved[1];
uint8_t tx_shared;
uint8_t rx_shared;
uint16_t ep_id;
uint16_t rem_ep_id;
uint16_t buffered_len;
uint16_t min_multi_recv;
@ -430,10 +435,11 @@ struct sock_ep {
struct sockaddr_in *dest_addr;
struct sockaddr_in cm_addr;
fid_t peer_fid;
uint64_t peer_fid;
uint16_t key;
int is_disabled;
struct sock_cm_entry cm;
struct sock_conn_listener listener;
};
struct sock_pep {
@ -452,7 +458,8 @@ struct sock_rx_entry {
uint8_t is_busy;
uint8_t is_claimed;
uint8_t is_complete;
uint8_t reserved[5];
uint8_t is_tagged;
uint8_t reserved[3];
uint64_t used;
uint64_t total_len;
@ -481,7 +488,8 @@ struct sock_rx_ctx {
uint8_t rem_write_cq_event;
uint16_t buffered_len;
uint16_t min_multi_recv;
uint8_t reserved[7];
uint16_t num_left;
uint8_t reserved[5];
uint64_t addr;
struct sock_comp comp;
@ -545,8 +553,8 @@ struct sock_msg_hdr{
uint8_t op_type;
uint8_t rx_id;
uint8_t dest_iov_len;
uint16_t ep_id;
uint16_t pe_entry_id;
uint8_t reserved[2];
uint64_t flags;
uint64_t msg_len;
@ -683,6 +691,7 @@ struct sock_pe{
int num_free_entries;
struct sock_pe_entry pe_table[SOCK_PE_MAX_ENTRIES];
fastlock_t lock;
fastlock_t list_lock;
struct dlist_entry free_list;
struct dlist_entry busy_list;
@ -722,7 +731,10 @@ struct sock_cq {
};
struct sock_cm_msg_list_entry {
size_t msg_len;
uint64_t msg_len;
uint8_t retry;
uint8_t reserved[7];
uint64_t timestamp_ms;
struct sockaddr_in addr;
struct dlist_entry entry;
char msg[0];
@ -732,13 +744,11 @@ struct sock_conn_hdr {
uint8_t type;
uint8_t reserved[3];
int32_t s_port;
fid_t c_fid;
fid_t s_fid;
uint64_t msg_id;
};
struct sock_conn_req {
struct sock_conn_hdr hdr;
uint16_t ep_id;
struct fi_info info;
struct sockaddr_in src_addr;
struct sockaddr_in dest_addr;
@ -761,6 +771,7 @@ enum {
SOCK_CONN_ACCEPT,
SOCK_CONN_REJECT,
SOCK_CONN_SHUTDOWN,
SOCK_CONN_ACK
};
int sock_verify_info(struct fi_info *hints);
@ -777,12 +788,12 @@ int sock_msg_verify_ep_attr(struct fi_ep_attr *ep_attr, struct fi_tx_attr *tx_at
struct fi_info *sock_fi_info(enum fi_ep_type ep_type,
struct fi_info *hints, void *src_addr, void *dest_addr);
int sock_rdm_getinfo(uint32_t version, const char *node, const char *service,
uint64_t flags, struct fi_info *hints, struct fi_info **info);
int sock_dgram_getinfo(uint32_t version, const char *node, const char *service,
uint64_t flags, struct fi_info *hints, struct fi_info **info);
int sock_msg_getinfo(uint32_t version, const char *node, const char *service,
uint64_t flags, struct fi_info *hints, struct fi_info **info);
int sock_msg_fi_info(void *src_addr, void *dest_addr, struct fi_info *hints,
struct fi_info **info);
int sock_dgram_fi_info(void *src_addr, void *dest_addr, struct fi_info *hints,
struct fi_info **info);
int sock_rdm_fi_info(void *src_addr, void *dest_addr, struct fi_info *hints,
struct fi_info **info);
void free_fi_info(struct fi_info *info);
int sock_msg_getinfo(uint32_t version, const char *node, const char *service,
@ -790,6 +801,9 @@ int sock_msg_getinfo(uint32_t version, const char *node, const char *service,
int sock_domain(struct fid_fabric *fabric, struct fi_info *info,
struct fid_domain **dom, void *context);
void sock_fabric_add_service(struct sock_fabric *fab, int service);
void sock_fabric_remove_service(struct sock_fabric *fab, int service);
int sock_fabric_check_service(struct sock_fabric *fab, int service);
int sock_alloc_endpoint(struct fid_domain *domain, struct fi_info *info,
@ -867,7 +881,18 @@ void sock_tx_ctx_start(struct sock_tx_ctx *tx_ctx);
void sock_tx_ctx_write(struct sock_tx_ctx *tx_ctx, const void *buf, size_t len);
void sock_tx_ctx_commit(struct sock_tx_ctx *tx_ctx);
void sock_tx_ctx_abort(struct sock_tx_ctx *tx_ctx);
void sock_tx_ctx_write_op_send(struct sock_tx_ctx *tx_ctx,
struct sock_op *op, uint64_t flags, uint64_t context,
uint64_t dest_addr, uint64_t buf, struct sock_ep *ep,
struct sock_conn *conn);
void sock_tx_ctx_write_op_tsend(struct sock_tx_ctx *tx_ctx,
struct sock_op *op, uint64_t flags, uint64_t context,
uint64_t dest_addr, uint64_t buf, struct sock_ep *ep,
struct sock_conn *conn, uint64_t tag);
void sock_tx_ctx_read_op_send(struct sock_tx_ctx *tx_ctx,
struct sock_op *op, uint64_t *flags, uint64_t *context,
uint64_t *dest_addr, uint64_t *buf, struct sock_ep **ep,
struct sock_conn **conn);
int sock_poll_open(struct fid_domain *domain, struct fi_poll_attr *attr,
struct fid_poll **pollset);
@ -883,25 +908,30 @@ int sock_av_open(struct fid_domain *domain, struct fi_av_attr *attr,
fi_addr_t _sock_av_lookup(struct sock_av *av, struct sockaddr *addr);
fi_addr_t sock_av_get_fiaddr(struct sock_av *av, struct sock_conn *conn);
fi_addr_t sock_av_lookup_key(struct sock_av *av, int key);
struct sock_conn *sock_av_lookup_addr(struct sock_av *av, fi_addr_t addr);
struct sock_conn *sock_av_lookup_addr(struct sock_ep *ep,
struct sock_av *av, fi_addr_t addr);
int sock_av_compare_addr(struct sock_av *av, fi_addr_t addr1, fi_addr_t addr2);
uint16_t sock_av_lookup_ep_id(struct sock_av *av, fi_addr_t addr);
int sock_compare_addr(struct sockaddr_in *addr1,
struct sockaddr_in *addr2);
struct sock_conn *sock_conn_map_lookup_key(struct sock_conn_map *conn_map,
uint16_t key);
uint16_t sock_conn_map_connect(struct sock_domain *dom,
uint16_t sock_conn_map_connect(struct sock_ep *ep,
struct sock_domain *dom,
struct sock_conn_map *map,
struct sockaddr_in *addr);
uint16_t sock_conn_map_lookup(struct sock_conn_map *map,
struct sockaddr_in *addr);
uint16_t sock_conn_map_match_or_connect(struct sock_domain *dom,
uint16_t sock_conn_map_match_or_connect(struct sock_ep *ep,
struct sock_domain *dom,
struct sock_conn_map *map,
struct sockaddr_in *addr);
int sock_conn_listen(struct sock_domain *domain);
int sock_conn_listen(struct sock_ep *ep);
int sock_conn_map_clear_pe_entry(struct sock_conn *conn_entry, uint16_t key);
void sock_conn_map_destroy(struct sock_conn_map *cmap);
void sock_set_sockopts(int sock);
int fd_set_nonblock(int fd);
int sock_conn_map_init(struct sock_conn_map *map, int init_size);
struct sock_pe *sock_pe_init(struct sock_domain *domain);
void sock_pe_add_tx_ctx(struct sock_pe *pe, struct sock_tx_ctx *ctx);
@ -917,7 +947,8 @@ struct sock_rx_entry *sock_rx_new_entry(struct sock_rx_ctx *rx_ctx);
struct sock_rx_entry *sock_rx_new_buffered_entry(struct sock_rx_ctx *rx_ctx,
size_t len);
struct sock_rx_entry *sock_rx_get_entry(struct sock_rx_ctx *rx_ctx,
uint64_t addr, uint64_t tag);
uint64_t addr, uint64_t tag,
uint8_t op_type);
size_t sock_rx_avail_len(struct sock_rx_entry *rx_entry);
void sock_rx_release_entry(struct sock_rx_entry *rx_entry);

Просмотреть файл

@ -78,12 +78,10 @@ static ssize_t sock_ep_tx_atomic(struct fid_ep *ep,
sock_ep = container_of(ep, struct sock_ep, ep);
tx_ctx = sock_ep->tx_ctx;
break;
case FI_CLASS_TX_CTX:
tx_ctx = container_of(ep, struct sock_tx_ctx, fid.ctx);
sock_ep = tx_ctx->ep;
break;
default:
SOCK_LOG_ERROR("Invalid EP type\n");
return -FI_EINVAL;
@ -96,7 +94,7 @@ static ssize_t sock_ep_tx_atomic(struct fid_ep *ep,
if (sock_ep->connected) {
conn = sock_ep_lookup_conn(sock_ep);
} else {
conn = sock_av_lookup_addr(tx_ctx->av, msg->addr);
conn = sock_av_lookup_addr(sock_ep, tx_ctx->av, msg->addr);
}
if (!conn)
@ -104,7 +102,7 @@ static ssize_t sock_ep_tx_atomic(struct fid_ep *ep,
src_len = 0;
datatype_sz = fi_datatype_size(msg->datatype);
if (SOCK_INJECT_OK(flags)) {
if (flags & FI_INJECT) {
for (i=0; i< msg->iov_count; i++) {
src_len += (msg->msg_iov[i].count * datatype_sz);
}
@ -114,7 +112,7 @@ static ssize_t sock_ep_tx_atomic(struct fid_ep *ep,
total_len = msg->iov_count * sizeof(union sock_iov);
}
total_len += (sizeof(struct sock_op_send) +
total_len += (sizeof(tx_op) +
(msg->rma_iov_count * sizeof(union sock_iov)) +
(result_count * sizeof (union sock_iov)));
@ -125,7 +123,7 @@ static ssize_t sock_ep_tx_atomic(struct fid_ep *ep,
}
flags |= tx_ctx->attr.op_flags;
memset(&tx_op, 0, sizeof(struct sock_op));
memset(&tx_op, 0, sizeof(tx_op));
tx_op.op = SOCK_OP_ATOMIC;
tx_op.dest_iov_len = msg->rma_iov_count;
tx_op.atomic.op = msg->op;
@ -133,25 +131,20 @@ static ssize_t sock_ep_tx_atomic(struct fid_ep *ep,
tx_op.atomic.res_iov_len = result_count;
tx_op.atomic.cmp_iov_len = compare_count;
if (SOCK_INJECT_OK(flags))
if (flags & FI_INJECT)
tx_op.src_iov_len = src_len;
else
tx_op.src_iov_len = msg->iov_count;
sock_tx_ctx_write(tx_ctx, &tx_op, sizeof(struct sock_op));
sock_tx_ctx_write(tx_ctx, &flags, sizeof(uint64_t));
sock_tx_ctx_write(tx_ctx, &msg->context, sizeof(uint64_t));
sock_tx_ctx_write(tx_ctx, &msg->addr, sizeof(uint64_t));
sock_tx_ctx_write(tx_ctx, &conn, sizeof(uint64_t));
sock_tx_ctx_write(tx_ctx, &msg->msg_iov[0].addr, sizeof(uint64_t));
sock_tx_ctx_write(tx_ctx, &sock_ep, sizeof(uint64_t));
sock_tx_ctx_write_op_send(tx_ctx, &tx_op, flags, (uintptr_t) msg->context,
msg->addr, (uintptr_t) msg->msg_iov[0].addr, sock_ep, conn);
if (flags & FI_REMOTE_CQ_DATA) {
sock_tx_ctx_write(tx_ctx, &msg->data, sizeof(uint64_t));
}
src_len = 0;
if (SOCK_INJECT_OK(flags)) {
if (flags & FI_INJECT) {
for (i=0; i< msg->iov_count; i++) {
sock_tx_ctx_write(tx_ctx, msg->msg_iov[i].addr,
msg->msg_iov[i].count * datatype_sz);
@ -159,10 +152,10 @@ static ssize_t sock_ep_tx_atomic(struct fid_ep *ep,
}
} else {
for (i = 0; i< msg->iov_count; i++) {
tx_iov.ioc.addr = (uint64_t)msg->msg_iov[i].addr;
tx_iov.ioc.addr = (uintptr_t) msg->msg_iov[i].addr;
tx_iov.ioc.count = msg->msg_iov[i].count;
tx_iov.ioc.key = (uint64_t)msg->desc[i];
sock_tx_ctx_write(tx_ctx, &tx_iov, sizeof(union sock_iov));
tx_iov.ioc.key = (uintptr_t) msg->desc[i];
sock_tx_ctx_write(tx_ctx, &tx_iov, sizeof(tx_iov));
src_len += (tx_iov.ioc.count * datatype_sz);
}
}
@ -173,7 +166,7 @@ static ssize_t sock_ep_tx_atomic(struct fid_ep *ep,
tx_iov.ioc.addr = msg->rma_iov[i].addr;
tx_iov.ioc.key = msg->rma_iov[i].key;
tx_iov.ioc.count = msg->rma_iov[i].count;
sock_tx_ctx_write(tx_ctx, &tx_iov, sizeof(union sock_iov));
sock_tx_ctx_write(tx_ctx, &tx_iov, sizeof(tx_iov));
dst_len += (tx_iov.ioc.count * datatype_sz);
}
@ -185,9 +178,9 @@ static ssize_t sock_ep_tx_atomic(struct fid_ep *ep,
dst_len = 0;
for (i = 0; i< result_count; i++) {
tx_iov.ioc.addr = (uint64_t)resultv[i].addr;
tx_iov.ioc.addr = (uintptr_t) resultv[i].addr;
tx_iov.ioc.count = resultv[i].count;
sock_tx_ctx_write(tx_ctx, &tx_iov, sizeof(union sock_iov));
sock_tx_ctx_write(tx_ctx, &tx_iov, sizeof(tx_iov));
dst_len += (tx_iov.ioc.count * datatype_sz);
}
@ -199,9 +192,9 @@ static ssize_t sock_ep_tx_atomic(struct fid_ep *ep,
dst_len = 0;
for (i = 0; i< compare_count; i++) {
tx_iov.ioc.addr = (uint64_t)comparev[i].addr;
tx_iov.ioc.addr = (uintptr_t) comparev[i].addr;
tx_iov.ioc.count = comparev[i].count;
sock_tx_ctx_write(tx_ctx, &tx_iov, sizeof(union sock_iov));
sock_tx_ctx_write(tx_ctx, &tx_iov, sizeof(tx_iov));
dst_len += (tx_iov.ioc.count * datatype_sz);
}
@ -311,7 +304,7 @@ static ssize_t sock_ep_atomic_inject(struct fid_ep *ep, const void *buf, size_t
msg.op = op;
msg.data = 0;
return sock_ep_atomic_writemsg(ep, &msg, FI_INJECT);
return sock_ep_atomic_writemsg(ep, &msg, FI_INJECT | SOCK_NO_COMPLETION);
}
static ssize_t sock_ep_atomic_readwritemsg(struct fid_ep *ep,

Просмотреть файл

@ -49,35 +49,30 @@
#include "sock.h"
#include "sock_util.h"
fi_addr_t sock_av_lookup_key(struct sock_av *av, int key)
{
int i, idx;
int i;
struct sock_av_addr *av_addr;
struct sock_conn_map *cmap;
for (i = 0; i < IDX_MAX_INDEX; i++) {
av_addr = idm_lookup(&av->addr_idm, i);
if (!av_addr)
continue;
idx = av_addr - &av->table[0];
if (!av->key[idx]) {
av->key[idx] = sock_conn_map_lookup(
av->cmap,
(struct sockaddr_in*)&av_addr->addr);
if (!av->key[idx]) {
continue;
}
}
if (av->key[idx] == key + 1) {
cmap = av->cmap;
for (i = 0; i < av->table_hdr->stored; i++) {
av_addr = &av->table[i];
if (sock_compare_addr(&cmap->table[key].addr,
(struct sockaddr_in*)&av_addr->addr)) {
SOCK_LOG_INFO("LOOKUP: (%d->%d)\n", key, i);
return i;
}
}
SOCK_LOG_INFO("Reverse-lookup failed: %d\n", key);
SOCK_LOG_INFO("Reverse-LOOKUP failed: %d, %s:%d\n", key,
inet_ntoa(cmap->table[key].addr.sin_addr),
ntohs(cmap->table[key].addr.sin_port));
return FI_ADDR_NOTAVAIL;
}
int sock_av_compare_addr(struct sock_av *av,
fi_addr_t addr1, fi_addr_t addr2)
{
@ -100,7 +95,8 @@ int sock_av_compare_addr(struct sock_av *av,
sizeof(struct sockaddr_in));
}
struct sock_conn *sock_av_lookup_addr(struct sock_av *av,
struct sock_conn *sock_av_lookup_addr(struct sock_ep *ep,
struct sock_av *av,
fi_addr_t addr)
{
int idx;
@ -123,7 +119,7 @@ struct sock_conn *sock_av_lookup_addr(struct sock_av *av,
idx = av_addr - &av->table[0];
if (!av->key[idx]) {
av->key[idx] = sock_conn_map_match_or_connect(
av->domain, av->cmap,
ep, av->domain, av->cmap,
(struct sockaddr_in*)&av_addr->addr);
if (!av->key[idx]) {
SOCK_LOG_ERROR("failed to match or connect to addr %"
@ -135,40 +131,33 @@ struct sock_conn *sock_av_lookup_addr(struct sock_av *av,
return sock_conn_map_lookup_key(av->cmap, av->key[idx]);
}
uint16_t sock_av_lookup_ep_id(struct sock_av *av, fi_addr_t addr)
static inline void sock_av_report_success(struct sock_av *av, void *context,
int num_done, uint64_t flags)
{
int index = ((uint64_t)addr & av->mask);
struct sock_av_addr *av_addr;
struct fi_eq_entry eq_entry;
if (index >= av->table_hdr->stored || index < 0) {
return AF_INET;
if (!av->eq)
return;
eq_entry.fid = &av->av_fid.fid;
eq_entry.context = context;
eq_entry.data = num_done;
sock_eq_report_event(av->eq, FI_AV_COMPLETE,
&eq_entry, sizeof(eq_entry), flags);
}
if (!av->cmap) {
SOCK_LOG_ERROR("EP with no AV bound\n");
return 0;
}
av_addr = idm_lookup(&av->addr_idm, index);
return av_addr->rem_ep_id;
}
static inline void sock_av_report_success(struct sock_av *av,
int *index, uint64_t flags)
static inline void sock_av_report_error(struct sock_av *av, void *context)
{
if (av->eq)
sock_eq_report_event(av->eq, FI_COMPLETION,
index, sizeof(int), flags);
}
if (!av->eq)
return;
static inline void sock_av_report_error(struct sock_av *av, void *context,
uint64_t flags, int *index)
{
if (av->eq)
sock_eq_report_error(av->eq, &av->av_fid.fid,
context, -FI_EINVAL, -FI_EINVAL, NULL);
sock_av_report_success(av, index, flags);
}
static int sock_av_is_valid_address(struct sockaddr_in *addr)
{
return addr->sin_family == AF_INET ? 1 : 0;
}
static int sock_check_table_in(struct sock_av *_av, struct sockaddr_in *addr,
@ -179,7 +168,6 @@ static int sock_check_table_in(struct sock_av *_av, struct sockaddr_in *addr,
char sa_ip[INET_ADDRSTRLEN];
struct sock_av_addr *av_addr;
size_t new_count, table_sz;
uint16_t rem_ep_id;
if ((_av->attr.flags & FI_EVENT) && !_av->eq)
return -FI_ENOEQ;
@ -187,33 +175,34 @@ static int sock_check_table_in(struct sock_av *_av, struct sockaddr_in *addr,
if (_av->attr.flags & FI_READ) {
for (i = 0; i < count; i++) {
for (j = 0; j < _av->table_hdr->stored; j++) {
if (!sock_av_is_valid_address(&addr[i])) {
if (fi_addr)
fi_addr[i] = FI_ADDR_NOTAVAIL;
sock_av_report_error(_av, context);
continue;
}
av_addr = &_av->table[j];
rem_ep_id = ((struct sockaddr_in*)&addr[i])->sin_family;
((struct sockaddr_in*)&addr[i])->sin_family = AF_INET;
if ((memcmp(&av_addr->addr, &addr[i],
sizeof(struct sockaddr_in)) == 0) &&
av_addr->rem_ep_id == rem_ep_id) {
if (memcmp(&av_addr->addr, &addr[i],
sizeof(struct sockaddr_in)) == 0) {
SOCK_LOG_INFO("Found addr in shared av\n");
if (idm_set(&_av->addr_idm, _av->key[j], av_addr) < 0) {
if (fi_addr)
fi_addr[i] = FI_ADDR_NOTAVAIL;
sock_av_report_error(
_av, context, flags,
count > 1 ? &i : &index);
sock_av_report_error(_av, context);
continue;
}
if (fi_addr)
fi_addr[i] = (fi_addr_t)j;
sock_av_report_success(
_av, count > 1 ? &i : &index, flags);
ret++;
}
}
}
sock_av_report_success(_av, context, ret, flags);
return ret;
}
@ -242,8 +231,12 @@ static int sock_check_table_in(struct sock_av *_av, struct sockaddr_in *addr,
}
}
rem_ep_id = ((struct sockaddr_in*)&addr[i])->sin_family;
((struct sockaddr_in*)&addr[i])->sin_family = AF_INET;
if (!sock_av_is_valid_address(&addr[i])) {
if (fi_addr)
fi_addr[i] = FI_ADDR_NOTAVAIL;
sock_av_report_error(_av, context);
continue;
}
av_addr = &_av->table[_av->table_hdr->stored];
memcpy(sa_ip, inet_ntoa((&addr[i])->sin_addr), INET_ADDRSTRLEN);
@ -252,24 +245,21 @@ static int sock_check_table_in(struct sock_av *_av, struct sockaddr_in *addr,
ntohs(((struct sockaddr_in*)&addr[i])->sin_port));
memcpy(&av_addr->addr, &addr[i], sizeof(struct sockaddr_in));
av_addr->rem_ep_id = rem_ep_id;
if (idm_set(&_av->addr_idm, _av->table_hdr->stored, av_addr) < 0) {
if (fi_addr)
fi_addr[i] = FI_ADDR_NOTAVAIL;
sock_av_report_error(
_av, context, flags,
count > 1 ? &i : &index);
sock_av_report_error(_av, context);
continue;
}
if (fi_addr)
fi_addr[i] = (fi_addr_t)_av->table_hdr->stored;
sock_av_report_success(_av, count > 1 ? &i : &index, flags);
av_addr->valid = 1;
_av->table_hdr->stored++;
ret++;
}
sock_av_report_success(_av, context, ret, flags);
return ret;
}
@ -324,10 +314,8 @@ static int _sock_av_insertsvc(struct fid_av *av, const char *node,
ret = getaddrinfo(node, service, &sock_hints, &result);
if (ret) {
if (_av->eq) {
sock_eq_report_error(_av->eq, &_av->av_fid.fid,
context, -FI_EINVAL, -FI_EINVAL, NULL);
sock_eq_report_event(_av->eq, FI_COMPLETION,
&index, sizeof(int), flags);
sock_av_report_error(_av, context);
sock_av_report_success(_av, context, 0, flags);
}
return -ret;
}

Просмотреть файл

@ -260,8 +260,10 @@ static struct fi_ops_cntr sock_cntr_ops = {
static struct fi_ops sock_cntr_fi_ops = {
.size = sizeof(struct fi_ops),
.control = sock_cntr_control,
.close = sock_cntr_close,
.bind = fi_no_bind,
.control = sock_cntr_control,
.ops_open = fi_no_ops_open,
};
static int sock_cntr_verify_attr(struct fi_cntr_attr *attr)

Просмотреть файл

@ -58,21 +58,15 @@
static ssize_t sock_comm_send_socket(struct sock_conn *conn, const void *buf, size_t len)
{
ssize_t ret;
size_t rem = len;
size_t offset = 0, done_len = 0;
while(rem > 0) {
len = MIN(rem, SOCK_COMM_BUF_SZ);
ret = send(conn->sock_fd, (char *)buf + offset, len, 0);
if (ret <= 0)
break;
done_len += ret;
rem -= ret;
offset += ret;
ret = write(conn->sock_fd, buf, len);
if (ret < 0) {
SOCK_LOG_INFO("write %s\n", strerror(errno));
ret = 0;
}
SOCK_LOG_INFO("WROTE %lu on wire\n", done_len);
return done_len;
SOCK_LOG_INFO("wrote to network: %lu\n", ret);
return ret;
}
ssize_t sock_comm_flush(struct sock_conn *conn)
@ -111,9 +105,10 @@ ssize_t sock_comm_send(struct sock_conn *conn, const void *buf, size_t len)
used = rbused(&conn->outbuf);
if (used == sock_comm_flush(conn)) {
return sock_comm_send_socket(conn, buf, len);
} else
} else {
return 0;
}
}
if (rbavail(&conn->outbuf) < len) {
ret = sock_comm_flush(conn);
@ -124,7 +119,7 @@ ssize_t sock_comm_send(struct sock_conn *conn, const void *buf, size_t len)
ret = MIN(rbavail(&conn->outbuf), len);
rbwrite(&conn->outbuf, buf, ret);
rbcommit(&conn->outbuf);
SOCK_LOG_INFO("Buffered %lu\n", ret);
SOCK_LOG_INFO("buffered %lu\n", ret);
return ret;
}
@ -132,11 +127,13 @@ ssize_t sock_comm_recv_socket(struct sock_conn *conn, void *buf, size_t len)
{
ssize_t ret;
ret = recv(conn->sock_fd, buf, len, 0);
if (ret <= 0)
return 0;
ret = read(conn->sock_fd, buf, len);
if (ret < 0) {
SOCK_LOG_INFO("read %s\n", strerror(errno));
ret = 0;
}
SOCK_LOG_INFO("READ from wire: %lu\n", ret);
SOCK_LOG_INFO("read from network: %lu\n", ret);
return ret;
}
@ -144,12 +141,12 @@ ssize_t sock_comm_recv_buffer(struct sock_conn *conn)
{
int ret;
size_t endlen;
endlen = conn->inbuf.size -
(conn->inbuf.wpos & conn->inbuf.size_mask);
if ((ret = sock_comm_recv_socket(conn, (char*) conn->inbuf.buf +
endlen = conn->inbuf.size - (conn->inbuf.wpos & conn->inbuf.size_mask);
ret = sock_comm_recv_socket(conn,(char*) conn->inbuf.buf +
(conn->inbuf.wpos & conn->inbuf.size_mask),
endlen)) <= 0)
endlen);
if (ret <= 0)
return 0;
conn->inbuf.wpos += ret;
@ -157,8 +154,8 @@ ssize_t sock_comm_recv_buffer(struct sock_conn *conn)
if (ret != endlen)
return ret;
if ((ret = sock_comm_recv_socket(conn, conn->inbuf.buf,
rbavail(&conn->inbuf))) <= 0)
ret = sock_comm_recv_socket(conn, conn->inbuf.buf, rbavail(&conn->inbuf));
if (ret <= 0)
return 0;
conn->inbuf.wpos += ret;
@ -186,7 +183,7 @@ ssize_t sock_comm_recv(struct sock_conn *conn, void *buf, size_t len)
ret = 0;
sock_comm_recv_buffer(conn);
}
SOCK_LOG_INFO("Read %lu from buffer\n", ret + read_len);
SOCK_LOG_INFO("read from buffer: %lu\n", ret + read_len);
return ret + read_len;
}
@ -203,7 +200,6 @@ ssize_t sock_comm_peek(struct sock_conn *conn, void *buf, size_t len)
int sock_comm_buffer_init(struct sock_conn *conn)
{
int optval;
uint64_t flags;
socklen_t size = SOCK_COMM_BUF_SZ;
socklen_t optlen = sizeof(socklen_t);
@ -212,10 +208,7 @@ int sock_comm_buffer_init(struct sock_conn *conn)
&optval, sizeof optval))
SOCK_LOG_ERROR("setsockopt failed\n");
flags = fcntl(conn->sock_fd, F_GETFL, 0);
if (fcntl(conn->sock_fd, F_SETFL, flags | O_NONBLOCK))
SOCK_LOG_ERROR("fcntl failed\n");
fd_set_nonblock(conn->sock_fd);
rbinit(&conn->inbuf, SOCK_COMM_BUF_SZ);
rbinit(&conn->outbuf, SOCK_COMM_BUF_SZ);

Просмотреть файл

@ -53,12 +53,12 @@
#include "sock.h"
#include "sock_util.h"
static int sock_conn_map_init(struct sock_conn_map *map, int init_size)
int sock_conn_map_init(struct sock_conn_map *map, int init_size)
{
map->table = (struct sock_conn*)calloc(init_size,
sizeof(struct sock_conn));
map->table = calloc(init_size, sizeof(*map->table));
if (!map->table)
return -FI_ENOMEM;
map->used = 0;
map->size = init_size;
return 0;
@ -66,14 +66,15 @@ static int sock_conn_map_init(struct sock_conn_map *map, int init_size)
static int sock_conn_map_increase(struct sock_conn_map *map, int new_size)
{
void *_table;
if (map->used + new_size > map->size) {
void *_table = realloc(map->table, map->size * sizeof(struct
sock_conn));
_table = realloc(map->table, new_size * sizeof(*map->table));
if (!_table)
return -FI_ENOMEM;
map->size = MAX(map->size, new_size) * 2;
map->table = (struct sock_conn*) _table;
map->size = new_size;
map->table = _table;
}
return 0;
}
@ -85,8 +86,8 @@ void sock_conn_map_destroy(struct sock_conn_map *cmap)
cmap->used = cmap->size = 0;
}
struct sock_conn *sock_conn_map_lookup_key(struct sock_conn_map *conn_map,
uint16_t key)
struct sock_conn *
sock_conn_map_lookup_key(struct sock_conn_map *conn_map, uint16_t key)
{
if (key > conn_map->used) {
SOCK_LOG_ERROR("requested key is larger than conn_map size\n");
@ -97,29 +98,20 @@ struct sock_conn *sock_conn_map_lookup_key(struct sock_conn_map *conn_map,
return &conn_map->table[key - 1];
}
#define SOCK_ADDR_IN_PTR(sa)((struct sockaddr_in *)(sa))
#define SOCK_ADDR_IN_FAMILY(sa)SOCK_ADDR_IN_PTR(sa)->sin_family
#define SOCK_ADDR_IN_PORT(sa)SOCK_ADDR_IN_PTR(sa)->sin_port
#define SOCK_ADDR_IN_ADDR(sa)SOCK_ADDR_IN_PTR(sa)->sin_addr
static int sock_compare_addr(struct sockaddr_in *addr1,
int sock_compare_addr(struct sockaddr_in *addr1,
struct sockaddr_in *addr2)
{
if ((SOCK_ADDR_IN_ADDR(addr1).s_addr ==
SOCK_ADDR_IN_ADDR(addr2).s_addr) &&
(SOCK_ADDR_IN_PORT(addr1) == SOCK_ADDR_IN_PORT(addr2)))
return 1;
return 0;
return ((addr1->sin_addr.s_addr == addr2->sin_addr.s_addr) &&
(addr1->sin_port == addr2->sin_port));
}
uint16_t sock_conn_map_lookup(struct sock_conn_map *map,
struct sockaddr_in *addr)
{
int i;
struct sockaddr_in *entry;
for (i = 0; i < map->used; i++) {
entry = (struct sockaddr_in *)&(map->table[i].addr);
if (sock_compare_addr(entry, addr)) {
if (sock_compare_addr(&map->table[i].addr, addr)) {
return i + 1;
}
}
@ -128,9 +120,11 @@ uint16_t sock_conn_map_lookup(struct sock_conn_map *map,
static int sock_conn_map_insert(struct sock_conn_map *map,
struct sockaddr_in *addr,
struct sock_ep *ep,
int conn_fd)
{
int index;
if (map->size == map->used) {
if (sock_conn_map_increase(map, map->size * 2)) {
return 0;
@ -138,20 +132,49 @@ static int sock_conn_map_insert(struct sock_conn_map *map,
}
index = map->used;
memcpy(&map->table[index].addr, addr, sizeof *addr);
map->table[index].addr = *addr;
map->table[index].sock_fd = conn_fd;
map->table[index].ep = ep;
sock_comm_buffer_init(&map->table[index]);
map->used++;
return index + 1;
}
uint16_t sock_conn_map_connect(struct sock_domain *dom,
int fd_set_nonblock(int fd)
{
int flags, ret;
flags = fcntl(fd, F_GETFL, 0);
ret = fcntl(fd, F_SETFL, flags | O_NONBLOCK);
if (ret) {
SOCK_LOG_ERROR("fcntl failed\n");
ret = -errno;
}
return ret;
}
void sock_set_sockopts(int sock)
{
int optval;
optval = 1;
if (setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, &optval, sizeof optval))
SOCK_LOG_ERROR("setsockopt reuseaddr failed\n");
if (setsockopt(sock, IPPROTO_TCP, TCP_NODELAY, &optval, sizeof optval))
SOCK_LOG_ERROR("setsockopt nodelay failed\n");
fd_set_nonblock(sock);
}
uint16_t sock_conn_map_connect(struct sock_ep *ep,
struct sock_domain *dom,
struct sock_conn_map *map,
struct sockaddr_in *addr)
{
int conn_fd, optval, ret;
char sa_ip[INET_ADDRSTRLEN];
unsigned short reply;
int conn_fd, optval = 0, ret;
char use_conn;
struct timeval tv;
socklen_t optlen;
uint64_t flags;
@ -163,21 +186,10 @@ uint16_t sock_conn_map_connect(struct sock_domain *dom,
return 0;
}
optval = 1;
if (setsockopt(conn_fd, SOL_SOCKET, SO_REUSEADDR, &optval, sizeof optval))
SOCK_LOG_ERROR("setsockopt failed\n");
sock_set_sockopts(conn_fd);
memcpy(sa_ip, inet_ntoa(addr->sin_addr), INET_ADDRSTRLEN);
SOCK_LOG_INFO("Connecting to: %s:%d\n",
sa_ip, ntohs(((struct sockaddr_in*)addr)->sin_port));
flags = fcntl(conn_fd, F_GETFL, 0);
if (fcntl(conn_fd, F_SETFL, flags | O_NONBLOCK))
SOCK_LOG_ERROR("fcntl failed\n");
fastlock_acquire(&map->lock);
memcpy(&map->curr_addr, addr, sizeof(struct sockaddr_in));
fastlock_release(&map->lock);
SOCK_LOG_INFO("Connecting to: %s:%d\n", inet_ntoa(addr->sin_addr),
ntohs(addr->sin_port));
if (connect(conn_fd, (struct sockaddr *) addr, sizeof *addr) < 0) {
if (errno == EINPROGRESS) {
@ -188,63 +200,55 @@ uint16_t sock_conn_map_connect(struct sock_domain *dom,
FD_SET(conn_fd, &fds);
if (select(conn_fd+1, NULL, &fds, NULL, &tv) > 0) {
optlen = sizeof(int);
getsockopt(conn_fd, SOL_SOCKET, SO_ERROR, &optval, &optlen);
getsockopt(conn_fd, SOL_SOCKET, SO_ERROR,
&optval, &optlen);
if (optval) {
SOCK_LOG_ERROR("failed to connect %d - %s\n", optval,
strerror(optval));
ret = 0;
SOCK_LOG_ERROR("failed to connect %d - %s\n",
optval, strerror(optval));
goto err;
}
} else {
SOCK_LOG_ERROR("Timeout or error to connect %d - %s\n", optval,
strerror(optval));
ret = 0;
SOCK_LOG_ERROR("Timeout or error to connect %d - %s\n",
optval, strerror(optval));
goto err;
}
} else {
SOCK_LOG_ERROR("Error connecting %d - %s\n", errno,
strerror(errno));
ret = 0;
goto err;
}
}
fd_set_nonblock(conn_fd);
flags = fcntl(conn_fd, F_GETFL, 0);
flags &= (~O_NONBLOCK);
if (fcntl(conn_fd, F_SETFL, flags))
SOCK_LOG_ERROR("fcntl failed\n");
ret = send(conn_fd,
&((struct sockaddr_in*)&dom->src_addr)->sin_port,
sizeof(unsigned short), 0);
if (ret != sizeof(unsigned short)) {
ret = send(conn_fd, &((struct sockaddr_in*) ep->src_addr)->sin_port,
sizeof(((struct sockaddr_in*) ep->src_addr)->sin_port), 0);
if (ret != sizeof(((struct sockaddr_in*) ep->src_addr)->sin_port)) {
SOCK_LOG_ERROR("Cannot exchange port\n");
ret = 0;
goto err;
}
ret = recv(conn_fd,
&reply, sizeof(unsigned short), 0);
if (ret != sizeof(unsigned short)) {
ret = recv(conn_fd, &use_conn, sizeof(use_conn), 0);
if (ret != sizeof(use_conn)) {
SOCK_LOG_ERROR("Cannot exchange port: %d\n", ret);
ret = 0;
goto err;
}
reply = ntohs(reply);
SOCK_LOG_INFO("Connect response: %d\n", ntohs(reply));
SOCK_LOG_INFO("Connect response: %d\n", use_conn);
if (reply == 0) {
if (use_conn) {
sock_set_sockopts(conn_fd);
fastlock_acquire(&map->lock);
ret = sock_conn_map_insert(map, addr, conn_fd);
ret = sock_conn_map_insert(map, addr, ep, conn_fd);
fastlock_release(&map->lock);
} else {
ret = 0;
close(conn_fd);
SOCK_LOG_INFO("waiting for an accept\n");
while (!ret) {
for (ret = 0; !ret; ) {
fastlock_acquire(&map->lock);
ret = sock_conn_map_lookup(map, addr);
fastlock_release(&map->lock);
@ -256,10 +260,11 @@ uint16_t sock_conn_map_connect(struct sock_domain *dom,
err:
close(conn_fd);
return ret;
return 0;
}
uint16_t sock_conn_map_match_or_connect(struct sock_domain *dom,
uint16_t sock_conn_map_match_or_connect(struct sock_ep *ep,
struct sock_domain *dom,
struct sock_conn_map *map,
struct sockaddr_in *addr)
{
@ -269,50 +274,123 @@ uint16_t sock_conn_map_match_or_connect(struct sock_domain *dom,
fastlock_release(&map->lock);
if (!index)
index = sock_conn_map_connect(dom, map, addr);
index = sock_conn_map_connect(ep, dom, map, addr);
return index;
}
static void *_sock_conn_listen(void *arg)
{
struct sock_domain *domain = (struct sock_domain*) arg;
struct sock_conn_map *map = &domain->r_cmap;
uint16_t index;
int conn_fd, ret;
char tmp, use_conn;
socklen_t addr_size;
struct sockaddr_in remote;
struct pollfd poll_fds[2];
struct sock_ep *ep = (struct sock_ep *)arg;
struct sock_conn_listener *listener = &ep->listener;
struct sock_conn_map *map = &ep->domain->r_cmap;
poll_fds[0].fd = listener->sock;
poll_fds[1].fd = listener->signal_fds[1];
poll_fds[0].events = poll_fds[1].events = POLLIN;
while (listener->do_listen) {
if (poll(poll_fds, 2, -1) > 0) {
if (poll_fds[1].revents & POLLIN) {
ret = read(listener->signal_fds[1], &tmp, 1);
if (ret != 1) {
SOCK_LOG_ERROR("Invalid signal\n");
goto err;
}
continue;
}
} else {
goto err;
}
addr_size = sizeof(remote);
conn_fd = accept(listener->sock, (struct sockaddr *) &remote, &addr_size);
SOCK_LOG_INFO("CONN: accepted conn-req: %d\n", conn_fd);
if (conn_fd < 0) {
SOCK_LOG_ERROR("failed to accept: %d\n", errno);
goto err;
}
SOCK_LOG_INFO("ACCEPT: %s, %d\n", inet_ntoa(remote.sin_addr),
ntohs(remote.sin_port));
ret = recv(conn_fd, &remote.sin_port, sizeof(remote.sin_port), 0);
if (ret != sizeof(remote.sin_port))
SOCK_LOG_ERROR("Cannot exchange port\n");
SOCK_LOG_INFO("Remote port: %d\n", ntohs(remote.sin_port));
fastlock_acquire(&map->lock);
index = sock_conn_map_lookup(map, &remote);
if (!index) {
sock_conn_map_insert(map, &remote, ep, conn_fd);
use_conn = 1;
} else {
use_conn = 0;
}
fastlock_release(&map->lock);
ret = send(conn_fd, &use_conn, sizeof(use_conn), 0);
if (ret != sizeof(use_conn))
SOCK_LOG_ERROR("Cannot exchange port\n");
if (!use_conn) {
shutdown(conn_fd, SHUT_RDWR);
close(conn_fd);
}
}
err:
close(listener->sock);
SOCK_LOG_INFO("Listener thread exited\n");
return NULL;
}
int sock_conn_listen(struct sock_ep *ep)
{
struct addrinfo *s_res = NULL, *p;
struct addrinfo hints;
int optval, flags;
int listen_fd = 0, conn_fd, ret;
struct sockaddr_in remote;
int listen_fd = 0, ret;
socklen_t addr_size;
struct pollfd poll_fds[2];
struct sockaddr_in addr;
char sa_ip[INET_ADDRSTRLEN], tmp;
unsigned short port, response;
uint16_t index;
struct sock_conn_listener *listener = &ep->listener;
struct sock_domain *domain = ep->domain;
memset(&hints, 0, sizeof(hints));
hints.ai_family = AF_INET;
hints.ai_socktype = SOCK_STREAM;
hints.ai_flags = AI_PASSIVE;
ret = getaddrinfo(NULL, domain->service, &hints, &s_res);
if (ret) {
SOCK_LOG_ERROR("no available AF_INET address, service %s, %s\n",
domain->service, gai_strerror(ret));
return NULL;
if (getnameinfo((void*)ep->src_addr, sizeof (*ep->src_addr),
NULL, 0, listener->service,
sizeof(listener->service), NI_NUMERICSERV)) {
SOCK_LOG_ERROR("could not resolve src_addr\n");
return -FI_EINVAL;
}
SOCK_LOG_INFO("Binding listener thread to port: %s\n", domain->service);
if (!sock_fabric_check_service(domain->fab, atoi(listener->service))) {
memset(listener->service, 0, NI_MAXSERV);
((struct sockaddr_in*)ep->src_addr)->sin_port = 0;
}
ret = getaddrinfo(NULL, listener->service, &hints, &s_res);
if (ret) {
SOCK_LOG_ERROR("no available AF_INET address, service %s, %s\n",
listener->service, gai_strerror(ret));
return -FI_EINVAL;
}
SOCK_LOG_INFO("Binding listener thread to port: %s\n", listener->service);
for (p = s_res; p; p = p->ai_next) {
listen_fd = socket(p->ai_family, p->ai_socktype, p->ai_protocol);
if (listen_fd >= 0) {
flags = fcntl(listen_fd, F_GETFL, 0);
if (fcntl(listen_fd, F_SETFL, flags | O_NONBLOCK))
SOCK_LOG_ERROR("fcntl failed\n");
optval = 1;
if (setsockopt(listen_fd, SOL_SOCKET, SO_REUSEADDR, &optval,
sizeof optval))
SOCK_LOG_ERROR("setsockopt failed\n");
sock_set_sockopts(listen_fd);
if (!bind(listen_fd, s_res->ai_addr, s_res->ai_addrlen))
break;
@ -323,111 +401,39 @@ static void *_sock_conn_listen(void *arg)
freeaddrinfo(s_res);
if (listen_fd < 0) {
SOCK_LOG_ERROR("failed to listen to port: %s\n", domain->service);
SOCK_LOG_ERROR("failed to listen to port: %s\n",
listener->service);
goto err;
}
if (atoi(domain->service) == 0) {
addr_size = sizeof(struct sockaddr_in);
if (atoi(listener->service) == 0) {
addr_size = sizeof(addr);
if (getsockname(listen_fd, (struct sockaddr *) &addr, &addr_size))
goto err;
snprintf(domain->service, sizeof domain->service, "%d",
snprintf(listener->service, sizeof listener->service, "%d",
ntohs(addr.sin_port));
SOCK_LOG_INFO("Bound to port: %s\n", domain->service);
SOCK_LOG_INFO("Bound to port: %s\n", listener->service);
}
if (listen(listen_fd, 0)) {
SOCK_LOG_ERROR("failed to listen socket: %d\n", errno);
SOCK_LOG_ERROR("failed to listen socket: %s\n", strerror(errno));
goto err;
}
((struct sockaddr_in*)&(domain->src_addr))->sin_port =
htons(atoi(domain->service));
domain->listening = 1;
((struct sockaddr_in *) (ep->src_addr))->sin_port =
htons(atoi(listener->service));
listener->do_listen = 1;
listener->sock = listen_fd;
poll_fds[0].fd = listen_fd;
poll_fds[1].fd = domain->signal_fds[1];
poll_fds[0].events = poll_fds[1].events = POLLIN;
while(domain->listening) {
if (poll(poll_fds, 2, -1) > 0) {
if (poll_fds[1].revents & POLLIN) {
ret = read(domain->signal_fds[1], &tmp, 1);
if (ret != 1) {
SOCK_LOG_ERROR("Invalid signal\n");
goto err;
}
continue;
}
} else
sock_fabric_add_service(domain->fab, atoi(listener->service));
if (socketpair(AF_UNIX, SOCK_STREAM, 0, listener->signal_fds) < 0)
goto err;
addr_size = sizeof(struct sockaddr_in);
conn_fd = accept(listen_fd, (struct sockaddr *)&remote, &addr_size);
SOCK_LOG_INFO("CONN: accepted conn-req: %d\n", conn_fd);
if (conn_fd < 0) {
SOCK_LOG_ERROR("failed to accept: %d\n", errno);
goto err;
}
addr_size = sizeof(struct sockaddr_in);
if (getpeername(conn_fd, (struct sockaddr *) &remote, &addr_size)) {
SOCK_LOG_ERROR("Failed to do getpeername\n");
goto err;
}
memcpy(sa_ip, inet_ntoa(remote.sin_addr), INET_ADDRSTRLEN);
SOCK_LOG_INFO("ACCEPT: %s, %d\n", sa_ip, ntohs(remote.sin_port));
ret = recv(conn_fd, &port, sizeof(port), 0);
if (ret != sizeof(port))
SOCK_LOG_ERROR("Cannot exchange port\n");
remote.sin_port = port;
SOCK_LOG_INFO("Remote port: %d\n", ntohs(port));
fastlock_acquire(&map->lock);
index = sock_conn_map_lookup(map, &remote);
response = (index) ? 1 : 0;
if (response == 0) {
if (sock_compare_addr((struct sockaddr_in*)&map->curr_addr,
&remote)) {
ret = memcmp(&domain->src_addr, &remote,
sizeof(struct sockaddr_in));
if (ret > 0 ||
(ret == 0 && atoi(domain->service) > port)) {
response = 1;
SOCK_LOG_INFO("Rejecting accept\n");
}
}
}
fastlock_release(&map->lock);
ret = send(conn_fd, &response, sizeof(response), 0);
if (ret != sizeof(response))
SOCK_LOG_ERROR("Cannot exchange port\n");
if (!response) {
fastlock_acquire(&map->lock);
sock_conn_map_insert(map, &remote, conn_fd);
fastlock_release(&map->lock);
} else
close(conn_fd);
}
close(listen_fd);
return NULL;
fd_set_nonblock(listener->signal_fds[1]);
return pthread_create(&listener->listener_thread, 0,
_sock_conn_listen, ep);
err:
if (listen_fd > 0)
if (listen_fd >= 0)
close(listen_fd);
perror("listening thread failed");
return NULL;
}
int sock_conn_listen(struct sock_domain *domain)
{
sock_conn_map_init(&domain->r_cmap, 128); /* TODO: init cmap size */
pthread_create(&domain->listen_thread, 0, _sock_conn_listen, domain);
return 0;
return -FI_EINVAL;
}

Просмотреть файл

@ -119,10 +119,9 @@ static ssize_t _sock_cq_write(struct sock_cq *cq, fi_addr_t addr,
}
rbwrite(&cq->addr_rb, &addr, sizeof(fi_addr_t));
rbwrite(&cq->addr_rb, &addr, sizeof(addr));
rbcommit(&cq->addr_rb);
rbfdwrite(&cq->cq_rbfd, buf, len);
rbfdcommit(&cq->cq_rbfd);
ret = len;
@ -162,7 +161,7 @@ static int sock_cq_report_context(struct sock_cq *cq, fi_addr_t addr,
struct sock_pe_entry *pe_entry)
{
struct fi_cq_entry cq_entry;
cq_entry.op_context = (void*)pe_entry->context;
cq_entry.op_context = (void *) (uintptr_t) pe_entry->context;
return _sock_cq_write(cq, addr, &cq_entry, sizeof(cq_entry));
}
@ -170,7 +169,7 @@ static int sock_cq_report_msg(struct sock_cq *cq, fi_addr_t addr,
struct sock_pe_entry *pe_entry)
{
struct fi_cq_msg_entry cq_entry;
cq_entry.op_context = (void*)pe_entry->context;
cq_entry.op_context = (void *) (uintptr_t) pe_entry->context;
cq_entry.flags = pe_entry->flags;
cq_entry.len = pe_entry->data_len;
return _sock_cq_write(cq, addr, &cq_entry, sizeof(cq_entry));
@ -180,10 +179,10 @@ static int sock_cq_report_data(struct sock_cq *cq, fi_addr_t addr,
struct sock_pe_entry *pe_entry)
{
struct fi_cq_data_entry cq_entry;
cq_entry.op_context = (void*)pe_entry->context;
cq_entry.op_context = (void *) (uintptr_t) pe_entry->context;
cq_entry.flags = pe_entry->flags;
cq_entry.len = pe_entry->data_len;
cq_entry.buf = (void*)pe_entry->buf;
cq_entry.buf = (void *) (uintptr_t) pe_entry->buf;
cq_entry.data = pe_entry->data;
return _sock_cq_write(cq, addr, &cq_entry, sizeof(cq_entry));
}
@ -192,10 +191,10 @@ static int sock_cq_report_tagged(struct sock_cq *cq, fi_addr_t addr,
struct sock_pe_entry *pe_entry)
{
struct fi_cq_tagged_entry cq_entry;
cq_entry.op_context = (void*)pe_entry->context;
cq_entry.op_context = (void *) (uintptr_t) pe_entry->context;
cq_entry.flags = pe_entry->flags;
cq_entry.len = pe_entry->data_len;
cq_entry.buf = (void*)pe_entry->buf;
cq_entry.buf = (void *) (uintptr_t) pe_entry->buf;
cq_entry.data = pe_entry->data;
cq_entry.tag = pe_entry->tag;
return _sock_cq_write(cq, addr, &cq_entry, sizeof(cq_entry));
@ -236,7 +235,7 @@ static inline ssize_t sock_cq_rbuf_read(struct sock_cq *cq, void *buf,
rbfdread(&cq->cq_rbfd, buf, cq_entry_len * count);
for (i = 0; i < count; i++) {
rbread(&cq->addr_rb, &addr, sizeof(fi_addr_t));
rbread(&cq->addr_rb, &addr, sizeof(addr));
if (src_addr)
src_addr[i] = addr;
}
@ -247,7 +246,7 @@ ssize_t sock_cq_sreadfrom(struct fid_cq *cq, void *buf, size_t count,
fi_addr_t *src_addr, const void *cond, int timeout)
{
int ret = 0;
int64_t threshold;
size_t threshold;
struct sock_cq *sock_cq;
uint64_t start_ms = 0, end_ms = 0;
ssize_t cq_entry_len, avail;
@ -256,7 +255,7 @@ ssize_t sock_cq_sreadfrom(struct fid_cq *cq, void *buf, size_t count,
cq_entry_len = sock_cq->cq_entry_size;
if (sock_cq->attr.wait_cond == FI_CQ_COND_THRESHOLD) {
threshold = MIN((int64_t)cond, count);
threshold = MIN((uintptr_t) cond, count);
}else{
threshold = count;
}
@ -277,7 +276,7 @@ ssize_t sock_cq_sreadfrom(struct fid_cq *cq, void *buf, size_t count,
fastlock_release(&sock_cq->lock);
if (ret == 0 && timeout >= 0) {
if (fi_gettime_ms() >= end_ms)
return -FI_ETIMEDOUT;
return -FI_EAGAIN;
}
}while (ret == 0);
} else {
@ -289,7 +288,7 @@ ssize_t sock_cq_sreadfrom(struct fid_cq *cq, void *buf, size_t count,
src_addr, cq_entry_len);
fastlock_release(&sock_cq->lock);
}
return ret;
return (ret == 0 || ret == -FI_ETIMEDOUT) ? -FI_EAGAIN : ret;
}
ssize_t sock_cq_sread(struct fid_cq *cq, void *buf, size_t len,
@ -301,9 +300,7 @@ ssize_t sock_cq_sread(struct fid_cq *cq, void *buf, size_t len,
ssize_t sock_cq_readfrom(struct fid_cq *cq, void *buf, size_t count,
fi_addr_t *src_addr)
{
int ret;
ret = sock_cq_sreadfrom(cq, buf, count, src_addr, NULL, 0);
return (ret == -FI_ETIMEDOUT) ? 0 : ret;
return sock_cq_sreadfrom(cq, buf, count, src_addr, NULL, 0);
}
ssize_t sock_cq_read(struct fid_cq *cq, void *buf, size_t count)
@ -311,29 +308,25 @@ ssize_t sock_cq_read(struct fid_cq *cq, void *buf, size_t count)
return sock_cq_readfrom(cq, buf, count, NULL);
}
ssize_t sock_cq_readerr(struct fid_cq *cq, struct fi_cq_err_entry *buf,
uint64_t flags)
{
ssize_t num_read;
struct sock_cq *sock_cq;
ssize_t ret;
sock_cq = container_of(cq, struct sock_cq, cq_fid);
num_read = 0;
if (sock_cq->domain->progress_mode == FI_PROGRESS_MANUAL)
sock_cq_progress(sock_cq);
fastlock_acquire(&sock_cq->lock);
while (rbused(&sock_cq->cqerr_rb) >= sizeof(struct fi_cq_err_entry)) {
rbread(&sock_cq->cqerr_rb,
(char*)buf +sizeof(struct fi_cq_err_entry) * num_read,
sizeof(struct fi_cq_err_entry));
num_read++;
if (rbused(&sock_cq->cqerr_rb) >= sizeof(struct fi_cq_err_entry)) {
rbread(&sock_cq->cqerr_rb, buf, sizeof(*buf));
ret = 1;
} else {
ret = -FI_EAGAIN;
}
fastlock_release(&sock_cq->lock);
return num_read;
return ret;
}
ssize_t sock_cq_write(struct fid_cq *cq, const void *buf, size_t len)
@ -391,6 +384,7 @@ int sock_cq_close(struct fid *fid)
}
struct fi_ops_cq sock_cq_ops = {
.size = sizeof(struct fi_ops_cq),
.read = sock_cq_read,
.readfrom = sock_cq_readfrom,
.readerr = sock_cq_readerr,
@ -437,8 +431,10 @@ static int sock_cq_control(struct fid *fid, int command, void *arg)
struct fi_ops sock_cq_fi_ops = {
.size = sizeof(struct fi_ops),
.control = sock_cq_control,
.close = sock_cq_close,
.bind = fi_no_bind,
.control = sock_cq_control,
.ops_open = fi_no_ops_open,
};
static int sock_cq_verify_attr(struct fi_cq_attr *attr)
@ -598,7 +594,7 @@ int sock_cq_report_error(struct sock_cq *cq, struct sock_pe_entry *entry,
struct fi_cq_err_entry err_entry;
fastlock_acquire(&cq->lock);
if (rbavail(&cq->cqerr_rb) < sizeof(struct fi_cq_err_entry)) {
if (rbavail(&cq->cqerr_rb) < sizeof(err_entry)) {
ret = -FI_ENOSPC;
goto out;
}
@ -611,15 +607,15 @@ int sock_cq_report_error(struct sock_cq *cq, struct sock_pe_entry *entry,
err_entry.flags = entry->flags;
err_entry.data = entry->data;
err_entry.tag = entry->tag;
err_entry.op_context = (void*)entry->context;
err_entry.op_context = (void *) (uintptr_t) entry->context;
if (entry->type == SOCK_PE_RX) {
err_entry.buf = (void*)entry->pe.rx.rx_iov[0].iov.addr;
err_entry.buf = (void *) (uintptr_t) entry->pe.rx.rx_iov[0].iov.addr;
}else {
err_entry.buf = (void*)entry->pe.tx.data.tx_iov[0].src.iov.addr;
err_entry.buf = (void *) (uintptr_t) entry->pe.tx.data.tx_iov[0].src.iov.addr;
}
rbwrite(&cq->cqerr_rb, &err_entry, sizeof(struct fi_cq_err_entry));
rbwrite(&cq->cqerr_rb, &err_entry, sizeof(err_entry));
rbcommit(&cq->cqerr_rb);
ret = 0;

Просмотреть файл

@ -61,6 +61,7 @@ struct sock_rx_ctx *sock_rx_ctx_alloc(const struct fi_rx_attr *attr, void *conte
rx_ctx->ctx.fid.fclass = FI_CLASS_RX_CTX;
rx_ctx->ctx.fid.context = context;
rx_ctx->num_left = attr->size;
rx_ctx->attr = *attr;
return rx_ctx;
}
@ -156,3 +157,40 @@ void sock_tx_ctx_abort(struct sock_tx_ctx *tx_ctx)
fastlock_release(&tx_ctx->wlock);
}
void sock_tx_ctx_write_op_send(struct sock_tx_ctx *tx_ctx,
struct sock_op *op, uint64_t flags, uint64_t context,
uint64_t dest_addr, uint64_t buf, struct sock_ep *ep,
struct sock_conn *conn)
{
sock_tx_ctx_write(tx_ctx, op, sizeof *op);
sock_tx_ctx_write(tx_ctx, &flags, sizeof flags);
sock_tx_ctx_write(tx_ctx, &context, sizeof context);
sock_tx_ctx_write(tx_ctx, &dest_addr, sizeof dest_addr);
sock_tx_ctx_write(tx_ctx, &buf, sizeof buf);
sock_tx_ctx_write(tx_ctx, &ep, sizeof ep);
sock_tx_ctx_write(tx_ctx, &conn, sizeof conn);
}
void sock_tx_ctx_write_op_tsend(struct sock_tx_ctx *tx_ctx,
struct sock_op *op, uint64_t flags, uint64_t context,
uint64_t dest_addr, uint64_t buf, struct sock_ep *ep,
struct sock_conn *conn, uint64_t tag)
{
sock_tx_ctx_write_op_send(tx_ctx, op, flags, context, dest_addr,
buf, ep, conn);
sock_tx_ctx_write(tx_ctx, &tag, sizeof tag);
}
void sock_tx_ctx_read_op_send(struct sock_tx_ctx *tx_ctx,
struct sock_op *op, uint64_t *flags, uint64_t *context,
uint64_t *dest_addr, uint64_t *buf, struct sock_ep **ep,
struct sock_conn **conn)
{
rbfdread(&tx_ctx->rbfd, op, sizeof *op);
rbfdread(&tx_ctx->rbfd, flags, sizeof *flags);
rbfdread(&tx_ctx->rbfd, context, sizeof *context);
rbfdread(&tx_ctx->rbfd, dest_addr, sizeof *dest_addr);
rbfdread(&tx_ctx->rbfd, buf, sizeof *buf);
rbfdread(&tx_ctx->rbfd, ep, sizeof *ep);
rbfdread(&tx_ctx->rbfd, conn, sizeof *conn);
}

Просмотреть файл

@ -46,6 +46,7 @@ const struct fi_domain_attr sock_domain_attr = {
.threading = FI_THREAD_SAFE,
.control_progress = FI_PROGRESS_AUTO,
.data_progress = FI_PROGRESS_AUTO,
.resource_mgmt = FI_RM_ENABLED,
.mr_key_size = sizeof(uint16_t),
.cq_data_size = sizeof(uint64_t),
.ep_cnt = SOCK_EP_MAX_EP_CNT,
@ -100,6 +101,17 @@ int sock_verify_domain_attr(struct fi_domain_attr *attr)
return -FI_ENODATA;
}
switch (attr->resource_mgmt){
case FI_RM_UNSPEC:
case FI_RM_DISABLED:
case FI_RM_ENABLED:
break;
default:
SOCK_LOG_INFO("Resource mgmt not supported!\n");
return -FI_ENODATA;
}
if(attr->cq_data_size > sock_domain_attr.cq_data_size)
return -FI_ENODATA;
@ -118,27 +130,11 @@ int sock_verify_domain_attr(struct fi_domain_attr *attr)
static int sock_dom_close(struct fid *fid)
{
struct sock_domain *dom;
void *res;
int ret;
char c = 0;
dom = container_of(fid, struct sock_domain, dom_fid.fid);
if (atomic_get(&dom->ref)) {
return -FI_EBUSY;
}
dom->listening = 0;
ret = write(dom->signal_fds[0], &c, 1);
if (ret != 1) {
SOCK_LOG_ERROR("Failed to signal\n");
return -FI_EINVAL;
}
if (pthread_join(dom->listen_thread, &res)) {
SOCK_LOG_ERROR("could not join listener thread, errno = %d\n", errno);
return -FI_EBUSY;
}
if (dom->r_cmap.size)
sock_conn_map_destroy(&dom->r_cmap);
fastlock_destroy(&dom->r_cmap.lock);
@ -244,7 +240,7 @@ struct sock_mr *sock_mr_verify_key(struct sock_domain *domain, uint16_t key,
struct sock_mr *sock_mr_verify_desc(struct sock_domain *domain, void *desc,
void *buf, size_t len, uint64_t access)
{
uint64_t key = (uint64_t)desc;
uint64_t key = (uintptr_t) desc;
return sock_mr_verify_key(domain, key, buf, len, access);
}
@ -291,7 +287,7 @@ static int sock_regattr(struct fid *fid, const struct fi_mr_attr *attr,
if (idm_set(&dom->mr_idm, key, _mr) < 0)
goto err;
_mr->mr_fid.key = key;
_mr->mr_fid.mem_desc = (void *)key;
_mr->mr_fid.mem_desc = (void *) (uintptr_t) key;
fastlock_release(&dom->lock);
_mr->iov_count = attr->iov_count;
@ -421,9 +417,11 @@ static struct fi_ops_mr sock_dom_mr_ops = {
int sock_domain(struct fid_fabric *fabric, struct fi_info *info,
struct fid_domain **dom, void *context)
{
int ret, flags;
struct sock_domain *sock_domain;
struct sock_fabric *fab;
int ret;
fab = container_of(fabric, struct sock_fabric, fab_fid);
if(info && info->domain_attr){
ret = sock_verify_domain_attr(info->domain_attr);
if(ret)
@ -437,16 +435,8 @@ int sock_domain(struct fid_fabric *fabric, struct fi_info *info,
fastlock_init(&sock_domain->lock);
atomic_init(&sock_domain->ref, 0);
if(info && info->src_addr) {
if (getnameinfo(info->src_addr, info->src_addrlen, NULL, 0,
sock_domain->service, sizeof(sock_domain->service),
NI_NUMERICSERV)) {
SOCK_LOG_ERROR("could not resolve src_addr\n");
goto err;
}
if (info) {
sock_domain->info = *info;
memcpy(&sock_domain->src_addr, info->src_addr,
sizeof(struct sockaddr_in));
} else {
SOCK_LOG_ERROR("invalid fi_info\n");
goto err;
@ -470,21 +460,13 @@ int sock_domain(struct fid_fabric *fabric, struct fi_info *info,
goto err;
}
sock_domain->ep_count = AF_INET;
sock_domain->r_cmap.domain = sock_domain;
fastlock_init(&sock_domain->r_cmap.lock);
if(socketpair(AF_UNIX, SOCK_STREAM, 0, sock_domain->signal_fds) < 0)
if (sock_conn_map_init(&sock_domain->r_cmap, 128))
goto err;
flags = fcntl(sock_domain->signal_fds[1], F_GETFL, 0);
if (fcntl(sock_domain->signal_fds[1], F_SETFL, flags | O_NONBLOCK))
SOCK_LOG_ERROR("fcntl failed\n");
sock_conn_listen(sock_domain);
while(!(volatile int)sock_domain->listening)
pthread_yield();
sock_domain->r_cmap.domain = sock_domain;
fastlock_init(&sock_domain->r_cmap.lock);
sock_domain->fab = fab;
*dom = &sock_domain->dom_fid;
return 0;

Просмотреть файл

@ -56,7 +56,7 @@ extern const struct fi_fabric_attr sock_fabric_attr;
const struct fi_tx_attr sock_stx_attr = {
.caps = SOCK_EP_RDM_CAP,
.op_flags = SOCK_DEF_OPS,
.op_flags = 0,
.msg_order = SOCK_EP_MSG_ORDER,
.inject_size = SOCK_EP_MAX_INJECT_SZ,
.size = SOCK_EP_TX_SZ,
@ -65,7 +65,7 @@ const struct fi_tx_attr sock_stx_attr = {
const struct fi_rx_attr sock_srx_attr = {
.caps = SOCK_EP_RDM_CAP,
.op_flags = SOCK_DEF_OPS,
.op_flags = 0,
.msg_order = SOCK_EP_MSG_ORDER,
.total_buffered_recv = SOCK_EP_MAX_BUFF_RECV,
.size = SOCK_EP_MAX_MSG_SZ,
@ -321,7 +321,7 @@ static int sock_ctx_control(struct fid *fid, int command, void *arg)
*(uint64_t *) arg = tx_ctx->attr.op_flags;
break;
case FI_SETOPSFLAG:
tx_ctx->attr.op_flags = (uint64_t)arg;
tx_ctx->attr.op_flags = *(uint64_t *) arg;
break;
case FI_ENABLE:
ep = container_of(fid, struct fid_ep, fid);
@ -339,7 +339,7 @@ static int sock_ctx_control(struct fid *fid, int command, void *arg)
*(uint64_t *) arg = rx_ctx->attr.op_flags;
break;
case FI_SETOPSFLAG:
rx_ctx->attr.op_flags = (uint64_t)arg;
rx_ctx->attr.op_flags = *(uint64_t *) arg;
break;
case FI_ENABLE:
ep = container_of(fid, struct fid_ep, fid);
@ -357,7 +357,7 @@ static int sock_ctx_control(struct fid *fid, int command, void *arg)
*(uint64_t *) arg = tx_ctx->attr.op_flags;
break;
case FI_SETOPSFLAG:
tx_ctx->attr.op_flags = (uint64_t)arg;
tx_ctx->attr.op_flags = *(uint64_t *) arg;
break;
default:
return -FI_ENOSYS;
@ -376,6 +376,7 @@ static struct fi_ops sock_ctx_ops = {
.close = sock_ctx_close,
.bind = sock_ctx_bind,
.control = sock_ctx_control,
.ops_open = fi_no_ops_open,
};
static int sock_ctx_getopt(fid_t fid, int level, int optname,
@ -440,7 +441,7 @@ static ssize_t sock_rx_ctx_cancel(struct sock_rx_ctx *rx_ctx, void *context)
if (rx_entry->is_busy)
continue;
if ((uint64_t)context == rx_entry->context) {
if ((uintptr_t) context == rx_entry->context) {
dlist_remove(&rx_entry->entry);
sock_rx_release_entry(rx_entry);
ret = 0;
@ -480,6 +481,57 @@ static ssize_t sock_ep_cancel(fid_t fid, void *context)
return sock_rx_ctx_cancel(rx_ctx, context);
}
static ssize_t sock_rx_size_left(struct fid_ep *ep)
{
struct sock_rx_ctx *rx_ctx;
struct sock_ep *sock_ep;
switch (ep->fid.fclass) {
case FI_CLASS_EP:
sock_ep = container_of(ep, struct sock_ep, ep);
rx_ctx = sock_ep->rx_ctx;
break;
case FI_CLASS_RX_CTX:
case FI_CLASS_SRX_CTX:
rx_ctx = container_of(ep, struct sock_rx_ctx, ctx);
break;
default:
SOCK_LOG_ERROR("Invalid ep type\n");
return -FI_EINVAL;
}
return rx_ctx->num_left;
}
static ssize_t sock_tx_size_left(struct fid_ep *ep)
{
struct sock_ep *sock_ep;
struct sock_tx_ctx *tx_ctx;
ssize_t num_left = 0;
switch (ep->fid.fclass) {
case FI_CLASS_EP:
sock_ep = container_of(ep, struct sock_ep, ep);
tx_ctx = sock_ep->tx_ctx;
break;
case FI_CLASS_TX_CTX:
tx_ctx = container_of(ep, struct sock_tx_ctx, fid.ctx);
break;
default:
SOCK_LOG_ERROR("Invalid EP type\n");
return -FI_EINVAL;
}
fastlock_acquire(&tx_ctx->wlock);
num_left = rbfdavail(&tx_ctx->rbfd)/SOCK_EP_TX_ENTRY_SZ;
fastlock_release(&tx_ctx->wlock);
return num_left;
}
struct fi_ops_ep sock_ctx_ep_ops = {
.size = sizeof(struct fi_ops_ep),
.cancel = sock_ep_cancel,
@ -487,8 +539,8 @@ struct fi_ops_ep sock_ctx_ep_ops = {
.setopt = sock_ctx_setopt,
.tx_ctx = fi_no_tx_ctx,
.rx_ctx = fi_no_rx_ctx,
.rx_size_left = fi_no_rx_size_left,
.tx_size_left = fi_no_tx_size_left,
.rx_size_left = sock_rx_size_left,
.tx_size_left = sock_tx_size_left,
};
static int sock_ep_close(struct fid *fid)
@ -535,18 +587,33 @@ static int sock_ep_close(struct fid *fid)
sock_ep->cm.do_listen = 0;
if (write(sock_ep->cm.signal_fds[0], &c, 1) != 1)
if (write(sock_ep->cm.signal_fds[0], &c, 1) != 1) {
SOCK_LOG_INFO("Failed to signal\n");
}
if (sock_ep->cm.listener_thread &&
pthread_join(sock_ep->cm.listener_thread, NULL)) {
SOCK_LOG_INFO("pthread join failed\n");
SOCK_LOG_ERROR("pthread join failed (%d)\n", errno);
}
close(sock_ep->cm.signal_fds[0]);
close(sock_ep->cm.signal_fds[1]);
}
sock_ep->listener.do_listen = 0;
if (write(sock_ep->listener.signal_fds[0], &c, 1) != 1) {
SOCK_LOG_INFO("Failed to signal\n");
}
if (pthread_join(sock_ep->listener.listener_thread, NULL)) {
SOCK_LOG_ERROR("pthread join failed (%d)\n", errno);
}
close(sock_ep->listener.signal_fds[0]);
close(sock_ep->listener.signal_fds[1]);
sock_fabric_remove_service(sock_ep->domain->fab,
atoi(sock_ep->listener.service));
atomic_dec(&sock_ep->domain->ref);
free(sock_ep);
return 0;
@ -802,9 +869,8 @@ static int sock_ep_control(struct fid *fid, int command, void *arg)
case FI_GETOPSFLAG:
*(uint64_t *) arg = ep->op_flags;
break;
case FI_SETOPSFLAG:
ep->op_flags = (uint64_t)arg;
ep->op_flags = *(uint64_t *) arg;
break;
case FI_ENABLE:
ep_fid = container_of(fid, struct fid_ep, fid);
@ -1026,8 +1092,8 @@ struct fi_ops_ep sock_ep_ops ={
.setopt = sock_ep_setopt,
.tx_ctx = sock_ep_tx_ctx,
.rx_ctx = sock_ep_rx_ctx,
.rx_size_left = fi_no_rx_size_left,
.tx_size_left = fi_no_tx_size_left,
.rx_size_left = sock_rx_size_left,
.tx_size_left = sock_tx_size_left,
};
static int sock_verify_tx_attr(const struct fi_tx_attr *attr)
@ -1122,51 +1188,53 @@ int sock_srx_ctx(struct fid_domain *domain,
return 0;
}
struct fi_info *sock_fi_info(enum fi_ep_type ep_type,
struct fi_info *hints, void *src_addr, void *dest_addr)
struct fi_info *sock_fi_info(enum fi_ep_type ep_type, struct fi_info *hints,
void *src_addr, void *dest_addr)
{
struct fi_info *_info = fi_allocinfo();
if (!_info)
struct fi_info *info;
info = fi_allocinfo();
if (!info)
return NULL;
_info->src_addr = calloc(1, sizeof(struct sockaddr_in));
_info->dest_addr = calloc(1, sizeof(struct sockaddr_in));
_info->mode = SOCK_MODE;
_info->addr_format = FI_SOCKADDR_IN;
_info->dest_addrlen =_info->src_addrlen = sizeof(struct sockaddr_in);
info->src_addr = calloc(1, sizeof(struct sockaddr_in));
info->mode = SOCK_MODE;
info->addr_format = FI_SOCKADDR_IN;
if (src_addr) {
memcpy(_info->src_addr, src_addr, sizeof(struct sockaddr_in));
memcpy(info->src_addr, src_addr, sizeof(struct sockaddr_in));
info->src_addrlen = sizeof(struct sockaddr_in);
}
if (dest_addr) {
memcpy(_info->dest_addr, dest_addr, sizeof(struct sockaddr_in));
info->dest_addr = calloc(1, sizeof(struct sockaddr_in));
info->dest_addrlen = sizeof(struct sockaddr_in);
memcpy(info->dest_addr, dest_addr, sizeof(struct sockaddr_in));
}
if (hints) {
if (hints->caps)
_info->caps = hints->caps;
info->caps = hints->caps;
if (hints->ep_attr)
*(_info->ep_attr) = *(hints->ep_attr);
*(info->ep_attr) = *(hints->ep_attr);
if (hints->tx_attr)
*(_info->tx_attr) = *(hints->tx_attr);
*(info->tx_attr) = *(hints->tx_attr);
if (hints->rx_attr)
*(_info->rx_attr) = *(hints->rx_attr);
*(info->rx_attr) = *(hints->rx_attr);
}
_info->ep_attr->type = ep_type;
*(_info->domain_attr) = sock_domain_attr;
*(_info->fabric_attr) = sock_fabric_attr;
info->ep_attr->type = ep_type;
*(info->domain_attr) = sock_domain_attr;
*(info->fabric_attr) = sock_fabric_attr;
_info->domain_attr->name = strdup(sock_dom_name);
_info->fabric_attr->name = strdup(sock_fab_name);
_info->fabric_attr->prov_name = strdup(sock_prov_name);
info->domain_attr->name = strdup(sock_dom_name);
info->fabric_attr->name = strdup(sock_fab_name);
info->fabric_attr->prov_name = strdup(sock_prov_name);
return _info;
return info;
}
int sock_alloc_endpoint(struct fid_domain *domain, struct fi_info *info,
@ -1222,24 +1290,22 @@ int sock_alloc_endpoint(struct fid_domain *domain, struct fi_info *info,
*ep = sock_ep;
fastlock_acquire(&sock_dom->lock);
sock_ep->ep_id = sock_dom->ep_count++;
fastlock_release(&sock_dom->lock);
if (info) {
sock_ep->ep_type = info->ep_attr->type;
sock_ep->info.caps = info->caps;
sock_ep->info.addr_format = FI_SOCKADDR_IN;
if (info->ep_attr) {
sock_ep->ep_type = info->ep_attr->type;
sock_ep->ep_attr.tx_ctx_cnt = info->ep_attr->tx_ctx_cnt;
sock_ep->ep_attr.rx_ctx_cnt = info->ep_attr->rx_ctx_cnt;
}
if (info->src_addr) {
sock_ep->src_addr = calloc(1, sizeof(struct sockaddr_in));
memcpy(sock_ep->src_addr, info->src_addr,
sizeof(struct sockaddr_in));
((struct sockaddr_in*)sock_ep->src_addr)->sin_port =
htons(atoi(sock_dom->service));
((struct sockaddr_in*)sock_ep->src_addr)->sin_family =
sock_ep->ep_id;
}
if (info->dest_addr) {
@ -1318,6 +1384,10 @@ int sock_alloc_endpoint(struct fid_domain *domain, struct fi_info *info,
memcpy(&sock_ep->info, info, sizeof(struct fi_info));
}
sock_ep->domain = sock_dom;
if (sock_conn_listen(sock_ep))
goto err;
if (sock_ep->ep_type == FI_EP_MSG) {
dlist_init(&sock_ep->cm.msg_list);
if (socketpair(AF_UNIX, SOCK_STREAM, 0,
@ -1329,20 +1399,19 @@ int sock_alloc_endpoint(struct fid_domain *domain, struct fi_info *info,
SOCK_LOG_ERROR("fcntl failed");
}
sock_ep->domain = sock_dom;
atomic_inc(&sock_dom->ref);
return 0;
err:
free(sock_ep);
return -FI_EAVAIL;
return -FI_EINVAL;
}
struct sock_conn *sock_ep_lookup_conn(struct sock_ep *ep)
{
if (!ep->key) {
ep->key = sock_conn_map_match_or_connect(
ep->domain, &ep->domain->r_cmap, ep->dest_addr);
ep, ep->domain, &ep->domain->r_cmap, ep->dest_addr);
if (!ep->key) {
SOCK_LOG_ERROR("failed to match or connect to addr\n");
errno = EINVAL;

Просмотреть файл

@ -70,7 +70,7 @@ const struct fi_ep_attr sock_dgram_ep_attr = {
const struct fi_tx_attr sock_dgram_tx_attr = {
.caps = SOCK_EP_DGRAM_CAP,
.op_flags = SOCK_DEF_OPS,
.op_flags = 0,
.msg_order = SOCK_EP_MSG_ORDER,
.inject_size = SOCK_EP_MAX_INJECT_SZ,
.size = SOCK_EP_TX_SZ,
@ -79,7 +79,7 @@ const struct fi_tx_attr sock_dgram_tx_attr = {
const struct fi_rx_attr sock_dgram_rx_attr = {
.caps = SOCK_EP_DGRAM_CAP,
.op_flags = SOCK_DEF_OPS,
.op_flags = 0,
.msg_order = SOCK_EP_MSG_ORDER,
.total_buffered_recv = SOCK_EP_MAX_BUFF_RECV,
.size = SOCK_EP_RX_SZ,
@ -94,9 +94,6 @@ static int sock_dgram_verify_rx_attr(const struct fi_rx_attr *attr)
if ((attr->caps | SOCK_EP_DGRAM_CAP) != SOCK_EP_DGRAM_CAP)
return -FI_ENODATA;
if ((attr->op_flags | SOCK_EP_DGRAM_CAP) != SOCK_EP_DGRAM_CAP)
return -FI_ENODATA;
if ((attr->msg_order | SOCK_EP_MSG_ORDER) != SOCK_EP_MSG_ORDER)
return -FI_ENODATA;
@ -120,9 +117,6 @@ static int sock_dgram_verify_tx_attr(const struct fi_tx_attr *attr)
if ((attr->caps | SOCK_EP_DGRAM_CAP) != SOCK_EP_DGRAM_CAP)
return -FI_ENODATA;
if ((attr->op_flags | SOCK_EP_DGRAM_CAP) != SOCK_EP_DGRAM_CAP)
return -FI_ENODATA;
if ((attr->msg_order | SOCK_EP_MSG_ORDER) != SOCK_EP_MSG_ORDER)
return -FI_ENODATA;
@ -184,230 +178,20 @@ int sock_dgram_verify_ep_attr(struct fi_ep_attr *ep_attr,
return 0;
}
static struct fi_info *sock_dgram_fi_info(struct fi_info *hints,
void *src_addr, void *dest_addr)
int sock_dgram_fi_info(void *src_addr, void *dest_addr, struct fi_info *hints,
struct fi_info **info)
{
struct fi_info *_info = sock_fi_info(FI_EP_DGRAM, hints,
src_addr, dest_addr);
if (!_info)
return NULL;
*info = sock_fi_info(FI_EP_DGRAM, hints, src_addr, dest_addr);
if (!*info)
return -FI_ENOMEM;
_info->caps = SOCK_EP_DGRAM_CAP;
*(_info->tx_attr) = sock_dgram_tx_attr;
*(_info->rx_attr) = sock_dgram_rx_attr;
*(_info->ep_attr) = sock_dgram_ep_attr;
*(*info)->tx_attr = sock_dgram_tx_attr;
*(*info)->rx_attr = sock_dgram_rx_attr;
*(*info)->ep_attr = sock_dgram_ep_attr;
_info->caps |= (_info->rx_attr->caps | _info->tx_attr->caps);
return _info;
}
int sock_dgram_getinfo(uint32_t version, const char *node, const char *service,
uint64_t flags, struct fi_info *hints, struct fi_info **info)
{
int ret;
int udp_sock = 0;
socklen_t len;
struct fi_info *_info;
struct addrinfo sock_hints;
struct addrinfo *result = NULL, *result_ptr = NULL;
struct sockaddr_in *src_addr = NULL, *dest_addr = NULL;
char sa_ip[INET_ADDRSTRLEN];
char hostname[HOST_NAME_MAX];
if (!info)
return -FI_EINVAL;
*info = NULL;
if (version != FI_VERSION(SOCK_MAJOR_VERSION,
SOCK_MINOR_VERSION))
return -FI_ENODATA;
if (hints) {
if ((SOCK_EP_DGRAM_CAP | hints->caps) != SOCK_EP_DGRAM_CAP) {
SOCK_LOG_INFO(
"Cannot support requested options!\n");
return -FI_ENODATA;
}
ret = sock_dgram_verify_rx_attr(hints->rx_attr);
if (ret)
return ret;
ret = sock_dgram_verify_tx_attr(hints->tx_attr);
if (ret)
return ret;
}
memset(&sock_hints, 0, sizeof(struct addrinfo));
sock_hints.ai_family = AF_INET;
sock_hints.ai_socktype = SOCK_STREAM;
if (flags & FI_NUMERICHOST)
sock_hints.ai_flags |= AI_NUMERICHOST;
if ((flags & FI_SOURCE) || !node) {
if (!node) {
gethostname(hostname, HOST_NAME_MAX);
}
ret = getaddrinfo(node ? node : hostname, service,
&sock_hints, &result_ptr);
if (ret != 0) {
ret = -FI_ENODATA;
SOCK_LOG_INFO("getaddrinfo failed!\n");
goto err;
}
result = result_ptr;
while (result) {
if (result->ai_family == AF_INET &&
result->ai_addrlen == sizeof(struct sockaddr_in))
break;
result = result->ai_next;
}
if (!result) {
SOCK_LOG_ERROR("getaddrinfo failed\n");
ret = -FI_EINVAL;
goto err;
}
src_addr = calloc(1, sizeof(struct sockaddr_in));
if (!src_addr) {
ret = -FI_ENOMEM;
goto err;
}
memcpy(src_addr, result->ai_addr, result->ai_addrlen);
freeaddrinfo(result_ptr);
result_ptr = NULL;
} else {
ret = getaddrinfo(node, service, &sock_hints, &result_ptr);
if (ret != 0) {
ret = -FI_ENODATA;
SOCK_LOG_INFO("getaddrinfo failed!\n");
goto err;
}
result = result_ptr;
while (result) {
if (result->ai_family == AF_INET &&
result->ai_addrlen == sizeof(struct sockaddr_in))
break;
result = result->ai_next;
}
if (!result) {
SOCK_LOG_ERROR("getaddrinfo failed\n");
ret = -FI_EINVAL;
goto err;
}
dest_addr = calloc(1, sizeof(struct sockaddr_in));
if (!dest_addr) {
ret = -FI_ENOMEM;
goto err;
}
memcpy(dest_addr, result->ai_addr, result->ai_addrlen);
udp_sock = socket(AF_INET, SOCK_DGRAM, 0);
if (udp_sock < 0) {
ret = -FI_ENOMEM;
goto err;
}
ret = connect(udp_sock, result->ai_addr,
result->ai_addrlen);
if ( ret != 0) {
SOCK_LOG_ERROR("Failed to create udp socket\n");
ret = -FI_ENODATA;
goto err;
}
len = sizeof(struct sockaddr_in);
src_addr = calloc(1, sizeof(struct sockaddr_in));
if (!src_addr) {
ret = -FI_ENOMEM;
goto err;
}
ret = getsockname(udp_sock, (struct sockaddr*)src_addr, &len);
if (ret != 0) {
SOCK_LOG_ERROR("getsockname failed\n");
ret = -FI_ENODATA;
goto err;
}
close(udp_sock);
udp_sock = 0;
freeaddrinfo(result_ptr);
result_ptr = NULL;
}
if (hints && hints->src_addr) {
if(hints->src_addrlen != sizeof(struct sockaddr_in)){
SOCK_LOG_ERROR("Sockets provider requires src_addrlen to be sizeof(struct sockaddr_in); got %zu\n",
hints->src_addrlen);
ret = -FI_ENODATA;
goto err;
}
memcpy(src_addr, hints->src_addr, hints->src_addrlen);
}
if (hints && hints->dest_addr) {
if (!dest_addr) {
dest_addr = calloc(1, sizeof(struct sockaddr_in));
if (!dest_addr) {
ret = -FI_ENOMEM;
goto err;
}
}
if(hints->dest_addrlen != sizeof(struct sockaddr_in)){
SOCK_LOG_ERROR("Sockets provider requires dest_addrlen to be sizeof(struct sockaddr_in); got %zu\n",
hints->dest_addrlen);
ret = -FI_ENODATA;
goto err;
}
memcpy(dest_addr, hints->dest_addr, hints->dest_addrlen);
}
if (dest_addr) {
memcpy(sa_ip, inet_ntoa(dest_addr->sin_addr), INET_ADDRSTRLEN);
SOCK_LOG_INFO("dest_addr: family: %d, IP is %s\n",
((struct sockaddr_in*)dest_addr)->sin_family, sa_ip);
}
if (src_addr) {
memcpy(sa_ip, inet_ntoa(src_addr->sin_addr), INET_ADDRSTRLEN);
SOCK_LOG_INFO("src_addr: family: %d, IP is %s\n",
((struct sockaddr_in*)src_addr)->sin_family, sa_ip);
}
_info = sock_dgram_fi_info(hints, src_addr, dest_addr);
if (!_info) {
ret = -FI_ENOMEM;
goto err;
}
if (src_addr)
free(src_addr);
if (dest_addr)
free(dest_addr);
*info = _info;
(*info)->caps = SOCK_EP_DGRAM_CAP|
(*info)->rx_attr->caps | (*info)->tx_attr->caps;
return 0;
err:
if (udp_sock > 0)
close(udp_sock);
if (src_addr)
free(src_addr);
if (dest_addr)
free(dest_addr);
if (result_ptr)
freeaddrinfo(result_ptr);
SOCK_LOG_ERROR("fi_getinfo failed\n");
return ret;
}
int sock_dgram_endpoint(struct fid_domain *domain, struct fi_info *info,

Просмотреть файл

@ -72,7 +72,7 @@ const struct fi_ep_attr sock_msg_ep_attr = {
const struct fi_tx_attr sock_msg_tx_attr = {
.caps = SOCK_EP_MSG_CAP,
.op_flags = SOCK_DEF_OPS,
.op_flags = 0,
.msg_order = SOCK_EP_MSG_ORDER,
.inject_size = SOCK_EP_MAX_INJECT_SZ,
.size = SOCK_EP_TX_SZ,
@ -81,14 +81,14 @@ const struct fi_tx_attr sock_msg_tx_attr = {
const struct fi_rx_attr sock_msg_rx_attr = {
.caps = SOCK_EP_MSG_CAP,
.op_flags = SOCK_DEF_OPS,
.op_flags = 0,
.msg_order = SOCK_EP_MSG_ORDER,
.total_buffered_recv = SOCK_EP_MAX_BUFF_RECV,
.size = SOCK_EP_RX_SZ,
.iov_limit = SOCK_EP_MAX_IOV_LIMIT,
};
static int sock_msg_verify_rx_attr(const struct fi_rx_attr *attr)
int sock_msg_verify_rx_attr(const struct fi_rx_attr *attr)
{
if (!attr)
return 0;
@ -96,9 +96,6 @@ static int sock_msg_verify_rx_attr(const struct fi_rx_attr *attr)
if ((attr->caps | SOCK_EP_MSG_CAP) != SOCK_EP_MSG_CAP)
return -FI_ENODATA;
if ((attr->op_flags | SOCK_EP_MSG_CAP) != SOCK_EP_MSG_CAP)
return -FI_ENODATA;
if ((attr->msg_order | SOCK_EP_MSG_ORDER) != SOCK_EP_MSG_ORDER)
return -FI_ENODATA;
@ -114,7 +111,7 @@ static int sock_msg_verify_rx_attr(const struct fi_rx_attr *attr)
return 0;
}
static int sock_msg_verify_tx_attr(const struct fi_tx_attr *attr)
int sock_msg_verify_tx_attr(const struct fi_tx_attr *attr)
{
if (!attr)
return 0;
@ -122,9 +119,6 @@ static int sock_msg_verify_tx_attr(const struct fi_tx_attr *attr)
if ((attr->caps | SOCK_EP_MSG_CAP) != SOCK_EP_MSG_CAP)
return -FI_ENODATA;
if ((attr->op_flags | SOCK_EP_MSG_CAP) != SOCK_EP_MSG_CAP)
return -FI_ENODATA;
if ((attr->msg_order | SOCK_EP_MSG_ORDER) != SOCK_EP_MSG_ORDER)
return -FI_ENODATA;
@ -186,230 +180,20 @@ int sock_msg_verify_ep_attr(struct fi_ep_attr *ep_attr,
return 0;
}
static struct fi_info *sock_msg_fi_info(struct fi_info *hints,
void *src_addr, void *dest_addr)
int sock_msg_fi_info(void *src_addr, void *dest_addr, struct fi_info *hints,
struct fi_info **info)
{
struct fi_info *_info = sock_fi_info(FI_EP_MSG, hints,
src_addr, dest_addr);
if (!_info)
return NULL;
*info = sock_fi_info(FI_EP_MSG, hints, src_addr, dest_addr);
if (!*info)
return -FI_ENOMEM;
_info->caps = SOCK_EP_MSG_CAP;
*(_info->tx_attr) = sock_msg_tx_attr;
*(_info->rx_attr) = sock_msg_rx_attr;
*(_info->ep_attr) = sock_msg_ep_attr;
*(*info)->tx_attr = sock_msg_tx_attr;
*(*info)->rx_attr = sock_msg_rx_attr;
*(*info)->ep_attr = sock_msg_ep_attr;
_info->caps |= (_info->rx_attr->caps | _info->tx_attr->caps);
return _info;
}
int sock_msg_getinfo(uint32_t version, const char *node, const char *service,
uint64_t flags, struct fi_info *hints, struct fi_info **info)
{
int ret;
int udp_sock = 0;
socklen_t len;
struct fi_info *_info;
struct addrinfo sock_hints;
struct addrinfo *result = NULL, *result_ptr = NULL;
struct sockaddr_in *src_addr = NULL, *dest_addr = NULL;
char sa_ip[INET_ADDRSTRLEN];
char hostname[HOST_NAME_MAX];
if (!info)
return -FI_EINVAL;
*info = NULL;
if (version != FI_VERSION(SOCK_MAJOR_VERSION,
SOCK_MINOR_VERSION))
return -FI_ENODATA;
if (hints) {
if ((SOCK_EP_MSG_CAP | hints->caps) != SOCK_EP_MSG_CAP) {
SOCK_LOG_INFO(
"Cannot support requested options!\n");
return -FI_ENODATA;
}
ret = sock_msg_verify_rx_attr(hints->rx_attr);
if (ret)
return ret;
ret = sock_msg_verify_tx_attr(hints->tx_attr);
if (ret)
return ret;
}
memset(&sock_hints, 0, sizeof(struct addrinfo));
sock_hints.ai_family = AF_INET;
sock_hints.ai_socktype = SOCK_STREAM;
if (flags & FI_NUMERICHOST)
sock_hints.ai_flags |= AI_NUMERICHOST;
if ((flags & FI_SOURCE) || !node) {
if (!node) {
gethostname(hostname, HOST_NAME_MAX);
}
ret = getaddrinfo(node ? node : hostname, service,
&sock_hints, &result_ptr);
if (ret != 0) {
ret = -FI_ENODATA;
SOCK_LOG_INFO("getaddrinfo failed!\n");
goto err;
}
result = result_ptr;
while (result) {
if (result->ai_family == AF_INET &&
result->ai_addrlen == sizeof(struct sockaddr_in))
break;
result = result->ai_next;
}
if (!result) {
SOCK_LOG_ERROR("getaddrinfo failed\n");
ret = -FI_EINVAL;
goto err;
}
src_addr = calloc(1, sizeof(struct sockaddr_in));
if (!src_addr) {
ret = -FI_ENOMEM;
goto err;
}
memcpy(src_addr, result->ai_addr, result->ai_addrlen);
freeaddrinfo(result_ptr);
result_ptr = NULL;
} else {
ret = getaddrinfo(node, service, &sock_hints, &result_ptr);
if (ret != 0) {
ret = -FI_ENODATA;
SOCK_LOG_INFO("getaddrinfo failed!\n");
goto err;
}
result = result_ptr;
while (result) {
if (result->ai_family == AF_INET &&
result->ai_addrlen == sizeof(struct sockaddr_in))
break;
result = result->ai_next;
}
if (!result) {
SOCK_LOG_ERROR("getaddrinfo failed\n");
ret = -FI_EINVAL;
goto err;
}
dest_addr = calloc(1, sizeof(struct sockaddr_in));
if (!dest_addr) {
ret = -FI_ENOMEM;
goto err;
}
memcpy(dest_addr, result->ai_addr, result->ai_addrlen);
udp_sock = socket(AF_INET, SOCK_DGRAM, 0);
if (udp_sock < 0) {
ret = -FI_ENOMEM;
goto err;
}
ret = connect(udp_sock, result->ai_addr,
result->ai_addrlen);
if ( ret != 0) {
SOCK_LOG_ERROR("Failed to create udp socket\n");
ret = -FI_ENODATA;
goto err;
}
len = sizeof(struct sockaddr_in);
src_addr = calloc(1, sizeof(struct sockaddr_in));
if (!src_addr) {
ret = -FI_ENOMEM;
goto err;
}
ret = getsockname(udp_sock, (struct sockaddr*)src_addr, &len);
if (ret != 0) {
SOCK_LOG_ERROR("getsockname failed\n");
ret = -FI_ENODATA;
goto err;
}
close(udp_sock);
udp_sock = 0;
freeaddrinfo(result_ptr);
result_ptr = NULL;
}
if (hints && hints->src_addr) {
if (hints->src_addrlen != sizeof(struct sockaddr_in)) {
SOCK_LOG_ERROR("Sockets provider requires src_addrlen to be sizeof(struct sockaddr_in); got %zu\n",
hints->src_addrlen);
ret = -FI_ENODATA;
goto err;
}
memcpy(src_addr, hints->src_addr, hints->src_addrlen);
}
if (hints && hints->dest_addr) {
if (!dest_addr) {
dest_addr = calloc(1, sizeof(struct sockaddr_in));
if (!dest_addr) {
ret = -FI_ENOMEM;
goto err;
}
}
if (hints->dest_addrlen != sizeof(struct sockaddr_in)) {
SOCK_LOG_ERROR("Sockets provider requires dest_addrlen to be sizeof(struct sockaddr_in); got %zu\n",
hints->dest_addrlen);
ret = -FI_ENODATA;
goto err;
}
memcpy(dest_addr, hints->dest_addr, hints->dest_addrlen);
}
if (dest_addr) {
memcpy(sa_ip, inet_ntoa(dest_addr->sin_addr), INET_ADDRSTRLEN);
SOCK_LOG_INFO("dest_addr: family: %d, IP is %s\n",
((struct sockaddr_in*)dest_addr)->sin_family, sa_ip);
}
if (src_addr) {
memcpy(sa_ip, inet_ntoa(src_addr->sin_addr), INET_ADDRSTRLEN);
SOCK_LOG_INFO("src_addr: family: %d, IP is %s\n",
((struct sockaddr_in*)src_addr)->sin_family, sa_ip);
}
_info = sock_msg_fi_info(hints, src_addr, dest_addr);
if (!_info) {
ret = -FI_ENOMEM;
goto err;
}
*info = _info;
if (src_addr)
free(src_addr);
if (dest_addr)
free(dest_addr);
(*info)->caps = SOCK_EP_MSG_CAP |
(*info)->rx_attr->caps | (*info)->tx_attr->caps;
return 0;
err:
if (udp_sock > 0)
close(udp_sock);
if (src_addr)
free(src_addr);
if (dest_addr)
free(dest_addr);
if (result_ptr)
freeaddrinfo(result_ptr);
SOCK_LOG_ERROR("fi_getinfo failed\n");
return ret;
}
static int sock_ep_cm_getname(fid_t fid, void *addr, size_t *addrlen)
@ -425,18 +209,15 @@ static int sock_ep_cm_getname(fid_t fid, void *addr, size_t *addrlen)
*addrlen = MIN(*addrlen, sizeof(struct sockaddr_in));
switch(fid->fclass) {
case FI_CLASS_EP:
case FI_CLASS_SEP:
sock_ep = container_of(fid, struct sock_ep, ep.fid);
memcpy(addr, sock_ep->src_addr, *addrlen);
break;
case FI_CLASS_PEP:
sock_pep = container_of(fid, struct sock_pep, pep.fid);
memcpy(addr, &sock_pep->src_addr, *addrlen);
break;
default:
SOCK_LOG_ERROR("Invalid argument\n");
return -FI_EINVAL;
@ -481,17 +262,18 @@ static int sock_ep_cm_enqueue_msg(struct sock_cm_entry *cm,
int ret = 0;
struct sock_cm_msg_list_entry *list_entry;
fastlock_acquire(&cm->lock);
list_entry = calloc(1, sizeof(struct sock_cm_msg_list_entry) + len);
if (!list_entry) {
ret = -FI_ENOMEM;
goto out;
}
list_entry = calloc(1, sizeof(*list_entry) + len);
if (!list_entry)
return -FI_ENOMEM;
list_entry->msg_len = len;
memcpy(&list_entry->msg, msg, len);
memcpy(&list_entry->addr, addr, sizeof(struct sockaddr_in));
memcpy(&list_entry->msg[0], msg, len);
memcpy(&list_entry->addr, addr, sizeof(*addr));
fastlock_acquire(&cm->lock);
dlist_insert_tail(&list_entry->entry, &cm->msg_list);
fastlock_release(&cm->lock);
ret = write(cm->signal_fds[0], &c, 1);
if (ret != 1) {
SOCK_LOG_INFO("failed to signal\n");
@ -500,91 +282,110 @@ static int sock_ep_cm_enqueue_msg(struct sock_cm_entry *cm,
ret = 0;
SOCK_LOG_INFO("Enqueued CM Msg\n");
}
out:
fastlock_release(&cm->lock);
return ret;
}
static int sock_ep_cm_send_msg(struct sock_cm_entry *cm,
const struct sockaddr_in *addr,
void *msg, size_t len)
{
int ret, retry = 0;
unsigned char response = 0;
struct sockaddr_in from_addr;
socklen_t addr_len;
int ret;
char sa_ip[INET_ADDRSTRLEN] = {0};
memcpy(sa_ip, inet_ntoa(addr->sin_addr), INET_ADDRSTRLEN);
SOCK_LOG_INFO("Sending message to %s:%d\n",
sa_ip, ntohs(addr->sin_port));
SOCK_LOG_INFO("Sending message to %s:%d\n", sa_ip, ntohs(addr->sin_port));
while (retry < SOCK_EP_MAX_RETRY && (volatile int)cm->do_listen) {
ret = sendto(cm->sock, (char *) msg, len, 0,
(struct sockaddr *) addr, sizeof *addr);
SOCK_LOG_INFO("Total Sent: %d\n", ret);
if (ret < 0)
return -1;
ret = fi_poll_fd(cm->sock, SOCK_CM_COMM_TIMEOUT);
retry++;
if (ret <= 0) {
continue;
}
addr_len = sizeof(struct sockaddr_in);
ret = recvfrom(cm->sock, &response, sizeof(response), 0,
(struct sockaddr *) &from_addr, &addr_len);
SOCK_LOG_INFO("Received ACK: %d\n", ret);
if (ret == sizeof(response))
return 0;
}
return -1;
}
static int sock_ep_cm_send_ack(struct sock_cm_entry *cm, struct sockaddr_in *addr)
{
int ack_sent = 0, retry = 0, ret;
unsigned char response = 0;
while(!ack_sent && retry < SOCK_EP_MAX_RETRY &&
(volatile int) cm->do_listen) {
ret = sendto(cm->sock, &response, sizeof(response), 0,
(struct sockaddr *) addr, sizeof *addr);
retry++;
SOCK_LOG_INFO("ack: %d\n", ret);
if (ret == sizeof(response)) {
ack_sent = 1;
break;
}
if (ret == EWOULDBLOCK || ret == EAGAIN)
usleep(SOCK_CM_COMM_TIMEOUT * 1000);
}
return ack_sent;
return (ret == len) ? 0 : -1;
}
static void sock_ep_cm_flush_msg(struct sock_cm_entry *cm)
{
struct dlist_entry *entry;
struct dlist_entry *entry, *next_entry;
struct sock_cm_msg_list_entry *msg_entry;
fastlock_acquire(&cm->lock);
while (!dlist_empty(&cm->msg_list)) {
SOCK_LOG_INFO("Flushing CM message\n");
entry = cm->msg_list.next;
for (entry = cm->msg_list.next; entry != &cm->msg_list;) {
msg_entry = container_of(entry,
struct sock_cm_msg_list_entry, entry);
next_entry = entry->next;
if (msg_entry->timestamp_ms != 0 &&
fi_gettime_ms() - msg_entry->timestamp_ms < SOCK_CM_COMM_TIMEOUT) {
entry = next_entry;
continue;
}
msg_entry->timestamp_ms = fi_gettime_ms();
msg_entry->retry++;
if (msg_entry->retry > SOCK_EP_MAX_RETRY) {
dlist_remove(entry);
free(msg_entry);
entry = next_entry;
continue;
}
if (sock_ep_cm_send_msg(cm, &msg_entry->addr,
&msg_entry->msg, msg_entry->msg_len))
SOCK_LOG_INFO("Failed to send out cm message\n");
entry = next_entry;
}
fastlock_release(&cm->lock);
}
static int sock_ep_cm_send_ack(struct sock_cm_entry *cm, struct sockaddr_in *addr,
uint64_t msg_id)
{
int ret;
struct sock_conn_response conn_response;
memset(&conn_response, 0, sizeof(conn_response));
conn_response.hdr.type = SOCK_CONN_ACK;
conn_response.hdr.msg_id = msg_id;
ret = sendto(cm->sock, &conn_response, sizeof conn_response, 0,
(struct sockaddr *) addr, sizeof *addr);
SOCK_LOG_INFO("Total Sent: %d\n", ret);
sock_ep_cm_flush_msg(cm);
return (ret == sizeof conn_response) ? 0 : -1;
}
static void sock_ep_cm_handle_ack(struct sock_cm_entry *cm,
struct sock_conn_hdr *hdr)
{
struct sock_conn_hdr *msg_hdr;
struct dlist_entry *entry;
struct sock_cm_msg_list_entry *msg_entry;
struct fi_eq_cm_entry cm_entry;
struct sock_ep *sock_ep;
sock_ep = container_of(cm, struct sock_ep, cm);
fastlock_acquire(&cm->lock);
for (entry = cm->msg_list.next; entry != &cm->msg_list;) {
msg_entry = container_of(entry, struct sock_cm_msg_list_entry,
entry);
msg_hdr = (struct sock_conn_hdr*)msg_entry->msg;
if (msg_hdr->msg_id == hdr->msg_id) {
if (msg_hdr->type == SOCK_CONN_SHUTDOWN) {
memset(&cm_entry, 0, sizeof cm_entry);
cm_entry.fid = &sock_ep->ep.fid;
if (sock_ep->cm.shutdown_received)
break;
if (sock_eq_report_event(sock_ep->eq, FI_SHUTDOWN, &cm_entry,
sizeof(cm_entry), 0))
SOCK_LOG_ERROR("Error in writing to EQ\n");
}
dlist_remove(entry);
free(msg_entry);
break;
}
entry = entry->next;
}
fastlock_release(&cm->lock);
}
@ -593,16 +394,14 @@ static void sock_ep_cm_flush_msg(struct sock_cm_entry *cm)
static void *sock_msg_ep_listener_thread(void *data)
{
struct pollfd poll_fds[2];
struct sock_ep *ep = data;
struct sock_ep *ep = (struct sock_ep*)data;
struct sock_conn_response *conn_response;
struct fi_eq_cm_entry *cm_entry;
struct fi_eq_err_entry *cm_err_entry;
struct sockaddr_in from_addr;
socklen_t addr_len;
int ret, user_data_sz, entry_sz;
struct fid_ep *fid_ep;
struct sock_ep *sock_ep;
int ret, user_data_sz, entry_sz, timeout;
char tmp = 0;
ep->cm.sock = sock_ep_cm_create_socket();
@ -625,13 +424,13 @@ static void *sock_msg_ep_listener_thread (void *data)
return NULL;
}
ep->cm.do_listen = 1;
poll_fds[0].fd = ep->cm.sock;
poll_fds[1].fd = ep->cm.signal_fds[1];
poll_fds[0].events = poll_fds[1].events = POLLIN;
while((volatile int)ep->cm.do_listen) {
if (poll(poll_fds, 2, -1) > 0) {
while (*((volatile int*) &ep->cm.do_listen)) {
timeout = dlist_empty(&ep->cm.msg_list) ? -1 : SOCK_CM_COMM_TIMEOUT;
if ((ret = poll(poll_fds, 2, timeout)) > 0) {
if (poll_fds[1].revents & POLLIN) {
ret = read(ep->cm.signal_fds[1], &tmp, 1);
if (ret != 1) {
@ -641,11 +440,16 @@ static void *sock_msg_ep_listener_thread (void *data)
sock_ep_cm_flush_msg(&ep->cm);
continue;
}
} else {
if (ret == 0) {
sock_ep_cm_flush_msg(&ep->cm);
continue;
} else {
break;
}
}
addr_len = sizeof(struct sockaddr_in);
addr_len = sizeof(from_addr);
ret = recvfrom(ep->cm.sock, (char*) conn_response,
sizeof(*conn_response) + SOCK_EP_MAX_CM_DATA_SZ,
0, (struct sockaddr *) &from_addr, &addr_len);
@ -654,35 +458,36 @@ static void *sock_msg_ep_listener_thread (void *data)
SOCK_LOG_INFO("Total received: %d\n", ret);
if (ret < sizeof(*conn_response) ||
!sock_ep_cm_send_ack(&ep->cm, &from_addr))
if (ret < sizeof(*conn_response))
continue;
user_data_sz = ret - sizeof(*conn_response);
if (conn_response->hdr.type != SOCK_CONN_ACK)
sock_ep_cm_send_ack(&ep->cm, &from_addr, conn_response->hdr.msg_id);
user_data_sz = ret - sizeof(*conn_response);
switch (conn_response->hdr.type) {
case SOCK_CONN_ACK:
SOCK_LOG_INFO("Received SOCK_CONN_ACK\n");
sock_ep_cm_handle_ack(&ep->cm, &conn_response->hdr);
break;
case SOCK_CONN_ACCEPT:
SOCK_LOG_INFO("Received SOCK_CONN_ACCEPT\n");
entry_sz = sizeof(*cm_entry) + user_data_sz;
memset(cm_entry, 0, sizeof *cm_entry);
cm_entry->fid = conn_response->hdr.c_fid;
cm_entry->fid = &ep->ep.fid;
memcpy(&ep->cm_addr, &from_addr, sizeof(from_addr));
memcpy(&cm_entry->data, &conn_response->user_data,
user_data_sz);
fid_ep = container_of(conn_response->hdr.c_fid,
struct fid_ep, fid);
sock_ep = container_of(fid_ep, struct sock_ep, ep);
if (sock_ep->is_disabled ||
sock_ep->cm.shutdown_received)
if (ep->is_disabled || ep->cm.shutdown_received)
break;
sock_ep->peer_fid = conn_response->hdr.s_fid;
sock_ep->connected = 1;
((struct sockaddr_in*)sock_ep->dest_addr)->sin_port =
ep->connected = 1;
((struct sockaddr_in*) ep->dest_addr)->sin_port =
conn_response->hdr.s_port;
sock_ep_enable(&ep->ep);
@ -690,16 +495,10 @@ static void *sock_msg_ep_listener_thread (void *data)
entry_sz, 0))
SOCK_LOG_ERROR("Error in writing to EQ\n");
break;
case SOCK_CONN_REJECT:
SOCK_LOG_INFO("Received SOCK_CONN_REJECT\n");
fid_ep = container_of(conn_response->hdr.c_fid,
struct fid_ep, fid);
sock_ep = container_of(fid_ep, struct sock_ep, ep);
if (sock_ep->is_disabled ||
sock_ep->cm.shutdown_received)
if (ep->is_disabled || ep->cm.shutdown_received)
break;
cm_err_entry = calloc(1, sizeof(*cm_err_entry) + user_data_sz);
@ -709,14 +508,14 @@ static void *sock_msg_ep_listener_thread (void *data)
}
memset(cm_err_entry, 0, sizeof(*cm_err_entry) + user_data_sz);
cm_err_entry->fid = conn_response->hdr.c_fid;
cm_err_entry->fid = &ep->ep.fid;
cm_err_entry->err = -FI_ECONNREFUSED;
if (user_data_sz > 0)
memcpy(cm_err_entry->err_data,
&conn_response->user_data, user_data_sz);
if (sock_eq_report_event(sock_ep->eq, FI_ECONNREFUSED,
if (sock_eq_report_event(ep->eq, FI_ECONNREFUSED,
cm_err_entry,
sizeof(*cm_err_entry) +
user_data_sz, 0))
@ -727,27 +526,21 @@ static void *sock_msg_ep_listener_thread (void *data)
case SOCK_CONN_SHUTDOWN:
SOCK_LOG_INFO("Received SOCK_CONN_SHUTDOWN\n");
entry_sz = sizeof(*cm_entry) + user_data_sz;
entry_sz = sizeof(*cm_entry);
memset(cm_entry, 0, sizeof *cm_entry);
cm_entry->fid = conn_response->hdr.c_fid;
cm_entry->fid = &ep->ep.fid;
memcpy(&cm_entry->data, &conn_response->user_data,
user_data_sz);
fid_ep = container_of(conn_response->hdr.c_fid,
struct fid_ep, fid);
sock_ep = container_of(fid_ep, struct sock_ep, ep);
if (sock_ep->cm.shutdown_received)
if (ep->cm.shutdown_received)
break;
sock_ep->cm.shutdown_received = 1;
ep->cm.shutdown_received = 1;
if (sock_eq_report_event(ep->eq, FI_SHUTDOWN, cm_entry,
entry_sz, 0))
SOCK_LOG_ERROR("Error in writing to EQ\n");
goto out;
default:
SOCK_LOG_ERROR("Invalid event\n");
SOCK_LOG_ERROR("Invalid event: %d\n", conn_response->hdr.type);
break;
}
}
@ -773,28 +566,23 @@ static int sock_ep_cm_connect(struct fid_ep *ep, const void *addr,
if (!_eq || paramlen > SOCK_EP_MAX_CM_DATA_SZ)
return -FI_EINVAL;
req = (struct sock_conn_req*)calloc(1, sizeof(*req) + paramlen);
req = calloc(1, sizeof(*req) + paramlen);
if (!req)
return -FI_ENOMEM;
_ep->rem_ep_id = ((struct sockaddr *)addr)->sa_family;
((struct sockaddr_in*)_ep->src_addr)->sin_port =
htons(atoi(_ep->domain->service));
((struct sockaddr *) addr)->sa_family = AF_INET;
req->hdr.type = SOCK_CONN_REQ;
req->ep_id = _ep->ep_id;
req->hdr.c_fid = &ep->fid;
req->hdr.s_fid = 0;
req->hdr.msg_id = _ep->cm.next_msg_id++;
req->info = _ep->info;
memcpy(&req->src_addr, _ep->src_addr, sizeof(struct sockaddr_in));
memcpy(&req->dest_addr, _ep->info.dest_addr, sizeof(struct sockaddr_in));
memcpy(&req->src_addr, _ep->src_addr, sizeof(req->src_addr));
memcpy(&req->dest_addr, _ep->info.dest_addr, sizeof(req->dest_addr));
req->tx_attr = *_ep->info.tx_attr;
req->rx_attr = *_ep->info.rx_attr;
req->ep_attr = *_ep->info.ep_attr;
req->domain_attr = *_ep->info.domain_attr;
req->fabric_attr = *_ep->info.fabric_attr;
if (param && paramlen)
memcpy(&req->user_data, param, paramlen);
@ -831,8 +619,7 @@ static int sock_ep_cm_accept(struct fid_ep *ep, const void *param, size_t paraml
if (_ep->is_disabled || _ep->cm.shutdown_received)
return -FI_EINVAL;
response = (struct sock_conn_response*)calloc(1,
sizeof(*response) + paramlen);
response = calloc(1, sizeof(*response) + paramlen);
if (!response)
return -FI_ENOMEM;
@ -843,18 +630,16 @@ static int sock_ep_cm_accept(struct fid_ep *ep, const void *param, size_t paraml
return -FI_EINVAL;
}
memcpy(&response->hdr, &req->hdr, sizeof(struct sock_conn_hdr));
memcpy(&response->hdr, &req->hdr, sizeof(response->hdr));
if (param && paramlen)
memcpy(&response->user_data, param, paramlen);
addr = &req->from_addr;
memcpy(&_ep->cm_addr, addr, sizeof(struct sockaddr_in));
_ep->peer_fid = req->hdr.c_fid;
memcpy(&_ep->cm_addr, addr, sizeof(*addr));
_ep->rem_ep_id = req->ep_id;
response->hdr.type = SOCK_CONN_ACCEPT;
response->hdr.s_fid = &ep->fid;
response->hdr.s_port = htons(atoi(_ep->domain->service));
req->hdr.msg_id = _ep->cm.next_msg_id++;
response->hdr.s_port = htons(atoi(_ep->listener.service));
if (sock_ep_cm_enqueue_msg(&_ep->cm, addr, response,
sizeof (*response) + paramlen)) {
@ -883,9 +668,8 @@ int sock_ep_cm_shutdown(struct fid_ep *ep, uint64_t flags)
_ep = container_of(ep, struct sock_ep, ep);
memset(&response, 0, sizeof(response));
response.hdr.c_fid = _ep->peer_fid;
response.hdr.s_fid = &ep->fid;
response.hdr.type = SOCK_CONN_SHUTDOWN;
response.hdr.msg_id = _ep->cm.next_msg_id++;
if (sock_ep_cm_enqueue_msg(&_ep->cm, &_ep->cm_addr, &response,
sizeof response)) {
@ -960,8 +744,9 @@ int sock_msg_ep(struct fid_domain *domain, struct fi_info *info,
if (ret)
return ret;
endpoint->cm.do_listen = 1;
if (pthread_create(&endpoint->cm.listener_thread, NULL,
sock_msg_ep_listener_thread, (void *)endpoint)) {
sock_msg_ep_listener_thread, endpoint)) {
SOCK_LOG_ERROR("Couldn't create listener thread\n");
return -FI_EINVAL;
}
@ -1047,15 +832,12 @@ static void *sock_pep_listener_thread (void *data)
{
struct sock_pep *pep = (struct sock_pep *)data;
struct sock_conn_req *conn_req = NULL;
struct sock_conn_response *conn_response = NULL;
struct fi_eq_cm_entry *cm_entry;
struct sockaddr_in from_addr;
struct pollfd poll_fds[2];
struct fid_ep *fid_ep;
struct sock_ep *sock_ep;
socklen_t addr_len;
int ret, user_data_sz, entry_sz;
int ret = 0, user_data_sz, entry_sz, timeout;
char tmp = 0;
SOCK_LOG_INFO("Starting listener thread for PEP: %p\n", pep);
@ -1068,8 +850,9 @@ static void *sock_pep_listener_thread (void *data)
poll_fds[0].fd = pep->cm.sock;
poll_fds[1].fd = pep->cm.signal_fds[1];
poll_fds[0].events = poll_fds[1].events = POLLIN;
while((volatile int)pep->cm.do_listen) {
if (poll(poll_fds, 2, -1) > 0) {
while(*((volatile int*)&pep->cm.do_listen)) {
timeout = dlist_empty(&pep->cm.msg_list) ? -1 : SOCK_CM_COMM_TIMEOUT;
if (poll(poll_fds, 2, timeout) > 0) {
if (poll_fds[1].revents & POLLIN) {
ret = read(pep->cm.signal_fds[1], &tmp, 1);
if (ret != 1)
@ -1077,10 +860,14 @@ static void *sock_pep_listener_thread (void *data)
sock_ep_cm_flush_msg(&pep->cm);
continue;
}
} else {
if (ret == 0) {
sock_ep_cm_flush_msg(&pep->cm);
continue;
} else {
break;
}
}
if (conn_req == NULL) {
conn_req = calloc(1, sizeof(*conn_req) + SOCK_EP_MAX_CM_DATA_SZ);
@ -1094,26 +881,35 @@ static void *sock_pep_listener_thread (void *data)
ret = recvfrom(pep->cm.sock, (char*)conn_req,
sizeof(*conn_req) + SOCK_EP_MAX_CM_DATA_SZ, 0,
(struct sockaddr *) &from_addr, &addr_len);
SOCK_LOG_INFO("Total received: %d\n", ret);
if (ret <= 0)
continue;
memcpy(&conn_req->from_addr, &from_addr, sizeof(struct sockaddr_in));
SOCK_LOG_INFO("CM msg received: %d\n", ret);
memset(cm_entry, 0, sizeof *cm_entry);
if (conn_req->hdr.type != SOCK_CONN_ACK)
sock_ep_cm_send_ack(&pep->cm, &from_addr, conn_req->hdr.msg_id);
switch (conn_req->hdr.type) {
case SOCK_CONN_ACK:
SOCK_LOG_INFO("Received SOCK_CONN_ACK\n");
sock_ep_cm_handle_ack(&pep->cm, &conn_req->hdr);
break;
case SOCK_CONN_REQ:
SOCK_LOG_INFO("Received SOCK_CONN_REQ\n");
user_data_sz = ret - sizeof(*conn_req);
entry_sz = sizeof(*cm_entry) + user_data_sz;
if (ret < sizeof(*conn_req) ||
!sock_ep_cm_send_ack(&pep->cm, &from_addr)) {
if (ret < sizeof(*conn_req)) {
SOCK_LOG_ERROR("Invalid connection request\n");
break;
}
cm_entry->fid = &pep->pep.fid;
cm_entry->info = sock_ep_msg_process_info(conn_req);
cm_entry->info->connreq = (fi_connreq_t) conn_req;
@ -1128,24 +924,17 @@ static void *sock_pep_listener_thread (void *data)
case SOCK_CONN_SHUTDOWN:
SOCK_LOG_INFO("Received SOCK_CONN_SHUTDOWN\n");
conn_response = (struct sock_conn_response*)conn_req;
entry_sz = sizeof(*cm_entry);
cm_entry->fid = conn_response->hdr.c_fid;
fid_ep = container_of(conn_response->hdr.c_fid,
struct fid_ep, fid);
sock_ep = container_of(fid_ep, struct sock_ep, ep);
if (sock_ep->cm.shutdown_received)
break;
cm_entry->fid = &pep->pep.fid;
sock_ep->cm.shutdown_received = 1;
if (sock_eq_report_event(sock_ep->eq, FI_SHUTDOWN, cm_entry,
if (sock_eq_report_event(pep->eq, FI_SHUTDOWN, cm_entry,
entry_sz, 0))
SOCK_LOG_ERROR("Error in writing to EQ\n");
break;
default:
SOCK_LOG_ERROR("Invalid event\n");
SOCK_LOG_ERROR("Invalid event: %d\n", conn_req->hdr.type);
goto out;
}
}
@ -1261,7 +1050,7 @@ static int sock_pep_reject(struct fid_pep *pep, fi_connreq_t connreq,
addr = &req->from_addr;
response->hdr.type = SOCK_CONN_REJECT;
response->hdr.s_fid = NULL;
req->hdr.msg_id = _pep->cm.next_msg_id++;
if (sock_ep_cm_enqueue_msg(&_pep->cm, addr, req,
sizeof(struct sock_conn_response))) {
@ -1304,44 +1093,38 @@ int sock_msg_sep(struct fid_domain *domain, struct fi_info *info,
int sock_msg_passive_ep(struct fid_fabric *fabric, struct fi_info *info,
struct fid_pep **pep, void *context)
{
int ret = -FI_EINVAL, flags;
int ret = 0;
struct sock_pep *_pep;
char hostname[HOST_NAME_MAX];
struct addrinfo sock_hints;
struct addrinfo *result = NULL;
struct addrinfo hints, *result;
if (info) {
ret = sock_verify_info(info);
if (ret) {
SOCK_LOG_INFO("Cannot support requested options!\n");
return -FI_EINVAL;
return ret;
}
}
_pep = (struct sock_pep*)calloc(1, sizeof(*_pep));
_pep = calloc(1, sizeof(*_pep));
if (!_pep)
return -FI_ENOMEM;
if (info) {
if (info->src_addr) {
memcpy(&_pep->src_addr, info->src_addr,
sizeof(struct sockaddr_in));
memcpy(&_pep->src_addr, info->src_addr, info->src_addrlen);
} else {
gethostname(hostname, HOST_NAME_MAX);
memset(&hints, 0, sizeof(hints));
hints.ai_family = AF_INET;
hints.ai_socktype = SOCK_STREAM;
memset(&sock_hints, 0, sizeof(struct addrinfo));
sock_hints.ai_family = AF_INET;
sock_hints.ai_socktype = SOCK_STREAM;
ret = getaddrinfo(hostname, NULL, &sock_hints, &result);
if (ret != 0) {
ret = FI_EINVAL;
ret = getaddrinfo("localhost", NULL, &hints, &result);
if (ret) {
ret = -FI_EINVAL;
SOCK_LOG_INFO("getaddrinfo failed!\n");
goto err;
}
memcpy(&_pep->src_addr, result->ai_addr, result->ai_addrlen);
freeaddrinfo(result);
result = NULL;
}
_pep->info = *info;
} else {
@ -1349,12 +1132,13 @@ int sock_msg_passive_ep(struct fid_fabric *fabric, struct fi_info *info,
goto err;
}
if(socketpair(AF_UNIX, SOCK_STREAM, 0, _pep->cm.signal_fds) < 0)
ret = socketpair(AF_UNIX, SOCK_STREAM, 0, _pep->cm.signal_fds);
if (ret) {
ret = -errno;
goto err;
}
flags = fcntl(_pep->cm.signal_fds[1], F_GETFL, 0);
if (fcntl(_pep->cm.signal_fds[1], F_SETFL, flags | O_NONBLOCK))
SOCK_LOG_ERROR("fcntl failed");
fd_set_nonblock(_pep->cm.signal_fds[1]);
dlist_init(&_pep->cm.msg_list);
_pep->pep.fid.fclass = FI_CLASS_PEP;
@ -1367,8 +1151,6 @@ int sock_msg_passive_ep(struct fid_fabric *fabric, struct fi_info *info,
*pep = &_pep->pep;
return 0;
err:
if (result)
freeaddrinfo(result);
free(_pep);
return ret;
}

Просмотреть файл

@ -71,7 +71,7 @@ const struct fi_ep_attr sock_rdm_ep_attr = {
const struct fi_tx_attr sock_rdm_tx_attr = {
.caps = SOCK_EP_RDM_CAP,
.op_flags = SOCK_DEF_OPS,
.op_flags = 0,
.msg_order = SOCK_EP_MSG_ORDER,
.inject_size = SOCK_EP_MAX_INJECT_SZ,
.size = SOCK_EP_TX_SZ,
@ -80,7 +80,7 @@ const struct fi_tx_attr sock_rdm_tx_attr = {
const struct fi_rx_attr sock_rdm_rx_attr = {
.caps = SOCK_EP_RDM_CAP,
.op_flags = SOCK_DEF_OPS,
.op_flags = 0,
.msg_order = SOCK_EP_MSG_ORDER,
.total_buffered_recv = SOCK_EP_MAX_BUFF_RECV,
.size = SOCK_EP_RX_SZ,
@ -97,11 +97,6 @@ static int sock_rdm_verify_rx_attr(const struct fi_rx_attr *attr)
return -FI_ENODATA;
}
if ((attr->op_flags | SOCK_EP_RDM_CAP) != SOCK_EP_RDM_CAP) {
SOCK_LOG_INFO("Unsupported rx op_flags\n");
return -FI_ENODATA;
}
if ((attr->msg_order | SOCK_EP_MSG_ORDER) != SOCK_EP_MSG_ORDER) {
SOCK_LOG_INFO("Unsuported rx message order\n");
return -FI_ENODATA;
@ -135,11 +130,6 @@ static int sock_rdm_verify_tx_attr(const struct fi_tx_attr *attr)
return -FI_ENODATA;
}
if ((attr->op_flags | SOCK_EP_RDM_CAP) != SOCK_EP_RDM_CAP) {
SOCK_LOG_INFO("Unsupported rx op_flags\n");
return -FI_ENODATA;
}
if ((attr->msg_order | SOCK_EP_MSG_ORDER) != SOCK_EP_MSG_ORDER) {
SOCK_LOG_INFO("Unsupported tx message order\n");
return -FI_ENODATA;
@ -227,232 +217,20 @@ int sock_rdm_verify_ep_attr(struct fi_ep_attr *ep_attr,
return 0;
}
static struct fi_info *sock_rdm_fi_info(struct fi_info *hints,
void *src_addr, void *dest_addr)
int sock_rdm_fi_info(void *src_addr, void *dest_addr, struct fi_info *hints,
struct fi_info **info)
{
struct fi_info *_info = sock_fi_info(FI_EP_RDM, hints,
src_addr, dest_addr);
if (!_info)
return NULL;
*info = sock_fi_info(FI_EP_RDM, hints, src_addr, dest_addr);
if (!*info)
return -FI_ENOMEM;
_info->caps = SOCK_EP_RDM_CAP;
*(_info->tx_attr) = sock_rdm_tx_attr;
*(_info->rx_attr) = sock_rdm_rx_attr;
*(_info->ep_attr) = sock_rdm_ep_attr;
*(*info)->tx_attr = sock_rdm_tx_attr;
*(*info)->rx_attr = sock_rdm_rx_attr;
*(*info)->ep_attr = sock_rdm_ep_attr;
_info->caps |= (_info->rx_attr->caps | _info->tx_attr->caps);
return _info;
}
int sock_rdm_getinfo(uint32_t version, const char *node, const char *service,
uint64_t flags, struct fi_info *hints, struct fi_info **info)
{
int ret;
int udp_sock = 0;
socklen_t len;
struct fi_info *_info;
struct addrinfo sock_hints;
struct addrinfo *result = NULL, *result_ptr = NULL;
struct sockaddr_in *src_addr = NULL, *dest_addr = NULL;
char sa_ip[INET_ADDRSTRLEN];
char hostname[HOST_NAME_MAX];
if (!info)
return -FI_EINVAL;
*info = NULL;
if (version != FI_VERSION(SOCK_MAJOR_VERSION,
SOCK_MINOR_VERSION)) {
SOCK_LOG_INFO("Unsupported version\n");
return -FI_ENODATA;
}
if (hints) {
if ((SOCK_EP_RDM_CAP | hints->caps) != SOCK_EP_RDM_CAP) {
SOCK_LOG_INFO("Unsupported capabilities\n");
return -FI_ENODATA;
}
ret = sock_rdm_verify_rx_attr(hints->rx_attr);
if (ret)
return ret;
ret = sock_rdm_verify_tx_attr(hints->tx_attr);
if (ret)
return ret;
}
memset(&sock_hints, 0, sizeof(struct addrinfo));
sock_hints.ai_family = AF_INET;
sock_hints.ai_socktype = SOCK_STREAM;
if (flags & FI_NUMERICHOST)
sock_hints.ai_flags |= AI_NUMERICHOST;
if ((flags & FI_SOURCE) || !node) {
if (!node) {
gethostname(hostname, HOST_NAME_MAX);
}
ret = getaddrinfo(node ? node : hostname, service,
&sock_hints, &result_ptr);
if (ret != 0) {
ret = -FI_ENODATA;
SOCK_LOG_INFO("getaddrinfo failed!\n");
goto err;
}
result = result_ptr;
while (result) {
if (result->ai_family == AF_INET &&
result->ai_addrlen == sizeof(struct sockaddr_in))
break;
result = result->ai_next;
}
if (!result) {
SOCK_LOG_ERROR("getaddrinfo failed\n");
ret = -FI_EINVAL;
goto err;
}
src_addr = calloc(1, sizeof(struct sockaddr_in));
if (!src_addr) {
ret = -FI_ENOMEM;
goto err;
}
memcpy(src_addr, result->ai_addr, result->ai_addrlen);
freeaddrinfo(result_ptr);
result_ptr = NULL;
} else {
ret = getaddrinfo(node, service, &sock_hints, &result_ptr);
if (ret != 0) {
ret = -FI_ENODATA;
SOCK_LOG_INFO("getaddrinfo failed!\n");
goto err;
}
result = result_ptr;
while (result) {
if (result->ai_family == AF_INET &&
result->ai_addrlen == sizeof(struct sockaddr_in))
break;
result = result->ai_next;
}
if (!result) {
SOCK_LOG_ERROR("getaddrinfo failed\n");
ret = -FI_EINVAL;
goto err;
}
dest_addr = calloc(1, sizeof(struct sockaddr_in));
if (!dest_addr) {
ret = -FI_ENOMEM;
goto err;
}
memcpy(dest_addr, result->ai_addr, result->ai_addrlen);
udp_sock = socket(AF_INET, SOCK_DGRAM, 0);
if (udp_sock < 0) {
ret = -FI_ENOMEM;
goto err;
}
ret = connect(udp_sock, result->ai_addr,
result->ai_addrlen);
if ( ret != 0) {
SOCK_LOG_ERROR("Failed to create udp socket\n");
ret = -FI_ENODATA;
goto err;
}
len = sizeof(struct sockaddr_in);
src_addr = calloc(1, sizeof(struct sockaddr_in));
if (!src_addr) {
ret = -FI_ENOMEM;
goto err;
}
ret = getsockname(udp_sock, (struct sockaddr*)src_addr, &len);
if (ret != 0) {
SOCK_LOG_ERROR("getsockname failed\n");
ret = -FI_ENODATA;
goto err;
}
close(udp_sock);
udp_sock = 0;
freeaddrinfo(result_ptr);
result_ptr = NULL;
}
if (hints && hints->src_addr) {
if(hints->src_addrlen != sizeof(struct sockaddr_in)){
SOCK_LOG_ERROR("Sockets provider requires src_addrlen to be sizeof(struct sockaddr_in); got %zu\n",
hints->src_addrlen);
ret = -FI_ENODATA;
goto err;
}
memcpy(src_addr, hints->src_addr, hints->src_addrlen);
}
if (hints && hints->dest_addr) {
if (!dest_addr) {
dest_addr = calloc(1, sizeof(struct sockaddr_in));
if (!dest_addr) {
ret = -FI_ENOMEM;
goto err;
}
}
if(hints->dest_addrlen != sizeof(struct sockaddr_in)){
SOCK_LOG_ERROR("Sockets provider requires dest_addrlen to be sizeof(struct sockaddr_in); got %zu\n",
hints->dest_addrlen);
ret = -FI_ENODATA;
goto err;
}
memcpy(dest_addr, hints->dest_addr, hints->dest_addrlen);
}
if (dest_addr) {
memcpy(sa_ip, inet_ntoa(dest_addr->sin_addr), INET_ADDRSTRLEN);
SOCK_LOG_INFO("dest_addr: family: %d, IP is %s\n",
((struct sockaddr_in*)dest_addr)->sin_family, sa_ip);
}
if (src_addr) {
memcpy(sa_ip, inet_ntoa(src_addr->sin_addr), INET_ADDRSTRLEN);
SOCK_LOG_INFO("src_addr: family: %d, IP is %s\n",
((struct sockaddr_in*)src_addr)->sin_family, sa_ip);
}
_info = sock_rdm_fi_info(hints, src_addr, dest_addr);
if (!_info) {
ret = -FI_ENOMEM;
goto err;
}
*info = _info;
if (src_addr)
free(src_addr);
if (dest_addr)
free(dest_addr);
(*info)->caps = SOCK_EP_RDM_CAP |
(*info)->rx_attr->caps | (*info)->tx_attr->caps;
return 0;
err:
if (udp_sock > 0)
close(udp_sock);
if (src_addr)
free(src_addr);
if (dest_addr)
free(dest_addr);
if (result_ptr)
freeaddrinfo(result_ptr);
SOCK_LOG_ERROR("fi_getinfo failed\n");
return ret;
}
int sock_rdm_endpoint(struct fid_domain *domain, struct fi_info *info,

Просмотреть файл

@ -56,7 +56,6 @@ ssize_t sock_eq_sread(struct fid_eq *eq, uint32_t *event, void *buf, size_t len,
struct sock_eq_entry *entry;
sock_eq = container_of(eq, struct sock_eq, eq);
if (!dlistfd_empty(&sock_eq->err_list)) {
return -FI_EAVAIL;
}
@ -64,11 +63,12 @@ ssize_t sock_eq_sread(struct fid_eq *eq, uint32_t *event, void *buf, size_t len,
if (dlistfd_empty(&sock_eq->list)) {
if(!timeout) {
SOCK_LOG_INFO("Nothing to read from eq!\n");
return 0;
return -FI_EAGAIN;
}
ret = dlistfd_wait_avail(&sock_eq->list, timeout);
if (ret <= 0)
return ret;
return (ret == 0 || ret == -FI_ETIMEDOUT) ?
-FI_EAGAIN : ret;
}
fastlock_acquire(&sock_eq->lock);
@ -91,7 +91,7 @@ ssize_t sock_eq_sread(struct fid_eq *eq, uint32_t *event, void *buf, size_t len,
out:
fastlock_release(&sock_eq->lock);
return ret;
return (ret == 0 || ret == -FI_ETIMEDOUT) ? -FI_EAGAIN : ret;
}
@ -110,10 +110,9 @@ ssize_t sock_eq_readerr(struct fid_eq *eq, struct fi_eq_err_entry *buf,
struct sock_eq_entry *entry;
sock_eq = container_of(eq, struct sock_eq, eq);
fastlock_acquire(&sock_eq->lock);
if (dlistfd_empty(&sock_eq->err_list)) {
ret = 0;
ret = -FI_EAGAIN;
goto out;
}
@ -130,57 +129,53 @@ ssize_t sock_eq_readerr(struct fid_eq *eq, struct fi_eq_err_entry *buf,
out:
fastlock_release(&sock_eq->lock);
return ret;
return (ret == 0) ? -FI_EAGAIN : ret;
}
ssize_t sock_eq_report_event(struct sock_eq *sock_eq, uint32_t event,
const void *buf, size_t len, uint64_t flags)
{
int ret;
struct sock_eq_entry *entry = calloc(1, len +
sizeof(struct sock_eq_entry));
struct sock_eq_entry *entry;
entry = calloc(1, len + sizeof(*entry));
if (!entry)
return -FI_ENOMEM;
fastlock_acquire(&sock_eq->lock);
entry->type = event;
entry->len = len;
entry->flags = flags;
ret = entry->len;
memcpy(entry->event, buf, len);
dlistfd_insert_tail(&entry->entry, &sock_eq->list);
fastlock_acquire(&sock_eq->lock);
dlistfd_insert_tail(&entry->entry, &sock_eq->list);
if (sock_eq->signal)
sock_wait_signal(sock_eq->waitset);
fastlock_release(&sock_eq->lock);
return ret;
return 0;
}
ssize_t sock_eq_report_error(struct sock_eq *sock_eq, fid_t fid, void *context,
int err, int prov_errno, void *err_data)
{
struct fi_eq_err_entry *err_entry;
struct sock_eq_entry *entry = calloc(1, sizeof(struct fi_eq_err_entry) +
sizeof(struct sock_eq_entry));
struct sock_eq_entry *entry;
entry = calloc(1, sizeof(*err_entry) + sizeof(*entry));
if (!entry)
return -FI_ENOMEM;
fastlock_acquire(&sock_eq->lock);
err_entry = (struct fi_eq_err_entry *) entry->event;
err_entry->fid = fid;
err_entry->context = context;
err_entry->err = err;
err_entry->prov_errno = prov_errno;
err_entry->err_data = err_data;
entry->len = sizeof(struct fi_eq_err_entry);
dlistfd_insert_tail(&entry->entry, &sock_eq->err_list);
entry->len = sizeof(*err_entry);
fastlock_acquire(&sock_eq->lock);
dlistfd_insert_tail(&entry->entry, &sock_eq->err_list);
if (sock_eq->signal)
sock_wait_signal(sock_eq->waitset);
fastlock_release(&sock_eq->lock);
return 0;
}
@ -189,12 +184,15 @@ static ssize_t sock_eq_write(struct fid_eq *eq, uint32_t event,
const void *buf, size_t len, uint64_t flags)
{
struct sock_eq *sock_eq;
sock_eq = container_of(eq, struct sock_eq, eq);
int ret;
sock_eq = container_of(eq, struct sock_eq, eq);
if (!(sock_eq->attr.flags & FI_WRITE))
return -FI_EINVAL;
return sock_eq_report_event(sock_eq, event, buf, len, flags);
ret = sock_eq_report_event(sock_eq, event, buf, len, flags);
return ret ? ret : len;
}
const char * sock_eq_strerror(struct fid_eq *eq, int prov_errno,
@ -217,8 +215,8 @@ static struct fi_ops_eq sock_eq_ops = {
int sock_eq_fi_close(struct fid *fid)
{
struct sock_eq *sock_eq;
sock_eq = container_of(fid, struct sock_eq, eq);
sock_eq = container_of(fid, struct sock_eq, eq);
dlistfd_head_free(&sock_eq->list);
dlistfd_head_free(&sock_eq->err_list);
fastlock_destroy(&sock_eq->lock);
@ -245,18 +243,15 @@ int sock_eq_control(struct fid *fid, int command, void *arg)
case FI_WAIT_FD:
memcpy(arg, &eq->list.fd[LIST_READ_FD], sizeof(int));
break;
case FI_WAIT_SET:
case FI_WAIT_MUTEX_COND:
sock_wait_get_obj(eq->waitset, arg);
break;
default:
ret = -FI_EINVAL;
break;
}
break;
default:
ret = -FI_EINVAL;
break;
@ -303,13 +298,12 @@ static struct fi_eq_attr _sock_eq_def_attr ={
int sock_eq_openwait(struct sock_eq *eq, const char *service)
{
SOCK_LOG_INFO("enter\n");
struct addrinfo *s_res = NULL, *p;
struct addrinfo *s_res, *p;
struct addrinfo hints;
int optval, ret;
int ret;
if (eq->wait_fd > 0 && !strncmp((char *)&eq->service, service, NI_MAXSERV))
{
SOCK_LOG_INFO("enter\n");
if (eq->wait_fd > 0 && !strncmp((char *) &eq->service, service, NI_MAXSERV)) {
SOCK_LOG_INFO("eq already opened for the service %s\n", service);
return 0;
}
@ -331,20 +325,17 @@ int sock_eq_openwait(struct sock_eq *eq, const char *service)
}
for (p = s_res; p; p = p->ai_next) {
eq->wait_fd = socket(p->ai_family, p->ai_socktype,
p->ai_protocol);
if (eq->wait_fd >= 0) {
optval = 1;
if (setsockopt(eq->wait_fd, SOL_SOCKET, SO_REUSEADDR, &optval,
sizeof optval))
SOCK_LOG_ERROR("setsockopt failed\n");
eq->wait_fd = socket(p->ai_family, p->ai_socktype, p->ai_protocol);
if (eq->wait_fd < 0)
continue;
sock_set_sockopts(eq->wait_fd);
if (!bind(eq->wait_fd, s_res->ai_addr, s_res->ai_addrlen))
break;
close(eq->wait_fd);
eq->wait_fd = -1;
}
}
freeaddrinfo(s_res);
if (eq->wait_fd < 0) {
@ -352,12 +343,8 @@ int sock_eq_openwait(struct sock_eq *eq, const char *service)
return -FI_EINVAL;
}
if (fcntl(eq->wait_fd, F_SETFL, O_NONBLOCK))
SOCK_LOG_ERROR("fcntl failed");
memcpy(&eq->service, service, NI_MAXSERV);
SOCK_LOG_INFO("open udp successfully\n");
return 0;
}
@ -372,12 +359,11 @@ int sock_eq_open(struct fid_fabric *fabric, struct fi_eq_attr *attr,
if (ret)
return ret;
sock_eq = (struct sock_eq *)calloc(1, sizeof(struct sock_eq));
sock_eq = calloc(1, sizeof(*sock_eq));
if (!sock_eq)
return -FI_ENOMEM;
sock_eq->sock_fab = container_of(fabric, struct sock_fabric, fab_fid);
sock_eq->eq.fid.fclass = FI_CLASS_EQ;
sock_eq->eq.fid.context = context;
sock_eq->eq.fid.ops = &sock_eq_fi_ops;
@ -401,7 +387,6 @@ int sock_eq_open(struct fid_fabric *fabric, struct fi_eq_attr *attr,
atomic_inc(&sock_eq->sock_fab->ref);
switch (sock_eq->attr.wait_obj) {
case FI_WAIT_NONE:
case FI_WAIT_UNSPEC:
sock_eq->signal = 0;
@ -409,7 +394,6 @@ int sock_eq_open(struct fid_fabric *fabric, struct fi_eq_attr *attr,
case FI_WAIT_FD:
sock_eq->signal = 0;
break;
case FI_WAIT_MUTEX_COND:
wait_attr.flags = 0;
wait_attr.wait_obj = FI_WAIT_MUTEX_COND;
@ -418,17 +402,14 @@ int sock_eq_open(struct fid_fabric *fabric, struct fi_eq_attr *attr,
goto err2;
sock_eq->signal = 1;
break;
case FI_WAIT_SET:
if (!attr) {
ret = -FI_EINVAL;
goto err2;
}
sock_eq->waitset = attr->wait_set;
sock_eq->signal = 1;
break;
default:
break;
}

Просмотреть файл

@ -36,6 +36,11 @@
#include <stdlib.h>
#include <string.h>
#include <netinet/in.h>
#include <sys/socket.h>
#include <arpa/inet.h>
#include <unistd.h>
#include <limits.h>
#include "prov.h"
@ -75,6 +80,7 @@ int sock_verify_fabric_attr(struct fi_fabric_attr *attr)
int sock_verify_info(struct fi_info *hints)
{
uint64_t caps;
enum fi_ep_type ep_type;
int ret;
@ -85,16 +91,19 @@ int sock_verify_info(struct fi_info *hints)
switch (ep_type) {
case FI_EP_UNSPEC:
case FI_EP_MSG:
caps = SOCK_EP_MSG_CAP;
ret = sock_msg_verify_ep_attr(hints->ep_attr,
hints->tx_attr,
hints->rx_attr);
break;
case FI_EP_DGRAM:
caps = SOCK_EP_DGRAM_CAP;
ret = sock_dgram_verify_ep_attr(hints->ep_attr,
hints->tx_attr,
hints->rx_attr);
break;
case FI_EP_RDM:
caps = SOCK_EP_RDM_CAP;
ret = sock_rdm_verify_ep_attr(hints->ep_attr,
hints->tx_attr,
hints->rx_attr);
@ -105,6 +114,11 @@ int sock_verify_info(struct fi_info *hints)
if (ret)
return ret;
if ((caps | hints->caps) != caps) {
SOCK_LOG_INFO("Unsupported capabilities\n");
return -FI_ENODATA;
}
switch (hints->addr_format) {
case FI_FORMAT_UNSPEC:
case FI_SOCKADDR:
@ -142,6 +156,7 @@ static int sock_fabric_close(fid_t fid)
return -FI_EBUSY;
}
fastlock_destroy(&fab->lock);
free(fab);
return 0;
}
@ -166,6 +181,9 @@ static int sock_fabric(struct fi_fabric_attr *attr,
if (!fab)
return -FI_ENOMEM;
fastlock_init(&fab->lock);
dlist_init(&fab->service_list);
fab->fab_fid.fid.fclass = FI_CLASS_FABRIC;
fab->fab_fid.fid.context = context;
fab->fab_fid.fid.ops = &sock_fab_fi_ops;
@ -175,66 +193,227 @@ static int sock_fabric(struct fi_fabric_attr *attr,
return 0;
}
static struct sock_service_entry *sock_fabric_find_service(struct sock_fabric *fab,
int service)
{
struct dlist_entry *entry;
struct sock_service_entry *service_entry;
for (entry = fab->service_list.next; entry != &fab->service_list;
entry = entry->next) {
service_entry = container_of(entry,
struct sock_service_entry, entry);
if (service_entry->service == service) {
return service_entry;
}
}
return NULL;
}
int sock_fabric_check_service(struct sock_fabric *fab, int service)
{
struct sock_service_entry *entry;
fastlock_acquire(&fab->lock);
entry = sock_fabric_find_service(fab, service);
fastlock_release(&fab->lock);
return (entry == NULL) ? 1 : 0;
}
void sock_fabric_add_service(struct sock_fabric *fab, int service)
{
struct sock_service_entry *entry;
entry = calloc(1, sizeof *entry);
if (!entry)
return;
entry->service = service;
fastlock_acquire(&fab->lock);
dlist_insert_tail(&entry->entry, &fab->service_list);
fastlock_release(&fab->lock);
}
void sock_fabric_remove_service(struct sock_fabric *fab, int service)
{
struct sock_service_entry *service_entry;
fastlock_acquire(&fab->lock);
service_entry = sock_fabric_find_service(fab, service);
dlist_remove(&service_entry->entry);
free(service_entry);
fastlock_release(&fab->lock);
}
static int sock_get_src_addr(struct sockaddr_in *dest_addr,
struct sockaddr_in *src_addr)
{
int sock, ret;
socklen_t len;
sock = socket(AF_INET, SOCK_DGRAM, 0);
if (sock < 0)
return -errno;
len = sizeof(*dest_addr);
ret = connect(sock, (struct sockaddr*)dest_addr, len);
if (ret) {
SOCK_LOG_INFO("Failed to connect udp socket\n");
ret = -errno;
goto out;
}
ret = getsockname(sock, (struct sockaddr *) src_addr, &len);
if (ret) {
SOCK_LOG_INFO("getsockname failed\n");
ret = -errno;
}
out:
close(sock);
return ret;
}
static int sock_ep_getinfo(const char *node, const char *service, uint64_t flags,
struct fi_info *hints, enum fi_ep_type ep_type,
struct fi_info **info)
{
struct addrinfo ai, *rai = NULL;
struct sockaddr_in *src_addr = NULL, *dest_addr = NULL;
struct sockaddr_in sin;
int ret;
memset(&ai, 0, sizeof(ai));
ai.ai_family = AF_INET;
ai.ai_socktype = SOCK_STREAM;
if (flags & FI_NUMERICHOST)
ai.ai_flags |= AI_NUMERICHOST;
if (flags & FI_SOURCE) {
ret = getaddrinfo(node, service, &ai, &rai);
if (ret) {
SOCK_LOG_INFO("getaddrinfo failed!\n");
return -FI_ENODATA;
}
src_addr = (struct sockaddr_in *) rai->ai_addr;
if (hints && hints->dest_addr)
dest_addr = hints->dest_addr;
} else {
if (node || service) {
ret = getaddrinfo(node, service, &ai, &rai);
if (ret) {
SOCK_LOG_INFO("getaddrinfo failed!\n");
return -FI_ENODATA;
}
dest_addr = (struct sockaddr_in *) rai->ai_addr;
} else {
dest_addr = hints->dest_addr;
}
if (hints && hints->src_addr)
src_addr = hints->src_addr;
}
if (dest_addr && !src_addr) {
ret = sock_get_src_addr(dest_addr, &sin);
if (!ret)
src_addr = &sin;
}
if (src_addr)
SOCK_LOG_INFO("src_addr: %s\n", inet_ntoa(src_addr->sin_addr));
if (dest_addr)
SOCK_LOG_INFO("dest_addr: %s\n", inet_ntoa(dest_addr->sin_addr));
switch (ep_type) {
case FI_EP_MSG:
ret = sock_msg_fi_info(src_addr, dest_addr, hints, info);
break;
case FI_EP_DGRAM:
ret = sock_dgram_fi_info(src_addr, dest_addr, hints, info);
break;
case FI_EP_RDM:
ret = sock_rdm_fi_info(src_addr, dest_addr, hints, info);
break;
default:
ret = -FI_ENODATA;
break;
}
if (rai)
freeaddrinfo(rai);
return ret;
}
static int sock_getinfo(uint32_t version, const char *node, const char *service,
uint64_t flags, struct fi_info *hints, struct fi_info **info)
{
struct fi_info *cur, *tail;
enum fi_ep_type ep_type;
char hostname[HOST_NAME_MAX];
int ret;
struct fi_info *_info, *tmp;
if (version != FI_VERSION(SOCK_MAJOR_VERSION, SOCK_MINOR_VERSION))
return -FI_ENODATA;
if (!(flags & FI_SOURCE) && hints && hints->src_addr &&
(hints->src_addrlen != sizeof(struct sockaddr_in)))
return -FI_ENODATA;
if (((!node && !service) || (flags & FI_SOURCE)) &&
hints && hints->dest_addr &&
(hints->dest_addrlen != sizeof(struct sockaddr_in)))
return -FI_ENODATA;
ret = sock_verify_info(hints);
if (ret)
return ret;
if (!node && !service && !hints) {
flags |= FI_SOURCE;
gethostname(hostname, sizeof hostname);
node = hostname;
}
if (!node && !service && !(flags & FI_SOURCE)) {
gethostname(hostname, sizeof hostname);
node = hostname;
}
if (hints && hints->ep_attr) {
switch (hints->ep_attr->type) {
case FI_EP_RDM:
return sock_rdm_getinfo(version, node, service, flags,
hints, info);
case FI_EP_DGRAM:
return sock_dgram_getinfo(version, node, service, flags,
hints, info);
case FI_EP_MSG:
return sock_msg_getinfo(version, node, service, flags,
hints, info);
return sock_ep_getinfo(node, service, flags, hints,
hints->ep_attr->type, info);
default:
break;
}
}
ret = sock_rdm_getinfo(version, node, service, flags,
hints, &_info);
if (ret == 0) {
*info = tmp = _info;
while(tmp->next != NULL)
tmp=tmp->next;
} else if (ret == -FI_ENODATA) {
tmp = NULL;
} else
return ret;
ret = sock_dgram_getinfo(version, node, service, flags,
hints, &_info);
if (ret == 0) {
*info = tmp = _info;
while(tmp->next != NULL)
tmp=tmp->next;
} else if (ret == -FI_ENODATA) {
tmp = NULL;
} else
return ret;
ret = sock_msg_getinfo(version, node, service, flags,
hints, &_info);
if (NULL != tmp) {
tmp->next = _info;
return ret;
*info = tail = NULL;
for (ep_type = FI_EP_MSG; ep_type <= FI_EP_RDM; ep_type++) {
ret = sock_ep_getinfo(node, service, flags,
hints, ep_type, &cur);
if (ret) {
if (ret == -FI_ENODATA)
continue;
goto err;
}
*info = _info;
if (!*info)
*info = cur;
else
tail->next = cur;
for (tail = cur; tail->next; tail = tail->next)
;
}
return 0;
err:
fi_freeinfo(*info);
*info = NULL;
return ret;
}
@ -255,8 +434,6 @@ SOCKETS_INI
{
char *tmp;
fi_log_init();
tmp = getenv("OFI_SOCK_PROGRESS_YIELD_TIME");
if (tmp)
sock_progress_thread_wait = atoi(tmp);

Просмотреть файл

@ -71,12 +71,10 @@ static ssize_t sock_ep_recvmsg(struct fid_ep *ep, const struct fi_msg *msg,
sock_ep = container_of(ep, struct sock_ep, ep);
rx_ctx = sock_ep->rx_ctx;
break;
case FI_CLASS_RX_CTX:
case FI_CLASS_SRX_CTX:
rx_ctx = container_of(ep, struct sock_rx_ctx, ctx);
break;
default:
SOCK_LOG_ERROR("Invalid ep type\n");
return -FI_EINVAL;
@ -93,22 +91,21 @@ static ssize_t sock_ep_recvmsg(struct fid_ep *ep, const struct fi_msg *msg,
rx_entry->rx_op.dest_iov_len = msg->iov_count;
rx_entry->flags = flags;
rx_entry->context = (uint64_t)msg->context;
rx_entry->context = (uintptr_t) msg->context;
rx_entry->addr = (rx_ctx->attr.caps & FI_DIRECTED_RECV) ?
msg->addr : FI_ADDR_UNSPEC;
rx_entry->data = msg->data;
rx_entry->ignore = 0xFFFFFFFF;
rx_entry->ignore = ~0ULL;
rx_entry->is_tagged = 0;
for (i = 0; i< msg->iov_count; i++) {
rx_entry->iov[i].iov.addr = (uint64_t)msg->msg_iov[i].iov_base;
rx_entry->iov[i].iov.len = (uint64_t)msg->msg_iov[i].iov_len;
rx_entry->iov[i].iov.addr = (uintptr_t) msg->msg_iov[i].iov_base;
rx_entry->iov[i].iov.len = msg->msg_iov[i].iov_len;
rx_entry->total_len += rx_entry->iov[i].iov.len;
}
fastlock_acquire(&rx_ctx->lock);
SOCK_LOG_INFO("New rx_entry: %p (ctx: %p)\n", rx_entry, rx_ctx);
fastlock_acquire(&rx_ctx->lock);
dlist_insert_tail(&rx_entry->entry, &rx_ctx->rx_entry_list);
fastlock_release(&rx_ctx->lock);
return 0;
@ -119,7 +116,7 @@ static ssize_t sock_ep_recv(struct fid_ep *ep, void *buf, size_t len, void *desc
{
struct fi_msg msg;
struct iovec msg_iov;
memset(&msg, 0, sizeof msg);
msg_iov.iov_base = buf;
msg_iov.iov_len = len;
@ -137,7 +134,7 @@ static ssize_t sock_ep_recvv(struct fid_ep *ep, const struct iovec *iov,
void *context)
{
struct fi_msg msg;
memset(&msg, 0, sizeof msg);
msg.msg_iov = iov;
msg.desc = desc;
msg.iov_count = count;
@ -163,12 +160,10 @@ static ssize_t sock_ep_sendmsg(struct fid_ep *ep, const struct fi_msg *msg,
sock_ep = container_of(ep, struct sock_ep, ep);
tx_ctx = sock_ep->tx_ctx;
break;
case FI_CLASS_TX_CTX:
tx_ctx = container_of(ep, struct sock_tx_ctx, fid.ctx);
sock_ep = tx_ctx->ep;
break;
default:
SOCK_LOG_ERROR("Invalid EP type\n");
return -FI_EINVAL;
@ -178,7 +173,7 @@ static ssize_t sock_ep_sendmsg(struct fid_ep *ep, const struct fi_msg *msg,
if (sock_ep->connected) {
conn = sock_ep_lookup_conn(sock_ep);
} else {
conn = sock_av_lookup_addr(tx_ctx->av, msg->addr);
conn = sock_av_lookup_addr(sock_ep, tx_ctx->av, msg->addr);
}
if (!conn)
return -FI_EAGAIN;
@ -191,7 +186,7 @@ static ssize_t sock_ep_sendmsg(struct fid_ep *ep, const struct fi_msg *msg,
tx_op.op = SOCK_OP_SEND;
total_len = 0;
if (SOCK_INJECT_OK(flags)) {
if (flags & FI_INJECT) {
for (i=0; i< msg->iov_count; i++) {
total_len += msg->msg_iov[i].iov_len;
}
@ -213,28 +208,24 @@ static ssize_t sock_ep_sendmsg(struct fid_ep *ep, const struct fi_msg *msg,
goto err;
}
sock_tx_ctx_write(tx_ctx, &tx_op, sizeof(struct sock_op));
sock_tx_ctx_write(tx_ctx, &flags, sizeof(uint64_t));
sock_tx_ctx_write(tx_ctx, &msg->context, sizeof(uint64_t));
sock_tx_ctx_write(tx_ctx, &msg->addr, sizeof(uint64_t));
sock_tx_ctx_write(tx_ctx, &conn, sizeof(uint64_t));
sock_tx_ctx_write(tx_ctx, &msg->msg_iov[0].iov_base, sizeof(uint64_t));
sock_tx_ctx_write(tx_ctx, &sock_ep, sizeof(uint64_t));
sock_tx_ctx_write_op_send(tx_ctx, &tx_op, flags, (uintptr_t) msg->context,
msg->addr, (uintptr_t) msg->msg_iov[0].iov_base,
sock_ep, conn);
if (flags & FI_REMOTE_CQ_DATA) {
sock_tx_ctx_write(tx_ctx, &msg->data, sizeof(uint64_t));
sock_tx_ctx_write(tx_ctx, &msg->data, sizeof(msg->data));
}
if (SOCK_INJECT_OK(flags)) {
if (flags & FI_INJECT) {
for (i=0; i< msg->iov_count; i++) {
sock_tx_ctx_write(tx_ctx, msg->msg_iov[i].iov_base,
msg->msg_iov[i].iov_len);
}
} else {
for (i=0; i< msg->iov_count; i++) {
tx_iov.iov.addr = (uint64_t)msg->msg_iov[i].iov_base;
tx_iov.iov.addr = (uintptr_t) msg->msg_iov[i].iov_base;
tx_iov.iov.len = msg->msg_iov[i].iov_len;
sock_tx_ctx_write(tx_ctx, &tx_iov, sizeof(union sock_iov));
sock_tx_ctx_write(tx_ctx, &tx_iov, sizeof(tx_iov));
}
}
@ -252,7 +243,7 @@ static ssize_t sock_ep_send(struct fid_ep *ep, const void *buf, size_t len,
{
struct fi_msg msg;
struct iovec msg_iov;
memset(&msg, 0, sizeof msg);
msg_iov.iov_base = (void*)buf;
msg_iov.iov_len = len;
msg.msg_iov = &msg_iov;
@ -269,6 +260,7 @@ static ssize_t sock_ep_sendv(struct fid_ep *ep, const struct iovec *iov,
void *context)
{
struct fi_msg msg;
memset(&msg, 0, sizeof msg);
msg.msg_iov = iov;
msg.desc = desc;
msg.iov_count = count;
@ -303,13 +295,33 @@ static ssize_t sock_ep_inject(struct fid_ep *ep, const void *buf, size_t len,
struct fi_msg msg;
struct iovec msg_iov;
memset(&msg, 0, sizeof msg);
msg_iov.iov_base = (void*)buf;
msg_iov.iov_len = len;
msg.msg_iov = &msg_iov;
msg.iov_count = 1;
msg.addr = dest_addr;
return sock_ep_sendmsg(ep, &msg, FI_INJECT);
return sock_ep_sendmsg(ep, &msg, FI_INJECT | SOCK_NO_COMPLETION);
}
static ssize_t sock_ep_injectdata(struct fid_ep *ep, const void *buf, size_t len,
uint64_t data, fi_addr_t dest_addr)
{
struct fi_msg msg;
struct iovec msg_iov;
memset(&msg, 0, sizeof msg);
msg_iov.iov_base = (void*)buf;
msg_iov.iov_len = len;
msg.msg_iov = &msg_iov;
msg.iov_count = 1;
msg.addr = dest_addr;
msg.data = data;
return sock_ep_sendmsg(ep, &msg, FI_REMOTE_CQ_DATA | FI_INJECT |
SOCK_NO_COMPLETION);
}
struct fi_ops_msg sock_ep_msg_ops = {
@ -322,6 +334,7 @@ struct fi_ops_msg sock_ep_msg_ops = {
.sendmsg = sock_ep_sendmsg,
.inject = sock_ep_inject,
.senddata = sock_ep_senddata,
.injectdata = sock_ep_injectdata
};
static ssize_t sock_ep_trecvmsg(struct fid_ep *ep,
@ -337,12 +350,10 @@ static ssize_t sock_ep_trecvmsg(struct fid_ep *ep,
sock_ep = container_of(ep, struct sock_ep, ep);
rx_ctx = sock_ep->rx_ctx;
break;
case FI_CLASS_RX_CTX:
case FI_CLASS_SRX_CTX:
rx_ctx = container_of(ep, struct sock_rx_ctx, ctx);
break;
default:
SOCK_LOG_ERROR("Invalid ep type\n");
return -FI_EINVAL;
@ -355,24 +366,27 @@ static ssize_t sock_ep_trecvmsg(struct fid_ep *ep,
return -FI_ENOMEM;
flags |= rx_ctx->attr.op_flags;
flags &= ~FI_MULTI_RECV;
rx_entry->rx_op.op = SOCK_OP_TRECV;
rx_entry->rx_op.dest_iov_len = msg->iov_count;
rx_entry->flags = flags;
rx_entry->context = (uint64_t)msg->context;
rx_entry->context = (uintptr_t) msg->context;
rx_entry->addr = (rx_ctx->attr.caps & FI_DIRECTED_RECV) ?
msg->addr : FI_ADDR_UNSPEC;
rx_entry->data = msg->data;
rx_entry->tag = msg->tag;
rx_entry->ignore = msg->ignore;
rx_entry->is_tagged = 1;
for (i=0; i< msg->iov_count; i++) {
rx_entry->iov[i].iov.addr = (uint64_t)msg->msg_iov[i].iov_base;
rx_entry->iov[i].iov.len = (uint64_t)msg->msg_iov[i].iov_len;
rx_entry->iov[i].iov.addr = (uintptr_t) msg->msg_iov[i].iov_base;
rx_entry->iov[i].iov.len = msg->msg_iov[i].iov_len;
rx_entry->total_len += rx_entry->iov[i].iov.len;
}
fastlock_acquire(&rx_ctx->lock);
SOCK_LOG_INFO("New rx_entry: %p (ctx: %p)\n", rx_entry, rx_ctx);
dlist_insert_tail(&rx_entry->entry, &rx_ctx->rx_entry_list);
fastlock_release(&rx_ctx->lock);
return 0;
@ -384,6 +398,7 @@ static ssize_t sock_ep_trecv(struct fid_ep *ep, void *buf, size_t len, void *des
struct fi_msg_tagged msg;
struct iovec msg_iov;
memset(&msg, 0, sizeof msg);
msg_iov.iov_base = buf;
msg_iov.iov_len = len;
@ -404,6 +419,7 @@ static ssize_t sock_ep_trecvv(struct fid_ep *ep, const struct iovec *iov,
{
struct fi_msg_tagged msg;
memset(&msg, 0, sizeof msg);
msg.msg_iov = iov;
msg.desc = desc;
msg.iov_count = count;
@ -431,12 +447,10 @@ static ssize_t sock_ep_tsendmsg(struct fid_ep *ep,
sock_ep = container_of(ep, struct sock_ep, ep);
tx_ctx = sock_ep->tx_ctx;
break;
case FI_CLASS_TX_CTX:
tx_ctx = container_of(ep, struct sock_tx_ctx, fid.ctx);
sock_ep = tx_ctx->ep;
break;
default:
SOCK_LOG_ERROR("Invalid EP type\n");
return -FI_EINVAL;
@ -446,16 +460,16 @@ static ssize_t sock_ep_tsendmsg(struct fid_ep *ep,
if (sock_ep->connected) {
conn = sock_ep_lookup_conn(sock_ep);
} else {
conn = sock_av_lookup_addr(tx_ctx->av, msg->addr);
conn = sock_av_lookup_addr(sock_ep, tx_ctx->av, msg->addr);
}
if (!conn)
return -FI_EAGAIN;
memset(&tx_op, 0, sizeof(struct sock_op));
memset(&tx_op, 0, sizeof(tx_op));
tx_op.op = SOCK_OP_TSEND;
total_len = 0;
if (SOCK_INJECT_OK(flags)) {
if (flags & FI_INJECT) {
for (i=0; i< msg->iov_count; i++) {
total_len += msg->msg_iov[i].iov_len;
}
@ -477,29 +491,24 @@ static ssize_t sock_ep_tsendmsg(struct fid_ep *ep,
}
flags |= tx_ctx->attr.op_flags;
sock_tx_ctx_write(tx_ctx, &tx_op, sizeof(struct sock_op));
sock_tx_ctx_write(tx_ctx, &flags, sizeof(uint64_t));
sock_tx_ctx_write(tx_ctx, &msg->context, sizeof(uint64_t));
sock_tx_ctx_write(tx_ctx, &msg->addr, sizeof(uint64_t));
sock_tx_ctx_write(tx_ctx, &conn, sizeof(uint64_t));
sock_tx_ctx_write(tx_ctx, &msg->msg_iov[0].iov_base, sizeof(uint64_t));
sock_tx_ctx_write(tx_ctx, &sock_ep, sizeof(uint64_t));
sock_tx_ctx_write_op_tsend(tx_ctx, &tx_op, flags, (uintptr_t) msg->context,
msg->addr, (uintptr_t) msg->msg_iov[0].iov_base,
sock_ep, conn, msg->tag);
if (flags & FI_REMOTE_CQ_DATA) {
sock_tx_ctx_write(tx_ctx, &msg->data, sizeof(uint64_t));
sock_tx_ctx_write(tx_ctx, &msg->data, sizeof(msg->data));
}
sock_tx_ctx_write(tx_ctx, &msg->tag, sizeof(uint64_t));
if (SOCK_INJECT_OK(flags)) {
if (flags & FI_INJECT) {
for (i=0; i< msg->iov_count; i++) {
sock_tx_ctx_write(tx_ctx, msg->msg_iov[i].iov_base,
msg->msg_iov[i].iov_len);
}
} else {
for (i=0; i< msg->iov_count; i++) {
tx_iov.iov.addr = (uint64_t)msg->msg_iov[i].iov_base;
tx_iov.iov.addr = (uintptr_t) msg->msg_iov[i].iov_base;
tx_iov.iov.len = msg->msg_iov[i].iov_len;
sock_tx_ctx_write(tx_ctx, &tx_iov, sizeof(union sock_iov));
sock_tx_ctx_write(tx_ctx, &tx_iov, sizeof(tx_iov));
}
}
@ -518,6 +527,7 @@ static ssize_t sock_ep_tsend(struct fid_ep *ep, const void *buf, size_t len,
struct fi_msg_tagged msg;
struct iovec msg_iov;
memset(&msg, 0, sizeof msg);
msg_iov.iov_base = (void*)buf;
msg_iov.iov_len = len;
msg.msg_iov = &msg_iov;
@ -535,6 +545,8 @@ static ssize_t sock_ep_tsendv(struct fid_ep *ep, const struct iovec *iov,
uint64_t tag, void *context)
{
struct fi_msg_tagged msg;
memset(&msg, 0, sizeof msg);
msg.msg_iov = iov;
msg.desc = desc;
msg.iov_count = count;
@ -551,6 +563,7 @@ static ssize_t sock_ep_tsenddata(struct fid_ep *ep, const void *buf, size_t len,
struct fi_msg_tagged msg;
struct iovec msg_iov;
memset(&msg, 0, sizeof msg);
msg_iov.iov_base = (void*)buf;
msg_iov.iov_len = len;
msg.msg_iov = &msg_iov;
@ -570,15 +583,37 @@ static ssize_t sock_ep_tinject(struct fid_ep *ep, const void *buf, size_t len,
struct fi_msg_tagged msg;
struct iovec msg_iov;
memset(&msg, 0, sizeof msg);
msg_iov.iov_base = (void*)buf;
msg_iov.iov_len = len;
msg.msg_iov = &msg_iov;
msg.iov_count = 1;
msg.addr = dest_addr;
msg.tag = tag;
return sock_ep_tsendmsg(ep, &msg, FI_INJECT);
return sock_ep_tsendmsg(ep, &msg, FI_INJECT | SOCK_NO_COMPLETION);
}
static ssize_t sock_ep_tinjectdata(struct fid_ep *ep, const void *buf, size_t len,
uint64_t data, fi_addr_t dest_addr, uint64_t tag)
{
struct fi_msg_tagged msg;
struct iovec msg_iov;
memset(&msg, 0, sizeof msg);
msg_iov.iov_base = (void*)buf;
msg_iov.iov_len = len;
msg.msg_iov = &msg_iov;
msg.iov_count = 1;
msg.addr = dest_addr;
msg.data = data;
msg.tag = tag;
return sock_ep_tsendmsg(ep, &msg, FI_REMOTE_CQ_DATA | FI_INJECT |
SOCK_NO_COMPLETION);
}
static ssize_t sock_ep_tsearch(struct fid_ep *ep, uint64_t *tag, uint64_t ignore,
uint64_t flags, fi_addr_t *src_addr, size_t *len,
void *context)
@ -647,6 +682,7 @@ struct fi_ops_tagged sock_ep_tagged = {
.sendmsg = sock_ep_tsendmsg,
.inject = sock_ep_tinject,
.senddata = sock_ep_tsenddata,
.injectdata = sock_ep_tinjectdata,
.search = sock_ep_tsearch,
};

Просмотреть файл

@ -108,8 +108,7 @@ static void sock_pe_release_entry(struct sock_pe *pe,
{
dlist_remove(&pe_entry->ctx_entry);
if (pe_entry->type == SOCK_PE_TX)
{
if (pe_entry->type == SOCK_PE_TX) {
if (pe_entry->conn->tx_pe_entry == pe_entry)
pe_entry->conn->tx_pe_entry = NULL;
} else {
@ -159,8 +158,7 @@ static struct sock_pe_entry *sock_pe_acquire_entry(struct sock_pe *pe)
static void sock_pe_report_tx_completion(struct sock_pe_entry *pe_entry)
{
int ret1 = 0, ret2 = 0;
if (!(pe_entry->flags & FI_INJECT)) {
if (!(pe_entry->flags & SOCK_NO_COMPLETION)) {
if (pe_entry->comp->send_cq &&
(!pe_entry->comp->send_cq_event ||
(pe_entry->comp->send_cq_event &&
@ -192,7 +190,7 @@ static void sock_pe_report_rx_completion(struct sock_pe_entry *pe_entry)
if (pe_entry->comp->recv_cq &&
(!pe_entry->comp->recv_cq_event ||
(pe_entry->comp->recv_cq_event &&
(pe_entry->msg_hdr.flags & FI_COMPLETION))))
(pe_entry->flags & FI_COMPLETION))))
ret1 = pe_entry->comp->recv_cq->report_completion(
pe_entry->comp->recv_cq, pe_entry->addr,
pe_entry);
@ -264,7 +262,7 @@ static void sock_pe_report_remote_write(struct sock_rx_ctx *rx_ctx,
static void sock_pe_report_write_completion(struct sock_pe_entry *pe_entry)
{
if (!(pe_entry->flags & FI_INJECT)) {
if (!(pe_entry->flags & SOCK_NO_COMPLETION)) {
sock_pe_report_tx_completion(pe_entry);
if (pe_entry->comp->write_cq &&
@ -310,7 +308,7 @@ static void sock_pe_report_remote_read(struct sock_rx_ctx *rx_ctx,
static void sock_pe_report_read_completion(struct sock_pe_entry *pe_entry)
{
if (!(pe_entry->flags & FI_INJECT)) {
if (!(pe_entry->flags & SOCK_NO_COMPLETION)) {
sock_pe_report_tx_completion(pe_entry);
if (pe_entry->comp->read_cq &&
@ -366,7 +364,7 @@ static void sock_pe_progress_pending_ack(struct sock_pe *pe,
for (i = 0; i < pe_entry->msg_hdr.dest_iov_len; i++) {
if (sock_pe_send_field(
pe_entry,
(char*)pe_entry->pe.rx.rx_iov[i].iov.addr,
(char *) (uintptr_t) pe_entry->pe.rx.rx_iov[i].iov.addr,
pe_entry->pe.rx.rx_iov[i].iov.len, len))
return;
len += pe_entry->pe.rx.rx_iov[i].iov.len;
@ -413,13 +411,6 @@ static void sock_pe_send_response(struct sock_pe *pe,
response->msg_hdr.msg_len = htonll(response->msg_hdr.msg_len);
response->msg_hdr.rx_id = pe_entry->msg_hdr.rx_id;
if (pe_entry->ep && pe_entry->ep->connected)
response->msg_hdr.ep_id = pe_entry->ep->rem_ep_id;
else
response->msg_hdr.ep_id =
sock_av_lookup_ep_id(rx_ctx->av, pe_entry->addr);
response->msg_hdr.ep_id = htons(response->msg_hdr.ep_id);
pe->pe_atomic = NULL;
pe_entry->done_len = 0;
pe_entry->pe.rx.pending_send = 1;
@ -490,7 +481,7 @@ static int sock_pe_handle_read_complete(struct sock_pe *pe,
for (i=0; i < waiting_entry->pe.tx.tx_op.dest_iov_len; i++) {
if (sock_pe_recv_field(
pe_entry,
(char*)waiting_entry->pe.tx.data.tx_iov[i].dst.iov.addr,
(char *) (uintptr_t) waiting_entry->pe.tx.data.tx_iov[i].dst.iov.addr,
waiting_entry->pe.tx.data.tx_iov[i].dst.iov.len, len))
return 0;
len += waiting_entry->pe.tx.data.tx_iov[i].dst.iov.len;
@ -549,7 +540,7 @@ static int sock_pe_handle_atomic_complete(struct sock_pe *pe,
for (i=0; i < waiting_entry->pe.tx.tx_op.atomic.res_iov_len; i++) {
if (sock_pe_recv_field(
pe_entry,
(char*)waiting_entry->pe.tx.data.tx_iov[i].res.ioc.addr,
(char *) (uintptr_t) waiting_entry->pe.tx.data.tx_iov[i].res.ioc.addr,
waiting_entry->pe.tx.data.tx_iov[i].res.ioc.count * datatype_sz,
len))
return 0;
@ -586,12 +577,12 @@ static int sock_pe_process_rx_read(struct sock_pe *pe, struct sock_rx_ctx *rx_ct
mr = sock_mr_verify_key(rx_ctx->domain,
pe_entry->pe.rx.rx_iov[i].iov.key,
(void*)pe_entry->pe.rx.rx_iov[i].iov.addr,
(void *) (uintptr_t) pe_entry->pe.rx.rx_iov[i].iov.addr,
pe_entry->pe.rx.rx_iov[i].iov.len,
FI_REMOTE_READ);
if (!mr) {
SOCK_LOG_ERROR("Remote memory access error: %p, %lu, %" PRIu64 "\n",
(void*)pe_entry->pe.rx.rx_iov[i].iov.addr,
(void *) (uintptr_t) pe_entry->pe.rx.rx_iov[i].iov.addr,
pe_entry->pe.rx.rx_iov[i].iov.len,
pe_entry->pe.rx.rx_iov[i].iov.key);
sock_pe_send_response(pe, rx_ctx, pe_entry, 0,
@ -643,12 +634,12 @@ static int sock_pe_process_rx_write(struct sock_pe *pe, struct sock_rx_ctx *rx_c
if ((len - pe_entry->done_len) == pe_entry->pe.rx.rx_iov[i].iov.addr) {
mr = sock_mr_verify_key(rx_ctx->domain,
pe_entry->pe.rx.rx_iov[i].iov.key,
(void*)pe_entry->pe.rx.rx_iov[i].iov.addr,
(void *) (uintptr_t) pe_entry->pe.rx.rx_iov[i].iov.addr,
pe_entry->pe.rx.rx_iov[i].iov.len,
FI_REMOTE_WRITE);
if (!mr) {
SOCK_LOG_ERROR("Remote memory access error: %p, %lu, %" PRIu64 "\n",
(void*)pe_entry->pe.rx.rx_iov[i].iov.addr,
(void *) (uintptr_t) pe_entry->pe.rx.rx_iov[i].iov.addr,
pe_entry->pe.rx.rx_iov[i].iov.len,
pe_entry->pe.rx.rx_iov[i].iov.key);
sock_pe_send_response(pe, rx_ctx, pe_entry, 0,
@ -661,7 +652,7 @@ static int sock_pe_process_rx_write(struct sock_pe *pe, struct sock_rx_ctx *rx_c
}
if (sock_pe_recv_field(pe_entry,
(void*)pe_entry->pe.rx.rx_iov[i].iov.addr,
(void *) (uintptr_t) pe_entry->pe.rx.rx_iov[i].iov.addr,
pe_entry->pe.rx.rx_iov[i].iov.len, len))
return 0;
len += pe_entry->pe.rx.rx_iov[i].iov.len;
@ -1058,12 +1049,12 @@ static int sock_pe_process_rx_atomic(struct sock_pe *pe, struct sock_rx_ctx *rx_
for (i = 0; i < pe_entry->pe.rx.rx_op.dest_iov_len; i++) {
mr = sock_mr_verify_key(rx_ctx->domain,
pe_entry->pe.rx.rx_iov[i].ioc.key,
(void*)pe_entry->pe.rx.rx_iov[i].ioc.addr,
(void *) (uintptr_t) pe_entry->pe.rx.rx_iov[i].ioc.addr,
pe_entry->pe.rx.rx_iov[i].ioc.count * datatype_sz,
FI_REMOTE_WRITE);
if (!mr) {
SOCK_LOG_ERROR("Remote memory access error: %p, %lu, %" PRIu64 "\n",
(void*)pe_entry->pe.rx.rx_iov[i].ioc.addr,
(void *) (uintptr_t) pe_entry->pe.rx.rx_iov[i].ioc.addr,
pe_entry->pe.rx.rx_iov[i].ioc.count * datatype_sz,
pe_entry->pe.rx.rx_iov[i].ioc.key);
sock_pe_send_response(pe, rx_ctx, pe_entry, 0,
@ -1085,7 +1076,7 @@ static int sock_pe_process_rx_atomic(struct sock_pe *pe, struct sock_rx_ctx *rx_
for (i = 0; i < pe_entry->pe.rx.rx_op.dest_iov_len; i++) {
for (j = 0; j < pe_entry->pe.rx.rx_iov[i].ioc.count; j++) {
sock_pe_update_atomic((char *) &pe_entry->pe.rx.atomic_cmp[0] + offset,
(char *)pe_entry->pe.rx.rx_iov[i].ioc.addr + j * datatype_sz,
(char *) (uintptr_t) pe_entry->pe.rx.rx_iov[i].ioc.addr + j * datatype_sz,
(char *) &pe_entry->pe.rx.atomic_src[0] + offset,
pe_entry->pe.rx.rx_op.atomic.datatype,
pe_entry->pe.rx.rx_op.atomic.op);
@ -1113,7 +1104,6 @@ err:
return -FI_EINVAL;
}
int sock_pe_progress_buffered_rx(struct sock_rx_ctx *rx_ctx)
{
struct dlist_entry *entry;
@ -1135,7 +1125,8 @@ int sock_pe_progress_buffered_rx(struct sock_rx_ctx *rx_ctx)
continue;
rx_posted = sock_rx_get_entry(rx_ctx, rx_buffered->addr,
rx_buffered->tag);
rx_buffered->tag,
rx_buffered->is_tagged);
if (!rx_posted)
continue;
@ -1158,10 +1149,9 @@ int sock_pe_progress_buffered_rx(struct sock_rx_ctx *rx_ctx)
dst_offset = used_len;
len = MIN(rx_posted->iov[i].iov.len, rem);
pe_entry.buf = (uint64_t)
(char*)rx_posted->iov[i].iov.addr + dst_offset;
memcpy((char*)rx_posted->iov[i].iov.addr + dst_offset,
(char*)rx_buffered->iov[0].iov.addr + offset, len);
pe_entry.buf = rx_posted->iov[i].iov.addr + dst_offset;
memcpy((char *) (uintptr_t) rx_posted->iov[i].iov.addr + dst_offset,
(char *) (uintptr_t) rx_buffered->iov[0].iov.addr + offset, len);
offset += len;
rem -= len;
dst_offset = used_len = 0;
@ -1176,7 +1166,8 @@ int sock_pe_progress_buffered_rx(struct sock_rx_ctx *rx_ctx)
pe_entry.pe.rx.rx_iov[0].iov.addr = rx_posted->iov[0].iov.addr;
pe_entry.type = SOCK_PE_RX;
pe_entry.comp = rx_buffered->comp;
pe_entry.flags = 0;
pe_entry.flags = rx_posted->flags;
pe_entry.flags &= ~FI_MULTI_RECV;
if (rx_posted->flags & FI_MULTI_RECV) {
if (sock_rx_avail_len(rx_posted) < rx_ctx->min_multi_recv) {
@ -1198,8 +1189,11 @@ int sock_pe_progress_buffered_rx(struct sock_rx_ctx *rx_ctx)
dlist_remove(&rx_buffered->entry);
sock_rx_release_entry(rx_buffered);
if (pe_entry.flags & FI_MULTI_RECV)
if ((!(rx_posted->flags & FI_MULTI_RECV) ||
(pe_entry.flags & FI_MULTI_RECV))) {
sock_rx_release_entry(rx_posted);
rx_ctx->num_left++;
}
}
return 0;
}
@ -1229,14 +1223,12 @@ static int sock_pe_process_rx_send(struct sock_pe *pe, struct sock_rx_ctx *rx_ct
}
if (pe_entry->done_len == len && !pe_entry->pe.rx.rx_entry) {
data_len = pe_entry->msg_hdr.msg_len - len;
/* progress buffered recvs, if any */
fastlock_acquire(&rx_ctx->lock);
sock_pe_progress_buffered_rx(rx_ctx);
rx_entry = sock_rx_get_entry(rx_ctx, pe_entry->addr, pe_entry->tag);
rx_entry = sock_rx_get_entry(rx_ctx, pe_entry->addr, pe_entry->tag, pe_entry->msg_hdr.op_type);
SOCK_LOG_INFO("Consuming posted entry: %p\n", rx_entry);
if (!rx_entry) {
@ -1255,6 +1247,13 @@ static int sock_pe_process_rx_send(struct sock_pe *pe, struct sock_rx_ctx *rx_ct
rx_entry->ignore = 0;
rx_entry->comp = pe_entry->comp;
pe_entry->context = rx_entry->context;
if (pe_entry->msg_hdr.flags & FI_REMOTE_CQ_DATA)
rx_entry->flags |= FI_REMOTE_CQ_DATA;
if (pe_entry->msg_hdr.op_type == SOCK_OP_TSEND) {
rx_entry->is_tagged = 1;
}
}
fastlock_release(&rx_ctx->lock);
pe_entry->context = rx_entry->context;
@ -1278,14 +1277,13 @@ static int sock_pe_process_rx_send(struct sock_pe *pe, struct sock_rx_ctx *rx_ct
offset = used;
data_len = MIN(rx_entry->iov[i].iov.len - used, rem);
ret = sock_comm_recv(pe_entry->conn,
(char *)rx_entry->iov[i].iov.addr + offset,
(char *) (uintptr_t) rx_entry->iov[i].iov.addr + offset,
data_len);
if (ret <= 0)
return ret;
if (!pe_entry->buf)
pe_entry->buf = (uint64_t)
((char *)rx_entry->iov[i].iov.addr + offset);
pe_entry->buf = rx_entry->iov[i].iov.addr + offset;
rem -= ret;
used = 0;
pe_entry->done_len += ret;
@ -1294,6 +1292,14 @@ static int sock_pe_process_rx_send(struct sock_pe *pe, struct sock_rx_ctx *rx_ct
return 0;
}
pe_entry->is_complete = 1;
rx_entry->is_complete = 1;
rx_entry->is_busy = 0;
pe_entry->flags = rx_entry->flags;
if (pe_entry->msg_hdr.flags & FI_REMOTE_CQ_DATA)
pe_entry->flags |= FI_REMOTE_CQ_DATA;
pe_entry->flags &= ~FI_MULTI_RECV;
fastlock_acquire(&rx_ctx->lock);
if (rx_entry->flags & FI_MULTI_RECV) {
if (sock_rx_avail_len(rx_entry) < rx_ctx->min_multi_recv) {
@ -1306,10 +1312,6 @@ static int sock_pe_process_rx_send(struct sock_pe *pe, struct sock_rx_ctx *rx_ct
}
fastlock_release(&rx_ctx->lock);
pe_entry->is_complete = 1;
rx_entry->is_complete = 1;
rx_entry->is_busy = 0;
/* report error, if any */
if (rem) {
SOCK_LOG_ERROR("Not enough space in posted recv buffer\n");
@ -1328,8 +1330,12 @@ out:
if (!rx_entry->is_buffered &&
(!(rx_entry->flags & FI_MULTI_RECV) ||
(pe_entry->flags & FI_MULTI_RECV)))
(pe_entry->flags & FI_MULTI_RECV))) {
sock_rx_release_entry(rx_entry);
fastlock_acquire(&rx_ctx->lock);
rx_ctx->num_left++;
fastlock_release(&rx_ctx->lock);
}
return ret;
}
@ -1346,48 +1352,37 @@ static int sock_pe_process_recv(struct sock_pe *pe, struct sock_rx_ctx *rx_ctx,
goto out;
}
/* process rx entry */
switch (pe_entry->msg_hdr.op_type) {
case SOCK_OP_SEND:
case SOCK_OP_TSEND:
ret = sock_pe_process_rx_send(pe, rx_ctx, pe_entry);
break;
case SOCK_OP_WRITE:
ret = sock_pe_process_rx_write(pe, rx_ctx, pe_entry);
break;
case SOCK_OP_READ:
ret = sock_pe_process_rx_read(pe, rx_ctx, pe_entry);
break;
case SOCK_OP_ATOMIC:
ret = sock_pe_process_rx_atomic(pe, rx_ctx, pe_entry);
break;
case SOCK_OP_SEND_COMPLETE:
ret = sock_pe_handle_ack(pe, pe_entry);
break;
case SOCK_OP_WRITE_COMPLETE:
ret = sock_pe_handle_write_complete(pe, pe_entry);
break;
case SOCK_OP_READ_COMPLETE:
ret = sock_pe_handle_read_complete(pe, pe_entry);
break;
case SOCK_OP_ATOMIC_COMPLETE:
ret = sock_pe_handle_atomic_complete(pe, pe_entry);
break;
case SOCK_OP_WRITE_ERROR:
case SOCK_OP_READ_ERROR:
case SOCK_OP_ATOMIC_ERROR:
ret = sock_pe_handle_ack(pe, pe_entry);
break;
default:
ret = -FI_ENOSYS;
SOCK_LOG_ERROR("Operation not supported\n");
@ -1420,7 +1415,6 @@ static int sock_pe_peek_hdr(struct sock_pe *pe,
msg_hdr->msg_len = ntohll(msg_hdr->msg_len);
msg_hdr->flags = ntohll(msg_hdr->flags);
msg_hdr->pe_entry_id = ntohs(msg_hdr->pe_entry_id);
msg_hdr->ep_id = ntohs(msg_hdr->ep_id);
SOCK_LOG_INFO("PE RX (Hdr peek): MsgLen: %" PRIu64 ", TX-ID: %d, Type: %d\n",
msg_hdr->msg_len, msg_hdr->rx_id, msg_hdr->op_type);
@ -1430,9 +1424,6 @@ static int sock_pe_peek_hdr(struct sock_pe *pe,
static int sock_pe_read_hdr(struct sock_pe *pe, struct sock_rx_ctx *rx_ctx,
struct sock_pe_entry *pe_entry)
{
int match;
struct sock_ep *ep;
struct dlist_entry *entry;
struct sock_msg_hdr *msg_hdr;
struct sock_conn *conn = pe_entry->conn;
@ -1450,26 +1441,6 @@ static int sock_pe_read_hdr(struct sock_pe *pe, struct sock_rx_ctx *rx_ctx,
if (msg_hdr->rx_id != rx_ctx->rx_id)
return -1;
if (rx_ctx->ctx.fid.fclass == FI_CLASS_SRX_CTX) {
match = 0;
for (entry = rx_ctx->ep_list.next;
entry != &rx_ctx->ep_list; entry = entry->next) {
ep = container_of(entry, struct sock_ep, rx_ctx_entry);
if (ep->ep_id == msg_hdr->ep_id) {
match = 1;
break;
}
}
if (!match)
return -1;
} else {
if (msg_hdr->ep_id != rx_ctx->ep->ep_id) {
SOCK_LOG_INFO("Mismatch: %d:%d\n",
msg_hdr->ep_id,rx_ctx->ep->ep_id);
return -1;
}
}
if (sock_pe_recv_field(pe_entry, (void*)msg_hdr,
sizeof(struct sock_msg_hdr), 0)) {
SOCK_LOG_ERROR("Failed to recv header\n");
@ -1479,7 +1450,6 @@ static int sock_pe_read_hdr(struct sock_pe *pe, struct sock_rx_ctx *rx_ctx,
msg_hdr->msg_len = ntohll(msg_hdr->msg_len);
msg_hdr->flags = ntohll(msg_hdr->flags);
msg_hdr->pe_entry_id = ntohs(msg_hdr->pe_entry_id);
msg_hdr->ep_id = ntohs(msg_hdr->ep_id);
pe_entry->pe.rx.header_read = 1;
pe_entry->flags = msg_hdr->flags;
@ -1528,7 +1498,7 @@ static int sock_pe_progress_tx_atomic(struct sock_pe *pe,
datatype_sz = fi_datatype_size(pe_entry->pe.tx.tx_op.atomic.datatype);
for (i=0; i < pe_entry->pe.tx.tx_op.atomic.cmp_iov_len; i++) {
if (sock_pe_send_field(pe_entry,
(void*)pe_entry->pe.tx.data.tx_iov[i].cmp.ioc.addr,
(void *) (uintptr_t) pe_entry->pe.tx.data.tx_iov[i].cmp.ioc.addr,
pe_entry->pe.tx.data.tx_iov[i].cmp.ioc.count *
datatype_sz, len))
return 0;
@ -1536,7 +1506,7 @@ static int sock_pe_progress_tx_atomic(struct sock_pe *pe,
}
/* data */
if (SOCK_INJECT_OK(pe_entry->flags)) {
if (pe_entry->flags & FI_INJECT) {
if (sock_pe_send_field(pe_entry,
&pe_entry->pe.tx.data.inject[0],
pe_entry->pe.tx.tx_op.src_iov_len, len))
@ -1544,9 +1514,8 @@ static int sock_pe_progress_tx_atomic(struct sock_pe *pe,
len += pe_entry->pe.tx.tx_op.src_iov_len;
} else {
for (i=0; i < pe_entry->pe.tx.tx_op.src_iov_len; i++) {
if (sock_pe_send_field(
pe_entry,
(void*)pe_entry->pe.tx.data.tx_iov[i].src.ioc.addr,
if (sock_pe_send_field(pe_entry,
(void *) (uintptr_t) pe_entry->pe.tx.data.tx_iov[i].src.ioc.addr,
pe_entry->pe.tx.data.tx_iov[i].src.ioc.count *
datatype_sz, len))
return 0;
@ -1594,7 +1563,7 @@ static int sock_pe_progress_tx_write(struct sock_pe *pe,
len += dest_iov_len;
/* data */
if (SOCK_INJECT_OK(pe_entry->flags)) {
if (pe_entry->flags & FI_INJECT) {
if (sock_pe_send_field(pe_entry, &pe_entry->pe.tx.data.inject[0],
pe_entry->pe.tx.tx_op.src_iov_len, len))
return 0;
@ -1605,7 +1574,7 @@ static int sock_pe_progress_tx_write(struct sock_pe *pe,
for (i=0; i < pe_entry->pe.tx.tx_op.src_iov_len; i++) {
if (sock_pe_send_field(
pe_entry,
(void*)pe_entry->pe.tx.data.tx_iov[i].src.iov.addr,
(void *) (uintptr_t) pe_entry->pe.tx.data.tx_iov[i].src.iov.addr,
pe_entry->pe.tx.data.tx_iov[i].src.iov.len, len))
return 0;
len += pe_entry->pe.tx.data.tx_iov[i].src.iov.len;
@ -1683,7 +1652,7 @@ static int sock_pe_progress_tx_send(struct sock_pe *pe,
len += SOCK_CQ_DATA_SIZE;
}
if (SOCK_INJECT_OK(pe_entry->flags)) {
if (pe_entry->flags & FI_INJECT) {
if (sock_pe_send_field(pe_entry, pe_entry->pe.tx.data.inject,
pe_entry->pe.tx.tx_op.src_iov_len, len))
return 0;
@ -1692,9 +1661,8 @@ static int sock_pe_progress_tx_send(struct sock_pe *pe,
} else {
pe_entry->data_len = 0;
for (i = 0; i < pe_entry->pe.tx.tx_op.src_iov_len; i++) {
if (sock_pe_send_field(
pe_entry,
(void*)pe_entry->pe.tx.data.tx_iov[i].src.iov.addr,
if (sock_pe_send_field(pe_entry,
(void *) (uintptr_t) pe_entry->pe.tx.data.tx_iov[i].src.iov.addr,
pe_entry->pe.tx.data.tx_iov[i].src.iov.len, len))
return 0;
len += pe_entry->pe.tx.data.tx_iov[i].src.iov.len;
@ -1753,24 +1721,19 @@ static int sock_pe_progress_tx_entry(struct sock_pe *pe,
}
switch (pe_entry->msg_hdr.op_type) {
case SOCK_OP_SEND:
case SOCK_OP_TSEND:
ret = sock_pe_progress_tx_send(pe, pe_entry, conn);
break;
case SOCK_OP_WRITE:
ret = sock_pe_progress_tx_write(pe, pe_entry, conn);
break;
case SOCK_OP_READ:
ret = sock_pe_progress_tx_read(pe, pe_entry, conn);
break;
case SOCK_OP_ATOMIC:
ret = sock_pe_progress_tx_atomic(pe, pe_entry, conn);
break;
default:
ret = -FI_ENOSYS;
SOCK_LOG_ERROR("Operation not supported\n");
@ -1788,11 +1751,7 @@ static int sock_pe_progress_rx_pe_entry(struct sock_pe *pe,
if (pe_entry->pe.rx.pending_send) {
sock_pe_progress_pending_ack(pe, pe_entry);
if (pe_entry->is_complete) {
sock_pe_release_entry(pe, pe_entry);
SOCK_LOG_INFO("[%p] RX done\n", pe_entry);
}
return 0;
goto out;
}
if (!pe_entry->pe.rx.header_read) {
@ -1808,6 +1767,7 @@ static int sock_pe_progress_rx_pe_entry(struct sock_pe *pe,
return ret;
}
out:
if (pe_entry->is_complete) {
sock_pe_release_entry(pe, pe_entry);
SOCK_LOG_INFO("[%p] RX done\n", pe_entry);
@ -1821,13 +1781,14 @@ static int sock_pe_new_rx_entry(struct sock_pe *pe, struct sock_rx_ctx *rx_ctx,
{
int ret;
struct sock_pe_entry *pe_entry;
pe_entry = sock_pe_acquire_entry(pe);
if (!pe_entry) {
SOCK_LOG_INFO("Cannot get PE entry\n");
return 0;
}
memset(&pe_entry->pe.rx, 0, sizeof(struct sock_rx_pe_entry));
memset(&pe_entry->pe.rx, 0, sizeof(pe_entry->pe.rx));
pe_entry->conn = conn;
pe_entry->type = SOCK_PE_RX;
@ -1851,10 +1812,7 @@ static int sock_pe_new_rx_entry(struct sock_pe *pe, struct sock_rx_ctx *rx_ctx,
SOCK_LOG_INFO("Inserting rx_entry to PE entry %p, conn: %p\n",
pe_entry, pe_entry->conn);
/* link to tracking list in rx_ctx */
dlist_init(&pe_entry->ctx_entry);
dlist_insert_tail(&pe_entry->ctx_entry, &rx_ctx->pe_entry_list);
ret = sock_pe_progress_rx_pe_entry(pe, pe_entry, rx_ctx);
return ret;
}
@ -1872,8 +1830,8 @@ static int sock_pe_new_tx_entry(struct sock_pe *pe, struct sock_tx_ctx *tx_ctx)
return 0;
}
memset(&pe_entry->pe.tx, 0, sizeof(struct sock_tx_pe_entry));
memset(&pe_entry->msg_hdr, 0, sizeof(struct sock_msg_hdr));
memset(&pe_entry->pe.tx, 0, sizeof(pe_entry->pe.tx));
memset(&pe_entry->msg_hdr, 0, sizeof(pe_entry->msg_hdr));
pe_entry->type = SOCK_PE_TX;
pe_entry->is_complete = 0;
@ -1882,25 +1840,24 @@ static int sock_pe_new_tx_entry(struct sock_pe *pe, struct sock_tx_ctx *tx_ctx)
pe_entry->ep = tx_ctx->ep;
pe_entry->pe.tx.tx_ctx = tx_ctx;
dlist_init(&pe_entry->ctx_entry);
dlist_insert_tail(&pe_entry->ctx_entry, &tx_ctx->pe_entry_list);
/* fill in PE tx entry */
memset(&pe_entry->msg_hdr, 0, sizeof(struct sock_msg_hdr));
msg_hdr = &pe_entry->msg_hdr;
msg_hdr->msg_len = sizeof(struct sock_msg_hdr);
msg_hdr->msg_len = sizeof(*msg_hdr);
msg_hdr->pe_entry_id = PE_INDEX(pe, pe_entry);
SOCK_LOG_INFO("New TX on PE entry %p (%d)\n",
pe_entry, msg_hdr->pe_entry_id);
rbfdread(&tx_ctx->rbfd, &pe_entry->pe.tx.tx_op, sizeof(struct sock_op));
rbfdread(&tx_ctx->rbfd, &pe_entry->flags, sizeof(uint64_t));
rbfdread(&tx_ctx->rbfd, &pe_entry->context, sizeof(uint64_t));
rbfdread(&tx_ctx->rbfd, &pe_entry->addr, sizeof(uint64_t));
rbfdread(&tx_ctx->rbfd, &pe_entry->conn, sizeof(uint64_t));
rbfdread(&tx_ctx->rbfd, &pe_entry->buf, sizeof(uint64_t));
rbfdread(&tx_ctx->rbfd, &ep, sizeof(uint64_t));
sock_tx_ctx_read_op_send(tx_ctx, &pe_entry->pe.tx.tx_op,
&pe_entry->flags, &pe_entry->context, &pe_entry->addr,
&pe_entry->buf, &ep, &pe_entry->conn);
if (pe_entry->pe.tx.tx_op.op == SOCK_OP_TSEND) {
rbfdread(&tx_ctx->rbfd, &pe_entry->tag, sizeof(pe_entry->tag));
msg_hdr->msg_len += sizeof(pe_entry->tag);
}
if (ep && tx_ctx->fid.stx.fid.fclass == FI_CLASS_STX_CTX)
pe_entry->comp = &ep->comp;
@ -1908,116 +1865,91 @@ static int sock_pe_new_tx_entry(struct sock_pe *pe, struct sock_tx_ctx *tx_ctx)
pe_entry->comp = &tx_ctx->comp;
if (pe_entry->flags & FI_REMOTE_CQ_DATA) {
rbfdread(&tx_ctx->rbfd, &pe_entry->data, SOCK_CQ_DATA_SIZE);
msg_hdr->msg_len += SOCK_CQ_DATA_SIZE;
}
if (pe_entry->pe.tx.tx_op.op == SOCK_OP_TSEND) {
rbfdread(&tx_ctx->rbfd, &pe_entry->tag, SOCK_TAG_SIZE);
msg_hdr->msg_len += SOCK_TAG_SIZE;
rbfdread(&tx_ctx->rbfd, &pe_entry->data, sizeof(pe_entry->data));
msg_hdr->msg_len += sizeof(pe_entry->data);
}
msg_hdr->op_type = pe_entry->pe.tx.tx_op.op;
switch (pe_entry->pe.tx.tx_op.op) {
case SOCK_OP_SEND:
case SOCK_OP_TSEND:
if (SOCK_INJECT_OK(pe_entry->flags)) {
if (pe_entry->flags & FI_INJECT) {
rbfdread(&tx_ctx->rbfd, &pe_entry->pe.tx.data.inject[0],
pe_entry->pe.tx.tx_op.src_iov_len);
msg_hdr->msg_len += pe_entry->pe.tx.tx_op.src_iov_len;
} else {
/* read src iov(s)*/
for (i = 0; i < pe_entry->pe.tx.tx_op.src_iov_len; i++) {
rbfdread(&tx_ctx->rbfd, &pe_entry->pe.tx.data.tx_iov[i].src,
sizeof(union sock_iov));
sizeof(pe_entry->pe.tx.data.tx_iov[i].src));
msg_hdr->msg_len += pe_entry->pe.tx.data.tx_iov[i].src.iov.len;
}
}
break;
case SOCK_OP_WRITE:
if (SOCK_INJECT_OK(pe_entry->flags)) {
if (pe_entry->flags & FI_INJECT) {
rbfdread(&tx_ctx->rbfd, &pe_entry->pe.tx.data.inject[0],
pe_entry->pe.tx.tx_op.src_iov_len);
msg_hdr->msg_len += pe_entry->pe.tx.tx_op.src_iov_len;
} else {
/* read src iov(s)*/
for (i = 0; i < pe_entry->pe.tx.tx_op.src_iov_len; i++) {
rbfdread(&tx_ctx->rbfd, &pe_entry->pe.tx.data.tx_iov[i].src,
sizeof(union sock_iov));
sizeof(pe_entry->pe.tx.data.tx_iov[i].src));
msg_hdr->msg_len += pe_entry->pe.tx.data.tx_iov[i].src.iov.len;
}
}
/* read dst iov(s)*/
for (i = 0; i < pe_entry->pe.tx.tx_op.dest_iov_len; i++) {
rbfdread(&tx_ctx->rbfd, &pe_entry->pe.tx.data.tx_iov[i].dst,
sizeof(union sock_iov));
sizeof(pe_entry->pe.tx.data.tx_iov[i].dst));
}
msg_hdr->msg_len += sizeof(union sock_iov) *
pe_entry->pe.tx.tx_op.dest_iov_len;
msg_hdr->msg_len += sizeof(union sock_iov) * i;
break;
case SOCK_OP_READ:
/* read src iov(s)*/
for (i = 0; i < pe_entry->pe.tx.tx_op.src_iov_len; i++) {
rbfdread(&tx_ctx->rbfd, &pe_entry->pe.tx.data.tx_iov[i].src,
sizeof(union sock_iov));
sizeof(pe_entry->pe.tx.data.tx_iov[i].src));
}
msg_hdr->msg_len += sizeof(union sock_iov) *
pe_entry->pe.tx.tx_op.src_iov_len;
msg_hdr->msg_len += sizeof(union sock_iov) * i;
/* read dst iov(s)*/
for (i = 0; i <pe_entry->pe.tx.tx_op.dest_iov_len; i++) {
rbfdread(&tx_ctx->rbfd, &pe_entry->pe.tx.data.tx_iov[i].dst,
sizeof(union sock_iov));
sizeof(pe_entry->pe.tx.data.tx_iov[i].dst));
}
break;
case SOCK_OP_ATOMIC:
msg_hdr->msg_len += sizeof(struct sock_op);
datatype_sz = fi_datatype_size(pe_entry->pe.tx.tx_op.atomic.datatype);
if (SOCK_INJECT_OK(pe_entry->flags)) {
if (pe_entry->flags & FI_INJECT) {
rbfdread(&tx_ctx->rbfd, &pe_entry->pe.tx.data.inject[0],
pe_entry->pe.tx.tx_op.src_iov_len);
msg_hdr->msg_len += pe_entry->pe.tx.tx_op.src_iov_len;
} else {
/* read src ioc(s)*/
for (i = 0; i < pe_entry->pe.tx.tx_op.src_iov_len; i++) {
rbfdread(&tx_ctx->rbfd, &pe_entry->pe.tx.data.tx_iov[i].src,
sizeof(union sock_iov));
msg_hdr->msg_len +=
(pe_entry->pe.tx.data.tx_iov[i].src.ioc.count * datatype_sz);
sizeof(pe_entry->pe.tx.data.tx_iov[i].src));
msg_hdr->msg_len += datatype_sz *
pe_entry->pe.tx.data.tx_iov[i].src.ioc.count;
}
}
/* read dst ioc(s)*/
for (i = 0; i < pe_entry->pe.tx.tx_op.dest_iov_len; i++) {
rbfdread(&tx_ctx->rbfd, &pe_entry->pe.tx.data.tx_iov[i].dst,
sizeof(union sock_iov));
sizeof(pe_entry->pe.tx.data.tx_iov[i].dst));
}
msg_hdr->msg_len += sizeof(union sock_iov) *
pe_entry->pe.tx.tx_op.dest_iov_len;
msg_hdr->msg_len += sizeof(union sock_iov) * i;
/* read result ioc(s)*/
for (i = 0; i < pe_entry->pe.tx.tx_op.atomic.res_iov_len; i++) {
rbfdread(&tx_ctx->rbfd, &pe_entry->pe.tx.data.tx_iov[i].res,
sizeof(union sock_iov));
sizeof(pe_entry->pe.tx.data.tx_iov[i].res));
}
/* read comp ioc(s)*/
for (i = 0; i < pe_entry->pe.tx.tx_op.atomic.cmp_iov_len; i++) {
rbfdread(&tx_ctx->rbfd, &pe_entry->pe.tx.data.tx_iov[i].cmp,
sizeof(union sock_iov));
msg_hdr->msg_len += (pe_entry->pe.tx.data.tx_iov[i].cmp.ioc.count *
datatype_sz);
sizeof(pe_entry->pe.tx.data.tx_iov[i].cmp));
msg_hdr->msg_len += datatype_sz *
pe_entry->pe.tx.data.tx_iov[i].cmp.ioc.count;
}
break;
default:
SOCK_LOG_ERROR("Invalid operation type\n");
return -FI_EINVAL;
@ -2032,10 +1964,8 @@ static int sock_pe_new_tx_entry(struct sock_pe *pe, struct sock_tx_ctx *tx_ctx)
if (tx_ctx->av) {
msg_hdr->rx_id = (uint16_t) SOCK_GET_RX_ID(pe_entry->addr,
tx_ctx->av->rx_ctx_bits);
msg_hdr->ep_id = sock_av_lookup_ep_id(tx_ctx->av, pe_entry->addr);
} else {
msg_hdr->rx_id = 0;
msg_hdr->ep_id = ((ep != NULL) ? ep->rem_ep_id : 0);
}
msg_hdr->dest_iov_len = pe_entry->pe.tx.tx_op.dest_iov_len;
@ -2043,38 +1973,37 @@ static int sock_pe_new_tx_entry(struct sock_pe *pe, struct sock_tx_ctx *tx_ctx)
pe_entry->total_len = msg_hdr->msg_len;
msg_hdr->msg_len = htonll(msg_hdr->msg_len);
msg_hdr->pe_entry_id = htons(msg_hdr->pe_entry_id);
msg_hdr->ep_id = htons(msg_hdr->ep_id);
return sock_pe_progress_tx_entry(pe, tx_ctx, pe_entry);
}
void sock_pe_add_tx_ctx(struct sock_pe *pe, struct sock_tx_ctx *ctx)
{
fastlock_acquire(&pe->lock);
fastlock_acquire(&pe->list_lock);
dlistfd_insert_tail(&ctx->pe_entry, &pe->tx_list);
fastlock_release(&pe->lock);
fastlock_release(&pe->list_lock);
SOCK_LOG_INFO("TX ctx added to PE\n");
}
void sock_pe_add_rx_ctx(struct sock_pe *pe, struct sock_rx_ctx *ctx)
{
fastlock_acquire(&pe->lock);
fastlock_acquire(&pe->list_lock);
dlistfd_insert_tail(&ctx->pe_entry, &pe->rx_list);
fastlock_release(&pe->lock);
fastlock_release(&pe->list_lock);
SOCK_LOG_INFO("RX ctx added to PE\n");
}
void sock_pe_remove_tx_ctx(struct sock_tx_ctx *tx_ctx)
{
fastlock_acquire(&tx_ctx->domain->pe->lock);
fastlock_acquire(&tx_ctx->domain->pe->list_lock);
dlist_remove(&tx_ctx->pe_entry);
fastlock_release(&tx_ctx->domain->pe->lock);
fastlock_release(&tx_ctx->domain->pe->list_lock);
}
void sock_pe_remove_rx_ctx(struct sock_rx_ctx *rx_ctx)
{
fastlock_acquire(&rx_ctx->domain->pe->lock);
fastlock_acquire(&rx_ctx->domain->pe->list_lock);
dlist_remove(&rx_ctx->pe_entry);
fastlock_release(&rx_ctx->domain->pe->lock);
fastlock_release(&rx_ctx->domain->pe->list_lock);
}
int sock_pe_progress_rx_ep(struct sock_pe *pe, struct sock_ep *ep,
@ -2082,7 +2011,7 @@ int sock_pe_progress_rx_ep(struct sock_pe *pe, struct sock_ep *ep,
{
struct sock_conn *conn;
struct sock_conn_map *map;
int i, ret = 0, data_avail;
int i, ret, data_avail;
map = &ep->domain->r_cmap;
assert(map != NULL);
@ -2093,6 +2022,9 @@ int sock_pe_progress_rx_ep(struct sock_pe *pe, struct sock_ep *ep,
if (rbused(&conn->outbuf))
sock_comm_flush(conn);
if (ep != conn->ep)
continue;
data_avail = 0;
if (rbused(&conn->inbuf) > 0) {
data_avail = 1;
@ -2127,7 +2059,6 @@ int sock_pe_progress_rx_ctx(struct sock_pe *pe, struct sock_rx_ctx *rx_ctx)
if (fastlock_acquire(&pe->lock))
return 0;
/* progress buffered recvs */
fastlock_acquire(&rx_ctx->lock);
sock_pe_progress_buffered_rx(rx_ctx);
fastlock_release(&rx_ctx->lock);
@ -2150,7 +2081,6 @@ int sock_pe_progress_rx_ctx(struct sock_pe *pe, struct sock_rx_ctx *rx_ctx)
goto out;
}
/* progress rx_ctx in PE table */
for (entry = rx_ctx->pe_entry_list.next;
entry != &rx_ctx->pe_entry_list;) {
@ -2177,18 +2107,14 @@ int sock_pe_progress_tx_ctx(struct sock_pe *pe, struct sock_tx_ctx *tx_ctx)
if (fastlock_acquire(&pe->lock))
return 0;
/* check tx_ctx rbuf */
fastlock_acquire(&tx_ctx->rlock);
if (!rbfdempty(&tx_ctx->rbfd) &&
pe->num_free_entries > SOCK_PE_MIN_ENTRIES) {
/* new TX PE entry */
ret = sock_pe_new_tx_entry(pe, tx_ctx);
if (ret < 0) {
}
fastlock_release(&tx_ctx->rlock);
if (ret < 0)
goto out;
}
}
fastlock_release(&tx_ctx->rlock);
/* progress tx_ctx in PE table */
for (entry = tx_ctx->pe_entry_list.next;
@ -2225,14 +2151,15 @@ static void *sock_pe_progress_thread(void *data)
struct sock_pe *pe = (struct sock_pe *)data;
SOCK_LOG_INFO("Progress thread started\n");
while (pe->do_progress) {
while (*((volatile int*)&pe->do_progress)) {
/* FIXME */
if (sock_progress_thread_wait) {
pthread_yield();
usleep(sock_progress_thread_wait * 1000);
}
/* progress tx */
fastlock_acquire(&pe->list_lock);
if (!dlistfd_empty(&pe->tx_list)) {
for (entry = pe->tx_list.list.next;
entry != &pe->tx_list.list; entry = entry->next) {
@ -2240,14 +2167,13 @@ static void *sock_pe_progress_thread(void *data)
pe_entry);
ret = sock_pe_progress_tx_ctx(pe, tx_ctx);
if (ret < 0) {
SOCK_LOG_ERROR(
"failed to progress TX\n");
SOCK_LOG_ERROR("failed to progress TX\n");
fastlock_release(&pe->list_lock);
return NULL;
}
}
}
/* progress rx */
if (!dlistfd_empty(&pe->rx_list)) {
for (entry = pe->rx_list.list.next;
entry != &pe->rx_list.list; entry = entry->next) {
@ -2255,20 +2181,20 @@ static void *sock_pe_progress_thread(void *data)
pe_entry);
ret = sock_pe_progress_rx_ctx(pe, rx_ctx);
if (ret < 0) {
SOCK_LOG_ERROR(
"failed to progress RX\n");
SOCK_LOG_ERROR("failed to progress RX\n");
fastlock_release(&pe->list_lock);
return NULL;
}
}
}
fastlock_release(&pe->list_lock);
}
SOCK_LOG_INFO("Progress thread terminated\n");
return NULL;
}
static void sock_pe_init_table(
struct sock_pe *pe)
static void sock_pe_init_table(struct sock_pe *pe)
{
int i;
@ -2288,7 +2214,9 @@ static void sock_pe_init_table(
struct sock_pe *sock_pe_init(struct sock_domain *domain)
{
struct sock_pe *pe = calloc(1, sizeof(struct sock_pe));
struct sock_pe *pe;
pe = calloc(1, sizeof(*pe));
if (!pe)
return NULL;
@ -2296,6 +2224,7 @@ struct sock_pe *sock_pe_init(struct sock_domain *domain)
dlistfd_head_init(&pe->tx_list);
dlistfd_head_init(&pe->rx_list);
fastlock_init(&pe->lock);
fastlock_init(&pe->list_lock);
pe->domain = domain;
if (domain->progress_mode == FI_PROGRESS_AUTO) {
@ -2310,9 +2239,9 @@ struct sock_pe *sock_pe_init(struct sock_domain *domain)
return pe;
err:
fastlock_destroy(&pe->lock);
dlistfd_head_free(&pe->tx_list);
dlistfd_head_free(&pe->rx_list);
free(pe);
return NULL;
}
@ -2325,10 +2254,9 @@ void sock_pe_finalize(struct sock_pe *pe)
}
fastlock_destroy(&pe->lock);
fastlock_destroy(&pe->list_lock);
dlistfd_head_free(&pe->tx_list);
dlistfd_head_free(&pe->rx_list);
free(pe);
SOCK_LOG_INFO("Progress engine finalize: OK\n");
}

Просмотреть файл

@ -93,7 +93,7 @@ static ssize_t sock_ep_rma_readmsg(struct fid_ep *ep,
if (sock_ep->connected) {
conn = sock_ep_lookup_conn(sock_ep);
} else {
conn = sock_av_lookup_addr(tx_ctx->av, msg->addr);
conn = sock_av_lookup_addr(sock_ep, tx_ctx->av, msg->addr);
}
if (!conn)
@ -115,16 +115,12 @@ static ssize_t sock_ep_rma_readmsg(struct fid_ep *ep,
tx_op.src_iov_len = msg->rma_iov_count;
tx_op.dest_iov_len = msg->iov_count;
sock_tx_ctx_write(tx_ctx, &tx_op, sizeof(struct sock_op));
sock_tx_ctx_write(tx_ctx, &flags, sizeof(uint64_t));
sock_tx_ctx_write(tx_ctx, &msg->context, sizeof(uint64_t));
sock_tx_ctx_write(tx_ctx, &msg->addr, sizeof(uint64_t));
sock_tx_ctx_write(tx_ctx, &conn, sizeof(uint64_t));
sock_tx_ctx_write(tx_ctx, &msg->msg_iov[0].iov_base, sizeof(uint64_t));
sock_tx_ctx_write(tx_ctx, &sock_ep, sizeof(uint64_t));
sock_tx_ctx_write_op_send(tx_ctx, &tx_op, flags, (uintptr_t) msg->context,
msg->addr, (uintptr_t) msg->msg_iov[0].iov_base,
sock_ep, conn);
if (flags & FI_REMOTE_CQ_DATA) {
sock_tx_ctx_write(tx_ctx, &msg->data, sizeof(uint64_t));
sock_tx_ctx_write(tx_ctx, &msg->data, sizeof(msg->data));
}
src_len = 0;
@ -132,16 +128,16 @@ static ssize_t sock_ep_rma_readmsg(struct fid_ep *ep,
tx_iov.iov.addr = msg->rma_iov[i].addr;
tx_iov.iov.key = msg->rma_iov[i].key;
tx_iov.iov.len = msg->rma_iov[i].len;
sock_tx_ctx_write(tx_ctx, &tx_iov, sizeof(union sock_iov));
sock_tx_ctx_write(tx_ctx, &tx_iov, sizeof(tx_iov));
src_len += tx_iov.iov.len;
}
dst_len = 0;
for (i = 0; i< msg->iov_count; i++) {
tx_iov.iov.addr = (uint64_t)msg->msg_iov[i].iov_base;
tx_iov.iov.addr = (uintptr_t) msg->msg_iov[i].iov_base;
tx_iov.iov.len = msg->msg_iov[i].iov_len;
tx_iov.iov.key = (uint64_t)msg->desc[i];
sock_tx_ctx_write(tx_ctx, &tx_iov, sizeof(union sock_iov));
tx_iov.iov.key = (uintptr_t) msg->desc[i];
sock_tx_ctx_write(tx_ctx, &tx_iov, sizeof(tx_iov));
dst_len += tx_iov.iov.len;
}
@ -168,6 +164,7 @@ static ssize_t sock_ep_rma_read(struct fid_ep *ep, void *buf, size_t len,
struct iovec msg_iov;
struct fi_rma_iov rma_iov;
memset(&msg, 0, sizeof msg);
msg_iov.iov_base = (void*)buf;
msg_iov.iov_len = len;
msg.msg_iov = &msg_iov;
@ -194,6 +191,7 @@ static ssize_t sock_ep_rma_readv(struct fid_ep *ep, const struct iovec *iov,
struct fi_msg_rma msg;
struct fi_rma_iov rma_iov;
memset(&msg, 0, sizeof msg);
msg.msg_iov = iov;
msg.desc = desc;
msg.iov_count = count;
@ -247,7 +245,7 @@ static ssize_t sock_ep_rma_writemsg(struct fid_ep *ep,
if (sock_ep->connected) {
conn = sock_ep_lookup_conn(sock_ep);
} else {
conn = sock_av_lookup_addr(tx_ctx->av, msg->addr);
conn = sock_av_lookup_addr(sock_ep, tx_ctx->av, msg->addr);
}
if (!conn)
@ -259,7 +257,7 @@ static ssize_t sock_ep_rma_writemsg(struct fid_ep *ep,
tx_op.dest_iov_len = msg->rma_iov_count;
total_len = 0;
if (SOCK_INJECT_OK(flags)) {
if (flags & FI_INJECT) {
for (i=0; i< msg->iov_count; i++) {
total_len += msg->msg_iov[i].iov_len;
}
@ -279,20 +277,16 @@ static ssize_t sock_ep_rma_writemsg(struct fid_ep *ep,
goto err;
}
sock_tx_ctx_write(tx_ctx, &tx_op, sizeof(struct sock_op));
sock_tx_ctx_write(tx_ctx, &flags, sizeof(uint64_t));
sock_tx_ctx_write(tx_ctx, &msg->context, sizeof(uint64_t));
sock_tx_ctx_write(tx_ctx, &msg->addr, sizeof(uint64_t));
sock_tx_ctx_write(tx_ctx, &conn, sizeof(uint64_t));
sock_tx_ctx_write(tx_ctx, &msg->msg_iov[0].iov_base, sizeof(uint64_t));
sock_tx_ctx_write(tx_ctx, &sock_ep, sizeof(uint64_t));
sock_tx_ctx_write_op_send(tx_ctx, &tx_op, flags, (uintptr_t) msg->context,
msg->addr, (uintptr_t) msg->msg_iov[0].iov_base,
sock_ep, conn);
if (flags & FI_REMOTE_CQ_DATA) {
sock_tx_ctx_write(tx_ctx, &msg->data, sizeof(uint64_t));
sock_tx_ctx_write(tx_ctx, &msg->data, sizeof(msg->data));
}
src_len = 0;
if (SOCK_INJECT_OK(flags)) {
if (flags & FI_INJECT) {
for (i=0; i< msg->iov_count; i++) {
sock_tx_ctx_write(tx_ctx, msg->msg_iov[i].iov_base,
msg->msg_iov[i].iov_len);
@ -300,10 +294,10 @@ static ssize_t sock_ep_rma_writemsg(struct fid_ep *ep,
}
} else {
for (i = 0; i< msg->iov_count; i++) {
tx_iov.iov.addr = (uint64_t)msg->msg_iov[i].iov_base;
tx_iov.iov.addr = (uintptr_t) msg->msg_iov[i].iov_base;
tx_iov.iov.len = msg->msg_iov[i].iov_len;
tx_iov.iov.key = (uint64_t)msg->desc[i];
sock_tx_ctx_write(tx_ctx, &tx_iov, sizeof(union sock_iov));
tx_iov.iov.key = (uintptr_t) msg->desc[i];
sock_tx_ctx_write(tx_ctx, &tx_iov, sizeof(tx_iov));
src_len += tx_iov.iov.len;
}
}
@ -313,7 +307,7 @@ static ssize_t sock_ep_rma_writemsg(struct fid_ep *ep,
tx_iov.iov.addr = msg->rma_iov[i].addr;
tx_iov.iov.key = msg->rma_iov[i].key;
tx_iov.iov.len = msg->rma_iov[i].len;
sock_tx_ctx_write(tx_ctx, &tx_iov, sizeof(union sock_iov));
sock_tx_ctx_write(tx_ctx, &tx_iov, sizeof(tx_iov));
dst_len += tx_iov.iov.len;
}
@ -340,6 +334,7 @@ static ssize_t sock_ep_rma_write(struct fid_ep *ep, const void *buf,
struct iovec msg_iov;
struct fi_rma_iov rma_iov;
memset(&msg, 0, sizeof msg);
msg_iov.iov_base = (void*)buf;
msg_iov.iov_len = len;
@ -370,6 +365,7 @@ static ssize_t sock_ep_rma_writev(struct fid_ep *ep,
struct fi_msg_rma msg;
struct fi_rma_iov rma_iov;
memset(&msg, 0, sizeof msg);
msg.msg_iov = iov;
msg.desc = desc;
msg.iov_count = count;
@ -426,6 +422,7 @@ static ssize_t sock_ep_rma_inject(struct fid_ep *ep, const void *buf,
struct iovec msg_iov;
struct fi_rma_iov rma_iov;
memset(&msg, 0, sizeof msg);
msg_iov.iov_base = (void*)buf;
msg_iov.iov_len = len;
msg.msg_iov = &msg_iov;
@ -440,7 +437,7 @@ static ssize_t sock_ep_rma_inject(struct fid_ep *ep, const void *buf,
msg.msg_iov = &msg_iov;
msg.addr = dest_addr;
return sock_ep_rma_writemsg(ep, &msg, FI_INJECT);
return sock_ep_rma_writemsg(ep, &msg, FI_INJECT | SOCK_NO_COMPLETION);
}
static ssize_t sock_ep_rma_injectdata(struct fid_ep *ep, const void *buf,
@ -451,6 +448,7 @@ static ssize_t sock_ep_rma_injectdata(struct fid_ep *ep, const void *buf,
struct iovec msg_iov;
struct fi_rma_iov rma_iov;
memset(&msg, 0, sizeof msg);
msg_iov.iov_base = (void*)buf;
msg_iov.iov_len = len;
msg.msg_iov = &msg_iov;
@ -465,7 +463,8 @@ static ssize_t sock_ep_rma_injectdata(struct fid_ep *ep, const void *buf,
msg.msg_iov = &msg_iov;
msg.addr = dest_addr;
msg.data = data;
return sock_ep_rma_writemsg(ep, &msg, FI_INJECT|FI_REMOTE_CQ_DATA);
return sock_ep_rma_writemsg(ep, &msg, FI_INJECT | FI_REMOTE_CQ_DATA |
SOCK_NO_COMPLETION);
}

Просмотреть файл

@ -47,16 +47,23 @@
#include "sock_util.h"
/* FIXME: pool of rx_entry */
struct sock_rx_entry *sock_rx_new_entry(struct sock_rx_ctx *rx_ctx)
{
/* FIXME: pool of rx_entry */
struct sock_rx_entry *rx_entry;
rx_entry = calloc(1, sizeof(struct sock_rx_entry));
rx_entry = calloc(1, sizeof(*rx_entry));
if (!rx_entry)
return NULL;
rx_entry->is_tagged = 0;
SOCK_LOG_INFO("New rx_entry: %p, ctx: %p\n", rx_entry, rx_ctx);
dlist_init(&rx_entry->entry);
fastlock_acquire(&rx_ctx->lock);
rx_ctx->num_left--;
fastlock_release(&rx_ctx->lock);
return rx_entry;
}
@ -66,7 +73,6 @@ void sock_rx_release_entry(struct sock_rx_entry *rx_entry)
free(rx_entry);
}
struct sock_rx_entry *sock_rx_new_buffered_entry(struct sock_rx_ctx *rx_ctx,
size_t len)
{
@ -77,23 +83,25 @@ struct sock_rx_entry *sock_rx_new_buffered_entry(struct sock_rx_ctx *rx_ctx,
return NULL;
}
rx_entry = calloc(1, sizeof(struct sock_rx_entry) + len);
rx_entry = calloc(1, sizeof(*rx_entry) + len);
if (!rx_entry)
return NULL;
SOCK_LOG_INFO("New buffered entry:%p len: %lu, ctx: %p\n",
rx_entry, len, rx_ctx);
rx_entry->is_busy = 1;
rx_entry->is_buffered = 1;
rx_entry->rx_op.dest_iov_len = 1;
rx_entry->iov[0].iov.len = len;
rx_entry->iov[0].iov.addr = (uint64_t)((char*)rx_entry +
sizeof(struct sock_rx_entry));
rx_entry->iov[0].iov.addr = (uintptr_t) (rx_entry + 1);
rx_entry->total_len = len;
rx_ctx->buffered_len += len;
dlist_insert_tail(&rx_entry->entry, &rx_ctx->rx_buffered_list);
rx_entry->is_busy = 1;
rx_entry->is_tagged = 0;
return rx_entry;
}
@ -103,7 +111,8 @@ inline size_t sock_rx_avail_len(struct sock_rx_entry *rx_entry)
}
struct sock_rx_entry *sock_rx_get_entry(struct sock_rx_ctx *rx_ctx,
uint64_t addr, uint64_t tag)
uint64_t addr, uint64_t tag,
uint8_t op_type)
{
struct dlist_entry *entry;
struct sock_rx_entry *rx_entry;
@ -112,22 +121,17 @@ struct sock_rx_entry *sock_rx_get_entry(struct sock_rx_ctx *rx_ctx,
entry != &rx_ctx->rx_entry_list; entry = entry->next) {
rx_entry = container_of(entry, struct sock_rx_entry, entry);
if (rx_entry->is_busy)
if (rx_entry->is_busy || (op_type != rx_entry->is_tagged))
continue;
if (((rx_entry->tag & ~rx_entry->ignore) ==
(tag & ~rx_entry->ignore)) &&
if (((rx_entry->tag & ~rx_entry->ignore) == (tag & ~rx_entry->ignore)) &&
(rx_entry->addr == FI_ADDR_UNSPEC || addr == FI_ADDR_UNSPEC ||
rx_entry->addr == addr ||
(rx_ctx->av &&
!sock_av_compare_addr(rx_ctx->av, addr, rx_entry->addr)))) {
break;
}
}
if (entry == &rx_ctx->rx_entry_list)
rx_entry = NULL;
else
rx_entry->is_busy = 1;
return rx_entry;
}
}
return NULL;
}

Просмотреть файл

@ -74,7 +74,8 @@ int sock_wait_get_obj(struct fid_wait *fid, void *arg)
static int sock_wait_init(struct sock_wait *wait, enum fi_wait_obj type)
{
long flags = 0;
int ret;
wait->type = type;
switch (type) {
@ -82,11 +83,11 @@ static int sock_wait_init(struct sock_wait *wait, enum fi_wait_obj type)
if (socketpair(AF_UNIX, SOCK_STREAM, 0, wait->wobj.fd))
return -errno;
flags = fcntl(wait->wobj.fd[WAIT_READ_FD], F_GETFL, 0);
if (fcntl(wait->wobj.fd[WAIT_READ_FD], F_SETFL, flags | O_NONBLOCK)) {
ret = fd_set_nonblock(wait->wobj.fd[WAIT_READ_FD]);
if (ret) {
close(wait->wobj.fd[WAIT_READ_FD]);
close(wait->wobj.fd[WAIT_WRITE_FD]);
return -errno;
return ret;
}
break;

Просмотреть файл

@ -1,104 +1,95 @@
dnl Configry specific to the libfabrics usNIC provider
dnl
dnl Check for libnl; prefer version 3 instead of version 1. Abort (i.e.,
dnl AC_MSG_ERROR) if neither libnl v1 or v3 can be found.
dnl Copyright (c) 2015, Cisco Systems, Inc. All rights reserved.
dnl
dnl Outputs:
dnl This software is available to you under a choice of one of two
dnl licenses. You may choose to be licensed under the terms of the GNU
dnl General Public License (GPL) Version 2, available from the file
dnl COPYING in the main directory of this source tree, or the
dnl BSD license below:
dnl
dnl - Set $1 to the CPPFLAGS necessary to compile with libnl
dnl - Set $2 to the LIBS necessary to link with libnl
dnl - If $3 is 1, AC_MSG_ERROR (i.e., abort) if neither libnl or
dnl libnl3 can be found
dnl - Set HAVE_LIBNL3 to 1 if libnl3 will be used; 0 if libnl1 will be used
dnl - AC_SUBST $HAVE_LIBNL3
dnl - AC_DEFINE HAVE_LIBNL3
dnl Redistribution and use in source and binary forms, with or
dnl without modification, are permitted provided that the following
dnl conditions are met:
dnl
dnl - Redistributions of source code must retain the above
dnl copyright notice, this list of conditions and the following
dnl disclaimer.
dnl
dnl - Redistributions in binary form must reproduce the above
dnl copyright notice, this list of conditions and the following
dnl disclaimer in the documentation and/or other materials
dnl provided with the distribution.
dnl
dnl THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
dnl "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
dnl LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
dnl FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
dnl COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
dnl INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
dnl BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
dnl LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
dnl CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
dnl LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
dnl ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
dnl POSSIBILITY OF SUCH DAMAGE.
dnl
AC_DEFUN([CHECK_LIBNL3],[
# More libnl v1/v3 sadness: the two versions are not compatible
# and will not work correctly if simultaneously linked into the
# same applications. Unfortunately, they *will* link into the
# same image! On platforms like SLES 12, libibverbs depends on
# libnl-3.so.200 and friends, while a naive implementation of
# our configure logic would link libnl.so.1 to libdaplusnic,
# resulting in both versions in the dependency map at the same
# time. As a coarse fix, just check for libnl-3 first and use
# it if present on the system.
# GROSS: libnl wants us to either use pkg-config (which we
# can't assume is always present) or we need to look in a
# particular directory for the right libnl3 include files. For
# now, just hard code the special path into this logic.
dnl Configury specific to the libfabric usNIC provider
save_CPPFLAGS=$CPPFLAGS
save_LIBS=$LIBS
dnl libnl is sadness, but we have to use it. The majority of this
dnl configure.m4 is just to deal with libnl. :-(
$1="-I/usr/include/libnl3"
CPPFLAGS="$$1 $CPPFLAGS"
AC_MSG_CHECKING([for /usr/include/libnl3])
AS_IF([test -d "/usr/include/libnl3"],
[AC_MSG_RESULT([present])
AC_CHECK_HEADER(
[netlink/version.h],
[AC_COMPILE_IFELSE(
[AC_LANG_PROGRAM([[
#include <netlink/netlink.h>
#include <netlink/version.h>
#ifndef LIBNL_VER_MAJ
#error "LIBNL_VER_MAJ not defined!"
#endif
/* to the best of our knowledge, version.h only exists in libnl3 */
#if LIBNL_VER_MAJ < 3
#error "LIBNL_VER_MAJ < 3, this is very unusual"
#endif
]],[[/* empty body */]])],
[HAVE_LIBNL3=1], dnl our program compiled
[HAVE_LIBNL3=0])], dnl our program failed to compile
[HAVE_LIBNL3=0], dnl AC_CHECK_HEADER failed
[#include <netlink/netlink.h>
])],
[AC_MSG_RESULT([missing])
HAVE_LIBNL3=0]) dnl "/usr/include/libnl3" doesn't exist
dnl libnl has two versions: libnl (i.e., version 1) and libnl3.
# nl_recvmsgs_report is a symbol that is only present in v3
AS_IF([test "$HAVE_LIBNL3" -eq 1],
[AC_SEARCH_LIBS([nl_recvmsgs_report], [nl-3],
[# We also need libnl-route-3
AC_SEARCH_LIBS([nl_rtgen_request], [nl-route-3],
[$2="-lnl-3 -lnl-route-3"
HAVE_LIBNL3=1],
[HAVE_LIBNL3=0])],
[HAVE_LIBNL3=0])])
dnl These two versions have many of the same symbols, but they are
dnl incompatible with each other. We can handle this in the C code, but
dnl we must know which version to compile file (i.e., configure must
dnl figure this out). Additionally, if both versions get linked into
dnl the same process, they will disrupt each other's global state, and
dnl Random Bad Things happen. We can't always prevent this -- e.g., if we
dnl link against libnl vX and some other middleware links against libnl vY
dnl (and X != Y), prepare for unpleasentness. You have been warned.
AS_IF([test "$HAVE_LIBNL3" -eq 1],
[AC_MSG_NOTICE([using libnl-3])],
[# restore $1 since we are falling back to libnl (v1)
$1=""
AC_SEARCH_LIBS([nl_connect], [nl],
[$2="-lnl"],
[AC_MSG_WARN([Cannot find libnl-3 nor libnl])
AS_IF([test "$3" = "1"],
[AC_MSG_ERROR([Cannot continue])])
])
AC_MSG_NOTICE([using libnl (v1)])])
dnl As of this writing (March 2015), most Linux distros seem to be
dnl encouraging packages to prefer libnl v3 over libnl v1.
# libnl_utils.h does not include configure-generated config.h,
# so it may not see the HAVE_LIBNL3 #define. Hence, we set
# HAVE_LIBNL3 as both a C preprocessor macro (in case some
# other file includes config.h before libnl_utils.h) and a
# Makefile macro (so that the app can set HAVE_LIBNL3 via
# CPPFLAGS). Also, this macro may be used in multiple
# different libraries; setting HAVE_LIBNL3 both ways lets the
# application choose which way to set it.
AC_SUBST([HAVE_LIBNL3])
AC_DEFINE_UNQUOTED([HAVE_LIBNL3],[$HAVE_LIBNL3],
[set to 1 if should use libnl v3, set to 0 for libnl v11])
dnl libnl wants us to use pkg-config to find CPPFLAGS and LDFLAGS and
dnl LIBS, but pkg-config isn't always available. So we have to test here.
dnl It gets more complicated because libnl changed several things between v1
dnl and v3:
LIBS=$save_LIBS
AS_UNSET([save_LIBS])
CPPFLAGS=$save_CPPFLAGS
AS_UNSET([save_CPPFLAGS])
])
dnl v1:
dnl - Header files (e.g., <netlink/netlink.h> are in $prefix/include
dnl - Library is in $prefix/lib[64]
dnl - Library is named libnl.<suffix>
dnl v3:
dnl - Header files (e.g., <netlink/netlink.h> are in $prefix/include/libnl3
dnl *** NOTE: This means that a -I<dir> switch is REQUIRED to find
dnl the libnl3 headers (!)
dnl - Library is in $prefix/lib[64]
dnl - Library is named libnl-3.<suffix>
dnl - We *also* need the libnl-route-3 library
dnl These differing requirements make the configure/m4 tests a bit of
dnl a nightmare. :-(
dnl ---------------------------------------------------------------------------
dnl This configure.m4 script supports the following CLI options:
dnl --with-libnl[=dir]
dnl If specified, look for libnl support. If it is not found,
dnl error/abort configure. If dir is specified, look in that
dnl directory (configure will first look for libnl v3 in that tree, and if
dnl it is not found, look for libnl v1 in that tree). If no dir is
dnl specified, this option is redundant with --with-usnic.
dnl --without-libnl
dnl Do not look for libnl support. This means that the usnic provider
dnl will not be built (since the usnic provider *requires* libnl support).
dnl ---------------------------------------------------------------------------
dnl Called to configure this provider
dnl
@ -111,14 +102,211 @@ AC_DEFUN([FI_USNIC_CONFIGURE],[
# Determine if we can support the usnic provider
usnic_happy=0
AS_IF([test "x$enable_usnic" != "xno"],
[usnic_happy=1
AC_CHECK_HEADER([infiniband/verbs.h], [], [usnic_happy=0])
CHECK_LIBNL3([usnic_libnl_CPPFLAGS],
[usnic_libnl_LIBS], [0])
AC_SUBST(usnic_libnl_CPPFLAGS)
AC_SUBST(usnic_libnl_LIBS)
[AC_CHECK_HEADER([infiniband/verbs.h], [usnic_happy=1])
AS_IF([test $usnic_happy -eq 1],
[USNIC_CHECK_LIBNL_SADNESS])
])
])
AS_IF([test "$usnic_libnl_LIBS" = ""],
dnl
dnl Helper function to parse --with-libnl* options
dnl
dnl $1: variable name
dnl $2: --with-<foo> value
dnl
AC_DEFUN([USNIC_PARSE_WITH],[
case "$2" in
no)
# Nope, don't want it
usnic_want_$1=no
;;
yes)
# Yes, definitely want it
usnic_want_$1=yes
;;
default)
# Default case -- try and see if we can find it
usnic_want_$1=default
usnic_$1_location=/usr
;;
*)
# Yes, definitely want it -- at a specific location
usnic_want_$1=yes
usnic_$1_location="$2"
;;
esac
])
dnl
dnl Shared macro
dnl
AC_DEFUN([USNIC_CHECK_LIBNL_SADNESS],[
AC_ARG_WITH([libnl],
[AC_HELP_STRING([--with-libnl(=DIR)],
[Directory prefix for libnl (typically only necessary if libnl is installed in a location that the compiler/linker will not search by default)])],
[], [with_libnl=default])
# The --with options carry two pieces of information: 1) do
# you want a specific version of libnl, and 2) where that
# version of libnl lives. For simplicity, let's separate
# those two pieces of information.
USNIC_PARSE_WITH([libnl], [$with_libnl])
# Default to a numeric value (this value gets AC_DEFINEd)
HAVE_LIBNL3=0
###################################################
# NOTE: We *must* check for libnl3 before libnl.
###################################################
AS_IF([test "$usnic_want_libnl" != "no"],
[USNIC_CHECK_LIBNL3([$usnic_libnl_location], [usnic_nl])])
AS_IF([test "$usnic_want_libnl" != "no" &&
test "$usnic_nl_LIBS" = ""],
[USNIC_CHECK_LIBNL([$usnic_libnl_location], [usnic_nl])])
AS_IF([test "$usnic_want_libnl" = "yes" &&
test "$usnic_nl_LIBS" = ""],
[AC_MSG_WARN([--with-libnl specified, but not found])
AC_MSG_ERROR([Cannot continue])])
# Final result
AC_SUBST([HAVE_LIBNL3])
AC_DEFINE_UNQUOTED([HAVE_LIBNL3], [$HAVE_LIBNL3],
[Whether we have libl or libnl3])
AC_SUBST([usnic_nl_CPPFLAGS])
AC_SUBST([usnic_nl_LDFLAGS])
AC_SUBST([usnic_nl_LIBS])
AS_IF([test "$usnic_nl_LIBS" = ""],
[usnic_happy=0])
])
dnl
dnl Check for libnl-3.
dnl
dnl Inputs:
dnl
dnl $1: prefix where to look for libnl-3
dnl $2: var name prefix of _CPPFLAGS and _LDFLAGS and _LIBS
dnl
dnl Outputs:
dnl
dnl - Set $2_CPPFLAGS necessary to compile with libnl-3
dnl - Set $2_LDFLAGS necessary to link with libnl-3
dnl - Set $2_LIBS necessary to link with libnl-3
dnl - Set HAVE_LIBNL3 1 if libnl-3 will be used
dnl
AC_DEFUN([USNIC_CHECK_LIBNL3],[
AC_MSG_NOTICE([checking for libnl3])
AC_MSG_CHECKING([for libnl3 prefix])
AC_MSG_RESULT([$1])
AC_MSG_CHECKING([for $1/include/libnl3])
AS_IF([test -d "$1/include/libnl3"],
[usnic_libnl3_happy=1
AC_MSG_RESULT([found])],
[usnic_libnl3_happy=0
AC_MSG_RESULT([not found])])
# Random note: netlink/version.h is only in libnl3 - it is not in libnl.
# Also, nl_recvmsgs_report is only in libnl3.
CPPFLAGS_save=$CPPFLAGS
usnic_tmp_CPPFLAGS="-I$1/include/libnl3"
CPPFLAGS="$usnic_tmp_CPPFLAGS $CPPFLAGS"
AS_IF([test $usnic_libnl3_happy -eq 1],
[FI_CHECK_PACKAGE([$2],
[netlink/version.h],
[nl-3],
[nl_recvmsgs_report],
[],
[$1],
[],
[usnic_libnl3_happy=1],
[usnic_libnl3_happy=0])
# Note that FI_CHECK_PACKAGE is going to add
# -I$dir/include into $2_CPPFLAGS. But because libnl3
# puts the headers in $dir/libnl3, we need to
# overwrite $2_CPPFLAGS with -I$dir/libnl3. We can do
# this unconditionally; we don't have to check for
# success (checking for success occurs below).
$2_CPPFLAGS=$usnic_tmp_CPPFLAGS])
# If we found libnl-3, we *also* need libnl-route-3
LIBS_save=$LIBS
LDFLAGS_save=$LDFLAGS
AS_IF([test "$$2_LDFLAGS" != ""],
[LDFLAGS="$$2_LDFLAGS $LDFLAGS"])
AS_IF([test $usnic_libnl3_happy -eq 1],
[AC_SEARCH_LIBS([nl_rtgen_request],
[nl-route-3],
[usnic_libnl3_happy=1],
[usnic_libnl3_happy=0])])
LIBS=$LIBS_save
LDFLAGS=$LDFLAGS_save
# Just because libnl* is evil, double check that the
# netlink/version.h we found was for libnl3. As far as we
# know, netlink/version.h only first appeared in version
# 3... but let's really be sure.
AS_IF([test $usnic_libnl3_happy -eq 1],
[AC_MSG_CHECKING([to ensure these really are libnl3 headers])
CPPFLAGS="$$2_CPPFLAGS $CPPFLAGS"
AC_COMPILE_IFELSE(
[AC_LANG_PROGRAM([[
#include <netlink/netlink.h>
#include <netlink/version.h>
#ifndef LIBNL_VER_MAJ
#error "LIBNL_VER_MAJ not defined!"
#endif
/* to the best of our knowledge, version.h only exists in libnl3 */
#if LIBNL_VER_MAJ != 3
#error "LIBNL_VER_MAJ != 3, I am sad"
#endif
]])],
[AC_MSG_RESULT([yes])],
[AC_MSG_RESULT([no])
usnic_libnl3_happy=0]
)])
CPPFLAGS=$CPPFLAGS_save
# If we found everything
AS_IF([test $usnic_libnl3_happy -eq 1],
[$2_LIBS="-lnl-3 -lnl-route-3"
HAVE_LIBNL3=1])
])
dnl
dnl Check for libnl.
dnl
dnl Inputs:
dnl
dnl $1: prefix where to look for libnl
dnl $2: var name prefix of _CPPFLAGS and _LDFLAGS and _LIBS
dnl
dnl Outputs:
dnl
dnl - Set $2_CPPFLAGS necessary to compile with libnl
dnl - Set $2_LDFLAGS necessary to link with libnl
dnl - Set $2_LIBS necessary to link with libnl
dnl - Set HAVE_LIBNL3 0 if libnl will be used
dnl
AC_DEFUN([USNIC_CHECK_LIBNL],[
AC_MSG_NOTICE([checking for libnl])
FI_CHECK_PACKAGE([$2],
[netlink/netlink.h],
[nl],
[nl_connect],
[-lm],
[$1],
[],
[usnic_libnl_happy=1],
[usnic_libnl_happy=0])
AS_IF([test $usnic_libnl_happy -eq 1],
[$2_LIBS="-lnl -lm"
HAVE_LIBNL3=0])
])

Просмотреть файл

@ -1,5 +1,6 @@
/*
* Copyright (c) 2013-2014 Intel Corporation. All rights reserved.
* Copyright (c) 2015 Cisco Systems, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@ -33,6 +34,11 @@
#ifndef _FI_EXT_USNIC_H_
#define _FI_EXT_USNIC_H_
/*
* See the fi_usnic.7 man page for information about the usnic provider
* extensions provided in this header.
*/
#include <stdint.h>
#include <net/if.h>
@ -61,7 +67,7 @@ struct fi_usnic_info {
};
/*
* usNIC-specific AV ops
* usNIC-specific fabric ops
*/
#define FI_USNIC_FABRIC_OPS_1 "fabric_ops 1"
struct fi_usnic_ops_fabric {

Просмотреть файл

@ -526,6 +526,8 @@ static struct fi_ops usdf_av_fi_ops = {
static struct fi_ops_av usdf_am_ops_async = {
.size = sizeof(struct fi_ops_av),
.insert = usdf_am_insert_async,
.insertsvc = fi_no_av_insertsvc,
.insertsym = fi_no_av_insertsym,
.remove = usdf_am_remove,
.lookup = usdf_am_lookup,
.straddr = usdf_av_straddr
@ -534,6 +536,8 @@ static struct fi_ops_av usdf_am_ops_async = {
static struct fi_ops_av usdf_am_ops_sync = {
.size = sizeof(struct fi_ops_av),
.insert = usdf_am_insert_sync,
.insertsvc = fi_no_av_insertsvc,
.insertsym = fi_no_av_insertsym,
.remove = usdf_am_remove,
.lookup = usdf_am_lookup,
.straddr = usdf_av_straddr

Просмотреть файл

@ -648,61 +648,81 @@ usdf_cq_close(fid_t fid)
static struct fi_ops_cq usdf_cq_context_ops = {
.size = sizeof(struct fi_ops_cq),
.read = usdf_cq_read_context,
.sread = usdf_cq_sread,
.readfrom = usdf_cq_readfrom_context,
.readerr = usdf_cq_readerr,
.strerror = usdf_cq_strerror
.write = fi_no_cq_write,
.writeerr = fi_no_cq_writeerr,
.sread = usdf_cq_sread,
.sreadfrom = fi_no_cq_sreadfrom,
.strerror = usdf_cq_strerror,
};
static struct fi_ops_cq usdf_cq_context_soft_ops = {
.size = sizeof(struct fi_ops_cq),
.read = usdf_cq_read_context_soft,
.sread = usdf_cq_sread_soft,
.readfrom = usdf_cq_readfrom_context_soft,
.readerr = usdf_cq_readerr,
.strerror = usdf_cq_strerror
.write = fi_no_cq_write,
.writeerr = fi_no_cq_writeerr,
.sread = usdf_cq_sread_soft,
.sreadfrom = fi_no_cq_sreadfrom,
.strerror = usdf_cq_strerror,
};
static struct fi_ops_cq usdf_cq_msg_ops = {
.size = sizeof(struct fi_ops_cq),
.read = usdf_cq_read_msg,
.sread = usdf_cq_sread,
.readfrom = fi_no_cq_readfrom, /* XXX */
.readerr = usdf_cq_readerr,
.strerror = usdf_cq_strerror
.write = fi_no_cq_write,
.writeerr = fi_no_cq_writeerr,
.sread = usdf_cq_sread,
.sreadfrom = fi_no_cq_sreadfrom,
.strerror = usdf_cq_strerror,
};
static struct fi_ops_cq usdf_cq_msg_soft_ops = {
.size = sizeof(struct fi_ops_cq),
.read = usdf_cq_read_msg_soft,
.sread = usdf_cq_sread,
.readfrom = fi_no_cq_readfrom, /* XXX */
.readerr = usdf_cq_readerr,
.strerror = usdf_cq_strerror
.write = fi_no_cq_write,
.writeerr = fi_no_cq_writeerr,
.sread = usdf_cq_sread,
.sreadfrom = fi_no_cq_sreadfrom,
.strerror = usdf_cq_strerror,
};
static struct fi_ops_cq usdf_cq_data_ops = {
.size = sizeof(struct fi_ops_cq),
.read = usdf_cq_read_data,
.sread = usdf_cq_sread,
.readfrom = fi_no_cq_readfrom, /* XXX */
.readerr = usdf_cq_readerr,
.strerror = usdf_cq_strerror
.write = fi_no_cq_write,
.writeerr = fi_no_cq_writeerr,
.sread = usdf_cq_sread,
.sreadfrom = fi_no_cq_sreadfrom,
.strerror = usdf_cq_strerror,
};
static struct fi_ops_cq usdf_cq_data_soft_ops = {
.size = sizeof(struct fi_ops_cq),
.read = usdf_cq_read_data_soft,
.sread = usdf_cq_sread,
.readfrom = fi_no_cq_readfrom, /* XXX */
.readerr = usdf_cq_readerr,
.strerror = usdf_cq_strerror
.write = fi_no_cq_write,
.writeerr = fi_no_cq_writeerr,
.sread = usdf_cq_sread,
.sreadfrom = fi_no_cq_sreadfrom,
.strerror = usdf_cq_strerror,
};
static struct fi_ops usdf_cq_fi_ops = {
.size = sizeof(struct fi_ops),
.close = usdf_cq_close,
.bind = fi_no_bind,
.control = usdf_cq_control,
.ops_open = fi_no_ops_open,
};
/*

Просмотреть файл

@ -189,19 +189,27 @@ static struct fi_ops usdf_fid_ops = {
.size = sizeof(struct fi_ops),
.close = usdf_domain_close,
.bind = usdf_domain_bind,
.control = fi_no_control,
.ops_open = fi_no_ops_open,
};
static struct fi_ops_mr usdf_domain_mr_ops = {
.size = sizeof(struct fi_ops_mr),
.reg = usdf_reg_mr,
.regv = fi_no_mr_regv,
.regattr = fi_no_mr_regattr,
};
static struct fi_ops_domain usdf_domain_ops = {
.size = sizeof(struct fi_ops_domain),
.cq_open = usdf_cq_open,
.av_open = usdf_av_open,
.cq_open = usdf_cq_open,
.endpoint = usdf_endpoint_open,
.scalable_ep = fi_no_scalable_ep,
.cntr_open = fi_no_cntr_open,
.poll_open = fi_no_poll_open,
.stx_ctx = fi_no_stx_context,
.srx_ctx = fi_no_srx_context,
};
int

Просмотреть файл

@ -330,7 +330,12 @@ static struct fi_ops_msg usdf_dgram_prefix_ops = {
static struct fi_ops_cm usdf_cm_dgram_ops = {
.size = sizeof(struct fi_ops_cm),
.getname = fi_no_getname,
.getpeer = fi_no_getpeer,
.connect = fi_no_connect,
.listen = fi_no_listen,
.accept = fi_no_accept,
.reject = fi_no_reject,
.shutdown = fi_no_shutdown,
};

Просмотреть файл

@ -483,10 +483,10 @@ usdf_eq_close(fid_t fid)
static struct fi_ops_eq usdf_eq_ops = {
.size = sizeof(struct fi_ops_eq),
.read = usdf_eq_read,
.sread = fi_no_eq_sread,
.readerr = usdf_eq_readerr,
.write = fi_no_eq_write,
.strerror = usdf_eq_strerror
.sread = fi_no_eq_sread,
.strerror = usdf_eq_strerror,
};
static struct fi_ops usdf_eq_fi_ops = {

Просмотреть файл

@ -329,6 +329,7 @@ usdf_fill_info_dgram(
dattrp->threading = FI_THREAD_UNSPEC;
dattrp->control_progress = FI_PROGRESS_AUTO;
dattrp->data_progress = FI_PROGRESS_MANUAL;
dattrp->resource_mgmt = FI_RM_DISABLED;
/* add to tail of list */
if (*fi_first == NULL) {
@ -434,6 +435,7 @@ usdf_fill_info_msg(
dattrp->threading = FI_THREAD_UNSPEC;
dattrp->control_progress = FI_PROGRESS_AUTO;
dattrp->data_progress = FI_PROGRESS_MANUAL;
dattrp->resource_mgmt = FI_RM_DISABLED;
/* add to tail of list */
if (*fi_first == NULL) {
@ -537,6 +539,7 @@ usdf_fill_info_rdm(
dattrp->threading = FI_THREAD_UNSPEC;
dattrp->control_progress = FI_PROGRESS_AUTO;
dattrp->data_progress = FI_PROGRESS_MANUAL;
dattrp->resource_mgmt = FI_RM_DISABLED;
/* add to tail of list */
if (*fi_first == NULL) {
@ -857,6 +860,7 @@ static struct fi_ops_fabric usdf_ops_fabric = {
.domain = usdf_domain_open,
.passive_ep = usdf_pep_open,
.eq_open = usdf_eq_open,
.wait_open = fi_no_wait_open,
};
static int

Просмотреть файл

@ -56,6 +56,7 @@
#include <rdma/fi_rma.h>
#include <rdma/fi_errno.h>
#include "fi.h"
#include "fi_enosys.h"
#include "usnic_direct.h"
#include "usdf.h"
@ -76,7 +77,10 @@ int usdf_dereg_mr(fid_t fid)
static struct fi_ops usdf_mr_ops = {
.size = sizeof(struct fi_ops),
.close = usdf_dereg_mr
.close = usdf_dereg_mr,
.bind = fi_no_bind,
.control = fi_no_control,
.ops_open = fi_no_ops_open,
};
int

Просмотреть файл

@ -405,7 +405,7 @@ usdf_pep_close(fid_t fid)
usdf_pep_free_cr_lists(pep);
close(pep->pep_sock);
if (&pep->pep_eq != NULL) {
if (pep->pep_eq != NULL) {
atomic_dec(&pep->pep_eq->eq_refcnt);
}
atomic_dec(&pep->pep_fabric->fab_refcnt);
@ -429,6 +429,8 @@ static struct fi_ops_ep usdf_pep_base_ops = {
.setopt = fi_no_setopt,
.tx_ctx = fi_no_tx_ctx,
.rx_ctx = fi_no_rx_ctx,
.rx_size_left = fi_no_rx_size_left,
.tx_size_left = fi_no_tx_size_left,
};
static struct fi_ops_cm usdf_pep_cm_ops = {

Просмотреть файл

@ -248,6 +248,7 @@ static inline bool skb_flow_dissect(const struct sk_buff *skb, struct flow_keys
#define __vlan_hwaccel_put_tag(a, b, c) __vlan_hwaccel_put_tag(a, c);
#endif /* KERNEL < 3.9.0 */
#ifndef __VMKLNX__
#if ((LINUX_VERSION_CODE <= KERNEL_VERSION(3, 4, 0)) && \
(!RHEL_RELEASE_CODE || RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(6, 0)))
#define net_warn_ratelimited(fmt, ...) \
@ -262,6 +263,7 @@ static inline bool skb_flow_dissect(const struct sk_buff *skb, struct flow_keys
#else
#define enic_pci_dma_mapping_error(pdev, dma) pci_dma_mapping_error(pdev, dma)
#endif /* Kernel version <= 2.6.26 */
#endif
/* Kernel version-specific definitions */
#if (LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 14))

Просмотреть файл

@ -311,12 +311,14 @@ usd_ib_cmd_create_qp(
int ret;
int n;
uint32_t i;
struct usnic_vnic_barres_info *resources;
ucp = NULL;
resources = NULL;
irp = NULL;
memset(&cmd, 0, sizeof(cmd));
resp = calloc(1, sizeof(*resp) +
RES_TYPE_MAX*sizeof(struct usnic_vnic_barres_info));
resp = calloc(1, sizeof(*resp));
if (resp == NULL) {
usd_err("Failed to allocate memory for create_qp_resp\n");
return -ENOMEM;
@ -325,9 +327,7 @@ usd_ib_cmd_create_qp(
icp = &cmd.ibv_cmd;
icp->command = IB_USER_VERBS_CMD_CREATE_QP;
icp->in_words = sizeof(cmd) / 4;
icp->out_words = (sizeof(*resp) +
RES_TYPE_MAX * sizeof(struct usnic_vnic_barres_info))
/ 4;
icp->out_words = sizeof(*resp) / 4;
icp->response = (uintptr_t) resp;
icp->user_handle = (uintptr_t) qp;
@ -357,6 +357,15 @@ usd_ib_cmd_create_qp(
goto out;
}
ucp->u.v1.resources_len = RES_TYPE_MAX * sizeof(*resources);
resources = calloc(RES_TYPE_MAX, sizeof(*resources));
if (resources == NULL) {
usd_err("unable to allocate resources array\n");
ret = -ENOMEM;
goto out;
}
ucp->u.v1.resources = (u64)(uintptr_t)resources;
/* Issue command to IB driver */
n = write(dev->ud_ib_dev_fd, &cmd, sizeof(cmd));
if (n != sizeof(cmd)) {
@ -385,13 +394,15 @@ usd_ib_cmd_create_qp(
vfip->vi_bar_bus_addr = urp->bar_bus_addr;
vfip->vi_bar_len = urp->bar_len;
if (urp->cmd_version == USNIC_IB_CREATE_QP_VERSION) {
/* got expected version */
if (dev->ud_caps[USD_CAP_MAP_PER_RES] > 0) {
for (i = 0; i < urp->num_barres; i++) {
enum vnic_res_type type = urp->resources[i].type;
for (i = 0; i < MIN(RES_TYPE_MAX, urp->u.v1.num_barres); i++) {
enum vnic_res_type type = resources[i].type;
if (type < RES_TYPE_MAX) {
vfip->barres[type].type = type;
vfip->barres[type].bus_addr = urp->resources[i].bus_addr;
vfip->barres[type].len = urp->resources[i].len;
vfip->barres[type].bus_addr = resources[i].bus_addr;
vfip->barres[type].len = resources[i].len;
}
}
if (vfip->barres[RES_TYPE_WQ].bus_addr == 0) {
@ -420,12 +431,31 @@ usd_ib_cmd_create_qp(
goto out;
}
}
} else if (urp->cmd_version == 0) {
/* special case, old kernel that won't tell us about individual barres
* info but should otherwise work fine */
if (dev->ud_caps[USD_CAP_MAP_PER_RES] != 0) {
/* should not happen, only the presence of never-released kernel
* code should cause this case */
usd_err("USD_CAP_MAP_PER_RES claimed but qp_create cmd_version == 0\n");
ret = -ENXIO;
goto out;
}
} else {
usd_err("unexpected cmd_version (%u)\n", urp->cmd_version);
ret = -ENXIO;
goto out;
}
free(resources);
free(resp);
return 0;
out:
if (irp != NULL) /* indicates successful IB create QP */
usd_ib_cmd_destroy_qp(dev, qp);
free(resources);
free(resp);
return ret;
}

Просмотреть файл

@ -94,8 +94,18 @@ struct usnic_ib_create_qp_cmd_v0 {
struct usnic_ib_create_qp_cmd {
struct usnic_transport_spec spec;
u32 cmd_version;
union {
struct {
/* length in bytes of resources array */
u32 resources_len;
/* ptr to array of struct usnic_vnic_barres_info */
u64 resources;
} v1;
} u;
};
/*
* infomation of vnic bar resource
*/
@ -128,10 +138,14 @@ struct usnic_ib_create_qp_resp_v0 {
struct usnic_ib_create_qp_resp {
USNIC_IB_CREATE_QP_RESP_V0_FIELDS
/* the above fields end on 4-byte alignment boundary */
u32 cmd_version;
union {
struct {
u32 num_barres;
u32 pad_to_8byte;
struct usnic_vnic_barres_info resources[0];
} v1;
} u;
};
#define USNIC_CTX_RESP_VERSION 1

Просмотреть файл

@ -110,7 +110,6 @@ struct vnic_dev {
struct vnic_intr_coal_timer_info intr_coal_timer_info;
struct devcmd2_controller *devcmd2;
int (*devcmd_rtn)(struct vnic_dev *vdev, enum vnic_devcmd_cmd cmd, int wait);
struct vnic_gen_stats gen_stats;
};
#define VNIC_MAX_RES_HDR_SIZE \
@ -534,9 +533,11 @@ static int _vnic_dev_cmd2(struct vnic_dev *vdev, enum vnic_devcmd_cmd cmd,
new_posted = (posted + 1) % DEVCMD2_RING_SIZE;
if (new_posted == fetch_index) {
pr_err("wq is full while issuing devcmd2 command %d, "
pr_err("%s: wq is full while issuing devcmd2 command %d, "
"fetch index: %u, posted index: %u\n",
_CMD_N(cmd), fetch_index, posted);
pci_name(vdev->pdev),
_CMD_N(cmd),
fetch_index, posted);
return -EBUSY;
}
@ -573,7 +574,8 @@ static int _vnic_dev_cmd2(struct vnic_dev *vdev, enum vnic_devcmd_cmd cmd,
if (result->error) {
err = -(int) result->error;
if (err != ERR_ECMDUNKNOWN || cmd != CMD_CAPABILITY)
pr_err("Error %d devcmd %d\n",
pr_err("%s:Error %d devcmd %d\n",
pci_name(vdev->pdev),
err, _CMD_N(cmd));
return err;
}
@ -586,7 +588,8 @@ static int _vnic_dev_cmd2(struct vnic_dev *vdev, enum vnic_devcmd_cmd cmd,
}
}
pr_err("Timed out devcmd %d\n", _CMD_N(cmd));
pr_err("%s:Timed out devcmd %d\n", pci_name(vdev->pdev),
_CMD_N(cmd));
return -ETIMEDOUT;
#endif
@ -1806,7 +1809,7 @@ int vnic_dev_classifier(struct vnic_dev *vdev, u8 cmd, u16 *entry, struct filter
return ret;
}
#ifdef ENIC_VXLAN
int vnic_dev_overlay_offload_enable_disable(struct vnic_dev *vdev, u8 overlay,
int vnic_dev_overlay_offload_ctrl(struct vnic_dev *vdev, u8 overlay,
u8 config)
{
u64 a0, a1;
@ -1814,7 +1817,7 @@ int vnic_dev_overlay_offload_enable_disable(struct vnic_dev *vdev, u8 overlay,
int ret = -EINVAL;
a0 = overlay;
a1 = config;
ret = vnic_dev_cmd(vdev, CMD_OVERLAY_OFFLOAD_ENABLE_DISABLE,
ret = vnic_dev_cmd(vdev, CMD_OVERLAY_OFFLOAD_CTRL,
&a0, &a1, wait);
return ret;
@ -1832,4 +1835,16 @@ int vnic_dev_overlay_offload_cfg(struct vnic_dev *vdev, u8 overlay,
return ret;
}
int vnic_dev_get_supported_feature_ver(struct vnic_dev *vdev, u8 feature,
u64 *supported_versions)
{
u64 a0 = feature, a1 = 0;
int wait = 1000;
int ret = -EINVAL;
ret = vnic_dev_cmd(vdev, CMD_GET_SUPP_FEATURE_VER, &a0, &a1, wait);
if (!ret)
*supported_versions = a0;
return ret;
}
#endif

Просмотреть файл

@ -264,10 +264,12 @@ int vnic_dev_set_mac_addr(struct vnic_dev *vdev, u8 *mac_addr);
int vnic_dev_classifier(struct vnic_dev *vdev, u8 cmd, u16 *entry,
struct filter *data);
#ifdef ENIC_VXLAN
int vnic_dev_overlay_offload_enable_disable(struct vnic_dev *vdev,
int vnic_dev_overlay_offload_ctrl(struct vnic_dev *vdev,
u8 overlay, u8 config);
int vnic_dev_overlay_offload_cfg(struct vnic_dev *vdev, u8 overlay,
u16 vxlan_udp_port_number);
int vnic_dev_get_supported_feature_ver(struct vnic_dev *vdev,
u8 feature, u64 *supported_versions);
#endif
#ifndef ENIC_PMD
int vnic_dev_init_devcmdorig(struct vnic_dev *vdev);

Просмотреть файл

@ -479,14 +479,23 @@ enum vnic_devcmd_cmd {
*/
CMD_QP_STATS_CLEAR = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 63),
/* Use this devcmd for agreeing on the highest common version supported
* by both driver and fw for features who need such a facility.
* in: (u64) a0 = feature (driver requests for the supported versions on
* this feature)
* out: (u64) a0 = bitmap of all supported versions for that feature
*/
CMD_GET_SUPP_FEATURE_VER = _CMDC(_CMD_DIR_RW, _CMD_VTYPE_ENET, 69),
/*
* Enable/Disable overlay offloads on the given vnic
* Control (Enable/Disable) overlay offloads on the given vnic
* in: (u8) a0 = OVERLAY_FEATURE_NVGRE : NVGRE
* a0 = OVERLAY_FEATURE_VXLAN : VxLAN
* in: (u8) a1 = OVERLAY_OFFLOAD_ENABLE : Enable
* a1 = OVERLAY_OFFLOAD_DISABLE : Disable
* in: (u8) a1 = OVERLAY_OFFLOAD_ENABLE : Enable or
* a1 = OVERLAY_OFFLOAD_DISABLE : Disable or
* a1 = OVERLAY_OFFLOAD_ENABLE_V2 : Enable with version 2
*/
CMD_OVERLAY_OFFLOAD_ENABLE_DISABLE =
CMD_OVERLAY_OFFLOAD_CTRL =
_CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 72),
/*
@ -779,6 +788,18 @@ typedef enum {
#define OVERLAY_OFFLOAD_ENABLE 0
#define OVERLAY_OFFLOAD_DISABLE 1
#define OVERLAY_OFFLOAD_ENABLE_V2 2
#define OVERLAY_CFG_VXLAN_PORT_UPDATE 0
/*
* Use this enum to get the supported versions for each of these features
* If you need to use the devcmd_get_supported_feature_version(), add
* the new feature into this enum and install function handler in devcmd.c
*/
typedef enum {
VIC_FEATURE_VXLAN,
VIC_FEATURE_MAX,
} vic_feature_t;
#endif /* _VNIC_DEVCMD_H_ */

Просмотреть файл

@ -86,10 +86,12 @@ struct vnic_rx_stats {
u64 rsvd[16];
};
#ifndef __VMKLNX__
/* Generic statistics */
struct vnic_gen_stats {
u64 dma_map_error;
};
#endif
struct vnic_stats {
struct vnic_tx_stats tx;

Просмотреть файл

@ -102,14 +102,20 @@ static int vnic_wq_alloc_bufs(struct vnic_wq *wq)
wq->ring.desc_size * buf->index;
if (buf->index + 1 == count) {
buf->next = wq->bufs[0];
#ifndef __VMKLNX__
buf->next->prev = buf;
#endif
break;
} else if (j + 1 == VNIC_WQ_BUF_BLK_ENTRIES(count)) {
buf->next = wq->bufs[i + 1];
#ifndef __VMKLNX__
buf->next->prev = buf;
#endif
} else {
buf->next = buf + 1;
#ifndef __VMKLNX__
buf->next->prev = buf;
#endif
buf++;
}
}

Просмотреть файл

@ -88,7 +88,9 @@ struct vnic_wq_buf {
uint8_t cq_entry; /* Gets completion event from hw */
uint8_t desc_skip_cnt; /* Num descs to occupy */
uint8_t compressed_send; /* Both hdr and payload in one desc */
#ifndef __VMKLNX__
struct vnic_wq_buf *prev;
#endif
};
/* Break the vnic_wq_buf allocations into blocks of 32/64 entries */

Просмотреть файл

@ -1,4 +1,4 @@
dnl Configury specific to the libfabrics verbs provider
dnl Configury specific to the libfabric verbs provider
dnl Called to configure this provider
dnl

Просмотреть файл

@ -375,11 +375,6 @@ static int fi_ibv_check_tx_attr(struct fi_tx_attr *attr, struct fi_info *info)
return -FI_ENODATA;
}
if (attr->inject_size > verbs_tx_attr.inject_size) {
VERBS_INFO("Given tx_attr->inject_size exceeds supported size\n");
return -FI_ENODATA;
}
return 0;
}
@ -507,11 +502,14 @@ static int fi_ibv_rai_to_fi(struct rdma_addrinfo *rai, struct fi_info *fi)
return 0;
}
static int fi_ibv_fill_info_attr(struct ibv_context *ctx, struct fi_info *hints,
static int fi_ibv_fill_info_attr(struct ibv_context *ctx, struct ibv_qp *qp,
struct fi_info *hints,
struct fi_info *fi)
{
struct ibv_qp_init_attr qp_init_attr;
struct ibv_device_attr device_attr;
struct ibv_port_attr port_attr;
struct ibv_qp_attr qp_attr;
union ibv_gid gid;
size_t name_len;
int ret;
@ -532,6 +530,17 @@ static int fi_ibv_fill_info_attr(struct ibv_context *ctx, struct fi_info *hints,
return 0;
}
if (qp) {
ret = ibv_query_qp(qp, &qp_attr, IBV_QP_CAP, &qp_init_attr);
if (ret)
return -ret;
fi->tx_attr->inject_size = qp_attr.cap.max_inline_data;
} else {
fi_read_file(FI_CONF_DIR, "def_inline_data",
def_inline_data, sizeof def_inline_data);
fi->tx_attr->inject_size = atoi(def_inline_data);
}
ibv_query_gid(ctx, 1, 0, &gid);
ret = ibv_query_device(ctx, &device_attr);
if (ret)
@ -640,48 +649,9 @@ err:
return ret;
}
static int fi_ibv_getinfo(uint32_t version, const char *node, const char *service,
uint64_t flags, struct fi_info *hints, struct fi_info **info)
static void fi_ibv_msg_ep_qp_init_attr(struct fi_ibv_msg_ep *ep,
struct ibv_qp_init_attr *attr)
{
struct rdma_cm_id *id;
struct rdma_addrinfo *rai;
struct fi_info *fi;
int ret;
ret = fi_ibv_create_ep(node, service, flags, hints, &rai, &id);
if (ret)
return ret;
if (!(fi = fi_allocinfo())) {
ret = -FI_ENOMEM;
goto err;
}
ret = fi_ibv_rai_to_fi(rai, fi);
if (ret)
goto err;
ret = fi_ibv_fill_info_attr(id->verbs, hints, fi);
if (ret)
goto err;
*info = fi;
rdma_destroy_ep(id);
rdma_freeaddrinfo(rai);
return 0;
err:
if (fi)
fi_freeinfo(fi);
rdma_destroy_ep(id);
rdma_freeaddrinfo(rai);
return ret;
}
static int fi_ibv_msg_ep_create_qp(struct fi_ibv_msg_ep *ep)
{
struct ibv_qp_init_attr attr;
/* TODO: serialize access to string buffers */
fi_read_file(FI_CONF_DIR, "def_send_wr",
def_send_wr, sizeof def_send_wr);
@ -691,22 +661,90 @@ static int fi_ibv_msg_ep_create_qp(struct fi_ibv_msg_ep *ep)
def_send_sge, sizeof def_send_sge);
fi_read_file(FI_CONF_DIR, "def_recv_sge",
def_recv_sge, sizeof def_recv_sge);
attr->cap.max_send_wr = atoi(def_send_wr);
attr->cap.max_recv_wr = atoi(def_recv_wr);
attr->cap.max_send_sge = atoi(def_send_sge);
attr->cap.max_recv_sge = atoi(def_recv_sge);
attr->srq = NULL;
attr->qp_type = IBV_QPT_RC;
attr->sq_sig_all = 1;
if (ep) {
attr->cap.max_inline_data = ep->inline_size;
attr->qp_context = ep;
attr->send_cq = ep->scq->cq;
attr->recv_cq = ep->rcq->cq;
} else {
fi_read_file(FI_CONF_DIR, "def_inline_data",
def_inline_data, sizeof def_inline_data);
attr->cap.max_inline_data = atoi(def_inline_data);
attr->qp_context = NULL;
attr->send_cq = attr->recv_cq = NULL;
}
}
attr.cap.max_send_wr = atoi(def_send_wr);
attr.cap.max_recv_wr = atoi(def_recv_wr);
attr.cap.max_send_sge = atoi(def_send_sge);
attr.cap.max_recv_sge = atoi(def_recv_sge);
ep->inline_size = atoi(def_inline_data);
attr.cap.max_inline_data = ep->inline_size;
attr.qp_context = ep;
attr.send_cq = ep->scq->cq;
attr.recv_cq = ep->rcq->cq;
attr.srq = NULL;
attr.qp_type = IBV_QPT_RC;
attr.sq_sig_all = 1;
static int fi_ibv_getinfo(uint32_t version, const char *node, const char *service,
uint64_t flags, struct fi_info *hints, struct fi_info **info)
{
struct rdma_cm_id *id;
struct ibv_qp_init_attr qp_init_attr;
struct rdma_addrinfo *rai;
struct fi_info *fi;
int ret;
ret = fi_ibv_create_ep(node, service, flags, hints, &rai, &id);
if (ret)
return ret;
if (id->verbs) {
fi_ibv_msg_ep_qp_init_attr(NULL, &qp_init_attr);
if (hints && hints->tx_attr)
qp_init_attr.cap.max_inline_data
= hints->tx_attr->inject_size;
ret = rdma_create_qp(id, NULL, &qp_init_attr);
if (ret) {
FI_LOG(3, "verbs", "Could not create queue pair with requested attributes\n");
ret = -FI_ENODATA;
goto err1;
}
}
if (!(fi = fi_allocinfo())) {
ret = -FI_ENOMEM;
goto err1;
}
ret = fi_ibv_rai_to_fi(rai, fi);
if (ret)
goto err2;
ret = fi_ibv_fill_info_attr(id->verbs, id->qp, hints, fi);
if (ret)
goto err2;
*info = fi;
if (id->verbs)
rdma_destroy_qp(id);
rdma_destroy_ep(id);
rdma_freeaddrinfo(rai);
return 0;
err2:
fi_freeinfo(fi);
err1:
if (id->verbs)
rdma_destroy_qp(id);
rdma_destroy_ep(id);
rdma_freeaddrinfo(rai);
return ret;
}
static int fi_ibv_msg_ep_create_qp(struct fi_ibv_msg_ep *ep)
{
struct ibv_qp_init_attr attr;
fi_ibv_msg_ep_qp_init_attr(ep, &attr);
return rdma_create_qp(ep->id, ep->rcq->domain->pd, &attr) ? -errno : 0;
}
@ -746,164 +784,6 @@ static int fi_ibv_msg_ep_bind(struct fid *fid, struct fid *bfid, uint64_t flags)
return 0;
}
static ssize_t
fi_ibv_msg_ep_recv(struct fid_ep *ep, void *buf, size_t len,
void *desc, fi_addr_t src_addr, void *context)
{
struct fi_ibv_msg_ep *_ep;
struct ibv_recv_wr wr, *bad;
struct ibv_sge sge;
sge.addr = (uintptr_t) buf;
sge.length = (uint32_t) len;
sge.lkey = (uint32_t) (uintptr_t) desc;
wr.wr_id = (uintptr_t) context;
wr.next = NULL;
wr.sg_list = &sge;
wr.num_sge = 1;
_ep = container_of(ep, struct fi_ibv_msg_ep, ep_fid);
return -ibv_post_recv(_ep->id->qp, &wr, &bad);
}
static ssize_t
fi_ibv_msg_ep_recvv(struct fid_ep *ep, const struct iovec *iov, void **desc,
size_t count, fi_addr_t src_addr, void *context)
{
struct fi_ibv_msg_ep *_ep;
struct ibv_recv_wr wr, *bad;
struct ibv_sge *sge;
size_t i;
sge = alloca(count * sizeof(struct ibv_sge));
wr.wr_id = (uintptr_t) context;
wr.next = NULL;
wr.sg_list = sge;
wr.num_sge = (int) count;
for (i = 0; i < count; i++) {
sge[i].addr = (uintptr_t) iov[i].iov_base;
sge[i].length = (uint32_t) iov[i].iov_len;
sge[i].lkey = (uint32_t) (uintptr_t) desc[i];
}
_ep = container_of(ep, struct fi_ibv_msg_ep, ep_fid);
return -ibv_post_recv(_ep->id->qp, &wr, &bad);
}
static ssize_t
fi_ibv_msg_ep_send(struct fid_ep *ep, const void *buf, size_t len,
void *desc, fi_addr_t dest_addr, void *context)
{
struct fi_ibv_msg_ep *_ep;
struct ibv_send_wr wr, *bad;
struct ibv_sge sge;
_ep = container_of(ep, struct fi_ibv_msg_ep, ep_fid);
sge.addr = (uintptr_t) buf;
sge.length = (uint32_t) len;
sge.lkey = (uint32_t) (uintptr_t) desc;
wr.wr_id = (uintptr_t) context;
wr.next = NULL;
wr.sg_list = &sge;
wr.num_sge = 1;
wr.opcode = IBV_WR_SEND;
wr.send_flags = (len <= _ep->inline_size) ? IBV_SEND_INLINE : 0;
return -ibv_post_send(_ep->id->qp, &wr, &bad);
}
static ssize_t
fi_ibv_msg_ep_senddata(struct fid_ep *ep, const void *buf, size_t len,
void *desc, uint64_t data, fi_addr_t dest_addr, void *context)
{
struct fi_ibv_msg_ep *_ep;
struct ibv_send_wr wr, *bad;
struct ibv_sge sge;
_ep = container_of(ep, struct fi_ibv_msg_ep, ep_fid);
sge.addr = (uintptr_t) buf;
sge.length = (uint32_t) len;
sge.lkey = (uint32_t) (uintptr_t) desc;
wr.wr_id = (uintptr_t) context;
wr.next = NULL;
wr.sg_list = &sge;
wr.num_sge = 1;
wr.opcode = IBV_WR_SEND_WITH_IMM;
wr.send_flags = (len <= _ep->inline_size) ? IBV_SEND_INLINE : 0;
wr.imm_data = (uint32_t) data;
return -ibv_post_send(_ep->id->qp, &wr, &bad);
}
static ssize_t
fi_ibv_msg_ep_sendv(struct fid_ep *ep, const struct iovec *iov, void **desc,
size_t count, fi_addr_t dest_addr, void *context)
{
struct fi_ibv_msg_ep *_ep;
struct ibv_send_wr wr, *bad;
struct ibv_sge *sge;
size_t bytes = 0, i;
sge = alloca(count * sizeof(struct ibv_sge));
wr.wr_id = (uintptr_t) context;
wr.next = NULL;
wr.sg_list = sge;
wr.num_sge = (int) count;
wr.opcode = IBV_WR_SEND;
_ep = container_of(ep, struct fi_ibv_msg_ep, ep_fid);
for (i = 0; i < count; i++) {
sge[i].addr = (uintptr_t) iov[i].iov_base;
sge[i].length = (uint32_t) iov[i].iov_len;
bytes += iov[i].iov_len;
sge[i].lkey = (uint32_t) (uintptr_t) desc[i];
}
wr.send_flags = (bytes <= _ep->inline_size) ? IBV_SEND_INLINE : 0;
return -ibv_post_send(_ep->id->qp, &wr, &bad);
}
static ssize_t
fi_ibv_msg_ep_sendmsg(struct fid_ep *ep, const struct fi_msg *msg, uint64_t flags)
{
struct fi_ibv_msg_ep *_ep;
struct ibv_send_wr wr, *bad;
struct ibv_sge *sge;
size_t i, len;
_ep = container_of(ep, struct fi_ibv_msg_ep, ep_fid);
wr.num_sge = msg->iov_count;
if (msg->iov_count) {
sge = alloca(sizeof(*sge) * msg->iov_count);
for (len = 0, i = 0; i < msg->iov_count; i++) {
sge[i].addr = (uintptr_t) msg->msg_iov[i].iov_base;
sge[i].length = (uint32_t) msg->msg_iov[i].iov_len;
sge[i].lkey = (uint32_t) (uintptr_t) (msg->desc[i]);
len += sge[i].length;
}
wr.sg_list = sge;
wr.send_flags = (len <= _ep->inline_size) ? IBV_SEND_INLINE : 0;
} else {
wr.send_flags = 0;
}
wr.wr_id = (uintptr_t) msg->context;
wr.next = NULL;
if (flags & FI_REMOTE_CQ_DATA) {
wr.opcode = IBV_WR_SEND_WITH_IMM;
wr.imm_data = (uint32_t) msg->data;
} else {
wr.opcode = IBV_WR_SEND;
}
return -ibv_post_send(_ep->id->qp, &wr, &bad);
}
static ssize_t
fi_ibv_msg_ep_recvmsg(struct fid_ep *ep, const struct fi_msg *msg, uint64_t flags)
{
@ -930,6 +810,135 @@ fi_ibv_msg_ep_recvmsg(struct fid_ep *ep, const struct fi_msg *msg, uint64_t flag
return -ibv_post_recv(_ep->id->qp, &wr, &bad);
}
static ssize_t
fi_ibv_msg_ep_recv(struct fid_ep *ep, void *buf, size_t len,
void *desc, fi_addr_t src_addr, void *context)
{
struct iovec iov;
struct fi_msg msg;
iov.iov_base = buf;
iov.iov_len = len;
msg.msg_iov = &iov;
msg.desc = &desc;
msg.iov_count = 1;
msg.addr = src_addr;
msg.context = context;
return fi_ibv_msg_ep_recvmsg(ep, &msg, 0);
}
static ssize_t
fi_ibv_msg_ep_recvv(struct fid_ep *ep, const struct iovec *iov, void **desc,
size_t count, fi_addr_t src_addr, void *context)
{
struct fi_msg msg;
msg.msg_iov = iov;
msg.desc = desc;
msg.iov_count = count;
msg.addr = src_addr;
msg.context = context;
return fi_ibv_msg_ep_recvmsg(ep, &msg, 0);
}
static ssize_t
fi_ibv_msg_ep_sendmsg(struct fid_ep *ep, const struct fi_msg *msg, uint64_t flags)
{
struct fi_ibv_msg_ep *_ep;
struct ibv_send_wr wr, *bad;
struct ibv_sge *sge;
size_t i, len;
_ep = container_of(ep, struct fi_ibv_msg_ep, ep_fid);
wr.num_sge = msg->iov_count;
if (msg->iov_count) {
sge = alloca(sizeof(*sge) * msg->iov_count);
for (len = 0, i = 0; i < msg->iov_count; i++) {
sge[i].addr = (uintptr_t) msg->msg_iov[i].iov_base;
sge[i].length = (uint32_t) msg->msg_iov[i].iov_len;
len += sge[i].length;
}
if (!(flags & FI_INJECT)) {
for (i = 0; i < msg->iov_count; i++) {
sge[i].lkey = (uint32_t)(uintptr_t)(msg->desc[i]);
}
}
wr.sg_list = sge;
wr.send_flags = (flags & FI_INJECT) ? IBV_SEND_INLINE : 0;
} else {
wr.send_flags = 0;
}
wr.wr_id = (uintptr_t) msg->context;
wr.next = NULL;
if (flags & FI_REMOTE_CQ_DATA) {
wr.opcode = IBV_WR_SEND_WITH_IMM;
wr.imm_data = (uint32_t) msg->data;
} else {
wr.opcode = IBV_WR_SEND;
}
return -ibv_post_send(_ep->id->qp, &wr, &bad);
}
static ssize_t
fi_ibv_msg_ep_send(struct fid_ep *ep, const void *buf, size_t len,
void *desc, fi_addr_t dest_addr, void *context)
{
struct iovec iov;
struct fi_msg msg;
iov.iov_base = (void *)buf;
iov.iov_len = len;
msg.msg_iov = &iov;
msg.desc = &desc;
msg.iov_count = 1;
msg.addr = dest_addr;
msg.context = context;
return fi_ibv_msg_ep_sendmsg(ep, &msg, 0);
}
static ssize_t
fi_ibv_msg_ep_senddata(struct fid_ep *ep, const void *buf, size_t len,
void *desc, uint64_t data, fi_addr_t dest_addr, void *context)
{
struct iovec iov;
struct fi_msg msg;
iov.iov_base = (void *)buf;
iov.iov_len = len;
msg.msg_iov = &iov;
msg.desc = &desc;
msg.iov_count = 1;
msg.addr = dest_addr;
msg.context = context;
msg.data = data;
return fi_ibv_msg_ep_sendmsg(ep, &msg, FI_REMOTE_CQ_DATA);
}
static ssize_t
fi_ibv_msg_ep_sendv(struct fid_ep *ep, const struct iovec *iov, void **desc,
size_t count, fi_addr_t dest_addr, void *context)
{
struct fi_msg msg;
msg.msg_iov = iov;
msg.desc = desc;
msg.iov_count = count;
msg.addr = dest_addr;
msg.context = context;
return fi_ibv_msg_ep_sendmsg(ep, &msg, 0);
}
static struct fi_ops_msg fi_ibv_msg_ep_msg_ops = {
.size = sizeof(struct fi_ops_msg),
.recv = fi_ibv_msg_ep_recv,
@ -1020,11 +1029,15 @@ fi_ibv_msg_ep_rma_writemsg(struct fid_ep *ep, const struct fi_msg_rma *msg,
for (len = 0, i = 0; i < msg->iov_count; i++) {
sge[i].addr = (uintptr_t) msg->msg_iov[i].iov_base;
sge[i].length = (uint32_t) msg->msg_iov[i].iov_len;
sge[i].lkey = (uint32_t) (uintptr_t) (msg->desc[i]);
len += sge[i].length;
}
if (!(flags & FI_INJECT)) {
for (i = 0; i < msg->iov_count; i++) {
sge[i].lkey = (uint32_t)(uintptr_t)(msg->desc[i]);
}
}
wr.send_flags = (len <= _ep->inline_size) ? IBV_SEND_INLINE : 0;
wr.send_flags = (flags & FI_INJECT) ? IBV_SEND_INLINE : 0;
}
wr.sg_list = sge;
@ -1272,13 +1285,15 @@ fi_ibv_msg_ep_atomic_writemsg(struct fid_ep *ep,
sge.addr = (uintptr_t) msg->msg_iov->addr;
sge.length = (uint32_t) sizeof(uint64_t);
if (!(flags & FI_INJECT)) {
sge.lkey = (uint32_t) (uintptr_t) msg->desc[0];
}
wr.wr_id = (uintptr_t) msg->context;
wr.next = NULL;
wr.sg_list = &sge;
wr.num_sge = 1;
wr.send_flags = (sge.length <= _ep->inline_size) ? IBV_SEND_INLINE : 0;
wr.send_flags = (flags & FI_INJECT) ? IBV_SEND_INLINE : 0;
wr.send_flags |= IBV_SEND_FENCE;
return -ibv_post_send(_ep->id->qp, &wr, &bad);
@ -1402,7 +1417,9 @@ fi_ibv_msg_ep_atomic_readwritemsg(struct fid_ep *ep,
sge.addr = (uintptr_t) resultv->addr;
sge.length = (uint32_t) sizeof(uint64_t);
if (!(flags & FI_INJECT)) {
sge.lkey = (uint32_t) (uintptr_t) result_desc[0];
}
_ep = container_of(ep, struct fi_ibv_msg_ep, ep_fid);
@ -1528,7 +1545,9 @@ fi_ibv_msg_ep_atomic_compwritemsg(struct fid_ep *ep,
sge.addr = (uintptr_t) resultv->addr;
sge.length = (uint32_t) sizeof(uint64_t);
if (!(flags & FI_INJECT)) {
sge.lkey = (uint32_t) (uintptr_t) result_desc[0];
}
_ep = container_of(ep, struct fi_ibv_msg_ep, ep_fid);
@ -1630,6 +1649,7 @@ static struct fi_ops_atomic fi_ibv_msg_ep_atomic_ops = {
.write = fi_ibv_msg_ep_atomic_write,
.writev = fi_ibv_msg_ep_atomic_writev,
.writemsg = fi_ibv_msg_ep_atomic_writemsg,
.inject = fi_no_atomic_inject,
.readwrite = fi_ibv_msg_ep_atomic_readwrite,
.readwritev = fi_ibv_msg_ep_atomic_readwritev,
.readwritemsg = fi_ibv_msg_ep_atomic_readwritemsg,
@ -1810,6 +1830,8 @@ static struct fi_ops_ep fi_ibv_msg_ep_base_ops = {
.cancel = fi_no_cancel,
.getopt = fi_ibv_msg_ep_getopt,
.setopt = fi_ibv_msg_ep_setopt,
.tx_ctx = fi_no_tx_ctx,
.rx_ctx = fi_no_rx_ctx,
.rx_size_left = fi_no_rx_size_left,
.tx_size_left = fi_no_tx_size_left,
};
@ -1889,6 +1911,14 @@ fi_ibv_open_ep(struct fid_domain *domain, struct fi_info *info,
_ep->ep_fid.rma = &fi_ibv_msg_ep_rma_ops;
_ep->ep_fid.atomic = &fi_ibv_msg_ep_atomic_ops;
if (info->tx_attr) {
_ep->inline_size = info->tx_attr->inject_size;
} else {
fi_read_file(FI_CONF_DIR, "def_inline_data",
def_inline_data, sizeof def_inline_data);
_ep->inline_size = atoi(def_inline_data);
}
*ep = &_ep->ep_fid;
return 0;
err:
@ -1935,7 +1965,7 @@ fi_ibv_eq_cm_getinfo(struct fi_ibv_fabric *fab, struct rdma_cm_event *event)
goto err;
memcpy(fi->dest_addr, rdma_get_peer_addr(event->id), fi->dest_addrlen);
fi_ibv_fill_info_attr(event->id->verbs, NULL, fi);
fi_ibv_fill_info_attr(event->id->verbs, NULL, NULL, fi);
fi->connreq = (fi_connreq_t) event->id;
return fi;
@ -2020,7 +2050,7 @@ fi_ibv_eq_read(struct fid_eq *eq, uint32_t *event,
ret = rdma_get_cm_event(_eq->channel, &cma_event);
if (ret)
return (errno == EAGAIN) ? 0 : -errno;
return -errno;
ret = fi_ibv_eq_cm_process_event(_eq, cma_event, event, entry, len);
rdma_ack_cm_event(cma_event);
@ -2038,12 +2068,12 @@ fi_ibv_eq_sread(struct fid_eq *eq, uint32_t *event,
while (1) {
ret = fi_ibv_eq_read(eq, event, buf, len, flags);
if (ret)
if (ret && (ret != -FI_EAGAIN))
return ret;
ret = fi_poll_fd(_eq->channel->fd, timeout);
if (ret == 0)
return -FI_ETIMEDOUT;
return -FI_EAGAIN;
else if (ret < 0)
return ret;
};
@ -2216,7 +2246,7 @@ fi_ibv_cq_sread(struct fid_cq *cq, void *buf, size_t count, const void *cond,
for (cur = 0; cur < threshold; ) {
ret = _cq->cq_fid.ops->read(cq, buf, count - cur);
if (ret < 0 || !_cq->channel)
if ((ret < 0 && ret != -FI_EAGAIN) || !_cq->channel)
break;
if (ret > 0) {
@ -2234,7 +2264,7 @@ fi_ibv_cq_sread(struct fid_cq *cq, void *buf, size_t count, const void *cond,
}
ret = fi_poll_fd(_cq->channel->fd, timeout);
if (ret == 0)
return -FI_ETIMEDOUT;
return -FI_EAGAIN;
else if (ret < 0)
break;
}
@ -2301,7 +2331,7 @@ static ssize_t fi_ibv_cq_read_context(struct fid_cq *cq, void *buf, size_t count
entry += 1;
}
return i ? i : ret;
return i ? i : (ret ? ret : -FI_EAGAIN);
}
static ssize_t fi_ibv_cq_read_msg(struct fid_cq *cq, void *buf, size_t count)
@ -2330,7 +2360,7 @@ static ssize_t fi_ibv_cq_read_msg(struct fid_cq *cq, void *buf, size_t count)
entry += 1;
}
return i ? i : ret;
return i ? i : (ret ? ret : -FI_EAGAIN);
}
static ssize_t fi_ibv_cq_read_data(struct fid_cq *cq, void *buf, size_t count)
@ -2368,7 +2398,7 @@ static ssize_t fi_ibv_cq_read_data(struct fid_cq *cq, void *buf, size_t count)
entry += 1;
}
return i ? i : ret;
return i ? i : (ret ? ret : -FI_EAGAIN);
}
static const char *
@ -2388,6 +2418,7 @@ static struct fi_ops_cq fi_ibv_cq_context_ops = {
.write = fi_no_cq_write,
.writeerr = fi_no_cq_writeerr,
.sread = fi_ibv_cq_sread,
.sreadfrom = fi_no_cq_sreadfrom,
.strerror = fi_ibv_cq_strerror
};
@ -2399,6 +2430,7 @@ static struct fi_ops_cq fi_ibv_cq_msg_ops = {
.write = fi_no_cq_write,
.writeerr = fi_no_cq_writeerr,
.sread = fi_ibv_cq_sread,
.sreadfrom = fi_no_cq_sreadfrom,
.strerror = fi_ibv_cq_strerror
};
@ -2410,6 +2442,7 @@ static struct fi_ops_cq fi_ibv_cq_data_ops = {
.write = fi_no_cq_write,
.writeerr = fi_no_cq_writeerr,
.sread = fi_ibv_cq_sread,
.sreadfrom = fi_no_cq_sreadfrom,
.strerror = fi_ibv_cq_strerror
};
@ -2675,8 +2708,11 @@ static struct fi_ops_domain fi_ibv_domain_ops = {
.av_open = fi_no_av_open,
.cq_open = fi_ibv_cq_open,
.endpoint = fi_ibv_open_ep,
.scalable_ep = fi_no_scalable_ep,
.cntr_open = fi_no_cntr_open,
.poll_open = fi_no_poll_open,
.stx_ctx = fi_no_stx_context,
.srx_ctx = fi_no_srx_context,
};
static int
@ -2866,6 +2902,5 @@ static struct fi_provider fi_ibv_prov = {
VERBS_INI
{
fi_log_init();
return &fi_ibv_prov;
}

Просмотреть файл

@ -224,6 +224,11 @@ int fi_no_endpoint(struct fid_domain *domain, struct fi_info *info,
{
return -FI_ENOSYS;
}
int fi_no_scalable_ep(struct fid_domain *domain, struct fi_info *info,
struct fid_ep **sep, void *context)
{
return -FI_ENOSYS;
}
int fi_no_cntr_open(struct fid_domain *domain, struct fi_cntr_attr *attr,
struct fid_cntr **cntr, void *context)
{
@ -270,10 +275,6 @@ int fi_no_mr_regattr(struct fid *fid, const struct fi_mr_attr *attr,
/*
* struct fi_ops_ep
*/
int fi_no_enable(struct fid_ep *ep)
{
return -FI_ENOSYS;
}
ssize_t fi_no_cancel(fid_t fid, void *context)
{
return -FI_ENOSYS;

Просмотреть файл

@ -36,6 +36,7 @@
# include <config.h>
#endif /* HAVE_CONFIG_H */
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@ -62,6 +63,24 @@ static struct fi_prov *prov_head, *prov_tail;
static volatile int init = 0;
static pthread_mutex_t ini_lock = PTHREAD_MUTEX_INITIALIZER;
static char **prov_name_filters = NULL;
static int prov_filter_negated = 0;
/* returns 1 if the provider should be kept, 0 if it should be skipped */
static int filter_provider(struct fi_provider *provider)
{
int i;
if (prov_name_filters != NULL) {
for (i = 0; prov_name_filters[i] != NULL; ++i)
if (strcmp(provider->name, prov_name_filters[i]) == 0)
return prov_filter_negated ? 0 : 1;
return prov_filter_negated ? 1 : 0;
} else {
/* keep by default */
return 1;
}
}
static void cleanup_provider(struct fi_provider *provider, void *dlhandle)
{
@ -78,6 +97,7 @@ static int fi_register_provider(struct fi_provider *provider, void *dlhandle)
{
struct fi_prov *prov;
int ret;
int keep;
if (!provider) {
ret = -FI_EINVAL;
@ -98,6 +118,15 @@ static int fi_register_provider(struct fi_provider *provider, void *dlhandle)
goto cleanup;
}
keep = filter_provider(provider);
if (!keep) {
FI_LOG(2, NULL,
"\"%s\" filtered by provider include/exclude list, skipping\n",
provider->name);
ret = -FI_ENODEV;
goto cleanup;
}
prov = fi_getprov(provider->name);
if (prov) {
/* If this provider is older than an already-loaded
@ -156,6 +185,98 @@ static int lib_filter(const struct dirent *entry)
}
#endif
/* split the given string "s" using the specified delimiter(s) in the string
* "delim" and return an array of strings. The array is terminated with a NULL
* pointer. You can clean this array up with a call to free_string_array().
*
* Returns NULL on failure.
*/
static char **split_and_alloc(const char *s, const char *delim)
{
int i, n;
char *tmp;
char *dup = NULL;
char **arr = NULL;
if (!s || !delim)
return NULL;
dup = strdup(s);
if (!dup) {
FI_WARN(NULL, "failed to allocate memory\n");
return NULL;
}
/* compute the array size */
n = 1;
for (tmp = dup; *tmp != '\0'; ++tmp) {
for (i = 0; delim[i] != '\0'; ++i) {
if (*tmp == delim[i]) {
++n;
break;
}
}
}
/* +1 to leave space for NULL terminating pointer */
arr = calloc(n + 1, sizeof(*arr));
if (!arr) {
FI_WARN(NULL, "failed to allocate memory\n");
goto cleanup;
}
/* set array elts to point inside the dup'ed string */
for (tmp = dup, i = 0; tmp != NULL; ++i) {
arr[i] = strsep(&tmp, delim);
}
assert(i == n);
return arr;
cleanup:
free(dup);
free(arr);
return NULL;
}
/* see split_and_alloc() */
static void free_string_array(char **s)
{
/* all strings are allocated from the same strdup'ed slab, so just free
* the first element */
if (s != NULL)
free(s[0]);
/* and then the actual array of pointers */
free(s);
}
/* parse any initialization-related environment variables */
static void fi_parse_ini_env()
{
const char *raw_prov_filter;
raw_prov_filter = getenv("FI_PROVIDER");
if (raw_prov_filter == NULL)
return;
if (*raw_prov_filter == '^') {
prov_filter_negated = 1;
++raw_prov_filter;
}
prov_name_filters = split_and_alloc(raw_prov_filter, ",");
if (!prov_name_filters) {
FI_WARN(NULL, "unable to parse FI_PROVIDER env var\n");
return;
}
}
static void fi_fini_env()
{
free_string_array(prov_name_filters);
}
/*
* Initialize the sockets provider last. This will result in it being
* the least preferred provider.
@ -168,6 +289,7 @@ static void fi_ini(void)
goto unlock;
fi_log_init();
fi_parse_ini_env();
#ifdef HAVE_LIBDL
struct dirent **liblist = NULL;
@ -240,6 +362,8 @@ static void __attribute__((destructor)) fi_fini(void)
cleanup_provider(prov->provider, prov->dlhandle);
free(prov);
}
fi_fini_env();
}
static struct fi_prov *fi_getprov(const char *prov_name)

Просмотреть файл

@ -156,7 +156,6 @@ static void fi_tostr_threading(char *buf, enum fi_threading threading)
}
}
static void fi_tostr_order(char *buf, uint64_t flags)
{
IFFLAGSTR(flags, FI_ORDER_RAR);
@ -345,6 +344,30 @@ static void fi_tostr_ep_attr(char *buf, const struct fi_ep_attr *attr, const cha
strcatf(buf, "%s%srx_ctx_cnt: %zd\n", prefix, TAB, attr->rx_ctx_cnt);
}
static void fi_tostr_resource_mgmt(char *buf, enum fi_resource_mgmt rm)
{
switch (rm) {
CASEENUMSTR(FI_RM_UNSPEC);
CASEENUMSTR(FI_RM_DISABLED);
CASEENUMSTR(FI_RM_ENABLED);
default:
strcatf(buf, "Unknown");
break;
}
}
static void fi_tostr_av_type(char *buf, enum fi_av_type type)
{
switch (type) {
CASEENUMSTR(FI_AV_UNSPEC);
CASEENUMSTR(FI_AV_MAP);
CASEENUMSTR(FI_AV_TABLE);
default:
strcatf(buf, "Unknown");
break;
}
}
static void fi_tostr_domain_attr(char *buf, const struct fi_domain_attr *attr,
const char *prefix)
{
@ -365,6 +388,12 @@ static void fi_tostr_domain_attr(char *buf, const struct fi_domain_attr *attr,
strcatf(buf, "%s%sdata_progress: ", prefix, TAB);
fi_tostr_progress(buf, attr->data_progress);
strcatf(buf, "\n");
strcatf(buf, "%s%sresouce_mgmt: ", prefix, TAB);
fi_tostr_resource_mgmt(buf, attr->resource_mgmt);
strcatf(buf, "\n");
strcatf(buf, "%s%sav_type: ", prefix, TAB);
fi_tostr_av_type(buf, attr->av_type);
strcatf(buf, "\n");
strcatf(buf, "%s%smr_key_size: %zd\n", prefix, TAB, attr->mr_key_size);
strcatf(buf, "%s%scq_data_size: %zd\n", prefix, TAB, attr->cq_data_size);
@ -422,17 +451,6 @@ static void fi_tostr_info(char *buf, const struct fi_info *info)
fi_tostr_fabric_attr(buf, info->fabric_attr, TAB);
}
static void fi_tostr_av_type(char *buf, enum fi_av_type type)
{
switch (type) {
CASEENUMSTR(FI_AV_MAP);
CASEENUMSTR(FI_AV_TABLE);
default:
strcatf(buf, "Unknown");
break;
}
}
static void fi_tostr_atomic_type(char *buf, enum fi_datatype type)
{
switch (type) {