1
1
Pull down a new embedded copy of libfabric from
https://github.com/ofiwg/libfabric.
Этот коммит содержится в:
Jeff Squyres 2014-12-19 12:07:17 -08:00
родитель 91b0d03bf2
Коммит e2362988a9
101 изменённых файлов: 16026 добавлений и 4538 удалений

Просмотреть файл

@ -74,10 +74,12 @@ libfabric_usnic_headers = \
libfabric/prov/usnic/src/usdf.h \
libfabric/prov/usnic/src/usdf_av.h \
libfabric/prov/usnic/src/usdf_cm.h \
libfabric/prov/usnic/src/usdf_cq.h \
libfabric/prov/usnic/src/usdf_dgram.h \
libfabric/prov/usnic/src/usdf_endpoint.h \
libfabric/prov/usnic/src/usdf_msg.h \
libfabric/prov/usnic/src/usdf_progress.h \
libfabric/prov/usnic/src/usdf_rdm.h \
libfabric/prov/usnic/src/usdf_timer.h \
libfabric/prov/usnic/src/usnic_direct/cq_desc.h \
libfabric/prov/usnic/src/usnic_direct/cq_enet_desc.h \
@ -126,6 +128,7 @@ libfabric_usnic_sources = \
libfabric/prov/usnic/src/usdf_endpoint.c \
libfabric/prov/usnic/src/usdf_ep_dgram.c \
libfabric/prov/usnic/src/usdf_ep_msg.c \
libfabric/prov/usnic/src/usdf_ep_rdm.c \
libfabric/prov/usnic/src/usdf_eq.c \
libfabric/prov/usnic/src/usdf_fabric.c \
libfabric/prov/usnic/src/usdf_mem.c \
@ -133,6 +136,7 @@ libfabric_usnic_sources = \
libfabric/prov/usnic/src/usdf_pep.c \
libfabric/prov/usnic/src/usdf_progress.c \
libfabric/prov/usnic/src/usdf_timer.c \
libfabric/prov/usnic/src/usdf_rdm.c \
libfabric/prov/usnic/src/usnic_direct/libnl_utils_common.c \
libfabric/prov/usnic/src/usnic_direct/usd_caps.c \
libfabric/prov/usnic/src/usnic_direct/usd_dest.c \

Просмотреть файл

@ -3,3 +3,4 @@ Reese Faucette <rfaucett@cisco.com>
Jeff Squyres <jsquyres@cisco.com>
Jianxin Xiong <jianxin.xiong@intel.com>
Sayantan Sur <sayantan.sur@intel.com>
Xuyang Wang <xuywang@cisco.com>

Просмотреть файл

@ -24,7 +24,7 @@ common_srcs = \
src/enosys.c
# ensure dl-built providers link back to libfabric
linkback = -lfabric -Lsrc/.libs/
linkback = $(top_builddir)/src/libfabric.la
src_libfabric_la_SOURCES = \
include/fi.h \
@ -32,6 +32,7 @@ src_libfabric_la_SOURCES = \
include/fi_indexer.h \
include/fi_list.h \
include/fi_rbuf.h \
include/prov.h \
src/fabric.c \
src/fi_tostr.c \
$(common_srcs)
@ -40,26 +41,35 @@ if HAVE_SOCKETS
_sockets_files = \
prov/sockets/src/sock.h \
prov/sockets/src/sock_av.c \
prov/sockets/src/sock_dgram.c \
prov/sockets/src/sock_dom.c \
prov/sockets/src/sock_eq.c \
prov/sockets/src/sock_cq.c \
prov/sockets/src/sock_cntr.c \
prov/sockets/src/sock_poll.c \
prov/sockets/src/sock_rdm.c \
prov/sockets/src/sock_wait.c \
prov/sockets/src/sock_ep_rdm.c \
prov/sockets/src/sock_ep_dgram.c \
prov/sockets/src/sock_ep_msg.c \
prov/sockets/src/sock_fabric.c \
prov/sockets/src/sock_ep.c \
prov/sockets/src/sock_ctx.c \
prov/sockets/src/sock_rx_entry.c \
prov/sockets/src/sock_progress.c \
prov/sockets/src/sock_comm.c \
prov/sockets/src/sock_conn.c \
prov/sockets/src/sock_msg.c \
prov/sockets/src/sock_rma.c \
prov/sockets/src/sock_atomic.c \
prov/sockets/src/sock_util.c \
prov/sockets/src/sock_util.h \
prov/sockets/src/indexer.c \
prov/sockets/src/list.c \
prov/sockets/src/list.h
prov/sockets/src/indexer.c
if HAVE_SOCKETS_DL
pkglib_LTLIBRARIES += libsockets-fi.la
libsockets_fi_la_SOURCES = $(_sockets_files) $(common_srcs)
libsockets_fi_la_LDFLAGS = -module -avoid-version -shared -export-dynamic $(linkback)
libsockets_fi_la_LIBADD = $(linkback)
libsockets_fi_la_LDFLAGS = -module -avoid-version -shared -export-dynamic
libsockets_fi_la_DEPENDENCIES = $(linkback)
else !HAVE_SOCKETS_DL
src_libfabric_la_SOURCES += $(_sockets_files)
endif !HAVE_SOCKETS_DL
@ -72,7 +82,9 @@ _verbs_files = prov/verbs/src/fi_verbs.c
if HAVE_VERBS_DL
pkglib_LTLIBRARIES += libverbs-fi.la
libverbs_fi_la_SOURCES = $(_verbs_files) $(common_srcs)
libverbs_fi_la_LDFLAGS = -module -avoid-version -shared -export-dynamic -libverbs -lrdmacm $(linkback)
libverbs_fi_la_LIBADD = -libverbs -lrdmacm $(linkback)
libverbs_fi_la_LDFLAGS = -module -avoid-version -shared -export-dynamic
libverbs_fi_la_DEPENDENCIES = $(linkback)
else !HAVE_VERBS_DL
src_libfabric_la_SOURCES += $(_verbs_files)
endif !HAVE_VERBS_DL
@ -149,6 +161,7 @@ _usnic_files = \
prov/usnic/src/usdf_cm.c \
prov/usnic/src/usdf_cm.h \
prov/usnic/src/usdf_cq.c \
prov/usnic/src/usdf_cq.h \
prov/usnic/src/usdf_dgram.c \
prov/usnic/src/usdf_dgram.h \
prov/usnic/src/usdf_domain.c \
@ -156,6 +169,7 @@ _usnic_files = \
prov/usnic/src/usdf_endpoint.h \
prov/usnic/src/usdf_ep_dgram.c \
prov/usnic/src/usdf_ep_msg.c \
prov/usnic/src/usdf_ep_rdm.c \
prov/usnic/src/usdf_eq.c \
prov/usnic/src/usdf_fabric.c \
prov/usnic/src/usdf_mem.c \
@ -164,6 +178,9 @@ _usnic_files = \
prov/usnic/src/usdf_pep.c \
prov/usnic/src/usdf_progress.c \
prov/usnic/src/usdf_progress.h \
prov/usnic/src/usdf_rdm.c \
prov/usnic/src/usdf_rdm.h \
prov/usnic/src/usdf_rudp.h \
prov/usnic/src/usdf_timer.c \
prov/usnic/src/usdf_timer.h
@ -175,8 +192,9 @@ if HAVE_USNIC_DL
pkglib_LTLIBRARIES += libusnic-fi.la
libusnic_fi_la_CPPFLAGS = $(AM_CPPFLAGS) $(_usnic_cppflags)
libusnic_fi_la_SOURCES = $(_usnic_files) $(common_srcs)
libusnic_fi_la_LIBADD = $(linkback)
libusnic_fi_la_LDFLAGS = -module -avoid-version -shared -export-dynamic
libusnic_fi_la_LIBS = $(linkback)
libusnic_fi_la_DEPENDENCIES = $(linkback)
else !HAVE_USNIC_DL
AM_CPPFLAGS += $(_usnic_cppflags)
src_libfabric_la_SOURCES += $(_usnic_files)
@ -188,6 +206,7 @@ if HAVE_PSM
_psm_files = \
prov/psm/src/psm_am.h \
prov/psm/src/psmx.h \
prov/psm/src/psm_am.h \
prov/psm/src/psmx_init.c \
prov/psm/src/psmx_domain.c \
prov/psm/src/psmx_cq.c \
@ -209,7 +228,9 @@ _psm_files = \
if HAVE_PSM_DL
pkglib_LTLIBRARIES += libpsmx-fi.la
libpsmx_fi_la_SOURCES = $(_psm_files) $(common_srcs)
libpsmx_fi_la_LDFLAGS = -module -avoid-version -shared -export-dynamic $(linkback)
libpsmx_fi_la_LIBADD = $(linkback)
libpsmx_fi_la_LDFLAGS = -module -avoid-version -shared -export-dynamic
libpsmx_fi_la_DEPENDENCIES = $(linkback)
else !HAVE_PSM_DL
src_libfabric_la_SOURCES += $(_psm_files)
endif !HAVE_PSM_DL

Просмотреть файл

@ -1,7 +1,7 @@
This README is for userspace RDMA fabric library.
Version Libfabric v0.0.2
Released on 2014-12-09
Released on 2014-12-19
Building
========

Просмотреть файл

@ -33,6 +33,18 @@
/* Define to 1 if you have the <memory.h> header file. */
#undef HAVE_MEMORY_H
/* psm provider is built */
#undef HAVE_PSM
/* psm provider is built as DSO */
#undef HAVE_PSM_DL
/* sockets provider is built */
#undef HAVE_SOCKETS
/* sockets provider is built as DSO */
#undef HAVE_SOCKETS_DL
/* Define to 1 if you have the <stdint.h> header file. */
#undef HAVE_STDINT_H
@ -57,6 +69,18 @@
/* Define to 1 if you have the <unistd.h> header file. */
#undef HAVE_UNISTD_H
/* usnic provider is built */
#undef HAVE_USNIC
/* usnic provider is built as DSO */
#undef HAVE_USNIC_DL
/* verbs provider is built */
#undef HAVE_VERBS
/* verbs provider is built as DSO */
#undef HAVE_VERBS_DL
/* Define to 1 to enable valgrind annotations */
#undef INCLUDE_VALGRIND

Просмотреть файл

@ -88,7 +88,11 @@ AC_DEFUN([FI_PROVIDER_SETUP],[
],
[AC_MSG_NOTICE([$1 provider disabled])])
# Set conditionals for HAVE_<provider> and HAVE_<provider>_DL
AC_DEFINE_UNQUOTED([HAVE_]m4_translit([$1], [a-z], [A-Z]), $$1_happy, [$1 provider is built])
AC_DEFINE_UNQUOTED([HAVE_]m4_translit([$1], [a-z], [A-Z])[_DL], $$1_dl, [$1 provider is built as DSO])
# Set AM conditionals for HAVE_<provider> and HAVE_<provider>_DL
# as well as AC defines
AM_CONDITIONAL([HAVE_]m4_translit([$1], [a-z], [A-Z]),
[test $$1_happy -eq 1])
AM_CONDITIONAL([HAVE_]m4_translit([$1], [a-z], [A-Z])[_DL],

Просмотреть файл

@ -73,6 +73,8 @@ static inline uint64_t htonll(uint64_t x) { return x; }
static inline uint64_t ntohll(uint64_t x) { return x; }
#endif
#define sizeof_field(type, field) sizeof(((type *)0)->field)
#define MIN(a, b) ((a) < (b) ? a : b)
#define MAX(a, b) ((a) > (b) ? a : b)
@ -176,10 +178,7 @@ static inline int atomic_get(atomic_t *atomic)
#endif // HAVE_ATOMICS
/* non exported symbols */
int fi_init(void);
int fi_read_file(const char *dir, const char *file, char *buf, size_t size);
int fi_poll_fd(int fd, int timeout);
int fi_wait_cond(pthread_cond_t *cond, pthread_mutex_t *mut, int timeout);
@ -191,8 +190,6 @@ size_t fi_datatype_size(enum fi_datatype datatype);
uint64_t fi_tag_bits(uint64_t mem_tag_format);
uint64_t fi_tag_format(uint64_t tag_bits);
int fi_version_register(uint32_t version, struct fi_provider *provider);
#define RDMA_CONF_DIR SYSCONFDIR "/" RDMADIR
#define FI_CONF_DIR RDMA_CONF_DIR "/fabric"

Просмотреть файл

@ -104,7 +104,7 @@ static inline void rbwrite(struct ringbuf *rb, const void *buf, size_t len)
memcpy((char*)rb->buf + (rb->wpos & rb->size_mask), buf, len);
} else {
memcpy((char*)rb->buf + (rb->wpos & rb->size_mask), buf, endlen);
memcpy(rb->buf, buf, len - endlen);
memcpy(rb->buf, (char*)buf + endlen, len - endlen);
}
rb->wpos += len;
}
@ -128,7 +128,7 @@ static inline void rbpeek(struct ringbuf *rb, void *buf, size_t len)
memcpy(buf, (char*)rb->buf + (rb->rcnt & rb->size_mask), len);
} else {
memcpy(buf, (char*)rb->buf + (rb->rcnt & rb->size_mask), endlen);
memcpy(buf, rb->buf, len - endlen);
memcpy((char*)buf + endlen, rb->buf, len - endlen);
}
}

Просмотреть файл

@ -0,0 +1,96 @@
/*
* Copyright (c) 2013-2014 Intel Corporation. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef _PROV_H_
#define _PROV_H_
#if HAVE_CONFIG_H
# include <config.h>
#endif /* HAVE_CONFIG_H */
#include <rdma/fi_prov.h>
/* Provider initialization function signature that built-in providers
* must specify. */
#define INI_SIG(name) struct fi_provider* name(void)
/* for each provider defines for three scenarios:
* dl: externally visible ctor with known name (see fi_prov.h)
* built-in: ctor function def, don't export symbols
* not built: no-op call for ctor
*/
#if (HAVE_VERBS) && (HAVE_VERBS_DL)
# define VERBS_INI FI_EXT_INI
# define VERBS_INIT NULL
#elif (HAVE_VERBS)
# define VERBS_INI INI_SIG(fi_verbs_ini)
# define VERBS_INIT fi_verbs_ini()
VERBS_INI ;
#else
# define VERBS_INIT NULL
#endif
#if (HAVE_PSM) && (HAVE_PSM_DL)
# define PSM_INI FI_EXT_INI
# define PSM_INIT NULL
#elif (HAVE_PSM)
# define PSM_INI INI_SIG(fi_psm_ini)
# define PSM_INIT fi_psm_ini()
PSM_INI ;
#else
# define PSM_INIT NULL
#endif
#if (HAVE_SOCKETS) && (HAVE_SOCKETS_DL)
# define SOCKETS_INI FI_EXT_INI
# define SOCKETS_INIT NULL
#elif (HAVE_SOCKETS)
# define SOCKETS_INI INI_SIG(fi_sockets_ini)
# define SOCKETS_INIT fi_sockets_ini()
SOCKETS_INI ;
#else
# define SOCKETS_INIT NULL
#endif
#if (HAVE_USNIC) && (HAVE_USNIC_DL)
# define USNIC_INI FI_EXT_INI
# define USNIC_INIT NULL
#elif (HAVE_USNIC)
# define USNIC_INI INI_SIG(fi_usnic_ini)
# define USNIC_INIT fi_usnic_ini()
USNIC_INI ;
#else
# define USNIC_INIT NULL
#endif
#endif /* _PROV_H_ */

Просмотреть файл

@ -201,7 +201,7 @@ enum {
FI_PROTO_IB_UD,
FI_PROTO_PSMX,
FI_PROTO_UDP,
FI_PROTO_SOCK_RDS,
FI_PROTO_SOCK_TCP
};
/* Mode bits */
@ -232,6 +232,7 @@ struct fi_rx_attr {
struct fi_ep_attr {
uint32_t protocol;
uint32_t protocol_version;
size_t max_msg_size;
size_t inject_size;
size_t total_buffered_recv;

Просмотреть файл

@ -192,6 +192,20 @@ fi_cntr_open(struct fid_domain *domain, struct fi_cntr_attr *attr,
return domain->ops->cntr_open(domain, attr, cntr, context);
}
static inline int
fi_wait_open(struct fid_domain *domain, struct fi_wait_attr *attr,
struct fid_wait **waitset)
{
return domain->ops->wait_open(domain, attr, waitset);
}
static inline int
fi_poll_open(struct fid_domain *domain, struct fi_poll_attr *attr,
struct fid_poll **pollset)
{
return domain->ops->poll_open(domain, attr, pollset);
}
static inline int
fi_mr_reg(struct fid_domain *domain, const void *buf, size_t len,
uint64_t access, uint64_t offset, uint64_t requested_key,

Просмотреть файл

@ -53,7 +53,7 @@ enum fi_wait_obj {
FI_WAIT_UNSPEC,
FI_WAIT_SET,
FI_WAIT_FD,
FI_WAIT_MUT_COND, /* pthread mutex & cond */
FI_WAIT_MUTEX_COND, /* pthread mutex & cond */
};
struct fi_wait_attr {
@ -71,13 +71,11 @@ struct fid_wait {
struct fi_ops_wait *ops;
};
struct fi_wait_obj_set {
size_t count;
enum fi_wait_obj wait_obj;
void *obj;
struct fi_mutex_cond {
pthread_mutex_t *mutex;
pthread_cond_t *cond;
};
/*
* Poll Set
* Allows polling multiple event queues and counters for progress
@ -90,6 +88,10 @@ struct fi_poll_attr {
struct fi_ops_poll {
size_t size;
int (*poll)(struct fid_poll *pollset, void **context, int count);
int (*poll_add)(struct fid_poll *pollset, struct fid *event_fid,
uint64_t flags);
int (*poll_del)(struct fid_poll *pollset, struct fid *event_fid,
uint64_t flags);
};
struct fid_poll {
@ -301,6 +303,17 @@ fi_poll(struct fid_poll *pollset, void **context, int count)
return pollset->ops->poll(pollset, context, count);
}
static inline int
fi_poll_add(struct fid_poll *pollset, struct fid *event_fid, uint64_t flags)
{
return pollset->ops->poll_add(pollset, event_fid, flags);
}
static inline int
fi_poll_del(struct fid_poll *pollset, struct fid *event_fid, uint64_t flags)
{
return pollset->ops->poll_del(pollset, event_fid, flags);
}
static inline int
fi_eq_open(struct fid_fabric *fabric, struct fi_eq_attr *attr,

Просмотреть файл

@ -43,31 +43,33 @@ extern "C" {
#endif
/*
* Extension that low-level drivers should add to their .so filename
* (probably via libtool "-release" option). For example a low-level
* driver named "libfoo" should build a plug-in named "libfoo-fi.so".
* Extension that dl-loaded providers should add to their .so filename
* (probably via libtool "-release" option). For example a provider
* driver named "foo" should build a plug-in named "libfoo-fi.so", and
* place it in $prefix/$libdir/libfabric/
*/
#define FI_LIB_EXTENSION "fi"
#define FI_LIB_SUFFIX FI_LIB_EXTENSION ".so"
#define FI_LIB_CLASS_NAME "libfabric"
/*
* Dynamically loaded providers must export the following entry point.
* This is invoked by the libfabric framework when the provider library
* is loaded.
*/
#define FI_EXT_INI \
__attribute__((visibility ("default"))) struct fi_provider* fi_prov_ini(void)
struct fi_provider {
const char *name;
uint32_t version;
uint32_t fi_version;
const char *name;
int (*getinfo)(uint32_t version, const char *node, const char *service,
uint64_t flags, struct fi_info *hints, struct fi_info **info);
int (*fabric)(struct fi_fabric_attr *attr, struct fid_fabric **fabric,
void *context);
void (*cleanup)(void);
};
int fi_register_provider(uint32_t fi_version, struct fi_provider *provider);
static inline int fi_register(struct fi_provider *provider)
{
return fi_register_provider(FI_VERSION(FI_MAJOR_VERSION, FI_MINOR_VERSION),
provider);
}
#ifdef __cplusplus
}
#endif

Просмотреть файл

@ -6,7 +6,6 @@ FABRIC_1.0 {
fi_fabric;
fi_version;
fi_strerror;
fi_register_provider;
fi_tostr;
local: *;
};

Просмотреть файл

@ -1,50 +1,41 @@
%define ver 0.0.2
Name: libfabric
Version: 0.0.2
Release: 1%{?dist}
Summary: Userspace RDMA Fabric Interfaces
Summary: User-space RDMA Fabric Interfaces
Group: System Environment/Libraries
License: GPLv2 or BSD
Url: http://www.github.com/ofiwg/libfabric
Source: http://www.openfabrics.org/downloads/fabrics/%{name}-%{version}.tar.gz
BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root-%(%{__id_u} -n)
Source: http://www.openfabrics.org/downloads/fabrics/%{name}-%{version}.tar.bz2
Prefix: ${_prefix}
%description
libfabric provides a userspace API to access high-performance fabric
libfabric provides a user-space API to access high-performance fabric
services, such as RDMA.
%package devel
Summary: Development files for the libfabric library
Group: System Environment/Libraries
Requires: libfabric = %{version}
%description devel
Development files for the libfabric library.
%package utils
Summary: Examples for the libfabric library
Group: System Environment/Libraries
Requires: %{name} = %{version}-%{release}
%description utils
Example test programs for the libfabric library.
%prep
%setup -q -n %{name}-%{ver}
%setup -q -n %{name}-%{version}
%build
%configure
# defaults: with-dlopen and without-valgrind can be over-rode:
%configure %{?_without_dlopen} %{?_with_valgrind}
make %{?_smp_mflags}
%install
rm -rf $RPM_BUILD_ROOT
%makeinstall
rm -rf %{buildroot}
%makeinstall installdirs
# remove unpackaged files from the buildroot
rm -f $RPM_BUILD_ROOT%{_libdir}/*.la
rm -f %{buildroot}%{_libdir}/*.la
%clean
rm -rf $RPM_BUILD_ROOT
rm -rf %{buildroot}
%post -p /sbin/ldconfig
%postun -p /sbin/ldconfig
@ -52,6 +43,7 @@ rm -rf $RPM_BUILD_ROOT
%files
%defattr(-,root,root,-)
%{_libdir}/lib*.so.*
%dir %{_libdir}/libfabric/
%doc AUTHORS COPYING README
%files devel
@ -62,10 +54,6 @@ rm -rf $RPM_BUILD_ROOT
%{_mandir}/man3/*
%{_mandir}/man7/*
%files utils
%defattr(-,root,root,-)
%{_bindir}/*
%{_mandir}/man1/*
%changelog
* Mon Jan 19 2015 Maintainer Name <email@intel.com> 1.0.0
- TODO: Release manager fill this out for initial release

Просмотреть файл

@ -1,50 +1,41 @@
%define ver @VERSION@
Name: libfabric
Version: 0.0.2
Version: @VERSION@
Release: 1%{?dist}
Summary: Userspace RDMA Fabric Interfaces
Summary: User-space RDMA Fabric Interfaces
Group: System Environment/Libraries
License: GPLv2 or BSD
Url: http://www.github.com/ofiwg/libfabric
Source: http://www.openfabrics.org/downloads/fabrics/%{name}-%{version}.tar.gz
BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root-%(%{__id_u} -n)
Source: http://www.openfabrics.org/downloads/fabrics/%{name}-%{version}.tar.bz2
Prefix: ${_prefix}
%description
libfabric provides a userspace API to access high-performance fabric
libfabric provides a user-space API to access high-performance fabric
services, such as RDMA.
%package devel
Summary: Development files for the libfabric library
Group: System Environment/Libraries
Requires: libfabric = %{version}
%description devel
Development files for the libfabric library.
%package utils
Summary: Examples for the libfabric library
Group: System Environment/Libraries
Requires: %{name} = %{version}-%{release}
%description utils
Example test programs for the libfabric library.
%prep
%setup -q -n %{name}-%{ver}
%setup -q -n %{name}-%{version}
%build
%configure
# defaults: with-dlopen and without-valgrind can be over-rode:
%configure %{?_without_dlopen} %{?_with_valgrind}
make %{?_smp_mflags}
%install
rm -rf $RPM_BUILD_ROOT
%makeinstall
rm -rf %{buildroot}
%makeinstall installdirs
# remove unpackaged files from the buildroot
rm -f $RPM_BUILD_ROOT%{_libdir}/*.la
rm -f %{buildroot}%{_libdir}/*.la
%clean
rm -rf $RPM_BUILD_ROOT
rm -rf %{buildroot}
%post -p /sbin/ldconfig
%postun -p /sbin/ldconfig
@ -52,6 +43,7 @@ rm -rf $RPM_BUILD_ROOT
%files
%defattr(-,root,root,-)
%{_libdir}/lib*.so.*
%dir %{_libdir}/libfabric/
%doc AUTHORS COPYING README
%files devel
@ -62,10 +54,6 @@ rm -rf $RPM_BUILD_ROOT
%{_mandir}/man3/*
%{_mandir}/man7/*
%files utils
%defattr(-,root,root,-)
%{_bindir}/*
%{_mandir}/man1/*
%changelog
* Mon Jan 19 2015 Maintainer Name <email@intel.com> 1.0.0
- TODO: Release manager fill this out for initial release

Просмотреть файл

@ -1,4 +1,4 @@
.TH fabric 7 "2014\-12\-05" "Libfabric Programmer\[aq]s Manual" "\@VERSION\@"
.TH fabric 7 "2014\-12\-15" "Libfabric Programmer\[aq]s Manual" "\@VERSION\@"
.SH NAME
.PP
Fabric Interface Library
@ -106,12 +106,13 @@ Endpoints are configured with specific communication capabilities and
data transfer interfaces.
.PP
\f[I]fi_eq - Event Queue\f[] : Event queues, are used to collect and
report the completion of asynchronous operations.
For example, the completion of a data transfer operation submitted over
a fabric endpoint may write an event to an event queue associated with
the endpoint.
There are multiple types of event queues, and the format of the events
that they report are controlled by applications.
report the completion of asynchronous operations and events.
Event queues report events that are not directly associated with data
transfer operations.
.PP
\f[I]fi_cq - Completion Queue\f[] : Completion queues are
high-performance event queues used to report the completion of data
transfer operations.
.PP
\f[I]fi_cntr - Event Counters\f[] : Event counters are used to report
the number of completed asynchronous operations.
@ -214,15 +215,13 @@ addresses must support FI_SOCKADDR_IN and FI_SOCKADDR_IN6 input formats.
Address vectors must support FI_ADDR, FI_ADDR_INDEX, and FI_AV output
formats.
.IP \[bu] 2
Access domains must support opening event queues and counters.
Access domains must support opening completion queues and counters.
.IP \[bu] 2
Event queues must support the FI_EQ_FORMAT_CONTEXT format.
.IP \[bu] 2
Event queues associated with data transfer completions must support the
FI_EQ_FORMAT_DATA format.
Completion queues must support the FI_CQ_FORMAT_CONTEXT and
FI_CQ_FORMAT_MSG formats.
.IP \[bu] 2
Event queues associated with tagged message transfers must support the
FI_EQ_FORMAT_TAGGED format.
FI_CQ_FORMAT_TAGGED format.
.IP \[bu] 2
A provider is expected to be forward compatible, and must be able to be
compiled against expanded \f[C]fi_xxx_ops\f[] structures that define new
@ -231,6 +230,7 @@ Any unknown functions must be set to NULL.
.SH SEE ALSO
.PP
\f[C]fi_getinfo\f[](3), \f[C]fi_endpoint\f[](3), \f[C]fi_domain\f[](3),
\f[C]fi_av\f[](3), \f[C]fi_eq\f[](3), \f[C]fi_mr\f[](3)
\f[C]fi_av\f[](3), \f[C]fi_eq\f[](3), \f[C]fi_cq\f[](3),
\f[C]fi_cntr\f[](3), \f[C]fi_mr\f[](3)
.SH AUTHORS
OpenFabrics.

Просмотреть файл

@ -1,4 +1,4 @@
.TH fi_cntr 3 "2014\-11\-21" "Libfabric Programmer\[aq]s Manual" "\@VERSION\@"
.TH fi_cntr 3 "2014\-12\-15" "Libfabric Programmer\[aq]s Manual" "\@VERSION\@"
.SH NAME
.PP
fi_cntr - Completion and event counter operations
@ -105,7 +105,7 @@ Users may use fi_control to retrieve the underlying wait object
associated with a counter, in order to use it in other system calls.
The following values may be used to specify the type of wait object
associated with a counter: FI_WAIT_NONE, FI_WAIT_UNSPEC, FI_WAIT_SET,
FI_WAIT_FD, and FI_WAIT_MUT_COND.
FI_WAIT_FD, and FI_WAIT_MUTEX_COND.
.IP \[bu] 2
\f[I]FI_WAIT_NONE\f[] : Used to indicate that the user will not block
(wait) for events on the counter.
@ -130,7 +130,7 @@ routines.
However, a provider may signal an FD wait object by marking it as
readable, writable, or with an error.
.IP \[bu] 2
\f[I]FI_WAIT_MUT_COND\f[] : Specifies that the counter should use a
\f[I]FI_WAIT_MUTEX_COND\f[] : Specifies that the counter should use a
pthread mutex and cond variable as a wait object.
.PP
\f[I]wait_set\f[] : If wait_obj is FI_WAIT_SET, this field references a
@ -167,13 +167,7 @@ operational flags associated with the counter.
the low-level wait object associated with the counter.
The format of the wait-object is specified during counter creation,
through the counter attributes.
The fi_cntr_control arg parameter should be an address where a pointer
to the returned wait object will be written.
.PP
\f[I]FI_CNTR_WAIT_MUT_COND\f[] : The counter wait is implemented using a
pthread_mutex_t and pthread_cond_t.
FI_GETWAIT will return two pointers, a reference to pthread_mutex_t *
and pthread_cond_t *, respectively.
See fi_eq.3 for addition details using control with FI_GETWAIT.
.SS fi_cntr_read
.PP
The fi_cntr_read call returns the current value of the counter.

Просмотреть файл

@ -1,4 +1,4 @@
.TH fi_cq 3 "2014\-12\-05" "Libfabric Programmer\[aq]s Manual" "\@VERSION\@"
.TH fi_cq 3 "2014\-12\-15" "Libfabric Programmer\[aq]s Manual" "\@VERSION\@"
.SH NAME
.PP
fi_cq - Completion queue operations
@ -219,7 +219,7 @@ Users may use fi_control to retrieve the underlying wait object
associated with an CQ, in order to use it in other system calls.
The following values may be used to specify the type of wait object
associated with an CQ: FI_WAIT_NONE, FI_WAIT_UNSPEC, FI_WAIT_SET,
FI_WAIT_FD, and FI_WAIT_MUT_COND.
FI_WAIT_FD, and FI_WAIT_MUTEX_COND.
.IP \[bu] 2
\f[I]FI_WAIT_NONE\f[] : Used to indicate that the user will not block
(wait) for completions on the CQ.
@ -247,7 +247,7 @@ routines.
However, a provider may signal an FD wait object by marking it as
readable, writable, or with an error.
.IP \[bu] 2
\f[I]FI_WAIT_MUT_COND\f[] : Specifies that the CQ should use a pthread
\f[I]FI_WAIT_MUTEX_COND\f[] : Specifies that the CQ should use a pthread
mutex and cond variable as a wait object.
.PP
\f[I]signaling_vector\f[] : Indicates which processor core interrupts
@ -302,8 +302,7 @@ The following control commands are usable with an CQ.
the low-level wait object associated with the CQ.
The format of the wait-object is specified during CQ creation, through
the CQ attributes.
The fi_control arg parameter should be an address where a pointer to the
returned wait object will be written.
See fi_eq.3 for addition details using control with FI_GETWAIT.
.SS fi_cq_read / fi_cq_readfrom
.PP
The fi_cq_read and fi_cq_readfrom operations perform a non-blocking read
@ -394,6 +393,19 @@ Len must be a multiple of the size of the event to insert.
.PP
User events inserted into a CQ with be associated with the source
address FI_ADDR_NOTAVAIL.
.SH COMPLETION FLAGS
.PP
Completion flags provide additional details regarding the completed
operation.
The following completion flags are defined.
.PP
*FI_REMOTE_CQ_DATA : This indicates that remote CQ data is available as
part of the completion.
.PP
\f[I]FI_MULTI_RECV\f[] : This flag applies to receive buffers that were
posted with the FI_MULTI_RECV flag set.
This completion flag indicates that the receive buffer referenced by the
completion has been consumed and was released by the provider.
.SH RETURN VALUES
.PP
fi_cq_open : Returns 0 on success.

Просмотреть файл

@ -1,4 +1,4 @@
.TH fi_domain 3 "2014\-11\-21" "Libfabric Programmer\[aq]s Manual" "\@VERSION\@"
.TH fi_domain 3 "2014\-12\-19" "Libfabric Programmer\[aq]s Manual" "\@VERSION\@"
.SH NAME
.PP
fi_domain - Open a fabric access domain
@ -160,6 +160,9 @@ For instance, endpoints that share the same event queue or poll set
belong to the same progress domain.
Applications that can allocate endpoint resources to specific threads
can reduce provider locking by using FI_THREAD_PROGRESS.
.PP
\f[I]FI_THREAD_DOMAIN\f[] : A domain serialization model requires
applications to serialize access to all objects belonging to a domain.
.SS Progress Models (control_progress / data_progress)
.PP
Progress is the ability of the underlying implementation to complete
@ -174,7 +177,7 @@ application threads.
.PP
Control progress indicates the method that the provider uses to make
progress on asynchronous control operations.
Control operations are function which do not directly involve the
Control operations are functions which do not directly involve the
transfer of application data between endpoints.
They include address vector, memory registration, and connection
management routines.
@ -248,9 +251,9 @@ the provider.
.PP
The number of outbound command queues optimally supported by the
provider.
For a low-level provider, this represents the number command queues to
the hardware and/or the number of parallel transmit engines effectively
supported by the hardware and caches.
For a low-level provider, this represents the number of command queues
to the hardware and/or the number of parallel transmit engines
effectively supported by the hardware and caches.
Applications which allocate more transmit contexts than this value will
end up sharing underlying resources.
By default, there is a single transmit context associated with each

Просмотреть файл

@ -1,4 +1,4 @@
.TH fi_endpoint 3 "2014\-12\-04" "Libfabric Programmer\[aq]s Manual" "\@VERSION\@"
.TH fi_endpoint 3 "2014\-12\-18" "Libfabric Programmer\[aq]s Manual" "\@VERSION\@"
.SH NAME
.PP
fi_endpoint - Fabric endpoint operations
@ -9,8 +9,8 @@ Allocate or close an endpoint.
.RE
.TP
.B fi_ep_bind
Associate an endpoint with an event queue, completion queue, address
vector, or memory region
Associate an endpoint with an event queue, completion queue, counter,
address vector, or memory region
.RS
.RE
.TP
@ -227,14 +227,6 @@ completion of a subsequent operation.
Use of this flag may improve performance by allowing the provider to
avoid writing a completion entry for every operation.
.PP
The use of FI_COMPLETION is often paired with the call fi_sync.
FI_COMPLETION allows the user to suppress completions from being
generated.
In order for the application to ensure that all previous operations have
completed, the application may call fi_sync.
The successful completion of fi_sync indicates that all prior operations
have completed successfully.
.PP
An endpoint may also, or instead, be bound to a fabric counter.
When binding an endpoint to a counter, the following flags may be
specified.
@ -346,6 +338,11 @@ The following option levels and option names and parameters are defined.
\f[I]FI_OPT_MIN_MULTI_RECV - size_t\f[] : Defines the minimum receive
buffer space available when the receive buffer is automatically freed
(see FI_MULTI_RECV).
Modifying this value is only guaranteed to set the minimum buffer space
needed on receives posted after the value has been changed.
It is recommended that applications that want to override the default
MIN_MULTI_RECV value set this option before enabling the corresponding
endpoint.
.SH ENDPOINT ATTRIBUTES
.PP
The fi_ep_attr structure defines the set of attributes associated with
@ -354,7 +351,8 @@ an endpoint.
.nf
\f[C]
struct\ fi_ep_attr\ {
\ \ \ \ uint64_t\ \ protocol;
\ \ \ \ uint32_t\ \ protocol;
\ \ \ \ uint32_t\ \ protocol_version;
\ \ \ \ size_t\ \ \ \ max_msg_size;
\ \ \ \ size_t\ \ \ \ inject_size;
\ \ \ \ size_t\ \ \ \ total_buffered_recv;
@ -376,8 +374,8 @@ A matching protocol must be used by communicating endpoints to ensure
interoperability.
The following protocol values are defined.
Provider specific protocols are also allowed.
Provider specific protocols will be indicated by having the upper 3
bytes of the protocol value set to the vendor OUI.
Provider specific protocols will be indicated by having the upper bit of
the protocol value set to one.
.PP
\f[I]FI_PROTO_UNSPEC\f[] : The protocol is not specified.
This is usually provided as input, with other attributes of the socket
@ -397,6 +395,15 @@ datagram queue pairs.
protocol known as PSM, performance scaled messaging.
PSMX is an extended version of the PSM protocol to support the libfabric
interfaces.
.SS protocol_version - Protocol Version
.PP
Identifies which version of the protocol is employeed by the provider.
The protocol version allows providers to extend an existing protocol, by
adding support for additional features or functionality for example, in
a backward compatible manner.
Providers that support different versions of the same protocol should
interoperate, but only when using the capabilities defined for the
lesser version.
.SS max_msg_size - Max Message Size
.PP
Defines the maximum size for an application data transfer as a single
@ -584,7 +591,7 @@ submission.
Number of transmit contexts to associate with the endpoint.
If not specified (0), 1 context will be assigned if the endpoint
supports outbound transfers.
Transmit contexts are independent command queues that may be separately
Transmit contexts are independent transmit queues that may be separately
configured.
Each transmit context may be bound to a separate CQ, and no ordering is
defined between contexts.
@ -637,7 +644,7 @@ and require the application to explicitly create transmit and receive
contexts as described below.
.SS fi_tx_context
.PP
Transmit contexts are independent command queues.
Transmit contexts are independent transmit queues.
Ordering and synchronization between contexts are not defined.
Conceptually a transmit context behaves similar to a send-only endpoint.
A transmit context may be configured with relaxed capabilities, and has
@ -706,7 +713,7 @@ operation.
(scatter-gather elements) that a single posted operation may reference.
.SS fi_rx_context
.PP
Receive contexts are independent command queues for receiving incoming
Receive contexts are independent receive queues for receiving incoming
data.
Ordering and synchronization between contexts are not guaranteed.
Conceptually a receive context behaves similar to a receive-only
@ -797,7 +804,7 @@ processing, with the potential cost of serializing access across
multiple endpoints.
Support for sharable contexts is domain specific.
.PP
Conceptually, sharable contexts are command queues that may be accessed
Conceptually, sharable contexts are transmit queues that may be accessed
by many endpoints.
The use of a shared transmit context is mostly opaque to an application.
Applications must allocate and bind shared transmit contexts to
@ -935,6 +942,13 @@ such data transfers.
Operations that complete in error that are not associated with valid
operational context will use the endpoint context in any error reporting
structures.
.PP
Users can attach both counters and completion queues to an endpoint.
When both counter and completion queue are attached, a successful
completion increments the counter and does not generate a completion
entry in the completion queue.
Operations that complete with an error increment the error counter and
generate a completion event.
.SH RETURN VALUES
.PP
Returns 0 on success.

Просмотреть файл

@ -1,4 +1,4 @@
.TH fi_eq 3 "2014\-12\-03" "Libfabric Programmer\[aq]s Manual" "\@VERSION\@"
.TH fi_eq 3 "2014\-12\-15" "Libfabric Programmer\[aq]s Manual" "\@VERSION\@"
.SH NAME
.PP
fi_eq - Event queue operations
@ -144,7 +144,7 @@ routines.
However, a provider may signal an FD wait object by marking it as
readable, writable, or with an error.
.IP \[bu] 2
\f[I]FI_WAIT_MUT_COND\f[] : Specifies that the EQ should use a pthread
\f[I]FI_WAIT_MUTEX_COND\f[] : Specifies that the EQ should use a pthread
mutex and cond variable as a wait object.
.PP
\f[I]signaling_vector\f[] : Indicates which processor core interrupts
@ -176,6 +176,17 @@ The format of the wait-object is specified during EQ creation, through
the EQ attributes.
The fi_control arg parameter should be an address where a pointer to the
returned wait object will be written.
This should be an \[aq]int *\[aq] for FI_WAIT_FD, or \[aq]struct
fi_mutex_cond\[aq] for FI_WAIT_MUTEX_COND.
.IP
.nf
\f[C]
struct\ fi_mutex_cond\ {
\ \ \ \ pthread_mutex_t\ \ \ \ \ *mutex;
\ \ \ \ pthread_cond_t\ \ \ \ \ \ *cond;
};
\f[]
.fi
.SS fi_eq_read
.PP
The fi_eq_read operations performs a non-blocking read of event data

Просмотреть файл

@ -1,4 +1,4 @@
.TH fi_fabric 3 "2014\-12\-04" "Libfabric Programmer\[aq]s Manual" "\@VERSION\@"
.TH fi_fabric 3 "2014\-12\-12" "Libfabric Programmer\[aq]s Manual" "\@VERSION\@"
.SH NAME
.PP
fi_fabric - Fabric domain operations
@ -89,6 +89,11 @@ uint64_t flags
\f[I]FI_TYPE_PROTO\f[] : struct fi_ep_attr::protocol field
.PP
\f[I]FI_TYPE_MSG_ORDER\f[] : struct fi_ep_attr::msg_order field
.PP
fi_tostr() will return a pointer to an internal libfabric buffer that
should not be modified, and will be overwritten the next time fi_tostr()
is invoked.
fi_tostr() is not thread safe.
.SH NOTES
.PP
The following resources are associated with fabric domains: access

Просмотреть файл

@ -1,4 +1,4 @@
.TH fi_getinfo 3 "2014\-12\-08" "Libfabric Programmer\[aq]s Manual" "\@VERSION\@"
.TH fi_getinfo 3 "2014\-12\-16" "Libfabric Programmer\[aq]s Manual" "\@VERSION\@"
.SH NAME
.PP
fi_getinfo / fi_freeinfo - Obtain / free fabric interface information
@ -158,7 +158,7 @@ When provided as hints, requested values of struct fi_tx_ctx_attr should
be set.
On output, the actual transmit context attributes that can be provided
will be returned.
Output values will be greater than or or equal to the requested input
Output values will be greater than or equal to the requested input
values.
.PP
\f[I]rx_attr - receive context attributes\f[] : Optionally supplied
@ -232,7 +232,7 @@ endpoint as send-only or receive-only.
\f[I]FI_RMA\f[] : Specifies that the endpoint should support RMA read
and write operations.
Endpoints supporting this capability support operations defined by
struct fi_rma_ops.
struct fi_ops_rma.
In the absence of any relevant flags, FI_RMA implies the ability to
initiate and be the target of remote memory reads and writes.
Applications can use the FI_READ, FI_WRITE, FI_REMOTE_READ, and
@ -241,10 +241,10 @@ by an endpoint.
.PP
\f[I]FI_TAGGED\f[] : Specifies that the endpoint should handle tagged
message transfers.
tagged message transfers associate a user-specified key or tag with each
Tagged message transfers associate a user-specified key or tag with each
message that is used for matching purposes at the remote side.
Endpoints supporting this capability support operations defined by
struct fi_tagged_ops.
struct fi_ops_tagged.
In the absence of any relevant flags, FI_TAGGED implies the ability to
send and receive tagged messages.
Applications can use the FI_SEND and FI_RECV flags to optimize an
@ -253,7 +253,7 @@ endpoint as send-only or receive-only.
\f[I]FI_ATOMICS\f[] : Specifies that the endpoint supports some set of
atomic operations.
Endpoints supporting this capability support operations defined by
struct fi_atomic_ops.
struct fi_ops_atomic.
In the absence of any relevant flags, FI_ATOMICS implies the ability to
initiate and be the target of remote atomic reads and writes.
Applications can use the FI_READ, FI_WRITE, FI_REMOTE_READ, and
@ -263,7 +263,7 @@ supported by an endpoint.
\f[I]FI_MULTICAST\f[] : Indicates that the endpoint should support
multicast data transfers.
Endpoints supporting this capability support multicast operations
defined by struct fi_msg_ops, when a multicast address is specified as
defined by struct fi_ops_msg, when a multicast address is specified as
the destination address.
In the absence of any relevant flags, FI_MULTICAST implies the ability
to send and receive messages.
@ -496,7 +496,7 @@ See \f[C]fi_av\f[](3).
specific address format should be selected.
Provider specific addresses may be protocol specific or a vendor
proprietary format.
Applications that select FI_FORMAT_UNSPEC should be prepared to be treat
Applications that select FI_FORMAT_UNSPEC should be prepared to treat
returned addressing data as opaque.
FI_FORMAT_UNSPEC targets apps which make use of an out of band address
exchange.
@ -512,7 +512,7 @@ interfaces examining the sa_family field.
\f[I]FI_SOCKADDR_IN6\f[] : Address is of type sockaddr_in6 (IPv6).
.PP
\f[I]FI_SOCKADDR_IB\f[] : Address is of type sockaddr_ib (defined in
Linux kernel source
Linux kernel source)
.PP
\f[I]FI_ADDR_PSMX\f[] : Address is an Intel proprietary format that is
used with their PSMX (extended performance scaled messaging) protocol.

Просмотреть файл

@ -1,4 +1,4 @@
.TH fi_mr 3 "2014-11-14" "Libfabric Programmer\[aq]s Manual" "\@VERSION\@"
.TH fi_mr 3 "2014\-12\-19" "Libfabric Programmer\[aq]s Manual" "\@VERSION\@"
.SH NAME
.PP
fi_mr - Memory region operations
@ -90,7 +90,15 @@ In order to support as broad range of applications as possible, without
unduly affecting their performance, applications that wish to manage
their own local memory registrations may do so by using the memory
registration calls.
Applications may use the FI_LOCAL_MR domain capability bit as a guide.
Applications may use the FI_LOCAL_MR domain mode bit as a guide.
.PP
When the FI_LOCAL_MR mode bit is set, applications must register all
data buffers that will be accessed by the local hardware and provide a
valid mem_desc parameter into applicable data transfer operations.
When FI_LOCAL_MR is zero, applications are not required to register data
buffers before using them for local operations (e.g.
send and receive data buffers), and the mem_desc parameter into data
transfer operations is ignored.
.PP
Providers may support applications registering any range of addresses in
their virtual address space, whether or not those addresses are back by
@ -177,6 +185,9 @@ Support for user requested keys is provider specific and is determined
by the FI_PROV_MR_ATTR mode bit.
Access domains must be opened with the FI_PROV_MR_ATTR mode cleared in
order to enable support for application selectable MR keys.
The requested_key parameter is ignored for memory registration calls
unless the access flags include either FI_REMOTE_READ or
FI_REMOTE_WRITE.
.PP
Remote RMA and atomic operations indicate the location within a
registered memory region by specifying an address.
@ -221,18 +232,10 @@ struct\ fi_mr_attr\ {
.fi
.SS fi_close
.PP
Fi_close may be used to release all resources associated with a
registering a memory region.
Fi_close is used to release all resources associated with a registering
a memory region.
Once unregistered, further access to the registered memory is not
guaranteed.
For performance reasons, unregistration processing may be done
asynchronously or lazily.
To force all queued unregistration requests to complete, applications
may call fi_sync on the domain.
Upon completion of a domain fi_sync call, all memory regions
unregistered before fi_sync was invoked will have completed, and no
further access to the registered region, either locally or remotely, via
fabric resources will be possible.
.SS fi_mr_desc / fi_mr_key
.PP
The local memory descriptor and remote protection key associated with a

Просмотреть файл

@ -1,4 +1,4 @@
.TH fi_poll 3 "2014-11-14" "Libfabric Programmer\[aq]s Manual" "\@VERSION\@"
.TH fi_poll 3 "2014\-12\-15" "Libfabric Programmer\[aq]s Manual" "\@VERSION\@"
.SH NAME
.PP
fi_poll - Polling and wait set operations
@ -37,8 +37,6 @@ int\ fi_wait_open(struct\ fid_domain\ *domain,\ struct\ fi_wait_attr\ *attr,
int\ fi_close(struct\ fid\ *waitset);
int\ fi_control(struct\ fid\ *waitset,\ int\ command,\ void\ *arg);
int\ fi_wait(struct\ fid_wait\ *waitset,\ int\ timeout);
\f[]
.fi
@ -71,19 +69,11 @@ A poll set is defined with the following attributes.
.nf
\f[C]
struct\ fi_poll_attr\ {
\ \ \ \ int\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ mask;\ \ \ \ \ \ /*\ valid\ attr\ fields\ */
\ \ \ \ uint64_t\ \ \ \ \ \ \ \ \ \ \ \ \ flags;\ \ \ \ \ /*\ operation\ flags\ */
};
\f[]
.fi
.PP
\f[I]mask\f[] : The mask field is used for forward and backward API
compatibility.
It is used by the application to indicate which fields in the attribute
structure have been set.
For this version of the API, mask should be set to FI_POLL_ATTR_MASK_V1,
indicating that all specified fields have been initialized.
.PP
\f[I]flags\f[] : Flags that set the default operation of the poll set.
The use of this field is reserved and must be set to 0 by the caller.
.SS fi_close
@ -120,29 +110,19 @@ fi_wait_attr.
.nf
\f[C]
struct\ fi_wait_attr\ {
\ \ \ \ int\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ mask;\ \ \ \ \ \ /*\ valid\ attr\ fields\ */
\ \ \ \ enum\ fi_wait_obj\ \ \ \ \ wait_obj;\ \ /*\ requested\ wait\ object\ */
\ \ \ \ uint64_t\ \ \ \ \ \ \ \ \ \ \ \ \ flags;\ \ \ \ \ /*\ operation\ flags\ */
};
\f[]
.fi
.PP
\f[I]mask\f[] : The mask field is used for forward and backward API
compatibility.
It is used by the application to indicate which fields in the attribute
structure have been set.
For this version of the API, mask should be set to FI_WAIT_ATTR_MASK_V1,
indicating that all specified fields have been initialized.
.PP
\f[I]wait_obj\f[] : Wait sets are associated with specific wait
object(s).
Wait objects allow applications to block until the wait object is
signaled, indicating that an event is available to be read.
Users may use fi_control to retrieve the underlying wait object(s)
associated with a wait set, in order to use it in other system calls.
The following values may be used to specify the type of wait object
associated with an wait set: FI_WAIT_UNSPEC, FI_WAIT_FD, and
FI_WAIT_MUT_COND.
associated with a wait set: FI_WAIT_UNSPEC, FI_WAIT_FD, and
FI_WAIT_MUTEX_COND.
.IP \[bu] 2
\f[I]FI_WAIT_UNSPEC\f[] : Specifies that the user will only wait on the
wait set using fabric interface calls, such as fi_wait.
@ -159,7 +139,7 @@ routines.
However, a provider may signal an FD wait object by marking it as
readable, writable, or with an error.
.IP \[bu] 2
\f[I]FI_WAIT_MUT_COND\f[] : Specifies that the wait set should use a
\f[I]FI_WAIT_MUTEX_COND\f[] : Specifies that the wait set should use a
pthread mutex and cond variable as a wait object.
.PP
\f[I]flags\f[] : Flags that set the default operation of the wait set.
@ -169,39 +149,6 @@ The use of this field is reserved and must be set to 0 by the caller.
The fi_close call releases all resources associated with a wait set.
The wait set must not be bound to any other opened resources prior to
being closed.
.SS fi_control
.PP
The fi_control call is used to access provider or implementation
specific details of the wait set.
Access to the wait set should be serialized across all calls when
fi_control is invoked, as it may redirect the implementation of wait set
operations.
The following control commands are usable with a wait set.
.PP
\f[I]FI_GETWAIT (void **)\f[] : This command allows the user to retrieve
the low-level wait object(s) associated with the wait set.
The format of the wait-object is specified during wait set creation,
through the wait set attributes.
The fi_control arg parameter should be an address to a struct
fi_wait_obj_set.
.IP
.nf
\f[C]
struct\ fi_wait_obj_set\ {
\ \ \ \ size_t\ \ \ \ \ \ \ \ \ \ \ \ len;\ \ \ \ \ \ /*\ size\ of\ obj\ array\ entries\ */
\ \ \ \ enum\ fi_wait_obj\ \ wait_obj;\ /*\ type\ of\ wait\ obj\ */
\ \ \ \ void\ \ \ \ \ \ \ \ \ \ \ \ \ *obj;\ \ \ \ \ \ /*\ array\ of\ wait\ objects\ */
};
\f[]
.fi
.PP
On input, len should indicate the size in bytes referenced by the obj
field.
On output, the needed size will be returned.
The underlying wait objects will be returned in the obj array.
If insufficient space is provided, the results will be truncated.
The wait_obj field may be used to identify the format of the wait
objects.
.SS fi_wait
.PP
Waits on a wait set until one or more of its underlying wait objects is

Просмотреть файл

@ -1,4 +1,4 @@
.TH fi_trigger 3 "2014-11-14" "Libfabric Programmer\[aq]s Manual" "\@VERSION\@"
.TH fi_trigger 3 "2014\-12\-10" "Libfabric Programmer\[aq]s Manual" "\@VERSION\@"
.SH NAME
.PP
fi_trigger - Triggered operations
@ -13,8 +13,7 @@ fi_trigger - Triggered operations
.PP
Triggered operations allow an application to queue a data transfer
request that is deferred until a specified condition is met.
It is often used to send a message, but only after receiving all input
data.
A typical use is to send a message only after receiving all input data.
.PP
A triggered operation may be requested by specifying the FI_TRIGGER flag
as part of the operation.
@ -23,7 +22,7 @@ FI_TRIGGER flag.
Such an endpoint is referred to as a triggerable endpoint.
All data transfer operations on a triggerable endpoint are deferred.
.PP
Any data transfer operation is potentially be triggerable, subject to
Any data transfer operation is potentially triggerable, subject to
provider constraints.
Triggerable endpoints are initialized such that only those interfaces
supported by the provider which are triggerable are available.
@ -63,7 +62,7 @@ event type.
.PP
The following trigger events are defined.
.PP
\f[I]FI_TRIGGER_THRESHOL\f[] : This indicates that the data transfer
\f[I]FI_TRIGGER_THRESHOLD\f[] : This indicates that the data transfer
operation will be deferred until an event counter crosses an application
specified threshold value.
The threshold is specified using struct fi_trigger_threshold:

Просмотреть файл

@ -613,7 +613,7 @@ int psmx_am_atomic_handler(psm_am_token_t token, psm_epaddr_t epaddr,
case PSMX_AM_REP_ATOMIC_COMPWRITE:
req = (struct psmx_am_request *)(uintptr_t)args[1].u64;
op_error = (int)args[0].u32w1;
assert(req->atomic.len == len);
assert(op_error || req->atomic.len == len);
if (!op_error)
memcpy(req->atomic.result, src, len);
@ -795,7 +795,6 @@ ssize_t _psmx_atomic_write(struct fid_ep *ep,
size_t idx;
ep_priv = container_of(ep, struct psmx_fid_ep, ep);
assert(ep_priv->domain);
if (flags & FI_TRIGGER) {
struct psmx_trigger *trigger;
@ -981,7 +980,6 @@ ssize_t _psmx_atomic_readwrite(struct fid_ep *ep,
size_t idx;
ep_priv = container_of(ep, struct psmx_fid_ep, ep);
assert(ep_priv->domain);
if (flags & FI_TRIGGER) {
struct psmx_trigger *trigger;
@ -1167,7 +1165,6 @@ ssize_t _psmx_atomic_compwrite(struct fid_ep *ep,
size_t idx;
ep_priv = container_of(ep, struct psmx_fid_ep, ep);
assert(ep_priv->domain);
if (flags & FI_TRIGGER) {
struct psmx_trigger *trigger;

Просмотреть файл

@ -118,9 +118,10 @@ static int psmx_av_insert(struct fid_av *av, const void *addr, size_t count,
{
struct psmx_fid_av *av_priv;
psm_error_t *errors;
int error_count = 0;
int *mask;
int err;
int i;
int i, j;
fi_addr_t *result = NULL;
struct psmx_epaddr_context *epaddr_context;
@ -174,25 +175,38 @@ static int psmx_av_insert(struct fid_av *av, const void *addr, size_t count,
(psm_epaddr_t *) fi_addr, 30*1e9);
for (i=0; i<count; i++){
if (mask[i] && errors[i] == PSM_OK) {
if (!mask[i])
continue;
if (errors[i] == PSM_OK) {
psmx_set_epaddr_context(av_priv->domain,
((psm_epid_t *) addr)[i],
((psm_epaddr_t *) fi_addr)[i]);
}
else {
fi_addr[i] = FI_ADDR_NOTAVAIL;
error_count++;
}
}
free(mask);
free(errors);
if (av_priv->type == FI_AV_TABLE) {
/* NOTE: unresolved addresses are left in the AV table */
if (result) {
for (i=0; i<count; i++)
result[i] = av_priv->last + i;
for (i=0; i<count; i++) {
j = av_priv->last + i;
if ((fi_addr_t)av_priv->psm_epaddrs[j] == FI_ADDR_NOTAVAIL)
result[i] = FI_ADDR_NOTAVAIL;
else
result[i] = j;
}
}
av_priv->last += count;
}
return psmx_errno(err);
return count - error_count;
}
static int psmx_av_remove(struct fid_av *av, fi_addr_t *fi_addr, size_t count,

Просмотреть файл

@ -384,7 +384,7 @@ int psmx_cntr_open(struct fid_domain *domain, struct fi_cntr_attr *attr,
break;
case FI_WAIT_FD:
case FI_WAIT_MUT_COND:
case FI_WAIT_MUTEX_COND:
wait_attr.wait_obj = attr->wait_obj;
wait_attr.flags = 0;
err = psmx_wait_open(domain, &wait_attr, (struct fid_wait **)&wait);
@ -394,7 +394,7 @@ int psmx_cntr_open(struct fid_domain *domain, struct fi_cntr_attr *attr,
default:
psmx_debug("%s: attr->wait_obj=%d, supported=%d...%d\n", __func__,
attr->wait_obj, FI_WAIT_NONE, FI_WAIT_MUT_COND);
attr->wait_obj, FI_WAIT_NONE, FI_WAIT_MUTEX_COND);
return -FI_EINVAL;
}

Просмотреть файл

@ -84,7 +84,7 @@ struct psmx_cq_event *psmx_cq_create_event(struct psmx_fid_cq *cq,
event->cqe.err.data = data;
event->cqe.err.tag = tag;
event->cqe.err.olen = olen;
event->cqe.err.prov_errno = 0;
event->cqe.err.prov_errno = PSM_INTERNAL_ERR;
goto out;
}
@ -363,7 +363,7 @@ int psmx_cq_poll_mq(struct psmx_fid_cq *cq, struct psmx_fid_domain *domain,
if (mr->domain->rma_ep->remote_write_cntr)
psmx_cntr_inc(mr->domain->rma_ep->remote_write_cntr);
if (!cq || mr->cq == cq)
return 1;
return psm_status.error_code ? -FI_EAVAIL : 1;
continue;
}
@ -375,7 +375,7 @@ int psmx_cq_poll_mq(struct psmx_fid_cq *cq, struct psmx_fid_domain *domain,
if (mr->domain->rma_ep->remote_read_cntr)
psmx_cntr_inc(mr->domain->rma_ep->remote_read_cntr);
if (!cq)
return 1;
return psm_status.error_code ? -FI_EAVAIL : 1;
continue;
}
}
@ -434,7 +434,7 @@ int psmx_cq_poll_mq(struct psmx_fid_cq *cq, struct psmx_fid_domain *domain,
}
if (!cq || tmp_cq == cq)
return 1;
return psm_status.error_code ? -FI_EAVAIL : 1;
}
else if (err == PSM_MQ_NO_COMPLETIONS) {
return 0;
@ -454,7 +454,6 @@ static ssize_t psmx_cq_readfrom(struct fid_cq *cq, void *buf, size_t count,
ssize_t read_count;
cq_priv = container_of(cq, struct psmx_fid_cq, cq);
assert(cq_priv->domain);
if (PSMX_CQ_EMPTY(cq_priv) || !buf) {
ret = psmx_cq_poll_mq(cq_priv, cq_priv->domain,
@ -601,7 +600,7 @@ static ssize_t psmx_cq_sreadfrom(struct fid_cq *cq, void *buf, size_t count,
else {
clock_gettime(CLOCK_REALTIME, &ts0);
while (1) {
if (psmx_cq_poll_mq(cq_priv, cq_priv->domain, NULL, 0, NULL) > 0)
if (psmx_cq_poll_mq(cq_priv, cq_priv->domain, NULL, 0, NULL))
break;
/* CQ may be updated asynchronously by the AM handlers */
@ -748,7 +747,7 @@ int psmx_cq_open(struct fid_domain *domain, struct fi_cq_attr *attr,
break;
case FI_WAIT_FD:
case FI_WAIT_MUT_COND:
case FI_WAIT_MUTEX_COND:
wait_attr.wait_obj = attr->wait_obj;
wait_attr.flags = 0;
err = psmx_wait_open(domain, &wait_attr, (struct fid_wait **)&wait);
@ -758,7 +757,7 @@ int psmx_cq_open(struct fid_domain *domain, struct fi_cq_attr *attr,
default:
psmx_debug("%s: attr->wait_obj=%d, supported=%d...%d\n", __func__, attr->wait_obj,
FI_WAIT_NONE, FI_WAIT_MUT_COND);
FI_WAIT_NONE, FI_WAIT_MUTEX_COND);
return -FI_EINVAL;
}

Просмотреть файл

@ -54,6 +54,11 @@ static int psmx_domain_close(fid_t fid)
psm_mq_finalize(domain->psm_mq);
#endif
/* workaround for:
* Assertion failure at psm_ep.c:1059: ep->mctxt_master == ep
*/
sleep(1);
err = psm_ep_close(domain->psm_ep, PSM_EP_CLOSE_GRACEFUL,
(int64_t) PSMX_TIME_OUT * 1000000000LL);
if (err != PSM_OK)

Просмотреть файл

@ -32,8 +32,10 @@
#include "psmx.h"
#include "fi.h"
#include "prov.h"
struct psmx_env psmx_env;
volatile int init_count = 0;
static int psmx_reserve_tag_bits(int *caps, uint64_t *max_tag_value)
{
@ -107,6 +109,8 @@ static int psmx_getinfo(uint32_t version, const char *node, const char *service,
uint64_t max_tag_value = 0;
int err = -ENODATA;
psmx_debug("%s\n", __func__);
*info = NULL;
if (psm_ep_num_devunits(&cnt) || !cnt) {
@ -114,25 +118,19 @@ static int psmx_getinfo(uint32_t version, const char *node, const char *service,
return -FI_ENODATA;
}
if (node && !(flags & FI_SOURCE)) {
if (service)
dest_addr = psmx_resolve_name(node, atoi(service));
else
dest_addr = psmx_resolve_name(node, 0);
}
if (node && !(flags & FI_SOURCE))
dest_addr = psmx_resolve_name(node, 0);
if (hints) {
switch (hints->ep_type) {
case FI_EP_UNSPEC:
case FI_EP_RDM:
break;
case FI_EP_MSG:
ep_type = FI_EP_MSG;
break;
default:
psmx_debug("%s: hints->ep_type=%d, supported=%d,%d,%d.\n",
psmx_debug("%s: hints->ep_type=%d, supported=%d,%d.\n",
__func__, hints->ep_type, FI_EP_UNSPEC,
FI_EP_RDM, FI_EP_MSG);
FI_EP_RDM);
goto err_out;
}
@ -279,6 +277,8 @@ static int psmx_fabric(struct fi_fabric_attr *attr,
{
struct psmx_fid_fabric *fabric_priv;
psmx_debug("%s\n", __func__);
if (strncmp(attr->name, "psm", 3))
return -FI_ENODATA;
@ -294,11 +294,21 @@ static int psmx_fabric(struct fi_fabric_attr *attr,
return 0;
}
static void psmx_fini(void)
{
psmx_debug("%s\n", __func__);
if (! --init_count)
psm_finalize();
}
static struct fi_provider psmx_prov = {
.name = "PSM",
.version = FI_VERSION(0, 9),
.fi_version = FI_VERSION(FI_MAJOR_VERSION, FI_MINOR_VERSION),
.getinfo = psmx_getinfo,
.fabric = psmx_fabric,
.cleanup = psmx_fini
};
static int psmx_get_int_env(char *name, int default_value)
@ -320,12 +330,14 @@ static int psmx_get_int_env(char *name, int default_value)
return default_value;
}
static void __attribute__((constructor)) psmx_ini(void)
PSM_INI
{
int major, minor;
int check_version;
int err;
psmx_debug("%s\n", __func__);
psmx_env.name_server = psmx_get_int_env("SFI_PSM_NAME_SERVER", 0);
psmx_env.am_msg = psmx_get_int_env("SFI_PSM_AM_MSG", 0);
psmx_env.tagged_rma = psmx_get_int_env("SFI_PSM_TAGGED_RMA", 0);
@ -342,7 +354,7 @@ static void __attribute__((constructor)) psmx_ini(void)
if (err != PSM_OK) {
fprintf(stderr, "%s: psm_init failed: %s\n", __func__,
psm_error_get_string(err));
return;
return NULL;
}
check_version = psmx_get_int_env("SFI_PSM_VERSION_CHECK", 1);
@ -351,13 +363,10 @@ static void __attribute__((constructor)) psmx_ini(void)
fprintf(stderr, "%s: PSM version mismatch: header %d.%d, library %d.%d.\n",
__func__, PSM_VERNO_MAJOR, PSM_VERNO_MINOR, major, minor);
fprintf(stderr, "\tSet envar SFI_PSM_VERSION_CHECK=0 to bypass version check.\n");
return;
return NULL;
}
(void) fi_register(&psmx_prov);
init_count++;
return (&psmx_prov);
}
static void __attribute__((destructor)) psmx_fini(void)
{
psm_finalize();
}

Просмотреть файл

@ -206,7 +206,6 @@ ssize_t _psmx_send(struct fid_ep *ep, const void *buf, size_t len,
size_t idx;
ep_priv = container_of(ep, struct psmx_fid_ep, ep);
assert(ep_priv->domain);
if (flags & FI_TRIGGER) {
struct psmx_trigger *trigger;

Просмотреть файл

@ -491,7 +491,6 @@ static ssize_t _psmx_send2(struct fid_ep *ep, const void *buf, size_t len,
size_t idx;
ep_priv = container_of(ep, struct psmx_fid_ep, ep);
assert(ep_priv->domain);
if (!buf)
return -EINVAL;

Просмотреть файл

@ -143,6 +143,8 @@ static struct fi_ops psmx_fi_ops = {
static struct fi_ops_poll psmx_poll_ops = {
.size = sizeof(struct fi_ops_poll),
.poll = psmx_poll_poll,
.poll_add = psmx_poll_add,
.poll_del = psmx_poll_del,
};
int psmx_poll_open(struct fid_domain *domain, struct fi_poll_attr *attr,

Просмотреть файл

@ -450,7 +450,6 @@ ssize_t _psmx_read(struct fid_ep *ep, void *buf, size_t len,
size_t idx;
ep_priv = container_of(ep, struct psmx_fid_ep, ep);
assert(ep_priv->domain);
if (flags & FI_TRIGGER) {
struct psmx_trigger *trigger;
@ -619,7 +618,6 @@ ssize_t _psmx_write(struct fid_ep *ep, const void *buf, size_t len,
size_t idx;
ep_priv = container_of(ep, struct psmx_fid_ep, ep);
assert(ep_priv->domain);
if (flags & FI_TRIGGER) {
struct psmx_trigger *trigger;

Просмотреть файл

@ -34,15 +34,10 @@
int psmx_wait_get_obj(struct psmx_fid_wait *wait, void *arg)
{
struct fi_wait_obj_set *wait_obj_set = arg;
void *obj_ptr;
int obj_size = 0;
int obj_type = FI_WAIT_NONE;
int ret_count = 0;
struct {
pthread_mutex_t *mutex;
pthread_cond_t *cond;
} mutex_cond;
struct fi_mutex_cond mutex_cond;
if (!arg)
return -EINVAL;
@ -55,7 +50,7 @@ int psmx_wait_get_obj(struct psmx_fid_wait *wait, void *arg)
obj_ptr = &wait->fd[0];
break;
case FI_WAIT_MUT_COND:
case FI_WAIT_MUTEX_COND:
mutex_cond.mutex = &wait->mutex;
mutex_cond.cond = &wait->cond;
obj_size = sizeof(mutex_cond);
@ -69,14 +64,9 @@ int psmx_wait_get_obj(struct psmx_fid_wait *wait, void *arg)
}
if (obj_size) {
ret_count = 1;
if (wait_obj_set->count)
memcpy(wait_obj_set->obj, obj_ptr, obj_size);
memcpy(arg, obj_ptr, obj_size);
}
wait_obj_set->count = ret_count;
wait_obj_set->wait_obj = obj_type;
return 0;
}
@ -99,7 +89,7 @@ int psmx_wait_wait(struct fid_wait *wait, int timeout)
err = -FI_ETIMEDOUT;
break;
case FI_WAIT_MUT_COND:
case FI_WAIT_MUTEX_COND:
err = fi_wait_cond(&wait_priv->cond,
&wait_priv->mutex, timeout);
break;
@ -127,7 +117,7 @@ void psmx_wait_signal(struct fid_wait *wait)
write(wait_priv->fd[1], &c, 1);
break;
case FI_WAIT_MUT_COND:
case FI_WAIT_MUTEX_COND:
pthread_cond_signal(&wait_priv->cond);
break;
}
@ -182,7 +172,7 @@ static int psmx_wait_init(struct psmx_fid_wait *wait, int type)
}
break;
case FI_WAIT_MUT_COND:
case FI_WAIT_MUTEX_COND:
pthread_mutex_init(&wait->mutex, NULL);
pthread_cond_init(&wait->cond, NULL);
break;
@ -210,14 +200,14 @@ int psmx_wait_open(struct fid_domain *domain, struct fi_wait_attr *attr,
break;
case FI_WAIT_FD:
case FI_WAIT_MUT_COND:
case FI_WAIT_MUTEX_COND:
type = attr->wait_obj;
break;
default:
psmx_debug("%s: attr->wait_obj=%d, supported=%d,%d,%d\n",
__func__, attr->wait_obj, FI_WAIT_UNSPEC,
FI_WAIT_FD, FI_WAIT_MUT_COND);
FI_WAIT_FD, FI_WAIT_MUTEX_COND);
return -FI_EINVAL;
}
}

Просмотреть файл

@ -1,266 +0,0 @@
/*
* Copyright (c) 2014 Intel Corporation, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include "list.h"
#define LIST_DEF_NUM_ENTRIES (128)
#define ENQUEUE_LIST(_head, _tail, _elem) do{ \
(_elem)->next = NULL; \
if(NULL == (_head)){ \
(_head) = (_tail) = (_elem); \
}else{ \
(_tail)->next = (_elem); \
} \
}while(0)
#define DEQUEUE_LIST(_head, _tail, _elem) do{ \
if(NULL == _head){ \
_elem = NULL; \
}else{ \
_elem = _head; \
_head = _head->next; \
if(_head == NULL) \
_tail = NULL; \
} \
}while(0)
static int _list_enqueue(list_element_t *element)
{
if(!element)
return -1;
ENQUEUE_LIST(element->list->head,
element->list->tail, element);
return 0;
}
static list_element_t *_list_dequeue(list_t *list)
{
list_element_t *element;
DEQUEUE_LIST(list->head, list->tail, element);
return element;
}
static int _list_enqueue_free_list(list_element_t *element)
{
if(!element)
return -1;
ENQUEUE_LIST(element->list->free_head,
element->list->free_tail, element);
return 0;
}
static list_element_t *_list_dequeue_free_list(list_t *list)
{
list_element_t *element;
DEQUEUE_LIST(list->free_head, list->free_tail, element);
return element;
}
list_t *new_list(size_t length)
{
int i;
list_t *list = (list_t *)malloc(sizeof(list_t) +
length * sizeof(list_element_t));
memset(list, 0, sizeof(list_t) +
length * sizeof(list_element_t));
list->curr_len = 0;
list->max_len = length;
list->head = list->tail = NULL;
list->free_head = list->free_tail = NULL;
if(0 != fastlock_init(&(list->lock)))
goto err;
list_element_t *elements = (list_element_t *)
((char*)list + sizeof(list_t));
for(i=0; i<length; i++){
list_element_t *element = (list_element_t *)((char *)elements +
sizeof(list_element_t) * i);
element->list = list;
if(0 != _list_enqueue_free_list(element))
goto err1;
}
return list;
err1:
fastlock_destroy(&(list->lock));
err:
free(list);
return NULL;
}
void free_list(list_t *list)
{
fastlock_destroy(&(list->lock));
free((void *)list);
}
int enqueue_item(list_t *list, void *data)
{
int ret;
fastlock_acquire(&(list->lock));
list_element_t *elem = _list_dequeue_free_list(list);
if(!elem){
int i;
list_element_t *elements;
if(list->curr_len == list->max_len){
list = realloc(list,
sizeof(list_t) + list->max_len * sizeof(list_element_t) +
sizeof(list_element_t) * LIST_DEF_NUM_ENTRIES);
if(!list){
fastlock_release(&(list->lock));
return -1;
}
elements = (list_element_t *) ((char*)list + sizeof(list_t) +
sizeof(list_element_t) * list->max_len);
memset(elements, 0, sizeof(list_element_t) *
LIST_DEF_NUM_ENTRIES);
for(i=0; i<LIST_DEF_NUM_ENTRIES; i++){
list_element_t *element = (list_element_t *)((char *)elements +
sizeof(list_element_t) * i);
if(0 != _list_enqueue_free_list(element)){
fastlock_release(&(list->lock));
return -1;
}
}
list->max_len += LIST_DEF_NUM_ENTRIES;
elem = _list_dequeue_free_list(list);
if(!elem){
fastlock_release(&(list->lock));
return -1;
}
}
}
elem->next = NULL;
elem->data = data;
elem->len = 0;
ret = _list_enqueue(elem);
if(!ret)
list->curr_len++;
fastlock_release(&(list->lock));
return ret;
}
void *dequeue_item(list_t *list)
{
fastlock_acquire(&(list->lock));
if(list->curr_len > 0){
void *data;
list_element_t *element = _list_dequeue(list);
list->curr_len--;
data = element->data;
_list_enqueue_free_list(element);
fastlock_release(&(list->lock));
return data;
}
fastlock_release(&(list->lock));
return NULL;
}
void *peek_item(list_t *list)
{
fastlock_acquire(&(list->lock));
if(list->curr_len > 0){
list_element_t *element = _list_dequeue(list);
fastlock_release(&(list->lock));
return element->data;
}
fastlock_release(&(list->lock));
return NULL;
}
int delete_item(list_t *list, void *item)
{
fastlock_acquire(&(list->lock));
list_element_t *curr;
list_element_t *prev = NULL;
for(curr = list->head; curr != NULL; curr = curr->next){
if(curr->data == item) {
if(prev == NULL) {
list->head = curr->next;
} else {
prev->next = curr->next;
}
if(list->tail == curr)
list->tail = NULL;
_list_enqueue_free_list(curr);
list->curr_len--;
fastlock_release(&(list->lock));
return 0;
}
prev = curr;
}
fastlock_release(&(list->lock));
return -1;
}
int find_item(list_t *list, void *item)
{
fastlock_acquire(&(list->lock));
list_element_t *curr = list->head;
while(curr){
if(curr->data == item){
fastlock_release(&(list->lock));
return 0;
}
curr=curr->next;
}
fastlock_release(&(list->lock));
return -1;
}
ssize_t list_length(list_t *list)
{
ssize_t len;
fastlock_acquire(&(list->lock));
len = list->curr_len;
fastlock_release(&(list->lock));
return len;
}

Просмотреть файл

@ -1,66 +0,0 @@
/*
* Copyright (c) 2014 Intel Corporation, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef _LIST_H_
#define _LIST_H_
#include "fi.h"
typedef struct _list_t list_t;
typedef struct _list_element_t
{
void *data;
size_t len;
list_t *list;
struct _list_element_t *next;
}list_element_t;
struct _list_t
{
list_element_t *head, *tail;
list_element_t *free_head, *free_tail;
size_t curr_len;
size_t max_len;
fastlock_t lock;
};
list_t *new_list(size_t length);
void free_list(list_t *list);
int enqueue_item(list_t *list, void *item);
void *peek_item(list_t *list);
void *dequeue_item(list_t *list);
int find_item(list_t *list, void *item);
int delete_item(list_t *list, void *item);
ssize_t list_length(list_t *list);
#endif /* _LIST_H_ */

Просмотреть файл

@ -45,47 +45,74 @@
#include <rdma/fi_rma.h>
#include <rdma/fi_tagged.h>
#include <rdma/fi_trigger.h>
#include <netdb.h>
#include <fi.h>
#include <fi_enosys.h>
#include <fi_indexer.h>
#include "list.h"
#include <fi_rbuf.h>
#include <fi_list.h>
#ifndef _SOCK_H_
#define _SOCK_H_
#define SOCK_EP_MAX_MSG_SZ (1<<22)
#define SOCK_EP_MAX_INJECT_SZ (1<<12)
#define SOCK_EP_MAX_BUFF_RECV (1<<22)
#define SOCK_EP_MAX_MSG_SZ (1<<23)
#define SOCK_EP_MAX_INJECT_SZ ((1<<8) - 1)
#define SOCK_EP_MAX_BUFF_RECV (1<<23)
#define SOCK_EP_MAX_ORDER_RAW_SZ (0)
#define SOCK_EP_MAX_ORDER_WAR_SZ (0)
#define SOCK_EP_MAX_ORDER_WAW_SZ (0)
#define SOCK_EP_MEM_TAG_FMT (0)
#define SOCK_EP_MSG_ORDER (0)
#define SOCK_EP_MAX_EP_CNT (128)
#define SOCK_EP_MAX_TX_CNT (16)
#define SOCK_EP_MAX_RX_CNT (16)
#define SOCK_EP_MAX_IOV_LIMIT (8)
#define SOCK_EP_MAX_TX_CTX_SZ (1<<12)
#define SOCK_EP_MIN_MULTI_RECV (64)
#define SOCK_EP_MAX_ATOMIC_SZ (512)
#define SOCK_EP_MAX_CTX_BITS (16)
#define SOCK_PE_POLL_TIMEOUT (100000)
#define SOCK_PE_MAX_ENTRIES (128)
#define SOCK_EQ_DEF_SZ (1<<12)
#define SOCK_CQ_DEF_SZ (1<<12)
#define SOCK_EQ_DEF_SZ (1<<8)
#define SOCK_CQ_DEF_SZ (1<<8)
#define SOCK_EP_RDM_CAP (FI_MSG | FI_INJECT | FI_SOURCE | FI_SEND | FI_RECV)
#define SOCK_EP_DGRAM_CAP (FI_MSG | FI_INJECT | FI_SOURCE | FI_SEND | FI_RECV)
#define SOCK_OPS_CAP (FI_INJECT | FI_SEND | FI_RECV )
#define SOCK_CQ_DATA_SIZE (sizeof(uint64_t))
#define SOCK_TAG_SIZE (sizeof(uint64_t))
#define SOCK_EP_RDM_CAP (FI_MSG | FI_RMA | FI_TAGGED | FI_ATOMICS | FI_DYNAMIC_MR | \
FI_NAMED_RX_CTX | FI_BUFFERED_RECV | FI_DIRECTED_RECV | \
FI_INJECT | FI_MULTI_RECV | FI_SOURCE | FI_READ | FI_WRITE | \
FI_RECV | FI_SEND | FI_REMOTE_READ | FI_REMOTE_WRITE | \
FI_REMOTE_CQ_DATA | FI_COMPLETION | FI_REMOTE_SIGNAL | \
FI_REMOTE_COMPLETE | FI_PEEK | FI_CANCEL)
#define SOCK_EP_MSG_CAP SOCK_EP_RDM_CAP
#define SOCK_EP_DGRAM_CAP (FI_MSG | FI_TAGGED | FI_DYNAMIC_MR | \
FI_NAMED_RX_CTX | FI_BUFFERED_RECV | FI_DIRECTED_RECV | \
FI_INJECT | FI_MULTI_RECV | FI_SOURCE | FI_RECV | FI_SEND | \
FI_REMOTE_CQ_DATA | FI_COMPLETION | FI_REMOTE_SIGNAL | \
FI_REMOTE_COMPLETE | FI_PEEK | FI_CANCEL)
#define SOCK_DEF_OPS (FI_SEND | FI_RECV | \
FI_BUFFERED_RECV | FI_READ | FI_WRITE | \
FI_REMOTE_READ | FI_REMOTE_WRITE)
#define SOCK_EP_MSG_ORDER (FI_ORDER_RAR | FI_ORDER_RAW | FI_ORDER_RAS| \
FI_ORDER_WAR | FI_ORDER_WAW | FI_ORDER_WAS | \
FI_ORDER_SAR | FI_ORDER_SAW | FI_ORDER_SAS)
#define SOCK_MODE (0)
#define SOCK_COMM_BUF_SZ (SOCK_EP_MAX_MSG_SZ)
#define SOCK_COMM_THRESHOLD (128 * 1024)
#define SOCK_MAJOR_VERSION 1
#define SOCK_MINOR_VERSION 0
extern const char const sock_fab_name[];
extern const char const sock_dom_name[];
struct sock_fabric{
struct fid_fabric fab_fid;
atomic_t ref;
@ -93,6 +120,18 @@ struct sock_fabric{
struct sock_conn {
int sock_fd;
struct sockaddr addr;
struct sock_pe_entry *rx_pe_entry;
struct sock_pe_entry *tx_pe_entry;
struct ringbuf inbuf;
struct ringbuf outbuf;
};
struct sock_conn_map {
struct sock_conn *table;
int used;
int size;
struct sock_domain *domain;
};
struct sock_domain {
@ -105,132 +144,136 @@ struct sock_domain {
struct sock_eq *eq;
struct sock_eq *mr_eq;
struct sock_pe *pe;
enum fi_progress progress_mode;
struct index_map mr_idm;
struct sock_pe *pe;
struct sock_conn_map u_cmap;
struct sock_conn_map r_cmap;
pthread_t listen_thread;
int listening;
char service[NI_MAXSERV];
};
struct sock_cntr {
struct fid_cntr cntr_fid;
struct sock_domain *dom;
uint64_t value;
uint64_t threshold;
atomic_t ref;
struct fid_cntr cntr_fid;
struct sock_domain *domain;
atomic_t value;
atomic_t threshold;
atomic_t ref;
atomic_t err_cnt;
pthread_cond_t cond;
pthread_mutex_t mut;
pthread_cond_t cond;
pthread_mutex_t mut;
struct fi_cntr_attr attr;
struct dlist_entry rx_list;
struct dlist_entry tx_list;
struct fid_wait *waitset;
int signal;
};
struct sock_mr {
struct fid_mr mr_fid;
struct sock_domain *dom;
uint64_t access;
uint64_t offset;
uint64_t key;
size_t iov_count;
struct iovec mr_iov[1];
struct fid_mr mr_fid;
struct sock_domain *domain;
uint64_t access;
uint64_t offset;
uint64_t key;
uint64_t flags;
size_t iov_count;
struct iovec mr_iov[1];
struct sock_cntr *cntr;
struct sock_cq *cq;
};
struct sock_av_addr {
uint16_t key;
struct sockaddr_storage addr;
};
struct sock_av {
struct fid_av av_fid;
struct sock_domain *dom;
atomic_t ref;
struct fi_av_attr attr;
size_t count;
struct sockaddr_in *table;
struct fid_av av_fid;
struct sock_domain *domain;
atomic_t ref;
struct fi_av_attr attr;
uint64_t mask;
int rx_ctx_bits;
size_t stored;
struct index_map addr_idm;
socklen_t addrlen;
struct sock_conn_map *cmap;
};
struct sock_fid_list {
struct dlist_entry entry;
struct fid *fid;
};
struct sock_poll {
struct fid_poll poll_fid;
struct sock_domain *dom;
struct fid_poll poll_fid;
struct sock_domain *domain;
struct dlist_entry fid_list;
};
struct sock_wait {
struct fid_wait wait_fid;
struct sock_domain *dom;
struct sock_domain *domain;
struct dlist_entry fid_list;
enum fi_wait_obj type;
union {
int fd[2];
struct {
pthread_mutex_t mutex;
pthread_cond_t cond;
};
};
};
enum {
SOCK_REQ_TYPE_SEND,
SOCK_REQ_TYPE_RECV,
SOCK_REQ_TYPE_USER,
};
/* wire protocol */
SOCK_OP_SEND = 0,
SOCK_OP_TSEND = 1,
SOCK_OP_SEND_COMPLETE = 2,
enum{
SOCK_COMM_TYPE_SEND,
SOCK_COMM_TYPE_SENDV,
SOCK_COMM_TYPE_SENDTO,
SOCK_COMM_TYPE_SENDMSG,
SOCK_COMM_TYPE_SENDDATA,
SOCK_COMM_TYPE_SENDDATATO,
};
SOCK_OP_WRITE = 3,
SOCK_OP_WRITE_COMPLETE = 4,
SOCK_OP_WRITE_ERROR = 5,
struct sock_req_item{
int req_type;
int comm_type;
struct sock_ep *ep;
SOCK_OP_READ = 6,
SOCK_OP_READ_COMPLETE = 7,
SOCK_OP_READ_ERROR = 8,
void *context;
uint64_t flags;
uint64_t tag;
uint64_t data;
SOCK_OP_ATOMIC_WRITE = 9,
SOCK_OP_ATOMIC_READ_WRITE = 10,
SOCK_OP_ATOMIC_COMP_WRITE = 11,
size_t done_len;
size_t total_len;
struct sockaddr src_addr;
struct sockaddr addr;
SOCK_OP_ATOMIC_COMPLETE = 12,
SOCK_OP_ATOMIC_ERROR = 13,
union{
struct fi_msg msg;
void *buf;
}item;
};
struct sock_comm_item{
int type;
int is_done;
void *context;
size_t done_len;
size_t total_len;
uint64_t flags;
struct sockaddr addr;
union{
struct fi_msg msg;
void *buf;
}item;
};
enum {
SOCK_OP_SEND,
/* internal */
SOCK_OP_RECV,
SOCK_OP_WRITE,
SOCK_OP_READ,
SOCK_OP_TSEND,
SOCK_OP_TRECV,
SOCK_OP_ATOMIC,
SOCK_OP_SEND_INJECT,
SOCK_OP_TSEND_INJECT,
};
/*
* Transmit context - ring buffer data:
* tx_op + flags + context + dest_addr + [data] + [tag] + tx_iov
* tx_op + flags + context + dest_addr + conn + [data] + [tag] + tx_iov
* 8B 8B 8B 8B 8B 8B 24B+
* data - only present if flags indicate
* tag - only present for TSEND op
*/
struct sock_op {
uint8_t op;
uint8_t src_iov_len;
uint8_t dest_iov_len;
uint8_t op;
uint8_t src_iov_len;
uint8_t dest_iov_len;
union {
struct {
uint8_t op;
uint8_t datatype;
uint8_t res_iov_len;
uint8_t cmp_iov_len;
} atomic;
uint8_t reserved[5];
uint8_t reserved[5];
};
};
@ -240,6 +283,8 @@ struct sock_op_send {
uint64_t context;
uint64_t dest_addr;
struct sock_conn *conn;
uint64_t buf;
struct sock_ep *ep;
};
struct sock_op_tsend {
@ -249,17 +294,13 @@ struct sock_op_tsend {
uint64_t dest_addr;
struct sock_conn *conn;
uint64_t tag;
uint64_t buf;
struct sock_ep *ep;
};
union sock_iov {
struct fi_rma_iov iov;
struct fi_rma_ioc ioc;
};
struct sock_rxtx {
struct ringbuffd rbfd;
fastlock_t wlock;
fastlock_t rlock;
struct fi_rma_iov iov;
struct fi_rma_ioc ioc;
};
struct sock_eq_entry{
@ -278,27 +319,19 @@ struct sock_eq{
struct dlistfd_head list;
struct dlistfd_head err_list;
fastlock_t lock;
struct fid_wait *waitset;
int signal;
};
struct sock_ep {
struct fid_ep ep;
uint8_t enabled;
uint8_t connected;
struct sock_comp {
uint8_t send_cq_event;
uint8_t recv_cq_event;
uint8_t read_cq_event;
uint8_t write_cq_event;
uint8_t rem_read_cq_event;
uint8_t rem_write_cq_event;
int sock_fd;
atomic_t ref;
struct sock_eq *eq;
struct sock_av *av;
struct sock_domain *domain;
char reserved[2];
struct sock_cq *send_cq;
struct sock_cq *recv_cq;
@ -314,6 +347,29 @@ struct sock_ep {
struct sock_cntr *rem_read_cntr;
struct sock_cntr *rem_write_cntr;
struct sock_eq *eq;
};
struct sock_ep {
union{
struct fid_ep ep;
struct fid_sep sep;
struct fid_pep pep;
};
size_t fclass;
uint64_t op_flags;
uint16_t buffered_len;
uint16_t min_multi_recv;
char reserved[4];
atomic_t ref;
struct sock_comp comp;
struct sock_eq *eq;
struct sock_av *av;
struct sock_domain *domain;
struct sock_rx_ctx *rx_ctx;
struct sock_tx_ctx *tx_ctx;
@ -333,35 +389,17 @@ struct sock_ep {
enum fi_ep_type ep_type;
struct sockaddr_in *src_addr;
struct sockaddr_in *dest_addr;
/* TODO: remove */
struct sock_ep *next;
struct sock_ep *prev;
struct sock_ep *alias;
struct sock_ep *base;
list_t *send_list;
list_t *recv_list;
int port_num;
};
struct sock_pep {
struct fid_pep pep;
struct sock_domain *dom;
int sock_fd;
struct sock_eq *eq;
struct sock_cq *send_cq;
struct sock_cq *recv_cq;
uint64_t op_flags;
uint64_t pep_cap;
};
struct sock_rx_entry {
struct sock_op rx_op;
uint8_t is_buffered;
uint8_t is_busy;
uint8_t is_claimed;
uint8_t reserved[5];
uint64_t used;
uint64_t total_len;
uint64_t flags;
uint64_t context;
@ -369,6 +407,7 @@ struct sock_rx_entry {
uint64_t data;
uint64_t tag;
uint64_t ignore;
struct sock_comp *comp;
union sock_iov iov[SOCK_EP_MAX_IOV_LIMIT];
struct dlist_entry entry;
@ -384,26 +423,25 @@ struct sock_rx_ctx {
uint8_t recv_cq_event;
uint8_t rem_read_cq_event;
uint8_t rem_write_cq_event;
uint8_t reserved[1];
uint16_t buffered_len;
uint16_t min_multi_recv;
uint8_t reserved[7];
uint64_t addr;
struct sock_cq *recv_cq;
struct sock_cq *rem_read_cq;
struct sock_cq *rem_write_cq;
struct sock_comp comp;
struct sock_ep *ep;
struct sock_av *av;
struct sock_eq *eq;
struct sock_domain *domain;
struct sock_cntr *recv_cntr;
struct sock_cntr *rem_read_cntr;
struct sock_cntr *rem_write_cntr;
struct dlist_entry cq_entry;
struct dlist_entry pe_entry;
struct dlist_entry cq_entry;
struct dlist_entry cntr_entry;
struct dlist_entry pe_entry_list;
struct dlist_entry rx_entry_list;
struct dlist_entry rx_buffered_list;
struct dlist_entry ep_list;
fastlock_t lock;
@ -411,7 +449,11 @@ struct sock_rx_ctx {
};
struct sock_tx_ctx {
struct fid_ep ctx;
union {
struct fid_ep ctx;
struct fid_stx stx;
};
size_t fclass;
struct ringbuffd rbfd;
fastlock_t wlock;
@ -421,26 +463,17 @@ struct sock_tx_ctx {
uint8_t enabled;
uint8_t progress;
uint8_t send_cq_event;
uint8_t read_cq_event;
uint8_t write_cq_event;
uint8_t reserved[1];
uint64_t addr;
struct sock_cq *send_cq;
struct sock_cq *read_cq;
struct sock_cq *write_cq;
struct sock_comp comp;
struct sock_ep *ep;
struct sock_av *av;
struct sock_eq *eq;
struct sock_domain *domain;
struct sock_cntr *send_cntr;
struct sock_cntr *read_cntr;
struct sock_cntr *write_cntr;
struct dlist_entry cq_entry;
struct dlist_entry pe_entry;
struct dlist_entry cq_entry;
struct dlist_entry cntr_entry;
struct dlist_entry pe_entry_list;
struct dlist_entry ep_list;
@ -455,29 +488,84 @@ struct sock_msg_hdr{
uint8_t version;
uint8_t op_type;
uint16_t rx_id;
uint8_t reserved[4];
uint16_t pe_entry_id;
uint8_t dest_iov_len;
uint8_t reserved[1];
uint64_t src_addr;
uint64_t flags;
uint64_t msg_len;
};
struct sock_msg_send{
struct sock_msg_hdr msg_hdr;
/* data */
/* user data */
/* data */
};
struct sock_msg_tsend{
struct sock_msg_hdr msg_hdr;
uint64_t tag;
/* user data */
/* data */
};
struct sock_rma_write_req {
struct sock_msg_hdr msg_hdr;
/* user data */
/* dst iov(s)*/
/* data */
};
struct sock_atomic_req {
struct sock_msg_hdr msg_hdr;
struct sock_op op;
/* user data */
/* dst ioc(s)*/
/* cmp iov(s) */
/* data */
};
struct sock_msg_response {
struct sock_msg_hdr msg_hdr;
uint16_t pe_entry_id;
uint8_t reserved[6];
};
struct sock_rma_read_req {
struct sock_msg_hdr msg_hdr;
/* src iov(s)*/
};
struct sock_rma_read_response {
struct sock_msg_hdr msg_hdr;
uint16_t pe_entry_id;
uint8_t reserved[6];
/* data */
};
struct sock_atomic_response {
struct sock_msg_hdr msg_hdr;
uint16_t pe_entry_id;
uint8_t reserved[6];
/* data */
};
struct sock_tx_iov {
union sock_iov src;
union sock_iov dst;
union sock_iov res;
union sock_iov cmp;
};
struct sock_tx_pe_entry{
struct sock_op tx_op;
struct sock_comp *comp;
uint8_t header_sent;
uint8_t reserved[7];
uint8_t send_done;
uint8_t reserved[6];
struct sock_tx_ctx *tx_ctx;
union {
struct sock_tx_iov tx_iov[SOCK_EP_MAX_IOV_LIMIT];
char inject_data[SOCK_EP_MAX_INJECT_SZ];
@ -486,8 +574,16 @@ struct sock_tx_pe_entry{
struct sock_rx_pe_entry{
struct sock_op rx_op;
void *raw_data;
struct sock_comp *comp;
uint8_t header_read;
uint8_t pending_send;
uint8_t reserved[6];
struct sock_rx_entry *rx_entry;
struct sock_msg_response response;
union sock_iov rx_iov[SOCK_EP_MAX_IOV_LIMIT];
char atomic_cmp[SOCK_EP_MAX_ATOMIC_SZ];
char atomic_src[SOCK_EP_MAX_ATOMIC_SZ];
};
/* PE entry type */
@ -509,18 +605,43 @@ struct sock_pe_entry{
uint64_t addr;
uint64_t data;
uint64_t tag;
uint64_t buf;
uint8_t type;
uint8_t reserved[7];
uint8_t is_complete;
uint8_t reserved[6];
uint64_t done_len;
uint64_t total_len;
uint64_t data_len;
struct sock_ep *ep;
struct sock_cq *cq;
struct sock_conn *conn;
struct sock_comp *comp;
struct dlist_entry entry;
struct dlist_entry ctx_entry;
};
struct sock_pe{
struct sock_domain *domain;
struct sock_pe_entry pe_table[SOCK_PE_MAX_ENTRIES];
fastlock_t lock;
struct dlist_entry free_list;
struct dlist_entry busy_list;
struct dlistfd_head tx_list;
struct dlistfd_head rx_list;
pthread_t progress_thread;
volatile int do_progress;
struct sock_pe_entry *pe_atomic;
};
typedef int (*sock_cq_report_fn) (struct sock_cq *cq, fi_addr_t addr,
struct sock_pe_entry *pe_entry);
struct sock_cq {
struct fid_cq cq_fid;
struct sock_domain *domain;
@ -533,6 +654,9 @@ struct sock_cq {
struct ringbuf cqerr_rb;
fastlock_t lock;
struct fid_wait *waitset;
int signal;
struct dlist_entry ep_list;
struct dlist_entry rx_list;
struct dlist_entry tx_list;
@ -540,92 +664,149 @@ struct sock_cq {
sock_cq_report_fn report_completion;
};
int sock_verify_info(struct fi_info *hints);
int sock_verify_fabric_attr(struct fi_fabric_attr *attr);
int sock_verify_domain_attr(struct fi_domain_attr *attr);
int sock_rdm_verify_ep_attr(struct fi_ep_attr *ep_attr,
struct fi_tx_attr *tx_attr,
struct fi_rx_attr *rx_attr);
int sock_rdm_verify_ep_attr(struct fi_ep_attr *ep_attr, struct fi_tx_attr *tx_attr,
struct fi_rx_attr *rx_attr);
int sock_dgram_verify_ep_attr(struct fi_ep_attr *ep_attr, struct fi_tx_attr *tx_attr,
struct fi_rx_attr *rx_attr);
int sock_msg_verify_ep_attr(struct fi_ep_attr *ep_attr, struct fi_tx_attr *tx_attr,
struct fi_rx_attr *rx_attr);
struct fi_info *sock_fi_info(enum fi_ep_type ep_type,
struct fi_info *hints, void *src_addr, void *dest_addr);
int sock_rdm_getinfo(uint32_t version, const char *node, const char *service,
uint64_t flags, struct fi_info *hints, struct fi_info **info);
uint64_t flags, struct fi_info *hints, struct fi_info **info);
int sock_dgram_getinfo(uint32_t version, const char *node, const char *service,
uint64_t flags, struct fi_info *hints, struct fi_info **info);
uint64_t flags, struct fi_info *hints, struct fi_info **info);
int sock_msg_getinfo(uint32_t version, const char *node, const char *service,
uint64_t flags, struct fi_info *hints, struct fi_info **info);
void free_fi_info(struct fi_info *info);
int sock_domain(struct fid_fabric *fabric, struct fi_info *info,
struct fid_domain **dom, void *context);
int sock_av_open(struct fid_domain *domain, struct fi_av_attr *attr,
struct fid_av **av, void *context);
fi_addr_t _sock_av_lookup(struct sock_av *av, struct sockaddr *addr);
int sock_av_lookup_addr(struct sock_av *av, fi_addr_t addr,
struct sock_conn **entry);
int sock_alloc_endpoint(struct fid_domain *domain, struct fi_info *info,
struct sock_ep **ep, void *context, size_t fclass);
int sock_rdm_ep(struct fid_domain *domain, struct fi_info *info,
struct fid_ep **ep, void *context);
int sock_rdm_sep(struct fid_domain *domain, struct fi_info *info,
struct fid_sep **sep, void *context);
int sock_dgram_ep(struct fid_domain *domain, struct fi_info *info,
struct fid_ep **ep, void *context);
int sock_dgram_sep(struct fid_domain *domain, struct fi_info *info,
struct fid_sep **sep, void *context);
int sock_msg_ep(struct fid_domain *domain, struct fi_info *info,
struct fid_ep **ep, void *context);
int sock_msg_sep(struct fid_domain *domain, struct fi_info *info,
struct fid_sep **sep, void *context);
int sock_msg_passive_ep(struct fid_fabric *fabric, struct fi_info *info,
struct fid_pep **pep, void *context);
int sock_stx_ctx(struct fid_domain *domain,
struct fi_tx_attr *attr, struct fid_stx **stx, void *context);
int sock_srx_ctx(struct fid_domain *domain,
struct fi_rx_attr *attr, struct fid_ep **srx, void *context);
int sock_cq_open(struct fid_domain *domain, struct fi_cq_attr *attr,
struct fid_cq **cq, void *context);
int _sock_cq_report_completion(struct sock_cq *sock_cq, struct sock_req_item *item);
int _sock_cq_report_error(struct sock_cq *sock_cq, struct fi_cq_err_entry *error);
int sock_cq_report_error(struct sock_cq *cq, struct sock_pe_entry *entry,
size_t olen, int err, int prov_errno, void *err_data);
int sock_cq_progress(struct sock_cq *cq);
int sock_cntr_open(struct fid_domain *domain, struct fi_cntr_attr *attr,
struct fid_cntr **cntr, void *context);
int sock_cntr_inc(struct sock_cntr *cntr);
int sock_cntr_err_inc(struct sock_cntr *cntr);
int sock_cntr_progress(struct sock_cntr *cntr);
int sock_eq_open(struct fid_fabric *fabric, struct fi_eq_attr *attr,
struct fid_eq **eq, void *context);
struct fid_eq **eq, void *context);
ssize_t sock_eq_report_event(struct sock_eq *sock_eq, uint32_t event,
const void *buf, size_t len, uint64_t flags);
ssize_t sock_eq_report_error(struct sock_eq *sock_eq, fid_t fid, void *context,
int err, int prov_errno, void *err_data);
int sock_cntr_open(struct fid_domain *domain, struct fi_cntr_attr *attr,
struct fid_cntr **cntr, void *context);
struct sock_mr *sock_mr_verify_key(struct sock_domain *domain, uint16_t key,
void *buf, size_t len, uint64_t access);
struct sock_mr *sock_mr_verify_desc(struct sock_domain *domain, void *desc,
void *buf, size_t len, uint64_t access);
struct sock_mr * sock_mr_get_entry(struct sock_domain *domain, uint16_t key);
int sock_rdm_ep(struct fid_domain *domain, struct fi_info *info,
struct fid_ep **ep, void *context);
int sock_dgram_ep(struct fid_domain *domain, struct fi_info *info,
struct fid_ep **ep, void *context);
int sock_passive_ep(struct fid_fabric *fabric, struct fi_info *info,
struct fid_pep **pep, void *context);
int sock_ep_connect(struct fid_ep *ep, const void *addr,
const void *param, size_t paramlen);
struct sock_rx_ctx *sock_rx_ctx_alloc(struct fi_rx_attr *attr,
void *context);
void sock_rx_ctx_add_ep(struct sock_rx_ctx *rx_ctx, struct sock_ep *ep);
struct sock_rx_ctx *sock_rx_ctx_alloc(struct fi_rx_attr *attr, void *context);
void sock_rx_ctx_free(struct sock_rx_ctx *rx_ctx);
struct sock_tx_ctx *sock_tx_ctx_alloc(struct fi_tx_attr *attr,
void *context);
void sock_tx_ctx_add_ep(struct sock_tx_ctx *tx_ctx, struct sock_ep *ep);
struct sock_tx_ctx *sock_tx_ctx_alloc(struct fi_tx_attr *attr, void *context);
void sock_tx_ctx_free(struct sock_tx_ctx *tx_ctx);
void sock_tx_ctx_start(struct sock_tx_ctx *tx_ctx);
void sock_tx_ctx_write(struct sock_tx_ctx *tx_ctx, const void *buf, size_t len);
void sock_tx_ctx_commit(struct sock_tx_ctx *tx_ctx);
void sock_tx_ctx_abort(struct sock_tx_ctx *tx_ctx);
int sock_tx_ctx_read(struct sock_tx_ctx *tx_ctx, void *buf, size_t len);
int sock_poll_open(struct fid_domain *domain, struct fi_poll_attr *attr,
struct fid_poll **pollset);
struct fid_poll **pollset);
int sock_wait_open(struct fid_domain *domain, struct fi_wait_attr *attr,
struct fid_wait **waitset);
struct fid_wait **waitset);
int sock_wait_open(struct fid_domain *domain, struct fi_wait_attr *attr,
struct fid_wait **waitset);
void sock_wait_signal(struct fid_wait *wait_fid);
int sock_wait_get_obj(struct fid_wait *fid, void *arg);
int sock_wait_close(fid_t fid);
int sock_av_open(struct fid_domain *domain, struct fi_av_attr *attr,
struct fid_av **av, void *context);
fi_addr_t _sock_av_lookup(struct sock_av *av, struct sockaddr *addr);
fi_addr_t sock_av_get_fiaddr(struct sock_av *av, struct sock_conn *conn);
fi_addr_t sock_av_lookup_key(struct sock_av *av, int key);
struct sock_conn *sock_av_lookup_addr(struct sock_av *av, fi_addr_t addr);
struct sock_conn *sock_conn_map_lookup_key(struct sock_conn_map *conn_map,
uint16_t key);
uint16_t sock_conn_map_match_or_connect(struct sock_conn_map *map,
struct sockaddr_in *addr, int match_only);
int sock_conn_listen(struct sock_domain *domain);
int sock_conn_map_clear_pe_entry(struct sock_conn *conn_entry, uint16_t key);
void sock_conn_map_destroy(struct sock_conn_map *cmap);
struct sock_pe *sock_pe_init(struct sock_domain *domain);
int sock_pe_add_tx_ctx(struct sock_pe *pe, struct sock_tx_ctx *ctx);
int sock_pe_add_rx_ctx(struct sock_pe *pe, struct sock_rx_ctx *ctx);
void sock_pe_add_tx_ctx(struct sock_pe *pe, struct sock_tx_ctx *ctx);
void sock_pe_add_rx_ctx(struct sock_pe *pe, struct sock_rx_ctx *ctx);
int sock_pe_progress_rx_ctx(struct sock_pe *pe, struct sock_rx_ctx *rx_ctx);
int sock_pe_progress_tx_ctx(struct sock_pe *pe, struct sock_tx_ctx *tx_ctx);
void sock_pe_finalize(struct sock_pe *pe);
void free_fi_info(struct fi_info *info);
struct sock_rx_entry *sock_rx_new_entry(struct sock_rx_ctx *rx_ctx);
struct sock_rx_entry *sock_rx_new_buffered_entry(struct sock_rx_ctx *rx_ctx,
size_t len);
struct sock_rx_entry *sock_rx_get_entry(struct sock_rx_ctx *rx_ctx,
uint64_t addr, uint64_t tag);
size_t sock_rx_avail_len(struct sock_rx_entry *rx_entry);
void sock_rx_release_entry(struct sock_rx_entry *rx_entry);
int sock_comm_buffer_init(struct sock_conn *conn);
void sock_comm_buffer_finalize(struct sock_conn *conn);
ssize_t sock_comm_send(struct sock_conn *conn, const void *buf, size_t len);
ssize_t sock_comm_recv(struct sock_conn *conn, void *buf, size_t len);
ssize_t sock_comm_flush(struct sock_conn *conn);
#endif

Просмотреть файл

@ -0,0 +1,504 @@
/*
* Copyright (c) 2014 Intel Corporation, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#if HAVE_CONFIG_H
# include <config.h>
#endif /* HAVE_CONFIG_H */
#include <errno.h>
#include <fcntl.h>
#include <netdb.h>
#include <netinet/in.h>
#include <netinet/tcp.h>
#include <poll.h>
#include <stdarg.h>
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/select.h>
#include <sys/socket.h>
#include <sys/types.h>
#include <sys/time.h>
#include <unistd.h>
#include <arpa/inet.h>
#include <limits.h>
#include "sock.h"
#include "sock_util.h"
static ssize_t sock_ep_tx_atomic(struct fid_ep *ep,
const struct fi_msg_atomic *msg,
const struct fi_ioc *comparev, void **compare_desc,
size_t compare_count, struct fi_ioc *resultv,
void **result_desc, size_t result_count,
uint64_t flags, int type)
{
int i, ret;
size_t datatype_sz;
struct sock_op tx_op;
union sock_iov tx_iov;
struct sock_conn *conn;
struct sock_tx_ctx *tx_ctx;
uint64_t total_len, src_len, dst_len;
struct sock_ep *sock_ep;
switch (ep->fid.fclass) {
case FI_CLASS_EP:
sock_ep = container_of(ep, struct sock_ep, ep);
tx_ctx = sock_ep->tx_ctx;
break;
case FI_CLASS_TX_CTX:
tx_ctx = container_of(ep, struct sock_tx_ctx, ctx);
sock_ep = tx_ctx->ep;
break;
default:
SOCK_LOG_ERROR("Invalid EP type\n");
return -FI_EINVAL;
}
assert(tx_ctx->enabled &&
msg->iov_count <= SOCK_EP_MAX_IOV_LIMIT &&
msg->rma_iov_count <= SOCK_EP_MAX_IOV_LIMIT);
conn = sock_av_lookup_addr(tx_ctx->av, msg->addr);
assert(conn);
src_len = 0;
datatype_sz = fi_datatype_size(msg->datatype);
if (flags & FI_INJECT) {
for (i=0; i< msg->iov_count; i++) {
src_len += (msg->msg_iov[i].count * datatype_sz);
}
assert(src_len <= SOCK_EP_MAX_INJECT_SZ);
total_len = src_len;
} else {
total_len = msg->iov_count * sizeof(union sock_iov);
}
total_len += (sizeof(struct sock_op_send) +
(msg->rma_iov_count * sizeof(union sock_iov)) +
(result_count * sizeof (union sock_iov)));
sock_tx_ctx_start(tx_ctx);
if (rbfdavail(&tx_ctx->rbfd) < total_len) {
ret = -FI_EAGAIN;
goto err;
}
flags |= tx_ctx->attr.op_flags;
memset(&tx_op, 0, sizeof(struct sock_op));
tx_op.op = type;
tx_op.dest_iov_len = msg->rma_iov_count;
tx_op.atomic.op = msg->op;
tx_op.atomic.datatype = msg->datatype;
tx_op.atomic.res_iov_len = result_count;
tx_op.atomic.cmp_iov_len = compare_count;
if (flags & FI_INJECT)
tx_op.src_iov_len = src_len;
else
tx_op.src_iov_len = msg->iov_count;
sock_tx_ctx_write(tx_ctx, &tx_op, sizeof(struct sock_op));
sock_tx_ctx_write(tx_ctx, &flags, sizeof(uint64_t));
sock_tx_ctx_write(tx_ctx, &msg->context, sizeof(uint64_t));
sock_tx_ctx_write(tx_ctx, &msg->addr, sizeof(uint64_t));
sock_tx_ctx_write(tx_ctx, &conn, sizeof(uint64_t));
sock_tx_ctx_write(tx_ctx, &msg->msg_iov[0].addr, sizeof(uint64_t));
sock_tx_ctx_write(tx_ctx, &sock_ep, sizeof(uint64_t));
if (flags & FI_REMOTE_CQ_DATA) {
sock_tx_ctx_write(tx_ctx, &msg->data, sizeof(uint64_t));
}
if (flags & FI_INJECT) {
for (i=0; i< msg->iov_count; i++) {
sock_tx_ctx_write(tx_ctx, msg->msg_iov[i].addr,
msg->msg_iov[i].count * datatype_sz);
src_len += (msg->msg_iov[i].count * datatype_sz);
}
} else {
for (i = 0; i< msg->iov_count; i++) {
tx_iov.ioc.addr = (uint64_t)msg->msg_iov[i].addr;
tx_iov.ioc.count = msg->msg_iov[i].count;
tx_iov.ioc.key = (uint64_t)msg->desc[i];
sock_tx_ctx_write(tx_ctx, &tx_iov, sizeof(union sock_iov));
src_len += (tx_iov.ioc.count * datatype_sz);
}
}
assert(src_len <= SOCK_EP_MAX_ATOMIC_SZ);
dst_len = 0;
for (i = 0; i< msg->rma_iov_count; i++) {
tx_iov.ioc.addr = msg->rma_iov[i].addr;
tx_iov.ioc.key = msg->rma_iov[i].key;
tx_iov.ioc.count = msg->rma_iov[i].count;
sock_tx_ctx_write(tx_ctx, &tx_iov, sizeof(union sock_iov));
dst_len += (tx_iov.ioc.count * datatype_sz);
}
if (dst_len != src_len) {
SOCK_LOG_ERROR("Buffer length mismatch\n");
ret = -FI_EINVAL;
goto err;
}
dst_len = 0;
for (i = 0; i< result_count; i++) {
tx_iov.ioc.addr = (uint64_t)resultv[i].addr;
tx_iov.ioc.count = resultv[i].count;
sock_tx_ctx_write(tx_ctx, &tx_iov, sizeof(union sock_iov));
dst_len += (tx_iov.ioc.count * datatype_sz);
}
if (result_count && (dst_len != src_len)) {
SOCK_LOG_ERROR("Buffer length mismatch\n");
ret = -FI_EINVAL;
goto err;
}
for (i = 0; i< compare_count; i++) {
tx_iov.ioc.addr = (uint64_t)comparev[i].addr;
tx_iov.ioc.count = comparev[i].count;
sock_tx_ctx_write(tx_ctx, &tx_iov, sizeof(union sock_iov));
dst_len += (tx_iov.ioc.count * datatype_sz);
}
if (compare_count && (dst_len != src_len)) {
SOCK_LOG_ERROR("Buffer length mismatch\n");
ret = -FI_EINVAL;
goto err;
}
sock_tx_ctx_commit(tx_ctx);
return 0;
err:
sock_tx_ctx_abort(tx_ctx);
return ret;
}
static ssize_t sock_ep_atomic_writemsg(struct fid_ep *ep,
const struct fi_msg_atomic *msg, uint64_t flags)
{
return sock_ep_tx_atomic(ep, msg, NULL, NULL, 0,
NULL, NULL, 0, flags, SOCK_OP_ATOMIC_WRITE);
}
static ssize_t sock_ep_atomic_write(struct fid_ep *ep,
const void *buf, size_t count, void *desc,
fi_addr_t dest_addr, uint64_t addr,
uint64_t key, enum fi_datatype datatype,
enum fi_op op, void *context)
{
struct fi_msg_atomic msg;
struct fi_ioc msg_iov;
struct fi_rma_ioc rma_iov;
msg_iov.addr = (void *)buf;
msg_iov.count = count;
msg.msg_iov = &msg_iov;
msg.desc = &desc;
msg.iov_count = 1;
msg.addr = dest_addr;
rma_iov.addr = addr;
rma_iov.key = key;
rma_iov.count = count;
msg.rma_iov = &rma_iov;
msg.rma_iov_count = 1;
msg.datatype = datatype;
msg.op = op;
msg.context = context;
msg.data = 0;
return sock_ep_atomic_writemsg(ep, &msg, 0);
}
static ssize_t sock_ep_atomic_writev(struct fid_ep *ep,
const struct fi_ioc *iov, void **desc, size_t count,
fi_addr_t dest_addr,
uint64_t addr, uint64_t key,
enum fi_datatype datatype, enum fi_op op, void *context)
{
struct fi_msg_atomic msg;
struct fi_rma_ioc rma_iov;
msg.msg_iov = iov;
msg.desc = desc;
msg.iov_count = count;
msg.addr = dest_addr;
rma_iov.addr = addr;
rma_iov.key = key;
rma_iov.count = count;
msg.rma_iov = &rma_iov;
msg.rma_iov_count = 1;
msg.datatype = datatype;
msg.op = op;
msg.context = context;
msg.data = 0;
return sock_ep_atomic_writemsg(ep, &msg, 0);
}
static ssize_t sock_ep_atomic_inject(struct fid_ep *ep, const void *buf, size_t count,
fi_addr_t dest_addr, uint64_t addr, uint64_t key,
enum fi_datatype datatype, enum fi_op op)
{
struct fi_msg_atomic msg;
struct fi_ioc msg_iov;
struct fi_rma_ioc rma_iov;
msg_iov.addr = (void *)buf;
msg_iov.count = count;
msg.msg_iov = &msg_iov;
msg.iov_count = 1;
msg.addr = dest_addr;
rma_iov.addr = addr;
rma_iov.key = key;
rma_iov.count = count;
msg.rma_iov = &rma_iov;
msg.rma_iov_count = 1;
msg.datatype = datatype;
msg.op = op;
msg.data = 0;
return sock_ep_atomic_writemsg(ep, &msg, FI_INJECT);
}
static ssize_t sock_ep_atomic_readwritemsg(struct fid_ep *ep,
const struct fi_msg_atomic *msg,
struct fi_ioc *resultv, void **result_desc,
size_t result_count, uint64_t flags)
{
return sock_ep_tx_atomic(ep, msg, NULL, NULL, 0,
resultv, result_desc, result_count, flags,
SOCK_OP_ATOMIC_READ_WRITE);
}
static ssize_t sock_ep_atomic_readwrite(struct fid_ep *ep,
const void *buf, size_t count, void *desc,
void *result, void *result_desc,
fi_addr_t dest_addr,
uint64_t addr, uint64_t key,
enum fi_datatype datatype, enum fi_op op, void *context)
{
struct fi_msg_atomic msg;
struct fi_ioc msg_iov;
struct fi_rma_ioc rma_iov;
struct fi_ioc resultv;
msg_iov.addr = (void *)buf;
msg_iov.count = count;
msg.msg_iov = &msg_iov;
msg.desc = &desc;
msg.iov_count = 1;
msg.addr = dest_addr;
rma_iov.addr = addr;
rma_iov.count = 1;
rma_iov.key = key;
msg.rma_iov = &rma_iov;
msg.rma_iov_count = 1;
msg.datatype = datatype;
msg.op = op;
msg.context = context;
resultv.addr = result;
resultv.count = 1;
return sock_ep_atomic_readwritemsg(ep, &msg,
&resultv, &result_desc, 1, 0);
}
static ssize_t sock_ep_atomic_readwritev(struct fid_ep *ep,
const struct fi_ioc *iov, void **desc, size_t count,
struct fi_ioc *resultv, void **result_desc, size_t result_count,
fi_addr_t dest_addr,
uint64_t addr, uint64_t key,
enum fi_datatype datatype, enum fi_op op, void *context)
{
struct fi_msg_atomic msg;
struct fi_rma_ioc rma_iov;
msg.msg_iov = iov;
msg.desc = desc;
msg.iov_count = count;
msg.addr = dest_addr;
rma_iov.addr = addr;
rma_iov.count = 1;
rma_iov.key = key;
msg.rma_iov = &rma_iov;
msg.rma_iov_count = 1;
msg.datatype = datatype;
msg.op = op;
msg.context = context;
return sock_ep_atomic_readwritemsg(ep, &msg,
resultv, result_desc, result_count, 0);
}
static ssize_t sock_ep_atomic_compwritemsg(struct fid_ep *ep,
const struct fi_msg_atomic *msg,
const struct fi_ioc *comparev, void **compare_desc, size_t compare_count,
struct fi_ioc *resultv, void **result_desc, size_t result_count,
uint64_t flags)
{
return sock_ep_tx_atomic(ep, msg, comparev, compare_desc, compare_count,
resultv, result_desc, result_count, flags,
SOCK_OP_ATOMIC_COMP_WRITE);
}
static ssize_t sock_ep_atomic_compwrite(struct fid_ep *ep,
const void *buf, size_t count, void *desc,
const void *compare, void *compare_desc,
void *result, void *result_desc,
fi_addr_t dest_addr,
uint64_t addr, uint64_t key,
enum fi_datatype datatype, enum fi_op op, void *context)
{
struct fi_msg_atomic msg;
struct fi_ioc msg_iov;
struct fi_rma_ioc rma_iov;
struct fi_ioc resultv;
struct fi_ioc comparev;
msg_iov.addr = (void *)buf;
msg_iov.count = count;
msg.msg_iov = &msg_iov;
msg.desc = &desc;
msg.iov_count = 1;
msg.addr = dest_addr;
rma_iov.addr = addr;
rma_iov.count = 1;
rma_iov.key = key;
msg.rma_iov = &rma_iov;
msg.rma_iov_count = 1;
msg.datatype = datatype;
msg.op = op;
msg.context = context;
resultv.addr = result;
resultv.count = 1;
comparev.addr = (void*)compare;
comparev.count = 1;
return sock_ep_atomic_compwritemsg(ep, &msg, &comparev, &compare_desc, 1,
&resultv, &result_desc, 1, 0);
}
static ssize_t sock_ep_atomic_compwritev(struct fid_ep *ep,
const struct fi_ioc *iov, void **desc, size_t count,
const struct fi_ioc *comparev, void **compare_desc, size_t compare_count,
struct fi_ioc *resultv, void **result_desc, size_t result_count,
fi_addr_t dest_addr,
uint64_t addr, uint64_t key,
enum fi_datatype datatype, enum fi_op op, void *context)
{
struct fi_msg_atomic msg;
struct fi_rma_ioc rma_iov;
msg.msg_iov = iov;
msg.desc = desc;
msg.iov_count = count;
msg.addr = dest_addr;
rma_iov.addr = addr;
rma_iov.count = 1;
rma_iov.key = key;
msg.rma_iov = &rma_iov;
msg.rma_iov_count = 1;
msg.datatype = datatype;
msg.op = op;
msg.context = context;
return sock_ep_atomic_compwritemsg(ep, &msg, comparev, compare_desc, 1,
resultv, result_desc, 1, 0);
}
static int sock_ep_atomic_valid(struct fid_ep *ep, enum fi_datatype datatype,
enum fi_op op, size_t *count)
{
size_t datatype_sz;
switch(datatype){
case FI_FLOAT:
case FI_DOUBLE:
if (op == FI_BOR || op == FI_BAND ||
op == FI_BXOR || op == FI_MSWAP)
return -FI_ENOENT;
break;
case FI_FLOAT_COMPLEX:
case FI_DOUBLE_COMPLEX:
case FI_LONG_DOUBLE:
case FI_LONG_DOUBLE_COMPLEX:
return -FI_ENOENT;
default:
break;
}
datatype_sz = fi_datatype_size(datatype);
*count = (SOCK_EP_MAX_ATOMIC_SZ/datatype_sz);
return 0;
}
struct fi_ops_atomic sock_ep_atomic = {
.size = sizeof(struct fi_ops_atomic),
.write = sock_ep_atomic_write,
.writev = sock_ep_atomic_writev,
.writemsg = sock_ep_atomic_writemsg,
.inject = sock_ep_atomic_inject,
.readwrite = sock_ep_atomic_readwrite,
.readwritev = sock_ep_atomic_readwritev,
.readwritemsg = sock_ep_atomic_readwritemsg,
.compwrite = sock_ep_atomic_compwrite,
.compwritev = sock_ep_atomic_compwritev,
.compwritemsg = sock_ep_atomic_compwritemsg,
.writevalid = sock_ep_atomic_valid,
.readwritevalid = sock_ep_atomic_valid,
.compwritevalid = sock_ep_atomic_valid,
};

Просмотреть файл

@ -41,26 +41,111 @@
#include <stdlib.h>
#include <string.h>
#include <sys/socket.h>
#include <ctype.h>
#include "sock.h"
#include "sock_util.h"
static int sock_at_insert(struct fid_av *av, const void *addr, size_t count,
fi_addr_t *fi_addr, uint64_t flags, void *context)
fi_addr_t sock_av_lookup_key(struct sock_av *av, int key)
{
int i;
struct sock_av_addr *av_addr;
for (i = 0; i < IDX_MAX_INDEX; i++) {
av_addr = idm_lookup(&av->addr_idm, i);
if (!av_addr)
continue;
if (!av_addr->key) {
av_addr->key = sock_conn_map_match_or_connect(
av->cmap,
(struct sockaddr_in*)&av_addr->addr, 1);
if (!av_addr->key) {
continue;
}
}
if (av_addr->key == key + 1) {
return i;
}
}
SOCK_LOG_INFO("Reverse-lookup failed: %d\n", key);
return FI_ADDR_NOTAVAIL;
}
struct sock_conn *sock_av_lookup_addr(struct sock_av *av,
fi_addr_t addr)
{
int index = ((uint64_t)addr & av->mask);
struct sock_av_addr *av_addr;
if (index >= av->stored || index < 0) {
SOCK_LOG_ERROR("requested rank is larger than av table\n");
errno = EINVAL;
return NULL;
}
if (!av->cmap) {
SOCK_LOG_ERROR("EP with no AV bound\n");
errno = EINVAL;
return NULL;
}
av_addr = idm_lookup(&av->addr_idm, index);
if (!av_addr->key) {
av_addr->key = sock_conn_map_match_or_connect(av->cmap,
(struct sockaddr_in*)&av_addr->addr, 0);
if (!av_addr->key) {
SOCK_LOG_ERROR("failed to match or connect to addr %lu\n", addr);
errno = EINVAL;
return NULL;
}
}
return sock_conn_map_lookup_key(av->cmap, av_addr->key);
}
static int sock_check_table_in(struct sock_av *_av, struct sockaddr_in *addr,
fi_addr_t *fi_addr, int count)
{
int i, ret;
struct sock_av_addr *av_addr;
av_addr = calloc(count, sizeof(struct sock_av_addr));
if (!av_addr)
return -ENOMEM;
for (i=0, ret = 0; i<count; i++) {
memcpy(&av_addr[i].addr, &addr[i], sizeof(struct sockaddr_in));
if (idm_set(&_av->addr_idm, _av->stored, &av_addr[i]) < 0) {
if (fi_addr)
fi_addr[i] = FI_ADDR_NOTAVAIL;
continue;
}
if (fi_addr)
fi_addr[i] = (fi_addr_t)_av->stored;
_av->stored++;
ret++;
}
return ret;
}
static int sock_av_insert(struct fid_av *av, const void *addr, size_t count,
fi_addr_t *fi_addr, uint64_t flags, void *context)
{
struct sock_av *_av;
_av = container_of(av, struct sock_av, av_fid);
_av->table = calloc(count, sizeof(struct sockaddr_in));
if (!_av->table)
return -ENOMEM;
for (i=0; i<count; i++) {
memcpy(&_av->table[i], &((struct sockaddr_in *)addr)[i], sizeof(struct sockaddr_in));
switch(((struct sockaddr *)addr)->sa_family) {
case AF_INET:
return sock_check_table_in(_av, (struct sockaddr_in *)addr,
fi_addr, count);
default:
SOCK_LOG_ERROR("invalid address type inserted: only IPv4 supported\n");
return -EINVAL;
}
_av->count = count;
return 0;
}
static int sock_at_remove(struct fid_av *av, fi_addr_t *fi_addr, size_t count,
@ -72,15 +157,20 @@ static int sock_at_remove(struct fid_av *av, fi_addr_t *fi_addr, size_t count,
static int sock_at_lookup(struct fid_av *av, fi_addr_t fi_addr, void *addr,
size_t *addrlen)
{
int idx;
idx = (int)(int64_t)fi_addr;
int index;
struct sock_av *_av;
struct sock_av_addr *av_addr;
_av = container_of(av, struct sock_av, av_fid);
if (idx >= _av->count || idx < 0)
index = ((uint64_t)fi_addr & _av->mask);
if (index >= _av->stored || index < 0) {
SOCK_LOG_ERROR("requested address not inserted\n");
return -EINVAL;
memcpy(addr, &_av->table[idx], MIN(*addrlen, sizeof(struct sockaddr_in)));
*addrlen = sizeof(struct sockaddr_in);
}
av_addr = idm_lookup(&_av->addr_idm, index);
addr = &av_addr->addr;
*addrlen = _av->addrlen;
return 0;
}
@ -90,26 +180,76 @@ static const char * sock_at_straddr(struct fid_av *av, const void *addr,
return NULL;
}
static int sock_am_insert(struct fid_av *av, const void *addr, size_t count,
fi_addr_t *fi_addr, uint64_t flags, void *context)
int sock_av_insertsvc(struct fid_av *av, const char *node,
const char *service, fi_addr_t *fi_addr,
uint64_t flags, void *context)
{
const struct sockaddr_in *sin;
struct sockaddr_in *fin;
int i;
int ret;
struct addrinfo sock_hints;
struct addrinfo *result = NULL;
if (flags)
return -FI_EBADFLAGS;
if (sizeof(void *) != sizeof(*sin))
return -FI_ENOSYS;
if (!service) {
SOCK_LOG_ERROR("Port not provided\n");
return -FI_EINVAL;
}
sin = addr;
fin = (struct sockaddr_in *) fi_addr;
for (i = 0; i < count; i++)
memcpy(&fin[i], &sin[i], sizeof(*sin));
memset(&sock_hints, 0, sizeof(struct addrinfo));
sock_hints.ai_family = AF_INET;
sock_hints.ai_socktype = SOCK_STREAM;
return 0;
ret = getaddrinfo(node, service, &sock_hints, &result);
if (ret)
return -ret;
ret = sock_av_insert(av, result->ai_addr, 1, fi_addr, flags, context);
freeaddrinfo(result);
return ret;
}
int sock_av_insertsym(struct fid_av *av, const char *node, size_t nodecnt,
const char *service, size_t svccnt, fi_addr_t *fi_addr,
uint64_t flags, void *context)
{
int ret = 0;
int var_port, var_host;
char base_host[FI_NAME_MAX] = {0};
char tmp_host[FI_NAME_MAX] = {0};
char tmp_port[FI_NAME_MAX] = {0};
int hostlen, offset = 0, fmt, i, j;
if (!node || !service) {
SOCK_LOG_ERROR("Node/service not provided\n");
return -FI_EINVAL;
}
hostlen = strlen(node);
while(isdigit(*(node + hostlen - (offset+1))))
offset++;
if (*(node + hostlen - offset) == '.')
fmt = 0;
else
fmt = offset;
strncpy(base_host, node, hostlen - (offset));
var_port = atoi(service);
var_host = atoi(node + hostlen - offset);
for (i = 0; i < nodecnt; i++) {
for (j = 0; j < svccnt; j++) {
sprintf(tmp_host, "%s%0*d", base_host, fmt, var_host + i);
sprintf(tmp_port, "%d", var_port + j);
if (sock_av_insertsvc(av, tmp_host, tmp_port,
&fi_addr[i * nodecnt + j],
flags, context) == 1)
ret++;
}
}
return ret;
}
static int sock_am_remove(struct fid_av *av, fi_addr_t *fi_addr, size_t count,
uint64_t flags)
{
@ -119,8 +259,7 @@ static int sock_am_remove(struct fid_av *av, fi_addr_t *fi_addr, size_t count,
static int sock_am_lookup(struct fid_av *av, fi_addr_t fi_addr, void *addr,
size_t *addrlen)
{
memcpy(addr, &fi_addr, MIN(*addrlen, sizeof(struct sockaddr_in)));
*addrlen = sizeof(struct sockaddr_in);
sock_at_lookup(av, fi_addr, addr, addrlen);
return 0;
}
@ -147,12 +286,20 @@ static int sock_av_bind(struct fid *fid, struct fid *bfid, uint64_t flags)
static int sock_av_close(struct fid *fid)
{
struct sock_av *av;
void *addr;
int i;
av = container_of(fid, struct sock_av, av_fid.fid);
if (atomic_get(&av->ref))
return -FI_EBUSY;
atomic_dec(&av->dom->ref);
for (i=0; i<av->stored; i++) {
addr = idm_clear(&av->addr_idm , i);
if (addr)
free(addr);
}
atomic_dec(&av->domain->ref);
free(av);
return 0;
}
@ -167,7 +314,9 @@ static struct fi_ops sock_av_fi_ops = {
static struct fi_ops_av sock_am_ops = {
.size = sizeof(struct fi_ops_av),
.insert = sock_am_insert,
.insert = sock_av_insert,
.insertsvc = sock_av_insertsvc,
.insertsym = sock_av_insertsym,
.remove = sock_am_remove,
.lookup = sock_am_lookup,
.straddr = sock_am_straddr
@ -175,7 +324,9 @@ static struct fi_ops_av sock_am_ops = {
static struct fi_ops_av sock_at_ops = {
.size = sizeof(struct fi_ops_av),
.insert = sock_at_insert,
.insert = sock_av_insert,
.insertsvc = sock_av_insertsvc,
.insertsym = sock_av_insertsym,
.remove = sock_at_remove,
.lookup = sock_at_lookup,
.straddr = sock_at_straddr
@ -214,11 +365,15 @@ int sock_av_open(struct fid_domain *domain, struct fi_av_attr *attr,
{
struct sock_domain *dom;
struct sock_av *_av;
// int ret;
if (attr->name || attr->flags)
if (attr->flags)
return -FI_ENOSYS;
if (attr->rx_ctx_bits > SOCK_EP_MAX_CTX_BITS) {
SOCK_LOG_ERROR("Invalid rx_ctx_bits\n");
return -EINVAL;
}
dom = container_of(domain, struct sock_domain, dom_fid);
_av = calloc(1, sizeof(*_av));
@ -238,43 +393,26 @@ int sock_av_open(struct fid_domain *domain, struct fi_av_attr *attr,
_av->av_fid.ops = &sock_at_ops;
break;
default:
return -FI_ENOSYS;
goto err;
}
#if 0
if (ret)
return ret;
#endif
atomic_init(&_av->ref, 0);
atomic_inc(&dom->ref);
_av->dom = dom;
_av->domain = dom;
switch (dom->info.addr_format) {
case FI_SOCKADDR_IN:
_av->addrlen = sizeof(struct sockaddr_in);
break;
default:
SOCK_LOG_ERROR("Invalid address format: only IPv4 supported\n");
goto err;
}
_av->rx_ctx_bits = attr->rx_ctx_bits;
_av->mask = ((uint64_t)1<<(64 - attr->rx_ctx_bits + 1))-1;
_av->attr = *attr;
*av = &_av->av_fid;
return 0;
}
/* TODO */
fi_addr_t _sock_av_lookup(struct sock_av *av, struct sockaddr *addr)
{
if (av->attr.type == FI_AV_MAP) {
return (fi_addr_t)addr;
} else {
int i;
struct sockaddr_in *addrin;
addrin = (struct sockaddr_in*)addr;
for (i = 0 ; i < av->count ; i++) {
if (av->table[i].sin_addr.s_addr == addrin->sin_addr.s_addr &&
av->table[i].sin_port == addrin->sin_port)
return (fi_addr_t)i;
}
fprintf(stderr, "[sock] failed to lookup src_addr in av table\n");
}
return FI_ADDR_NOTAVAIL;
}
/* place holder */
int sock_av_lookup_addr(struct sock_av *av, fi_addr_t addr,
struct sock_conn **entry)
{
return -FI_ENOSYS;
err:
free(_av);
return -EINVAL;
}

Просмотреть файл

@ -37,26 +37,55 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/time.h>
#include <sys/socket.h>
#include <sys/types.h>
#include "sock.h"
const struct fi_cntr_attr sock_cntr_attr = {
.events = FI_CNTR_EVENTS_COMP,
.wait_obj = FI_WAIT_MUTEX_COND,
.wait_set = NULL,
.flags = 0,
};
int sock_cntr_progress(struct sock_cntr *cntr)
{
struct sock_tx_ctx *tx_ctx;
struct sock_rx_ctx *rx_ctx;
struct dlist_entry *entry;
for (entry = cntr->tx_list.next; entry != &cntr->tx_list;
entry = entry->next) {
tx_ctx = container_of(entry, struct sock_tx_ctx, cntr_entry);
sock_pe_progress_tx_ctx(cntr->domain->pe, tx_ctx);
}
for (entry = cntr->rx_list.next; entry != &cntr->rx_list;
entry = entry->next) {
rx_ctx = container_of(entry, struct sock_rx_ctx, cntr_entry);
sock_pe_progress_rx_ctx(cntr->domain->pe, rx_ctx);
}
return 0;
}
static uint64_t sock_cntr_read(struct fid_cntr *cntr)
{
struct sock_cntr *_cntr;
_cntr = container_of(cntr, struct sock_cntr, cntr_fid);
return _cntr->value;
if (_cntr->domain->progress_mode == FI_PROGRESS_MANUAL)
sock_cntr_progress(_cntr);
return atomic_get(&_cntr->value);
}
int sock_cntr_inc(struct sock_cntr *cntr)
{
pthread_mutex_lock(&cntr->mut);
cntr->value += 1;
if (cntr->value >= cntr->threshold)
fastlock_acquire(&cntr->mut);
atomic_inc(&cntr->value);
if (atomic_get(&cntr->value) >= atomic_get(&cntr->threshold))
pthread_cond_signal(&cntr->cond);
pthread_mutex_unlock(&cntr->mut);
fastlock_release(&cntr->mut);
return 0;
}
@ -72,11 +101,11 @@ static int sock_cntr_add(struct fid_cntr *cntr, uint64_t value)
struct sock_cntr *_cntr;
_cntr = container_of(cntr, struct sock_cntr, cntr_fid);
pthread_mutex_lock(&_cntr->mut);
_cntr->value += value;
if (_cntr->value >= _cntr->threshold)
fastlock_acquire(&_cntr->mut);
atomic_set(&_cntr->value, atomic_get(&_cntr->value) + value);
if (atomic_get(&_cntr->value) >= atomic_get(&_cntr->threshold))
pthread_cond_signal(&_cntr->cond);
pthread_mutex_unlock(&_cntr->mut);
fastlock_release(&_cntr->mut);
return 0;
}
@ -85,26 +114,88 @@ static int sock_cntr_set(struct fid_cntr *cntr, uint64_t value)
struct sock_cntr *_cntr;
_cntr = container_of(cntr, struct sock_cntr, cntr_fid);
pthread_mutex_lock(&_cntr->mut);
_cntr->value = value;
if (_cntr->value >= _cntr->threshold)
fastlock_acquire(&_cntr->mut);
atomic_set(&_cntr->value, value);
if (atomic_get(&_cntr->value) >= atomic_get(&_cntr->threshold))
pthread_cond_signal(&_cntr->cond);
pthread_mutex_unlock(&_cntr->mut);
fastlock_release(&_cntr->mut);
return 0;
}
static int sock_cntr_wait(struct fid_cntr *cntr, uint64_t threshold, int timeout)
{
struct sock_cntr *_cntr;
int ret = 0;
struct timeval now;
double start_ms, end_ms;
struct sock_cntr *_cntr;
_cntr = container_of(cntr, struct sock_cntr, cntr_fid);
pthread_mutex_lock(&_cntr->mut);
_cntr->threshold = threshold;
while (_cntr->value < _cntr->threshold && !ret)
fastlock_acquire(&_cntr->mut);
atomic_set(&_cntr->threshold, threshold);
while (atomic_get(&_cntr->value) < atomic_get(&_cntr->threshold) && !ret) {
if (_cntr->domain->progress_mode == FI_PROGRESS_MANUAL) {
if (timeout > 0) {
gettimeofday(&now, NULL);
start_ms = (double)now.tv_sec * 1000.0 +
(double)now.tv_usec / 1000.0;
}
sock_cntr_progress(_cntr);
if (timeout > 0) {
gettimeofday(&now, NULL);
end_ms = (double)now.tv_sec * 1000.0 +
(double)now.tv_usec / 1000.0;
timeout -= (end_ms - start_ms);
timeout = timeout < 0 ? 0 : timeout;
}
}
ret = fi_wait_cond(&_cntr->cond, &_cntr->mut, timeout);
_cntr->threshold = ~0;
pthread_mutex_unlock(&_cntr->mut);
}
atomic_set(&_cntr->threshold, ~0);
fastlock_release(&_cntr->mut);
return -ret;
}
int sock_cntr_control(struct fid *fid, int command, void *arg)
{
int ret = 0;
struct sock_cntr *cntr;
cntr = container_of(fid, struct sock_cntr, cntr_fid);
switch (command) {
case FI_GETWAIT:
switch (cntr->attr.wait_obj) {
case FI_WAIT_NONE:
case FI_WAIT_UNSPEC:
case FI_WAIT_MUTEX_COND:
memcpy(arg, &cntr->mut, sizeof(cntr->mut));
memcpy((char*)arg + sizeof(cntr->mut), &cntr->cond,
sizeof(cntr->cond));
break;
case FI_WAIT_SET:
case FI_WAIT_FD:
sock_wait_get_obj(cntr->waitset, arg);
break;
default:
ret = -FI_EINVAL;
break;
}
break;
case FI_GETOPSFLAG:
memcpy(arg, &cntr->attr.flags, sizeof(uint64_t));
break;
case FI_SETOPSFLAG:
memcpy(&cntr->attr.flags, arg, sizeof(uint64_t));
break;
default:
ret = -FI_EINVAL;
break;
}
return ret;
}
@ -116,9 +207,12 @@ static int sock_cntr_close(struct fid *fid)
if (atomic_get(&cntr->ref))
return -FI_EBUSY;
pthread_mutex_destroy(&cntr->mut);
if (cntr->signal && cntr->attr.wait_obj == FI_WAIT_FD)
sock_wait_close(&cntr->waitset->fid);
fastlock_destroy(&cntr->mut);
pthread_cond_destroy(&cntr->cond);
atomic_dec(&cntr->dom->ref);
atomic_dec(&cntr->domain->ref);
free(cntr);
return 0;
}
@ -127,6 +221,8 @@ uint64_t sock_cntr_readerr(struct fid_cntr *cntr)
{
struct sock_cntr *_cntr;
_cntr = container_of(cntr, struct sock_cntr, cntr_fid);
if (_cntr->domain->progress_mode == FI_PROGRESS_MANUAL)
sock_cntr_progress(_cntr);
return atomic_get(&_cntr->err_cnt);
}
@ -141,18 +237,45 @@ static struct fi_ops_cntr sock_cntr_ops = {
static struct fi_ops sock_cntr_fi_ops = {
.size = sizeof(struct fi_ops),
.control = sock_cntr_control,
.close = sock_cntr_close,
};
static int sock_cntr_verify_attr(struct fi_cntr_attr *attr)
{
switch (attr->events) {
case FI_CNTR_EVENTS_COMP:
break;
default:
return -FI_ENOSYS;
}
switch (attr->wait_obj) {
case FI_WAIT_NONE:
case FI_WAIT_UNSPEC:
case FI_WAIT_MUTEX_COND:
case FI_WAIT_SET:
case FI_WAIT_FD:
break;
default:
return -FI_ENOSYS;
}
if (attr->flags)
return -FI_EINVAL;
return 0;
}
int sock_cntr_open(struct fid_domain *domain, struct fi_cntr_attr *attr,
struct fid_cntr **cntr, void *context)
{
int ret;
struct sock_domain *dom;
struct sock_cntr *_cntr;
int ret;
struct fi_wait_attr wait_attr;
struct sock_fid_list *list_entry;
struct sock_wait *wait;
if ((attr->events != FI_CNTR_EVENTS_COMP) ||
(attr->wait_obj != FI_WAIT_MUT_COND) || attr->flags)
if (attr && sock_cntr_verify_attr(attr))
return -FI_ENOSYS;
_cntr = calloc(1, sizeof(*_cntr));
@ -163,27 +286,64 @@ int sock_cntr_open(struct fid_domain *domain, struct fi_cntr_attr *attr,
if (ret)
goto err1;
ret = pthread_mutex_init(&_cntr->mut, NULL);
if (ret)
goto err2;
if(attr == NULL)
memcpy(&_cntr->attr, &sock_cntr_add, sizeof(sock_cntr_attr));
else
memcpy(&_cntr->attr, attr, sizeof(sock_cntr_attr));
switch (_cntr->attr.wait_obj) {
case FI_WAIT_NONE:
case FI_WAIT_UNSPEC:
case FI_WAIT_MUTEX_COND:
_cntr->signal = 0;
break;
case FI_WAIT_FD:
wait_attr.flags = 0;
wait_attr.wait_obj = FI_WAIT_FD;
ret = sock_wait_open(domain, &wait_attr, &_cntr->waitset);
if (ret)
goto err1;
_cntr->signal = 1;
break;
case FI_WAIT_SET:
_cntr->waitset = attr->wait_set;
_cntr->signal = 1;
wait = container_of(attr->wait_set, struct sock_wait, wait_fid);
list_entry = calloc(1, sizeof(*list_entry));
dlist_init(&list_entry->entry);
list_entry->fid = &_cntr->cntr_fid.fid;
dlist_insert_after(&list_entry->entry, &wait->fid_list);
break;
default:
break;
}
fastlock_init(&_cntr->mut);
atomic_init(&_cntr->ref, 0);
atomic_init(&_cntr->err_cnt, 0);
atomic_init(&_cntr->value, 0);
atomic_init(&_cntr->threshold, ~0);
dlist_init(&_cntr->tx_list);
dlist_init(&_cntr->rx_list);
_cntr->cntr_fid.fid.fclass = FI_CLASS_CNTR;
_cntr->cntr_fid.fid.context = context;
_cntr->cntr_fid.fid.ops = &sock_cntr_fi_ops;
_cntr->cntr_fid.ops = &sock_cntr_ops;
_cntr->threshold = ~0;
dom = container_of(domain, struct sock_domain, dom_fid);
atomic_inc(&dom->ref);
_cntr->dom = dom;
_cntr->domain = dom;
*cntr = &_cntr->cntr_fid;
return 0;
err2:
pthread_cond_destroy(&_cntr->cond);
err1:
free(_cntr);
return -ret;

Просмотреть файл

@ -0,0 +1,222 @@
/*
* Copyright (c) 2014 Intel Corporation, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#if HAVE_CONFIG_H
# include <config.h>
#endif /* HAVE_CONFIG_H */
#include <errno.h>
#include <fcntl.h>
#include <netdb.h>
#include <netinet/in.h>
#include <netinet/tcp.h>
#include <poll.h>
#include <stdarg.h>
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/select.h>
#include <sys/socket.h>
#include <sys/types.h>
#include <sys/time.h>
#include <unistd.h>
#include <pthread.h>
#include "sock.h"
#include "sock_util.h"
static ssize_t sock_comm_send_socket(struct sock_conn *conn, const void *buf, size_t len)
{
ssize_t ret;
size_t rem = len;
size_t offset = 0, done_len = 0;
while(rem > 0) {
len = MIN(rem, SOCK_COMM_BUF_SZ);
ret = send(conn->sock_fd, buf + offset, len, 0);
if (ret <= 0)
break;
done_len += ret;
rem -= ret;
offset += ret;
}
SOCK_LOG_INFO("WROTE %lu on wire\n", done_len);
return done_len;
}
ssize_t sock_comm_flush(struct sock_conn *conn)
{
ssize_t ret1, ret2 = 0;
size_t endlen, len, xfer_len;
len = rbused(&conn->outbuf);
endlen = conn->outbuf.size - (conn->outbuf.rcnt & conn->outbuf.size_mask);
xfer_len = MIN(len, endlen);
ret1 = sock_comm_send_socket(conn, conn->outbuf.buf +
(conn->outbuf.rcnt & conn->outbuf.size_mask),
xfer_len);
if (ret1 > 0)
conn->outbuf.rcnt += ret1;
if (ret1 == xfer_len && xfer_len < len) {
ret2 = sock_comm_send_socket(conn, conn->outbuf.buf +
(conn->outbuf.rcnt & conn->outbuf.size_mask),
len - xfer_len);
if (ret2 > 0)
conn->outbuf.rcnt += ret2;
else
ret2 = 0;
}
return (ret1 > 0) ? ret1 + ret2 : 0;
}
ssize_t sock_comm_send(struct sock_conn *conn, const void *buf, size_t len)
{
ssize_t ret, used;
if (len >= SOCK_COMM_THRESHOLD) {
used = rbused(&conn->outbuf);
if (used == sock_comm_flush(conn)) {
return sock_comm_send_socket(conn, buf, len);
} else
return 0;
}
if (rbavail(&conn->outbuf) < len) {
ret = sock_comm_flush(conn);
if (ret <= 0)
return 0;
}
ret = MIN(rbavail(&conn->outbuf), len);
rbwrite(&conn->outbuf, buf, ret);
rbcommit(&conn->outbuf);
SOCK_LOG_INFO("Buffered %lu\n", ret);
return ret;
}
ssize_t sock_comm_recv_socket(struct sock_conn *conn, void *buf, size_t len)
{
ssize_t ret;
ret = recv(conn->sock_fd, buf, len, 0);
if (ret <= 0)
return 0;
SOCK_LOG_INFO("READ from wire: %lu\n", ret);
return ret;
}
ssize_t sock_comm_recv_buffer(struct sock_conn *conn)
{
int ret;
size_t endlen;
endlen = conn->inbuf.size -
(conn->inbuf.wpos & conn->inbuf.size_mask);
if ((ret = sock_comm_recv_socket(conn, (char*) conn->inbuf.buf +
(conn->inbuf.wpos & conn->inbuf.size_mask),
endlen)) <= 0)
return 0;
conn->inbuf.wpos += ret;
rbcommit(&conn->inbuf);
if (ret != endlen)
return ret;
if ((ret = sock_comm_recv_socket(conn, conn->inbuf.buf,
rbavail(&conn->inbuf))) <= 0)
return 0;
conn->inbuf.wpos += ret;
rbcommit(&conn->inbuf);
return 0;
}
ssize_t sock_comm_recv(struct sock_conn *conn, void *buf, size_t len)
{
int ret = 0;
ssize_t used, read_len;
used = rbused(&conn->inbuf);
if (used == 0) {
ret = sock_comm_recv_socket(conn, buf, len);
sock_comm_recv_buffer(conn);
return ret;
}
read_len = MIN(len, used);
rbread(&conn->inbuf, buf, read_len);
if (len > used) {
ret = sock_comm_recv_socket(conn, (char*)buf + used, len - used);
if (ret <= 0)
ret = 0;
sock_comm_recv_buffer(conn);
}
SOCK_LOG_INFO("Read %lu from buffer\n", ret + read_len);
return ret + read_len;
}
int sock_comm_buffer_init(struct sock_conn *conn)
{
uint64_t flags;
socklen_t size = SOCK_COMM_BUF_SZ;
socklen_t optlen = sizeof(socklen_t);
flags = fcntl(conn->sock_fd, F_GETFL, 0);
fcntl(conn->sock_fd, F_SETFL, flags | O_NONBLOCK);
rbinit(&conn->inbuf, SOCK_COMM_BUF_SZ);
rbinit(&conn->outbuf, SOCK_COMM_BUF_SZ);
setsockopt(conn->sock_fd, SOL_SOCKET, SO_RCVBUF, &size, optlen);
setsockopt(conn->sock_fd, SOL_SOCKET, SO_SNDBUF, &size, optlen);
getsockopt(conn->sock_fd, SOL_SOCKET, SO_RCVBUF, &size, &optlen);
SOCK_LOG_INFO("SO_RCVBUF: %d\n", size);
optlen = sizeof(socklen_t);
getsockopt(conn->sock_fd, SOL_SOCKET, SO_SNDBUF, &size, &optlen);
SOCK_LOG_INFO("SO_SNDBUF: %d\n", size);
return 0;
}
void sock_comm_buffer_finalize(struct sock_conn *conn)
{
rbfree(&conn->inbuf);
rbfree(&conn->outbuf);
}

Просмотреть файл

@ -0,0 +1,263 @@
/*
* Copyright (c) 2014 Intel Corporation, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#if HAVE_CONFIG_H
# include <config.h>
#endif /* HAVE_CONFIG_H */
#include <stdlib.h>
#include <stdio.h>
#include <errno.h>
#include <fcntl.h>
#include <netdb.h>
#include <netinet/in.h>
#include <netinet/ip.h>
#include <netinet/tcp.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <net/if.h>
#include <ifaddrs.h>
#include "sock.h"
#include "sock_util.h"
static int _init_map(struct sock_conn_map *map, int init_size)
{
map->table = (struct sock_conn*)calloc(init_size,
sizeof(struct sock_conn));
if (!map->table)
return -FI_ENOMEM;
map->used = 0;
map->size = init_size;
return 0;
}
static int _increase_map(struct sock_conn_map *map, int new_size)
{
if (map->used + new_size > map->size) {
void *_table = realloc(map->table, map->size * sizeof(struct
sock_conn));
if (!_table)
return -FI_ENOMEM;
map->size = MAX(map->size, new_size) * 2;
map->table = (struct sock_conn*) _table;
}
return 0;
}
void sock_conn_map_destroy(struct sock_conn_map *cmap)
{
free(cmap->table);
cmap->table = NULL;
cmap->used = cmap->size = 0;
}
struct sock_conn *sock_conn_map_lookup_key(struct sock_conn_map *conn_map,
uint16_t key)
{
if (key > conn_map->used) {
SOCK_LOG_ERROR("requested key is larger than conn_map size\n");
errno = EINVAL;
return NULL;
}
return &conn_map->table[key-1];
}
uint16_t sock_conn_map_match_or_connect(struct sock_conn_map *map, struct
sockaddr_in *addr, int match_only)
{
int i, conn_fd, arg, optval;
socklen_t optlen;
char entry_ip[INET_ADDRSTRLEN];
char sa_ip[INET_ADDRSTRLEN];
struct sockaddr_in *entry;
struct timeval tv;
fd_set fds;
struct sock_conn *conn;
memcpy(sa_ip, inet_ntoa(addr->sin_addr), INET_ADDRSTRLEN);
/* match */
for (i=0; i < map->used; i++) {
entry = (struct sockaddr_in *)&map->table[i].addr;
memcpy(entry_ip, inet_ntoa(entry->sin_addr), INET_ADDRSTRLEN);
if(!strcmp(entry_ip, sa_ip)) {
return i+1;
}
}
if (match_only)
return 0;
/* no matching entry, connect */
conn_fd = socket(AF_INET, SOCK_STREAM, 0);
if (conn_fd < 0) {
SOCK_LOG_ERROR("failed to create conn_fd, errno: %d\n", errno);
return 0;
}
fcntl(conn_fd, F_SETFL, O_NONBLOCK);
if (connect(conn_fd, addr, sizeof *addr) < 0) {
if (errno == EINPROGRESS) {
/* timeout after 5 secs */
tv.tv_sec = 5;
tv.tv_usec = 0;
FD_ZERO(&fds);
FD_SET(conn_fd, &fds);
if (select(conn_fd+1, NULL, &fds, NULL, &tv) > 0) {
optlen = sizeof(int);
getsockopt(conn_fd, SOL_SOCKET, SO_ERROR, &optval, &optlen);
if (optval) {
SOCK_LOG_ERROR("failed to connect %d - %s\n", optval,
strerror(optval));
close(conn_fd);
return 0;
}
} else {
SOCK_LOG_ERROR("Timeout or error to connect %d - %s\n", optval,
strerror(optval));
close(conn_fd);
return 0;
}
} else {
SOCK_LOG_ERROR("Error connecting %d - %s\n", errno,
strerror(errno));
close(conn_fd);
return 0;
}
}
arg = fcntl(conn_fd, F_GETFL, NULL);
arg &= (~O_NONBLOCK);
fcntl(conn_fd, F_SETFL, arg);
memcpy(&map->table[map->used].addr, addr, sizeof *addr);
map->table[map->used].sock_fd = conn_fd;
conn = &map->table[map->used];
sock_comm_buffer_init(conn);
map->used++;
return map->used;
}
static void * _sock_conn_listen(void *arg)
{
struct sock_domain *domain = (struct sock_domain*) arg;
struct sock_conn_map *map = &domain->r_cmap;
struct addrinfo *s_res = NULL, *p;
struct addrinfo hints;
int optval;
int listen_fd = 0, conn_fd;
struct sockaddr_in remote;
socklen_t addr_size;
struct sock_conn *conn;
memset(&hints, 0, sizeof(hints));
hints.ai_family = AF_INET;
hints.ai_socktype = SOCK_STREAM;
hints.ai_flags = AI_PASSIVE;
if(getaddrinfo(NULL, domain->service, &hints, &s_res)) {
SOCK_LOG_ERROR("no available AF_INET address\n");
perror("no available AF_INET address");
return NULL;
}
for (p=s_res; p; p=p->ai_next) {
listen_fd = socket(p->ai_family, p->ai_socktype, p->ai_protocol);
if (listen_fd >= 0) {
optval = 1;
setsockopt(listen_fd, SOL_SOCKET, SO_REUSEADDR, &optval, sizeof
optval);
if (!bind(listen_fd, s_res->ai_addr, s_res->ai_addrlen))
break;
close(listen_fd);
listen_fd = -1;
}
}
freeaddrinfo(s_res);
if (listen_fd < 0) {
SOCK_LOG_ERROR("failed to listen to port: %s\n", domain->service);
goto err;
}
if (listen(listen_fd, 128)) {
SOCK_LOG_ERROR("failed to listen socket: %d\n", errno);
goto err;
}
while(domain->listening) {
addr_size = sizeof(struct sockaddr_in);
conn_fd = accept(listen_fd, (struct sockaddr *)&remote, &addr_size);
SOCK_LOG_INFO("CONN: accepted conn-req: %d\n", conn_fd);
if (conn_fd < 0) {
SOCK_LOG_ERROR("failed to accept: %d\n", errno);
goto err;
}
/* TODO: lock for multi-threads */
if ((map->size - map->used) == 0) {
_increase_map(map, map->size*2);
}
memcpy(&map->table[map->used].addr, &remote, addr_size);
map->table[map->used].sock_fd = conn_fd;
conn = &map->table[map->used];
sock_comm_buffer_init(conn);
map->used++;
}
close(listen_fd);
return NULL;
err:
close(listen_fd);
perror("listening thread failed");
return NULL;
}
int sock_conn_listen(struct sock_domain *domain)
{
_init_map(&domain->r_cmap, 128); /* TODO: init cmap size */
domain->listening = 1;
pthread_create(&domain->listen_thread, 0, _sock_conn_listen, domain);
return 0;
}

Просмотреть файл

@ -49,6 +49,26 @@
#include "sock_util.h"
int sock_cq_progress(struct sock_cq *cq)
{
struct sock_tx_ctx *tx_ctx;
struct sock_rx_ctx *rx_ctx;
struct dlist_entry *entry;
for (entry = cq->tx_list.next; entry != &cq->tx_list;
entry = entry->next) {
tx_ctx = container_of(entry, struct sock_tx_ctx, cq_entry);
sock_pe_progress_tx_ctx(cq->domain->pe, tx_ctx);
}
for (entry = cq->rx_list.next; entry != &cq->rx_list;
entry = entry->next) {
rx_ctx = container_of(entry, struct sock_rx_ctx, cq_entry);
sock_pe_progress_rx_ctx(cq->domain->pe, rx_ctx);
}
return 0;
}
static ssize_t sock_cq_entry_size(struct sock_cq *sock_cq)
{
ssize_t size;
@ -73,7 +93,7 @@ static ssize_t sock_cq_entry_size(struct sock_cq *sock_cq)
case FI_CQ_FORMAT_UNSPEC:
default:
size = -1;
SOCK_LOG_ERROR("CQ: Invalid CQ format\n");
SOCK_LOG_ERROR("Invalid CQ format\n");
break;
}
return size;
@ -85,9 +105,9 @@ static ssize_t _sock_cq_write(struct sock_cq *cq, fi_addr_t addr,
ssize_t ret;
fastlock_acquire(&cq->lock);
if(rbfdavail(&cq->cq_rbfd) < len) {
if (rbfdavail(&cq->cq_rbfd) < len) {
ret = -FI_ENOSPC;
SOCK_LOG_ERROR("Not enough space in CQ\n");
goto out;
}
@ -98,6 +118,8 @@ static ssize_t _sock_cq_write(struct sock_cq *cq, fi_addr_t addr,
rbwrite(&cq->addr_rb, &addr, sizeof(fi_addr_t));
rbcommit(&cq->addr_rb);
if (cq->signal)
sock_wait_signal(cq->waitset);
out:
fastlock_release(&cq->lock);
return ret;
@ -109,8 +131,9 @@ static ssize_t _sock_cq_writeerr(struct sock_cq *cq,
ssize_t ret;
fastlock_acquire(&cq->lock);
if(rbavail(&cq->cqerr_rb) < len) {
if (rbavail(&cq->cqerr_rb) < len) {
ret = -FI_ENOSPC;
SOCK_LOG_ERROR("Not enough space in CQ\n");
goto out;
}
@ -118,6 +141,8 @@ static ssize_t _sock_cq_writeerr(struct sock_cq *cq,
rbcommit(&cq->cqerr_rb);
ret = len;
if (cq->signal)
sock_wait_signal(cq->waitset);
out:
fastlock_release(&cq->lock);
return ret;
@ -138,7 +163,7 @@ static int sock_cq_report_msg(struct sock_cq *cq, fi_addr_t addr,
struct fi_cq_msg_entry cq_entry;
cq_entry.op_context = (void*)pe_entry->context;
cq_entry.flags = pe_entry->flags;
cq_entry.len = pe_entry->done_len;
cq_entry.len = pe_entry->data_len;
return _sock_cq_write(cq, addr, &cq_entry, sizeof(cq_entry));
}
@ -148,8 +173,8 @@ static int sock_cq_report_data(struct sock_cq *cq, fi_addr_t addr,
struct fi_cq_data_entry cq_entry;
cq_entry.op_context = (void*)pe_entry->context;
cq_entry.flags = pe_entry->flags;
cq_entry.len = pe_entry->done_len;
cq_entry.buf = (void*)pe_entry->rx.rx_iov[0].iov.addr;
cq_entry.len = pe_entry->data_len;
cq_entry.buf = (void*)pe_entry->buf;
cq_entry.data = pe_entry->data;
return _sock_cq_write(cq, addr, &cq_entry, sizeof(cq_entry));
}
@ -160,8 +185,8 @@ static int sock_cq_report_tagged(struct sock_cq *cq, fi_addr_t addr,
struct fi_cq_tagged_entry cq_entry;
cq_entry.op_context = (void*)pe_entry->context;
cq_entry.flags = pe_entry->flags;
cq_entry.len = pe_entry->done_len;
cq_entry.buf = (void*)pe_entry->rx.rx_iov[0].iov.addr;
cq_entry.len = pe_entry->data_len;
cq_entry.buf = (void*)pe_entry->buf;
cq_entry.data = pe_entry->data;
cq_entry.tag = pe_entry->tag;
return _sock_cq_write(cq, addr, &cq_entry, sizeof(cq_entry));
@ -188,7 +213,7 @@ static void sock_cq_set_report_fn(struct sock_cq *sock_cq)
case FI_CQ_FORMAT_UNSPEC:
default:
SOCK_LOG_ERROR("CQ: Invalid CQ format\n");
SOCK_LOG_ERROR("Invalid CQ format\n");
break;
}
}
@ -199,12 +224,30 @@ ssize_t sock_cq_sreadfrom(struct fid_cq *cq, void *buf, size_t count,
int ret;
fi_addr_t addr;
int64_t threshold;
ssize_t i, bytes_read, num_read, cq_entry_len;
struct timeval now;
struct sock_cq *sock_cq;
double start_ms, end_ms;
ssize_t i, bytes_read, num_read, cq_entry_len;
sock_cq = container_of(cq, struct sock_cq, cq_fid);
cq_entry_len = sock_cq->cq_entry_size;
if (sock_cq->domain->progress_mode == FI_PROGRESS_MANUAL) {
if (timeout > 0) {
gettimeofday(&now, NULL);
start_ms = (double)now.tv_sec * 1000.0 +
(double)now.tv_usec / 1000.0;
}
sock_cq_progress(sock_cq);
if (timeout > 0) {
gettimeofday(&now, NULL);
end_ms = (double)now.tv_sec * 1000.0 +
(double)now.tv_usec / 1000.0;
timeout -= (end_ms - start_ms);
timeout = timeout < 0 ? 0 : timeout;
}
}
if (sock_cq->attr.wait_cond == FI_CQ_COND_THRESHOLD) {
threshold = MIN((int64_t)cond, count);
}else{
@ -215,7 +258,7 @@ ssize_t sock_cq_sreadfrom(struct fid_cq *cq, void *buf, size_t count,
bytes_read = rbfdsread(&sock_cq->cq_rbfd, buf,
cq_entry_len*threshold, timeout);
if(bytes_read == 0) {
if (bytes_read == 0) {
ret = -FI_ETIMEDOUT;
goto out;
}
@ -223,11 +266,10 @@ ssize_t sock_cq_sreadfrom(struct fid_cq *cq, void *buf, size_t count,
num_read = bytes_read/cq_entry_len;
for(i=0; i < num_read; i++) {
rbread(&sock_cq->addr_rb, &addr, sizeof(fi_addr_t));
if(src_addr)
if (src_addr)
src_addr[i] = addr;
}
ret = num_read;
out:
fastlock_release(&sock_cq->lock);
return ret;
@ -261,9 +303,12 @@ ssize_t sock_cq_readerr(struct fid_cq *cq, struct fi_cq_err_entry *buf,
sock_cq = container_of(cq, struct sock_cq, cq_fid);
num_read = 0;
fastlock_acquire(&sock_cq->lock);
while(rbused(&sock_cq->cqerr_rb) >= sizeof(struct fi_cq_err_entry)) {
if (sock_cq->domain->progress_mode == FI_PROGRESS_MANUAL)
sock_cq_progress(sock_cq);
fastlock_acquire(&sock_cq->lock);
while (rbused(&sock_cq->cqerr_rb) >= sizeof(struct fi_cq_err_entry)) {
rbread(&sock_cq->cqerr_rb,
(char*)buf +sizeof(struct fi_cq_err_entry) * num_read,
sizeof(struct fi_cq_err_entry));
@ -279,7 +324,7 @@ ssize_t sock_cq_write(struct fid_cq *cq, const void *buf, size_t len)
struct sock_cq *sock_cq;
sock_cq = container_of(cq, struct sock_cq, cq_fid);
if(!(sock_cq->attr.flags & FI_WRITE))
if (!(sock_cq->attr.flags & FI_WRITE))
return -FI_EINVAL;
return _sock_cq_write(sock_cq, FI_ADDR_NOTAVAIL, buf, len);
@ -291,7 +336,7 @@ ssize_t sock_cq_writeerr(struct fid_cq *cq, struct fi_cq_err_entry *buf,
struct sock_cq *sock_cq;
sock_cq = container_of(cq, struct sock_cq, cq_fid);
if(!(sock_cq->attr.flags & FI_WRITE))
if (!(sock_cq->attr.flags & FI_WRITE))
return -FI_EINVAL;
return _sock_cq_writeerr(sock_cq, buf, len);
@ -313,6 +358,9 @@ int sock_cq_close(struct fid *fid)
if (atomic_get(&cq->ref))
return -FI_EBUSY;
if (cq->signal && cq->attr.wait_obj == FI_WAIT_MUTEX_COND)
sock_wait_close(&cq->waitset->fid);
rbfree(&cq->addr_rb);
rbfree(&cq->cqerr_rb);
rbfdfree(&cq->cq_rbfd);
@ -335,14 +383,49 @@ struct fi_ops_cq sock_cq_ops = {
.strerror = sock_cq_strerror,
};
static int sock_cq_control(struct fid *fid, int command, void *arg)
{
struct sock_cq *cq;
int ret = 0;
cq = container_of(fid, struct sock_cq, cq_fid);
switch (command) {
case FI_GETWAIT:
switch (cq->attr.wait_obj) {
case FI_WAIT_NONE:
case FI_WAIT_FD:
case FI_WAIT_UNSPEC:
memcpy(arg, &cq->cq_rbfd.fd[RB_READ_FD], sizeof(int));
break;
case FI_WAIT_SET:
case FI_WAIT_MUTEX_COND:
sock_wait_get_obj(cq->waitset, arg);
break;
default:
ret = -FI_EINVAL;
break;
}
break;
default:
ret = -FI_EINVAL;
break;
}
return ret;
}
struct fi_ops sock_cq_fi_ops = {
.size = sizeof(struct fi_ops),
.control = sock_cq_control,
.close = sock_cq_close,
};
static int sock_cq_verify_attr(struct fi_cq_attr *attr)
{
if(!attr)
if (!attr)
return 0;
switch (attr->format) {
@ -358,6 +441,8 @@ static int sock_cq_verify_attr(struct fi_cq_attr *attr)
switch (attr->wait_obj) {
case FI_WAIT_NONE:
case FI_WAIT_FD:
case FI_WAIT_SET:
case FI_WAIT_MUTEX_COND:
break;
case FI_WAIT_UNSPEC:
attr->wait_obj = FI_WAIT_FD;
@ -384,6 +469,9 @@ int sock_cq_open(struct fid_domain *domain, struct fi_cq_attr *attr,
{
struct sock_domain *sock_dom;
struct sock_cq *sock_cq;
struct fi_wait_attr wait_attr;
struct sock_fid_list *list_entry;
struct sock_wait *wait;
int ret;
sock_dom = container_of(domain, struct sock_domain, dom_fid);
@ -402,11 +490,13 @@ int sock_cq_open(struct fid_domain *domain, struct fi_cq_attr *attr,
sock_cq->cq_fid.ops = &sock_cq_ops;
atomic_inc(&sock_dom->ref);
if(attr == NULL)
memcpy(&sock_cq->attr, &_sock_cq_def_attr,
sizeof(struct fi_cq_attr));
else
memcpy(&sock_cq->attr, attr, sizeof(struct fi_cq_attr));
if (attr == NULL)
sock_cq->attr = _sock_cq_def_attr;
else {
sock_cq->attr = *attr;
if (attr->size == 0)
sock_cq->attr.size = _sock_cq_def_attr.size;
}
sock_cq->domain = sock_dom;
sock_cq->cq_entry_size = sock_cq_entry_size(sock_cq);
@ -416,18 +506,49 @@ int sock_cq_open(struct fid_domain *domain, struct fi_cq_attr *attr,
dlist_init(&sock_cq->rx_list);
dlist_init(&sock_cq->ep_list);
if((ret = rbfdinit(&sock_cq->cq_rbfd, sock_cq->attr.size)))
if ((ret = rbfdinit(&sock_cq->cq_rbfd, sock_cq->attr.size *
sock_cq->cq_entry_size)))
goto err1;
if((ret = rbinit(&sock_cq->addr_rb,
(sock_cq->attr.size/sock_cq->cq_entry_size) * sizeof(fi_addr_t))))
if ((ret = rbinit(&sock_cq->addr_rb,
sock_cq->attr.size * sizeof(fi_addr_t))))
goto err2;
if((ret = rbinit(&sock_cq->cqerr_rb, sock_cq->attr.size)))
if ((ret = rbinit(&sock_cq->cqerr_rb, sock_cq->attr.size *
sizeof(struct fi_cq_err_entry))))
goto err3;
fastlock_init(&sock_cq->lock);
switch (sock_cq->attr.wait_obj) {
case FI_WAIT_NONE:
case FI_WAIT_UNSPEC:
case FI_WAIT_FD:
break;
case FI_WAIT_MUTEX_COND:
wait_attr.flags = 0;
wait_attr.wait_obj = FI_WAIT_MUTEX_COND;
ret = sock_wait_open(&sock_dom->dom_fid, &wait_attr,
&sock_cq->waitset);
if (ret)
goto err3;
sock_cq->signal = 1;
break;
case FI_WAIT_SET:
sock_cq->waitset = attr->wait_set;
sock_cq->signal = 1;
wait = container_of(attr->wait_set, struct sock_wait, wait_fid);
list_entry = calloc(1, sizeof(*list_entry));
dlist_init(&list_entry->entry);
list_entry->fid = &sock_cq->cq_fid.fid;
dlist_insert_after(&list_entry->entry, &wait->fid_list);
break;
default:
break;
}
*cq = &sock_cq->cq_fid;
atomic_inc(&sock_dom->ref);
return 0;
@ -448,8 +569,7 @@ int sock_cq_report_error(struct sock_cq *cq, struct sock_pe_entry *entry,
struct fi_cq_err_entry err_entry;
fastlock_acquire(&cq->lock);
if(rbavail(&cq->cqerr_rb) < sizeof(struct fi_cq_err_entry)) {
if (rbavail(&cq->cqerr_rb) < sizeof(struct fi_cq_err_entry)) {
ret = -FI_ENOSPC;
goto out;
}
@ -457,14 +577,14 @@ int sock_cq_report_error(struct sock_cq *cq, struct sock_pe_entry *entry,
err_entry.err = err;
err_entry.olen = olen;
err_entry.err_data = err_data;
err_entry.len = entry->done_len;
err_entry.len = entry->data_len;
err_entry.prov_errno = prov_errno;
err_entry.flags = entry->flags;
err_entry.data = entry->data;
err_entry.tag = entry->tag;
err_entry.op_context = (void*)entry->context;
if(entry->type == SOCK_PE_RX) {
if (entry->type == SOCK_PE_RX) {
err_entry.buf = (void*)entry->rx.rx_iov[0].iov.addr;
}else {
err_entry.buf = (void*)entry->tx.tx_iov[0].src.iov.addr;

Просмотреть файл

@ -49,10 +49,12 @@ struct sock_rx_ctx *sock_rx_ctx_alloc(struct fi_rx_attr *attr, void *context)
return NULL;
dlist_init(&rx_ctx->cq_entry);
dlist_init(&rx_ctx->cntr_entry);
dlist_init(&rx_ctx->pe_entry);
dlist_init(&rx_ctx->pe_entry_list);
dlist_init(&rx_ctx->rx_entry_list);
dlist_init(&rx_ctx->rx_buffered_list);
dlist_init(&rx_ctx->ep_list);
fastlock_init(&rx_ctx->lock);
@ -63,21 +65,14 @@ struct sock_rx_ctx *sock_rx_ctx_alloc(struct fi_rx_attr *attr, void *context)
return rx_ctx;
}
void sock_rx_ctx_add_ep(struct sock_rx_ctx *rx_ctx, struct sock_ep *ep)
{
fastlock_acquire(&rx_ctx->lock);
dlist_insert_tail(&ep->rx_ctx_entry, &rx_ctx->ep_list);
atomic_inc(&ep->num_rx_ctx);
fastlock_release(&rx_ctx->lock);
}
void sock_rx_ctx_free(struct sock_rx_ctx *rx_ctx)
{
fastlock_destroy(&rx_ctx->lock);
free(rx_ctx);
}
struct sock_tx_ctx *sock_tx_ctx_alloc(struct fi_tx_attr *attr, void *context)
static struct sock_tx_ctx *sock_tx_context_alloc(struct fi_tx_attr *attr,
void *context, size_t fclass)
{
struct sock_tx_ctx *tx_ctx;
@ -89,6 +84,7 @@ struct sock_tx_ctx *sock_tx_ctx_alloc(struct fi_tx_attr *attr, void *context)
goto err;
dlist_init(&tx_ctx->cq_entry);
dlist_init(&tx_ctx->cntr_entry);
dlist_init(&tx_ctx->pe_entry);
dlist_init(&tx_ctx->pe_entry_list);
@ -97,22 +93,35 @@ struct sock_tx_ctx *sock_tx_ctx_alloc(struct fi_tx_attr *attr, void *context)
fastlock_init(&tx_ctx->rlock);
fastlock_init(&tx_ctx->wlock);
tx_ctx->ctx.fid.fclass = FI_CLASS_TX_CTX;
tx_ctx->ctx.fid.context = context;
switch (fclass) {
case FI_CLASS_TX_CTX:
tx_ctx->ctx.fid.fclass = FI_CLASS_TX_CTX;
tx_ctx->ctx.fid.context = context;
break;
case FI_CLASS_STX_CTX:
tx_ctx->stx.fid.fclass = FI_CLASS_TX_CTX;
tx_ctx->stx.fid.context = context;
break;
default:
goto err;
}
tx_ctx->attr = *attr;
return tx_ctx;
err:
free(tx_ctx);
return NULL;
}
void sock_tx_ctx_add_ep(struct sock_tx_ctx *tx_ctx, struct sock_ep *ep)
struct sock_tx_ctx *sock_tx_ctx_alloc(struct fi_tx_attr *attr, void *context)
{
fastlock_acquire(&tx_ctx->lock);
dlist_insert_tail(&ep->tx_ctx_entry, &tx_ctx->ep_list);
atomic_inc(&ep->num_tx_ctx);
fastlock_release(&tx_ctx->lock);
return sock_tx_context_alloc(attr, context, FI_CLASS_TX_CTX);
}
struct sock_tx_ctx *sock_stx_ctx_alloc(struct fi_tx_attr *attr, void *context)
{
return sock_tx_context_alloc(attr, context, FI_CLASS_STX_CTX);
}
void sock_tx_ctx_free(struct sock_tx_ctx *tx_ctx)
@ -136,7 +145,7 @@ void sock_tx_ctx_write(struct sock_tx_ctx *tx_ctx, const void *buf, size_t len)
void sock_tx_ctx_commit(struct sock_tx_ctx *tx_ctx)
{
rbfdcommit(&tx_ctx->rbfd);
fastlock_release(&tx_ctx->rlock);
fastlock_release(&tx_ctx->wlock);
}
void sock_tx_ctx_abort(struct sock_tx_ctx *tx_ctx)
@ -145,19 +154,3 @@ void sock_tx_ctx_abort(struct sock_tx_ctx *tx_ctx)
fastlock_release(&tx_ctx->rlock);
}
int sock_tx_ctx_read(struct sock_tx_ctx *tx_ctx, void *buf, size_t len)
{
int ret;
fastlock_acquire(&tx_ctx->rlock);
if (rbfdused(&tx_ctx->rbfd) >= len) {
rbfdread(&tx_ctx->rbfd, buf, len);
ret = 0;
} else {
ret = -FI_EAGAIN;
}
fastlock_release(&tx_ctx->rlock);
return ret;
}

Просмотреть файл

@ -1,802 +0,0 @@
/*
* Copyright (c) 2014 Intel Corporation, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#if HAVE_CONFIG_H
# include <config.h>
#endif /* HAVE_CONFIG_H */
#include <errno.h>
#include <fcntl.h>
#include <netdb.h>
#include <netinet/in.h>
#include <netinet/tcp.h>
#include <poll.h>
#include <stdarg.h>
#include <stddef.h>
#include <stdio.h>
#include <string.h>
#include <sys/select.h>
#include <sys/socket.h>
#include <sys/types.h>
#include <sys/time.h>
#include <unistd.h>
#include <stdlib.h>
#include "sock_util.h"
#include "sock.h"
/* FIXME: figure out the sockd caps */
#if 0
#define SOCKD_EP_CAP (FI_TAGGED | FI_MSG | FI_ATOMICS | FI_INJECT | \
FI_RMA | FI_BUFFERED_RECV | FI_MULTI_RECV | \
FI_READ | FI_WRITE | FI_SEND | FI_RECV | \
FI_REMOTE_READ | FI_REMOTE_WRITE | \
FI_REMOTE_COMPLETE | FI_REMOTE_SIGNAL | \
FI_CANCEL | FI_TRIGGER)
#endif
#define SOCKD_OP_FLAGS (FI_INJECT | FI_EVENT | \
FI_TRIGGER | FI_REMOTE_SIGNAL | FI_CANCEL)
#define SOCKD_DOMAIN_CAP (FI_WRITE_COHERENT | FI_CONTEXT | \
FI_USER_MR_KEY | FI_DYNAMIC_MR)
#define SOCKD_MTU (512)
static int so_rcvbuf;
int sockd_check_hints(struct fi_info *hints)
{
switch (hints->ep_type) {
case FI_EP_DGRAM:
break;
default:
SOCK_LOG_ERROR("[sockd] %s: hints->type = %d, only FI_EP_DGRAM = %d is supported\n",
__func__, hints->ep_type, FI_EP_DGRAM);
return -FI_ENODATA;
}
switch (hints->addr_format) {
case FI_SOCKADDR:
case FI_SOCKADDR_IN:
case FI_SOCKADDR_IN6:
break;
default:
SOCK_LOG_ERROR("[sockd] %s: hints->addr_format = %d, supported = FI_SOCKADDR or FI_SOCKADDR_IN or FI_SOCKADDR_IN6\n",
__func__, hints->addr_format);
return -FI_ENODATA;
}
if (hints->ep_attr) {
switch (hints->ep_attr->protocol) {
case FI_PROTO_UNSPEC:
break;
default:
/*
SOCK_LOG_ERROR("[sockd] %s: hints->ep_attr->protocol=%lu, supported=%d\n",
__func__, hints->ep_attr->protocol, FI_PROTO_UNSPEC);
*/
return -FI_ENODATA;
}
if (hints->ep_attr->max_msg_size > SOCKD_MTU) {
/*
SOCK_LOG_ERROR("[sockd] %s: hints->ep_attr->max_msg_size=%d, supported=%d\n",
__func__, hints->ep_attr->max_msg_size, SOCKD_MTU);
*/
return -FI_ENODATA;
}
if (hints->ep_attr->inject_size > SOCKD_MTU) {
/*
SOCK_LOG_ERROR("[sockd] %s: hints->ep_attr->inject_size=%d, supported=%d\n",
__func__, hints->ep_attr->inject_size, SOCKD_MTU);
*/
return -FI_ENODATA;
}
if (hints->ep_attr->total_buffered_recv > so_rcvbuf) {
/*
SOCK_LOG_ERROR("[sockd] %s: hints->ep_attr->total_buffered_recv=%d, supported=%d\n",
__func__, hints->ep_attr->total_buffered_recv, so_rcvbuf);
*/
return -FI_ENODATA;
}
/* FIXME: check
* max_order_raw_size,
* max_order_war_size,
* max_order_waw_size,
* mem_tag_format,
* msg_order */
}
if ((hints->caps & SOCK_EP_DGRAM_CAP) != hints->caps) {
/*
SOCK_LOG_ERROR("[sockd] %s: hints->ep_cap=0x%llx, supported=0x%llx\n",
__func__, hints->caps, SOCK_EP_DGRAM_CAP);
*/
return -FI_ENODATA;
}
if (hints->tx_attr && ((hints->tx_attr->op_flags & SOCKD_OP_FLAGS) != hints->tx_attr->op_flags)) {
/*
SOCK_LOG_ERROR("[sockd] %s: hints->tx_attr->op_flags=0x%llx, supported=0x%llx\n",
__func__, hints->tx_attr->op_flags, SOCKD_OP_FLAGS);
*/
return -FI_ENODATA;
}
#if 0 /* TODO */
if ((hints->domain_cap & SOCKD_DOMAIN_CAP) != hints->domain_cap) {
SOCK_LOG_ERROR("[sockd] %s: hints->domain_cap=0x%llx, supported=0x%llx\n",
__func__, hints->domain_cap, SOCKD_DOMAIN_CAP);
return -FI_ENODATA;
/* FIXME: check
* threading, control_progress, mr_key_size, eq_data_size */
}
#endif
if (hints->fabric_attr) {
/* FIXME: check name */
}
struct sockaddr_in *si_src;
if (!hints->src_addr || !hints->src_addrlen) {
SOCK_LOG_ERROR("[sockd] src_addr and src_addrlen are required from hints\n");
return -FI_ENODATA;
} else {
si_src = (struct sockaddr_in *)(hints->src_addr);
if (ntohs(si_src->sin_port)<1024) {
SOCK_LOG_ERROR("[sockd] port number should be above 1023\n");
return -FI_ENODATA;
}
SOCK_LOG_ERROR("[sockd] port is set to %d\n", ntohs(si_src->sin_port));
}
return 0;
}
/* TODO */
struct fi_info *__fi_allocinfo()
{
return calloc(1, sizeof(struct fi_info));
}
static struct fi_info* sockd_dupinfo(struct fi_info *hints)
{
struct fi_info *fi;
if (!(fi = __fi_allocinfo())) {
goto err1;
}
fi->next = NULL;
fi->ep_type = FI_EP_DGRAM;
if (hints) {
fi->caps = hints->caps;
fi->addr_format = hints->addr_format;
} else {
fi->caps = SOCK_EP_DGRAM_CAP;
fi->addr_format = FI_SOCKADDR;
}
fi->ep_attr = calloc(1, sizeof (struct fi_ep_attr));
if (!fi->ep_attr) {
goto err2;
}
fi->ep_attr->protocol = FI_PROTO_UNSPEC;
if (hints && hints->ep_attr) {
fi->ep_attr->max_msg_size = hints->ep_attr->max_msg_size;
fi->ep_attr->inject_size = hints->ep_attr->inject_size;
fi->ep_attr->total_buffered_recv = hints->ep_attr->total_buffered_recv;
} else {
fi->ep_attr->max_msg_size = SOCKD_MTU;
fi->ep_attr->inject_size = SOCKD_MTU;
fi->ep_attr->total_buffered_recv = so_rcvbuf;
}
/* fi->ep_attr->mem_tag_format = fi_tag_format(max_tag_value); */
/* fi->ep_attr->msg_order = FI_ORDER_SAS; */
fi->domain_attr = calloc(1, sizeof (struct fi_domain_attr));
if (!fi->domain_attr) {
goto err3;
}
fi->domain_attr->name = strdup("socket");
fi->domain_attr->threading = FI_THREAD_PROGRESS;
fi->domain_attr->control_progress = FI_PROGRESS_MANUAL;
fi->domain_attr->data_progress = FI_PROGRESS_MANUAL; /* FIXME: FI_PROGRESS_AUTO? */
/* TODO fi->domain_cap = SOCKD_DOMAIN_CAP; */
fi->fabric_attr = calloc(1, sizeof (struct fi_fabric_attr));
if (!fi->fabric_attr) {
goto err4;
}
fi->fabric_attr->name = strdup("IP"); /* FIXME: fabric name for socket */
fi->fabric_attr->prov_name = strdup("socket"); /* FIXME: fabric prov_name for socket */
/* fi->fabric_attr->prov_version = PROVIDER_VERSION; */
#if 0
if ((hints->ep_cap & FI_PASSIVE)) /* FIXME: FI_SOURCE? */
sockd_info->ep_cap = FI_PASSIVE;
#endif
if (hints && hints->src_addr) {
fi->src_addr = malloc(hints->src_addrlen);
if (!fi->src_addr) {
goto err5;
}
memcpy(fi->src_addr, hints->src_addr, hints->src_addrlen);
fi->src_addrlen = hints->src_addrlen;
} else {
SOCK_LOG_ERROR("[sockd] hints must have src_addr\n");
#if 0
fi->src_addr = NULL;
fi->src_addrlen = 0;
#endif
goto err6;
}
if (hints && hints->dest_addr) {
fi->dest_addr = malloc(hints->dest_addrlen);
if (!fi->dest_addr) {
goto err6;
}
memcpy(fi->dest_addr, hints->dest_addr, hints->dest_addrlen);
fi->dest_addrlen = hints->dest_addrlen;
} else {
fi->dest_addr = NULL;
fi->dest_addrlen = 0;
}
fi->tx_attr = calloc(1, sizeof (struct fi_tx_attr));
if (!fi->tx_attr) {
goto err7;
}
if (hints->tx_attr)
fi->tx_attr->op_flags = hints->tx_attr->op_flags;
else
fi->tx_attr->op_flags = SOCKD_OP_FLAGS;
return fi;
err7:
free(fi->dest_addr);
err6:
free(fi->src_addr);
err5:
free(fi->fabric_attr);
err4:
free(fi->domain_attr);
err3:
free(fi->ep_attr);
err2:
free(fi);
err1:
return NULL;
}
int sock_dgram_getinfo(uint32_t version, const char *node, const char *service,
uint64_t flags, struct fi_info *hints, struct fi_info **info)
{
int ret = 0;
struct fi_info *sockd_info;
int sockfd = -1;
int optval;
socklen_t optlen;
*info = NULL;
#if 0
if (!(flags & FI_SOURCE)) {
/* FIXME: FI_SOURCE is required for DGRAM */
fprintf(stderr, "[sockd] FI_SOURCE is required for EP_DGRAM\n");
errno = EINVAL;
return -errno;
}
#endif
/* solve user specified name or address */
if (node || service) {
struct addrinfo *res;
struct addrinfo sock_hints = {
.ai_family = AF_INET,
.ai_socktype = SOCK_DGRAM,
.ai_protocol = IPPROTO_UDP
};
ret = getaddrinfo(node, service, &sock_hints, &res);
if (ret) {
SOCK_LOG_ERROR("%s: couldn't getaddrinfo for (%s:%s):%s\n", __func__, node, service, gai_strerror(ret));
return -FI_ENODATA;
}
freeaddrinfo(res);
}
sockfd = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP);
if (sockfd < 0) {
SOCK_LOG_ERROR("%s: couldn't open DGRAM socket\n", __func__);
return -FI_ENODATA;
}
optlen = sizeof(int);
getsockopt(sockfd, SOL_SOCKET, SO_RCVBUF, (int *)&optval, &optlen);
so_rcvbuf = optval;
if (hints) {
ret = sockd_check_hints(hints);
if (ret)
return ret;
}
/* dup prov info */
if (!(sockd_info = sockd_dupinfo(hints))) {
ret = -ENOMEM;
return ret;
}
*info = sockd_info;
close(sockfd);
return ret;
}
/* sockd_fi_ops */
static int sockd_ep_close(fid_t fid)
{
struct sock_ep *ep;
ep = container_of(fid, struct sock_ep, ep.fid);
if (ep->sock_fd)
if (close(ep->sock_fd)) {
SOCK_LOG_ERROR("[sockd] cannot close sock_fd\n");
return -FI_ENODATA;
}
free(ep);
return 0;
}
static int sockd_ep_bind(struct fid *fid, struct fid *bfid, uint64_t flags)
{
struct sock_ep *ep;
struct sock_cntr *cntr;
struct sock_eq *eq;
struct sock_cq *cq;
struct sock_av *av;
ep = container_of(fid, struct sock_ep, ep.fid);
switch (bfid->fclass) {
case FI_CLASS_CNTR:
SOCK_LOG_ERROR("[sockd] bind counter to ep\n");
cntr = container_of(bfid, struct sock_cntr, cntr_fid.fid);
if (!(flags &
(FI_WRITE | FI_READ | FI_SEND | FI_RECV))) {
SOCK_LOG_ERROR("[sockd] Counter only support FI_WRITE | FI_READ | FI_SEND | FI_RECV\n");
errno = FI_EINVAL;
return -errno;
}
if (flags & FI_WRITE) {
if (ep->write_cntr)
return -EINVAL;
ep->write_cntr = cntr;
}
if (flags & FI_READ) {
if (ep->read_cntr)
return -EINVAL;
ep->read_cntr = cntr;
}
if (flags & FI_SEND) {
if (ep->send_cntr)
return -EINVAL;
ep->send_cntr = cntr;
}
if (flags & FI_RECV) {
if (ep->recv_cntr)
return -EINVAL;
ep->recv_cntr = cntr;
}
break;
case FI_CLASS_CQ:
SOCK_LOG_ERROR("[sockd] bind CQ to ep\n");
cq = container_of(bfid, struct sock_cq, cq_fid.fid);
if (!(flags &
(FI_SEND | FI_RECV))) {
SOCK_LOG_ERROR("[sockd] CQ only support FI_SEND | FI_RECV\n");
errno = FI_EINVAL;
return -errno;
}
if (flags & FI_SEND) {
if (ep->send_cq)
return -EINVAL;
ep->send_cq = cq;
}
if (flags & FI_RECV) {
if (ep->recv_cq)
return -EINVAL;
ep->recv_cq = cq;
}
/*
if(enqueue_item(cq->ep_list, ep)) {
return -ENOMEM;
}
*/
break;
case FI_CLASS_EQ:
SOCK_LOG_ERROR("[sockd] bind EQ to ep\n");
/* FIXME: bind EQ to sockd EP */
eq = container_of(bfid, struct sock_eq, eq.fid);
if (ep->eq) {
return -EINVAL;
}
ep->eq = eq;
break;
case FI_CLASS_AV:
SOCK_LOG_ERROR("[sockd] bind AV to ep\n");
av = container_of(bfid,
struct sock_av, av_fid.fid);
if (ep->domain != av->dom)
return -EINVAL;
ep->av = av;
break;
default:
return -FI_ENOSYS;
}
return 0;
}
static int sockd_ep_control(fid_t fid, int command, void *arg)
{
errno = FI_ENOSYS;
return -errno;
}
static int sockd_ep_ops_open(struct fid *fid, const char *name,
uint64_t flags, void **ops, void *context)
{
errno = FI_ENOSYS;
return -errno;
}
/* sockd_ops_ep */
static int sockd_ep_enable(struct fid_ep *ep)
{
struct sock_ep *sock_ep;
sock_ep = container_of(ep, struct sock_ep, ep);
if(!sock_ep)
return -FI_EINVAL;
sock_ep->enabled = 1;
return 0;
}
static ssize_t sockd_ep_cancel(fid_t fid, void *context)
{
errno = FI_ENOSYS;
return -errno;
}
static int sockd_ep_getopt(fid_t fid, int level, int optname,
void *optval, size_t *optlen)
{
errno = FI_ENOSYS;
return -errno;
}
static int sockd_ep_setopt(fid_t fid, int level, int optname,
const void *optval, size_t optlen)
{
errno = FI_ENOSYS;
return -errno;
}
static int sockd_ep_tx_ctx(struct fid_sep *sep, int index,
struct fi_tx_attr *attr, struct fid_ep **tx_ep,
void *context)
{
errno = FI_ENOSYS;
return -errno;
}
static int sockd_ep_rx_ctx(struct fid_sep *sep, int index,
struct fi_rx_attr *attr, struct fid_ep **rx_ep,
void *context)
{
errno = FI_ENOSYS;
return -errno;
}
/* sockd_ops_cm */
static int sockd_cm_getname(fid_t fid, void *addr, size_t *addrlen)
{
errno = FI_ENOSYS;
return -errno;
}
static int sockd_cm_join(struct fid_ep *ep, void *addr, fi_addr_t *fi_addr,
uint64_t flags, void *context)
{
errno = FI_ENOSYS;
return -errno;
}
static int sockd_cm_leave(struct fid_ep *ep, void *addr, fi_addr_t fi_addr,
uint64_t flags)
{
errno = FI_ENOSYS;
return -errno;
}
/* sockd_ops_msg */
static ssize_t sockd_msg_recv(struct fid_ep *ep, void *buf, size_t len, void *desc,
fi_addr_t src_addr, void *context)
{
struct sock_ep *sock_ep;
struct sock_req_item *recv_req;
sock_ep = container_of(ep, struct sock_ep, ep);
if(!sock_ep)
return -FI_EINVAL;
recv_req = calloc(1, sizeof(struct sock_req_item));
if(!recv_req)
return -FI_ENOMEM;
recv_req->item.buf = (void*)buf;
recv_req->req_type = SOCK_REQ_TYPE_RECV;
recv_req->comm_type = SOCK_COMM_TYPE_SENDTO;
recv_req->context = context;
recv_req->total_len = len;
recv_req->done_len = 0;
if (sock_ep->av->attr.type == FI_AV_MAP) {
memcpy(&recv_req->addr, (void*)src_addr, sizeof(struct sockaddr_in));
} else {
size_t idx;
idx = (size_t)src_addr;
if (idx > sock_ep->av->count-1 || idx < 0) {
return -EINVAL;
}
memcpy(&recv_req->addr, &sock_ep->av->table[idx], sizeof(struct sockaddr_in));
}
if(0 != enqueue_item(sock_ep->recv_list, recv_req)){
free(recv_req);
return -FI_ENOMEM;
}
return 0;
}
static ssize_t sockd_msg_recvv(struct fid_ep *ep, const struct iovec *iov, void **desc,
size_t count, fi_addr_t src_addr, void *context)
{
errno = FI_ENOSYS;
return -errno;
}
static ssize_t sockd_msg_recvmsg(struct fid_ep *ep, const struct fi_msg *msg,
uint64_t flags)
{
errno = FI_ENOSYS;
return -errno;
}
static ssize_t sockd_msg_send(struct fid_ep *ep, const void *buf, size_t len, void *desc,
fi_addr_t dest_addr, void *context)
{
struct sock_ep *sock_ep;
struct sock_req_item *send_req;
sock_ep = container_of(ep, struct sock_ep, ep);
if(!sock_ep)
return -FI_EINVAL;
send_req = calloc(1, sizeof(struct sock_req_item));
if(!send_req)
return -FI_ENOMEM;
send_req->item.buf = (void*)buf;
send_req->req_type = SOCK_REQ_TYPE_SEND;
send_req->comm_type = SOCK_COMM_TYPE_SENDTO;
send_req->context = context;
send_req->total_len = len;
send_req->done_len = 0;
if (sock_ep->av->attr.type == FI_AV_MAP) {
memcpy(&send_req->addr, (void*)dest_addr, sizeof(struct sockaddr_in));
} else {
size_t idx;
idx = (size_t)dest_addr;
if (idx > sock_ep->av->count-1 || idx < 0) {
return -EINVAL;
}
memcpy(&send_req->addr, &sock_ep->av->table[idx], sizeof(struct sockaddr_in));
}
if(0 != enqueue_item(sock_ep->send_list, send_req)){
free(send_req);
return -FI_ENOMEM;
}
return 0;
}
static ssize_t sockd_msg_sendv(struct fid_ep *ep, const struct iovec *iov, void **desc,
size_t count, fi_addr_t dest_addr, void *context)
{
errno = FI_ENOSYS;
return -errno;
}
static ssize_t sockd_msg_sendmsg(struct fid_ep *ep, const struct fi_msg *msg,
uint64_t flags)
{
errno = FI_ENOSYS;
return -errno;
}
static ssize_t sockd_msg_inject(struct fid_ep *ep, const void *buf, size_t len,
fi_addr_t dest_addr)
{
errno = FI_ENOSYS;
return -errno;
}
static ssize_t sockd_msg_senddata(struct fid_ep *ep, const void *buf, size_t len, void *desc,
uint64_t data, fi_addr_t dest_addr, void *context)
{
errno = FI_ENOSYS;
return -errno;
}
static struct fi_ops sockd_ep_fi_ops = {
.size = sizeof(struct fi_ops),
.close = sockd_ep_close,
.bind = sockd_ep_bind,
.control = sockd_ep_control,
.ops_open = sockd_ep_ops_open
};
static struct fi_ops_ep sockd_ops_ep = {
.size = sizeof(struct fi_ops_ep),
.cancel = sockd_ep_cancel,
.getopt = sockd_ep_getopt,
.setopt = sockd_ep_setopt,
.enable = sockd_ep_enable,
.tx_ctx = sockd_ep_tx_ctx,
.rx_ctx = sockd_ep_rx_ctx,
};
static struct fi_ops_cm sockd_ops_cm = {
.size = sizeof(struct fi_ops_cm),
.getname = sockd_cm_getname,
.getpeer = fi_no_getpeer,
.connect = fi_no_connect,
.listen = fi_no_listen,
.accept = fi_no_accept,
.reject = fi_no_reject,
.shutdown = fi_no_shutdown,
.join = sockd_cm_join,
.leave = sockd_cm_leave
};
static struct fi_ops_msg sockd_ops_msg = {
.size = sizeof(struct fi_ops_msg),
.recv = sockd_msg_recv,
.recvv = sockd_msg_recvv,
.recvmsg = sockd_msg_recvmsg,
.send = sockd_msg_send,
.sendv = sockd_msg_sendv,
.sendmsg = sockd_msg_sendmsg,
.inject = sockd_msg_inject,
.senddata = sockd_msg_senddata,
.injectdata = fi_no_msg_injectdata,
};
static inline int _sock_ep_dgram_progress(struct sock_ep *ep, struct sock_cq *cq)
{
struct sock_req_item *item;
if((item = dequeue_item(ep->send_list))) {
SOCK_LOG_ERROR("[ep_dgram_progress] found a send req\n");
}
if((item = dequeue_item(ep->recv_list))) {
SOCK_LOG_ERROR("[ep_dgram_progress] found a recv req\n");
}
return -FI_ENOSYS;
}
int sock_dgram_ep(struct fid_domain *domain, struct fi_info *info,
struct fid_ep **ep, void *context)
{
SOCK_LOG_ERROR("[sockd] enter sock_dgram_ep\n");
struct sock_ep *_ep;
struct sock_domain *_dom;
struct sockaddr_in si_me;
_dom = container_of(domain, struct sock_domain, dom_fid);
if(!_dom)
return -FI_EINVAL;
_ep = (struct sock_ep*)calloc(1, sizeof(*_ep));
if(!_ep)
return -FI_ENOMEM;
_ep->ep.fid.fclass = FI_CLASS_EP;
_ep->ep.fid.context = context;
_ep->ep.fid.ops = &sockd_ep_fi_ops;
_ep->ep.ops = &sockd_ops_ep;
_ep->ep.cm = &sockd_ops_cm;
_ep->ep.msg = &sockd_ops_msg;
_ep->ep.rma = NULL;
_ep->ep.tagged = NULL;
_ep->ep.atomic = NULL;
_ep->domain = _dom;
_ep->sock_fd = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP);
if (_ep->sock_fd < 0) {
SOCK_LOG_ERROR("%s: couldn't open DGRAM socket\n", __func__);
errno = FI_ENODATA;
goto err1;
}
si_me.sin_family = AF_INET;
si_me.sin_port = ((struct sockaddr_in *)(info->src_addr))->sin_port;
si_me.sin_addr.s_addr = htonl(INADDR_ANY);
if (bind(_ep->sock_fd, &si_me, sizeof(si_me)) == -1) {
SOCK_LOG_ERROR("[sockd] %s: failed to bind sock_fd to port %d\n", __func__, ntohs(si_me.sin_port));
goto err2;
}
_ep->port_num = ntohs(si_me.sin_port);
if(!(_ep->send_list = new_list(SOCK_CQ_DEF_SZ)))
goto err2;
if(!(_ep->recv_list = new_list(SOCK_CQ_DEF_SZ)))
goto err3;
/*
_ep->progress_fn = _sock_ep_dgram_progress;
*/
*ep = &_ep->ep;
return 0;
err3:
free_list(_ep->send_list);
err2:
close(_ep->sock_fd);
err1:
free(_ep);
return -errno;
}

Просмотреть файл

@ -40,6 +40,8 @@
#include "sock.h"
#include "sock_util.h"
extern const char const sock_dom_name[];
const struct fi_domain_attr sock_domain_attr = {
.name = NULL,
.threading = FI_THREAD_SAFE,
@ -48,8 +50,8 @@ const struct fi_domain_attr sock_domain_attr = {
.mr_key_size = 0,
.cq_data_size = sizeof(uint64_t),
.ep_cnt = SOCK_EP_MAX_EP_CNT,
.tx_ctx_cnt = 0,
.rx_ctx_cnt = 0,
.tx_ctx_cnt = SOCK_EP_MAX_TX_CNT,
.rx_ctx_cnt = SOCK_EP_MAX_RX_CNT,
.max_ep_tx_ctx = SOCK_EP_MAX_TX_CNT,
.max_ep_rx_ctx = SOCK_EP_MAX_RX_CNT,
};
@ -77,9 +79,9 @@ int sock_verify_domain_attr(struct fi_domain_attr *attr)
switch (attr->control_progress){
case FI_PROGRESS_UNSPEC:
case FI_PROGRESS_AUTO:
case FI_PROGRESS_MANUAL:
break;
case FI_PROGRESS_MANUAL:
default:
SOCK_LOG_INFO("Control progress mode not supported!\n");
return -FI_ENODATA;
@ -88,9 +90,9 @@ int sock_verify_domain_attr(struct fi_domain_attr *attr)
switch (attr->data_progress){
case FI_PROGRESS_UNSPEC:
case FI_PROGRESS_AUTO:
case FI_PROGRESS_MANUAL:
break;
case FI_PROGRESS_MANUAL:
default:
SOCK_LOG_INFO("Data progress mode not supported!\n");
return -FI_ENODATA;
@ -114,11 +116,25 @@ int sock_verify_domain_attr(struct fi_domain_attr *attr)
static int sock_dom_close(struct fid *fid)
{
struct sock_domain *dom;
void *res;
dom = container_of(fid, struct sock_domain, dom_fid.fid);
if (atomic_get(&dom->ref))
if (atomic_get(&dom->ref)) {
return -FI_EBUSY;
}
dom->listening = 0;
if (pthread_join(dom->listen_thread, &res)) {
SOCK_LOG_ERROR("could not join listener thread, errno = %d\n", errno);
return -FI_EBUSY;
}
if (dom->u_cmap.size)
sock_conn_map_destroy(&dom->u_cmap);
if (dom->r_cmap.size)
sock_conn_map_destroy(&dom->r_cmap);
sock_pe_finalize(dom->pe);
fastlock_destroy(&dom->lock);
free(dom);
return 0;
@ -141,7 +157,7 @@ static int sock_mr_close(struct fid *fid)
struct sock_mr *mr;
mr = container_of(fid, struct sock_mr, mr_fid.fid);
dom = mr->dom;
dom = mr->domain;
fastlock_acquire(&dom->lock);
idm_clear(&dom->mr_idm , (int) mr->mr_fid.key);
fastlock_release(&dom->lock);
@ -150,37 +166,71 @@ static int sock_mr_close(struct fid *fid)
return 0;
}
static int sock_mr_bind(struct fid *fid, struct fid *bfid, uint64_t flags)
{
struct sock_cntr *cntr;
struct sock_cq *cq;
struct sock_mr *mr;
mr = container_of(fid, struct sock_mr, mr_fid.fid);
switch (bfid->fclass) {
case FI_CLASS_CQ:
cq = container_of(bfid, struct sock_cq, cq_fid.fid);
assert(mr->domain == cq->domain);
mr->cq = cq;
break;
case FI_CLASS_CNTR:
cntr = container_of(bfid, struct sock_cntr, cntr_fid.fid);
assert(mr->domain == cntr->domain);
mr->cntr = cntr;
break;
default:
return -FI_EINVAL;
}
return 0;
}
static struct fi_ops sock_mr_fi_ops = {
.size = sizeof(struct fi_ops),
.close = sock_mr_close,
.bind = fi_no_bind,
.bind = sock_mr_bind,
.control = fi_no_control,
.ops_open = fi_no_ops_open,
};
int sock_mr_verify_key(struct sock_domain *domain, uint16_t key,
void *buf, size_t len, uint64_t access)
struct sock_mr * sock_mr_get_entry(struct sock_domain *domain, uint16_t key)
{
return (struct sock_mr *)idm_lookup(&domain->mr_idm, key);
}
struct sock_mr *sock_mr_verify_key(struct sock_domain *domain, uint16_t key,
void *buf, size_t len, uint64_t access)
{
int i;
struct sock_mr *mr;
mr = idm_lookup(&domain->mr_idm, key);
if (!mr)
return -FI_EINVAL;
return NULL;
if (mr->flags & FI_MR_OFFSET)
buf = (char*)buf + mr->offset;
for (i = 0; i < mr->iov_count; i++) {
if ((uintptr_t)buf >= (uintptr_t)mr->mr_iov[i].iov_base &&
((uintptr_t)buf + len <= (uintptr_t) mr->mr_iov[i].iov_base +
mr->mr_iov[i].iov_len)) {
if ((access & mr->access) == access)
return 0;
return mr;
}
}
SOCK_LOG_ERROR("MR check failed\n");
return -FI_EINVAL;
return NULL;
}
int sock_mr_verify_desc(struct sock_domain *domain, void *desc,
struct sock_mr *sock_mr_verify_desc(struct sock_domain *domain, void *desc,
void *buf, size_t len, uint64_t access)
{
uint64_t key = (uint64_t)desc;
@ -209,9 +259,9 @@ static int sock_regattr(struct fid_domain *domain, const struct fi_mr_attr *attr
_mr->mr_fid.fid.context = attr->context;
_mr->mr_fid.fid.ops = &sock_mr_fi_ops;
atomic_inc(&dom->ref);
_mr->dom = dom;
_mr->domain = dom;
_mr->access = attr->access;
_mr->flags = flags;
_mr->offset = (flags & FI_MR_OFFSET) ?
attr->offset : (uintptr_t) attr->mr_iov[0].iov_base;
@ -228,6 +278,7 @@ static int sock_regattr(struct fid_domain *domain, const struct fi_mr_attr *attr
memcpy(&_mr->mr_iov, attr->mr_iov, sizeof(_mr->mr_iov) * attr->iov_count);
*mr = &_mr->mr_fid;
atomic_inc(&dom->ref);
if (dom->mr_eq) {
eq_entry.fid = &domain->fid;
@ -240,7 +291,6 @@ static int sock_regattr(struct fid_domain *domain, const struct fi_mr_attr *attr
err:
fastlock_release(&dom->lock);
atomic_dec(&dom->ref);
free(_mr);
return -errno;
}
@ -299,6 +349,23 @@ int sock_endpoint(struct fid_domain *domain, struct fi_info *info,
return sock_rdm_ep(domain, info, ep, context);
case FI_EP_DGRAM:
return sock_dgram_ep(domain, info, ep, context);
case FI_EP_MSG:
return sock_msg_ep(domain, info, ep, context);
default:
return -FI_ENOPROTOOPT;
}
}
int sock_scalable_ep(struct fid_domain *domain, struct fi_info *info,
struct fid_sep **sep, void *context)
{
switch (info->ep_type) {
case FI_EP_RDM:
return sock_rdm_sep(domain, info, sep, context);
case FI_EP_DGRAM:
return sock_dgram_sep(domain, info, sep, context);
case FI_EP_MSG:
return sock_msg_sep(domain, info, sep, context);
default:
return -FI_ENOPROTOOPT;
}
@ -317,9 +384,12 @@ static struct fi_ops_domain sock_dom_ops = {
.av_open = sock_av_open,
.cq_open = sock_cq_open,
.endpoint = sock_endpoint,
.scalable_ep = sock_scalable_ep,
.cntr_open = sock_cntr_open,
.wait_open = sock_wait_open,
.poll_open = sock_poll_open,
.stx_ctx = sock_stx_ctx,
.srx_ctx = sock_srx_ctx,
};
static struct fi_ops_mr sock_dom_mr_ops = {
@ -329,54 +399,6 @@ static struct fi_ops_mr sock_dom_mr_ops = {
.regattr = sock_regattr,
};
int _sock_verify_domain_attr(struct fi_domain_attr *attr)
{
if(attr->name){
if (strcmp(attr->name, sock_dom_name))
return -FI_ENODATA;
}
switch(attr->threading){
case FI_THREAD_UNSPEC:
case FI_THREAD_SAFE:
case FI_THREAD_PROGRESS:
break;
default:
SOCK_LOG_INFO("Invalid threading model!\n");
return -FI_ENODATA;
}
switch (attr->control_progress){
case FI_PROGRESS_UNSPEC:
case FI_PROGRESS_AUTO:
break;
case FI_PROGRESS_MANUAL:
default:
SOCK_LOG_INFO("Control progress mode not supported!\n");
return -FI_ENODATA;
}
switch (attr->data_progress){
case FI_PROGRESS_UNSPEC:
case FI_PROGRESS_AUTO:
break;
case FI_PROGRESS_MANUAL:
default:
SOCK_LOG_INFO("Data progress mode not supported!\n");
return -FI_ENODATA;
}
if(attr->max_ep_tx_ctx > SOCK_EP_MAX_TX_CNT)
return -FI_ENODATA;
if(attr->max_ep_rx_ctx > SOCK_EP_MAX_RX_CNT)
return -FI_ENODATA;
return 0;
}
int sock_domain(struct fid_fabric *fabric, struct fi_info *info,
struct fid_domain **dom, void *context)
{
@ -384,7 +406,7 @@ int sock_domain(struct fid_fabric *fabric, struct fi_info *info,
struct sock_domain *sock_domain;
if(info && info->domain_attr){
ret = _sock_verify_domain_attr(info->domain_attr);
ret = sock_verify_domain_attr(info->domain_attr);
if(ret)
return ret;
}
@ -396,12 +418,47 @@ int sock_domain(struct fid_fabric *fabric, struct fi_info *info,
fastlock_init(&sock_domain->lock);
atomic_init(&sock_domain->ref, 0);
if(info && info->src_addr) {
if (getnameinfo(info->src_addr, info->src_addrlen, NULL, 0,
sock_domain->service,
sizeof(sock_domain->service),
NI_NUMERICSERV)) {
SOCK_LOG_ERROR("could not resolve src_addr\n");
goto err;
}
sock_domain->info = *info;
} else {
SOCK_LOG_ERROR("invalid fi_info\n");
goto err;
}
sock_domain->dom_fid.fid.fclass = FI_CLASS_DOMAIN;
sock_domain->dom_fid.fid.context = context;
sock_domain->dom_fid.fid.ops = &sock_dom_fi_ops;
sock_domain->dom_fid.ops = &sock_dom_ops;
sock_domain->dom_fid.mr = &sock_dom_mr_ops;
if (!info || !info->domain_attr ||
info->domain_attr->data_progress == FI_PROGRESS_UNSPEC)
sock_domain->progress_mode = FI_PROGRESS_AUTO;
else
sock_domain->progress_mode = info->domain_attr->data_progress;
sock_domain->pe = sock_pe_init(sock_domain);
if(!sock_domain->pe){
SOCK_LOG_ERROR("Failed to init PE\n");
goto err;
}
sock_domain->r_cmap.domain = sock_domain;
sock_domain->u_cmap.domain = sock_domain;
sock_conn_listen(sock_domain);
*dom = &sock_domain->dom_fid;
return 0;
err:
free(sock_domain);
return -FI_EINVAL;
}

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -0,0 +1,443 @@
/*
* Copyright (c) 2014 Intel Corporation, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#if HAVE_CONFIG_H
# include <config.h>
#endif /* HAVE_CONFIG_H */
#include <errno.h>
#include <fcntl.h>
#include <netdb.h>
#include <netinet/in.h>
#include <netinet/tcp.h>
#include <poll.h>
#include <stdarg.h>
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/select.h>
#include <sys/socket.h>
#include <sys/types.h>
#include <sys/time.h>
#include <unistd.h>
#include <arpa/inet.h>
#include <limits.h>
#include "sock_util.h"
#include "sock.h"
const struct fi_ep_attr sock_dgram_ep_attr = {
.protocol = FI_PROTO_SOCK_TCP,
.max_msg_size = SOCK_EP_MAX_MSG_SZ,
.inject_size = SOCK_EP_MAX_INJECT_SZ,
.total_buffered_recv = SOCK_EP_MAX_BUFF_RECV,
.max_order_raw_size = SOCK_EP_MAX_ORDER_RAW_SZ,
.max_order_war_size = SOCK_EP_MAX_ORDER_WAR_SZ,
.max_order_waw_size = SOCK_EP_MAX_ORDER_WAW_SZ,
.mem_tag_format = SOCK_EP_MEM_TAG_FMT,
.msg_order = SOCK_EP_MSG_ORDER,
.tx_ctx_cnt = 0,
.rx_ctx_cnt = 0,
};
const struct fi_tx_attr sock_dgram_tx_attr = {
.caps = SOCK_EP_DGRAM_CAP,
.op_flags = SOCK_DEF_OPS,
.msg_order = SOCK_EP_MSG_ORDER,
.inject_size = SOCK_EP_MAX_INJECT_SZ,
.size = SOCK_EP_MAX_TX_CTX_SZ,
.iov_limit = SOCK_EP_MAX_IOV_LIMIT,
};
const struct fi_rx_attr sock_dgram_rx_attr = {
.caps = SOCK_EP_DGRAM_CAP,
.op_flags = SOCK_DEF_OPS,
.msg_order = SOCK_EP_MSG_ORDER,
.total_buffered_recv = SOCK_EP_MAX_BUFF_RECV,
.size = SOCK_EP_MAX_MSG_SZ,
.iov_limit = SOCK_EP_MAX_IOV_LIMIT,
};
static int sock_dgram_verify_rx_attr(const struct fi_rx_attr *attr)
{
if (!attr)
return 0;
if ((attr->caps | SOCK_EP_DGRAM_CAP) != SOCK_EP_DGRAM_CAP)
return -FI_ENODATA;
if ((attr->op_flags | SOCK_EP_DGRAM_CAP) != SOCK_EP_DGRAM_CAP)
return -FI_ENODATA;
if ((attr->msg_order | SOCK_EP_MSG_ORDER) != SOCK_EP_MSG_ORDER)
return -FI_ENODATA;
if (attr->total_buffered_recv > sock_dgram_rx_attr.total_buffered_recv)
return -FI_ENODATA;
if (attr->size > sock_dgram_rx_attr.size)
return -FI_ENODATA;
if (attr->iov_limit > sock_dgram_rx_attr.iov_limit)
return -FI_ENODATA;
return 0;
}
static int sock_dgram_verify_tx_attr(const struct fi_tx_attr *attr)
{
if (!attr)
return 0;
if ((attr->caps | SOCK_EP_DGRAM_CAP) != SOCK_EP_DGRAM_CAP)
return -FI_ENODATA;
if ((attr->op_flags | SOCK_EP_DGRAM_CAP) != SOCK_EP_DGRAM_CAP)
return -FI_ENODATA;
if ((attr->msg_order | SOCK_EP_MSG_ORDER) != SOCK_EP_MSG_ORDER)
return -FI_ENODATA;
if (attr->inject_size > sock_dgram_tx_attr.inject_size)
return -FI_ENODATA;
if (attr->size > sock_dgram_tx_attr.size)
return -FI_ENODATA;
if (attr->iov_limit > sock_dgram_tx_attr.iov_limit)
return -FI_ENODATA;
return 0;
}
int sock_dgram_verify_ep_attr(struct fi_ep_attr *ep_attr,
struct fi_tx_attr *tx_attr,
struct fi_rx_attr *rx_attr)
{
if (ep_attr) {
switch (ep_attr->protocol) {
case FI_PROTO_UNSPEC:
case FI_PROTO_SOCK_TCP:
break;
default:
return -FI_ENODATA;
}
if (ep_attr->max_msg_size > sock_dgram_ep_attr.max_msg_size)
return -FI_ENODATA;
if (ep_attr->inject_size > sock_dgram_ep_attr.inject_size)
return -FI_ENODATA;
if (ep_attr->total_buffered_recv >
sock_dgram_ep_attr.total_buffered_recv)
return -FI_ENODATA;
if (ep_attr->max_order_raw_size >
sock_dgram_ep_attr.max_order_raw_size)
return -FI_ENODATA;
if (ep_attr->max_order_war_size >
sock_dgram_ep_attr.max_order_war_size)
return -FI_ENODATA;
if (ep_attr->max_order_waw_size >
sock_dgram_ep_attr.max_order_waw_size)
return -FI_ENODATA;
if ((ep_attr->msg_order | SOCK_EP_MSG_ORDER) != SOCK_EP_MSG_ORDER)
return -FI_ENODATA;
if ((ep_attr->tx_ctx_cnt > SOCK_EP_MAX_TX_CNT) &&
ep_attr->tx_ctx_cnt != FI_SHARED_CONTEXT)
return -FI_ENODATA;
if ((ep_attr->rx_ctx_cnt > SOCK_EP_MAX_RX_CNT) &&
ep_attr->rx_ctx_cnt != FI_SHARED_CONTEXT)
return -FI_ENODATA;
}
if (sock_dgram_verify_tx_attr(tx_attr) || sock_dgram_verify_rx_attr(rx_attr))
return -FI_ENODATA;
return 0;
}
static struct fi_info *sock_dgram_fi_info(struct fi_info *hints,
void *src_addr, void *dest_addr)
{
struct fi_info *_info = sock_fi_info(FI_EP_DGRAM, hints,
src_addr, dest_addr);
if (!_info)
return NULL;
if (!hints->caps)
_info->caps = SOCK_EP_DGRAM_CAP;
if (!hints->tx_attr)
*(_info->tx_attr) = sock_dgram_tx_attr;
if (!hints->rx_attr)
*(_info->rx_attr) = sock_dgram_rx_attr;
if (!hints->ep_attr)
*(_info->ep_attr) = sock_dgram_ep_attr;
return _info;
}
int sock_dgram_getinfo(uint32_t version, const char *node, const char *service,
uint64_t flags, struct fi_info *hints, struct fi_info **info)
{
int ret;
int udp_sock;
socklen_t len;
struct fi_info *_info;
struct addrinfo sock_hints;
struct addrinfo *result = NULL;
struct sockaddr_in *src_addr = NULL, *dest_addr = NULL;
char sa_ip[INET_ADDRSTRLEN];
char hostname[HOST_NAME_MAX];
if (!info)
return -FI_EBADFLAGS;
*info = NULL;
if (!node && !service && !hints)
return -FI_EBADFLAGS;
if (version != FI_VERSION(SOCK_MAJOR_VERSION,
SOCK_MINOR_VERSION))
return -FI_ENODATA;
if (hints) {
if ((SOCK_EP_DGRAM_CAP | hints->caps) != SOCK_EP_DGRAM_CAP) {
SOCK_LOG_INFO(
"Cannot support requested options!\n");
return -FI_ENODATA;
}
ret = sock_dgram_verify_rx_attr(hints->rx_attr);
if (ret)
return ret;
ret = sock_dgram_verify_tx_attr(hints->tx_attr);
if (ret)
return ret;
}
src_addr = calloc(1, sizeof(struct sockaddr_in));
dest_addr = calloc(1, sizeof(struct sockaddr_in));
memset(&sock_hints, 0, sizeof(struct addrinfo));
sock_hints.ai_family = AF_INET;
sock_hints.ai_socktype = SOCK_STREAM;
if (flags & FI_NUMERICHOST)
sock_hints.ai_flags |= AI_NUMERICHOST;
if ((flags & FI_SOURCE) || !node) {
if (!node) {
gethostname(hostname, HOST_NAME_MAX);
}
ret = getaddrinfo(node ? node : hostname, service,
&sock_hints, &result);
if (ret != 0) {
ret = FI_ENODATA;
SOCK_LOG_INFO("getaddrinfo failed!\n");
goto err;
}
while (result) {
if (result->ai_family == AF_INET &&
result->ai_addrlen == sizeof(struct sockaddr_in))
break;
result = result->ai_next;
}
if (!result) {
SOCK_LOG_ERROR("getaddrinfo failed\n");
ret = -FI_EINVAL;
goto err;
}
memcpy(src_addr, result->ai_addr, result->ai_addrlen);
freeaddrinfo(result);
} else if (node || service) {
ret = getaddrinfo(node, service, &sock_hints, &result);
if (ret != 0) {
ret = FI_ENODATA;
SOCK_LOG_INFO("getaddrinfo failed!\n");
goto err;
}
while (result) {
if (result->ai_family == AF_INET &&
result->ai_addrlen == sizeof(struct sockaddr_in))
break;
result = result->ai_next;
}
if (!result) {
SOCK_LOG_ERROR("getaddrinfo failed\n");
ret = -FI_EINVAL;
goto err;
}
memcpy(dest_addr, result->ai_addr, result->ai_addrlen);
udp_sock = socket(AF_INET, SOCK_DGRAM, 0);
ret = connect(udp_sock, result->ai_addr,
result->ai_addrlen);
if ( ret != 0) {
SOCK_LOG_ERROR("Failed to create udp socket\n");
ret = FI_ENODATA;
goto err;
}
len = sizeof(struct sockaddr_in);
ret = getsockname(udp_sock, (struct sockaddr*)src_addr, &len);
if (ret != 0) {
SOCK_LOG_ERROR("getsockname failed\n");
close(udp_sock);
ret = FI_ENODATA;
goto err;
}
close(udp_sock);
freeaddrinfo(result);
}
if (dest_addr) {
memcpy(sa_ip, inet_ntoa(dest_addr->sin_addr), INET_ADDRSTRLEN);
SOCK_LOG_INFO("dest_addr: family: %d, IP is %s\n",
((struct sockaddr_in*)dest_addr)->sin_family, sa_ip);
}
if (src_addr) {
memcpy(sa_ip, inet_ntoa(src_addr->sin_addr), INET_ADDRSTRLEN);
SOCK_LOG_INFO("src_addr: family: %d, IP is %s\n",
((struct sockaddr_in*)src_addr)->sin_family, sa_ip);
}
_info = sock_dgram_fi_info(hints, src_addr, dest_addr);
if (!_info) {
ret = FI_ENOMEM;
goto err;
}
*info = _info;
free(src_addr);
free(dest_addr);
return 0;
err:
free(src_addr);
free(dest_addr);
SOCK_LOG_ERROR("fi_getinfo failed\n");
return ret;
}
int sock_dgram_endpoint(struct fid_domain *domain, struct fi_info *info,
struct sock_ep **ep, void *context, size_t fclass)
{
int ret;
if (info) {
if (info->ep_attr) {
ret = sock_dgram_verify_ep_attr(info->ep_attr,
info->tx_attr,
info->rx_attr);
if (ret)
return ret;
}
if (info->tx_attr) {
ret = sock_dgram_verify_tx_attr(info->tx_attr);
if (ret)
return ret;
}
if (info->rx_attr) {
ret = sock_dgram_verify_rx_attr(info->rx_attr);
if (ret)
return ret;
}
}
ret = sock_alloc_endpoint(domain, info, ep, context, fclass);
if (ret)
return ret;
if (!info || !info->ep_attr)
(*ep)->ep_attr = sock_dgram_ep_attr;
if (!info || !info->tx_attr)
(*ep)->tx_attr = sock_dgram_tx_attr;
if (!info || !info->rx_attr)
(*ep)->rx_attr = sock_dgram_rx_attr;
return 0;
}
int sock_dgram_ep(struct fid_domain *domain, struct fi_info *info,
struct fid_ep **ep, void *context)
{
int ret;
struct sock_ep *endpoint;
ret = sock_dgram_endpoint(domain, info, &endpoint, context, FI_CLASS_EP);
if (ret)
return ret;
*ep = &endpoint->ep;
return 0;
}
int sock_dgram_sep(struct fid_domain *domain, struct fi_info *info,
struct fid_sep **sep, void *context)
{
int ret;
struct sock_ep *endpoint;
ret = sock_dgram_endpoint(domain, info, &endpoint, context, FI_CLASS_SEP);
if (ret)
return ret;
*sep = &endpoint->sep;
return 0;
}

Просмотреть файл

@ -0,0 +1,527 @@
/*
* Copyright (c) 2014 Intel Corporation, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#if HAVE_CONFIG_H
# include <config.h>
#endif /* HAVE_CONFIG_H */
#include <errno.h>
#include <fcntl.h>
#include <netdb.h>
#include <netinet/in.h>
#include <netinet/tcp.h>
#include <poll.h>
#include <stdarg.h>
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/select.h>
#include <sys/socket.h>
#include <sys/types.h>
#include <sys/time.h>
#include <unistd.h>
#include <arpa/inet.h>
#include <limits.h>
#include "sock.h"
#include "sock_util.h"
const struct fi_ep_attr sock_msg_ep_attr = {
.protocol = FI_PROTO_SOCK_TCP,
.max_msg_size = SOCK_EP_MAX_MSG_SZ,
.inject_size = SOCK_EP_MAX_INJECT_SZ,
.total_buffered_recv = SOCK_EP_MAX_BUFF_RECV,
.max_order_raw_size = SOCK_EP_MAX_ORDER_RAW_SZ,
.max_order_war_size = SOCK_EP_MAX_ORDER_WAR_SZ,
.max_order_waw_size = SOCK_EP_MAX_ORDER_WAW_SZ,
.mem_tag_format = SOCK_EP_MEM_TAG_FMT,
.msg_order = SOCK_EP_MSG_ORDER,
.tx_ctx_cnt = 0,
.rx_ctx_cnt = 0,
};
const struct fi_tx_attr sock_msg_tx_attr = {
.caps = SOCK_EP_MSG_CAP,
.op_flags = SOCK_DEF_OPS,
.msg_order = SOCK_EP_MSG_ORDER,
.inject_size = SOCK_EP_MAX_INJECT_SZ,
.size = SOCK_EP_MAX_TX_CTX_SZ,
.iov_limit = SOCK_EP_MAX_IOV_LIMIT,
};
const struct fi_rx_attr sock_msg_rx_attr = {
.caps = SOCK_EP_MSG_CAP,
.op_flags = SOCK_DEF_OPS,
.msg_order = SOCK_EP_MSG_ORDER,
.total_buffered_recv = SOCK_EP_MAX_BUFF_RECV,
.size = SOCK_EP_MAX_MSG_SZ,
.iov_limit = SOCK_EP_MAX_IOV_LIMIT,
};
static int sock_msg_verify_rx_attr(const struct fi_rx_attr *attr)
{
if (!attr)
return 0;
if ((attr->caps | SOCK_EP_MSG_CAP) != SOCK_EP_MSG_CAP)
return -FI_ENODATA;
if ((attr->op_flags | SOCK_EP_MSG_CAP) != SOCK_EP_MSG_CAP)
return -FI_ENODATA;
if ((attr->msg_order | SOCK_EP_MSG_ORDER) != SOCK_EP_MSG_ORDER)
return -FI_ENODATA;
if (attr->total_buffered_recv > sock_msg_rx_attr.total_buffered_recv)
return -FI_ENODATA;
if (attr->size > sock_msg_rx_attr.size)
return -FI_ENODATA;
if (attr->iov_limit > sock_msg_rx_attr.iov_limit)
return -FI_ENODATA;
return 0;
}
static int sock_msg_verify_tx_attr(const struct fi_tx_attr *attr)
{
if (!attr)
return 0;
if ((attr->caps | SOCK_EP_MSG_CAP) != SOCK_EP_MSG_CAP)
return -FI_ENODATA;
if ((attr->op_flags | SOCK_EP_MSG_CAP) != SOCK_EP_MSG_CAP)
return -FI_ENODATA;
if ((attr->msg_order | SOCK_EP_MSG_ORDER) != SOCK_EP_MSG_ORDER)
return -FI_ENODATA;
if (attr->inject_size > sock_msg_tx_attr.inject_size)
return -FI_ENODATA;
if (attr->size > sock_msg_tx_attr.size)
return -FI_ENODATA;
if (attr->iov_limit > sock_msg_tx_attr.iov_limit)
return -FI_ENODATA;
return 0;
}
int sock_msg_verify_ep_attr(struct fi_ep_attr *ep_attr,
struct fi_tx_attr *tx_attr,
struct fi_rx_attr *rx_attr)
{
if (ep_attr) {
switch (ep_attr->protocol) {
case FI_PROTO_UNSPEC:
case FI_PROTO_SOCK_TCP:
break;
default:
return -FI_ENODATA;
}
if (ep_attr->max_msg_size > sock_msg_ep_attr.max_msg_size)
return -FI_ENODATA;
if (ep_attr->inject_size > sock_msg_ep_attr.inject_size)
return -FI_ENODATA;
if (ep_attr->total_buffered_recv >
sock_msg_ep_attr.total_buffered_recv)
return -FI_ENODATA;
if (ep_attr->max_order_raw_size >
sock_msg_ep_attr.max_order_raw_size)
return -FI_ENODATA;
if (ep_attr->max_order_war_size >
sock_msg_ep_attr.max_order_war_size)
return -FI_ENODATA;
if (ep_attr->max_order_waw_size >
sock_msg_ep_attr.max_order_waw_size)
return -FI_ENODATA;
if ((ep_attr->msg_order | SOCK_EP_MSG_ORDER) != SOCK_EP_MSG_ORDER)
return -FI_ENODATA;
if ((ep_attr->tx_ctx_cnt > SOCK_EP_MAX_TX_CNT) &&
ep_attr->tx_ctx_cnt != FI_SHARED_CONTEXT)
return -FI_ENODATA;
if ((ep_attr->rx_ctx_cnt > SOCK_EP_MAX_RX_CNT) &&
ep_attr->rx_ctx_cnt != FI_SHARED_CONTEXT)
return -FI_ENODATA;
}
if (sock_msg_verify_tx_attr(tx_attr) || sock_msg_verify_rx_attr(rx_attr))
return -FI_ENODATA;
return 0;
}
static struct fi_info *sock_msg_fi_info(struct fi_info *hints,
void *src_addr, void *dest_addr)
{
struct fi_info *_info = sock_fi_info(FI_EP_MSG, hints,
src_addr, dest_addr);
if (!_info)
return NULL;
if (!hints->caps)
_info->caps = SOCK_EP_MSG_CAP;
if (!hints->tx_attr)
*(_info->tx_attr) = sock_msg_tx_attr;
if (!hints->rx_attr)
*(_info->rx_attr) = sock_msg_rx_attr;
if (!hints->ep_attr)
*(_info->ep_attr) = sock_msg_ep_attr;
return _info;
}
int sock_msg_getinfo(uint32_t version, const char *node, const char *service,
uint64_t flags, struct fi_info *hints, struct fi_info **info)
{
int ret;
int udp_sock;
socklen_t len;
struct fi_info *_info;
struct addrinfo sock_hints;
struct addrinfo *result = NULL;
struct sockaddr_in *src_addr = NULL, *dest_addr = NULL;
char sa_ip[INET_ADDRSTRLEN];
char hostname[HOST_NAME_MAX];
if (!info)
return -FI_EBADFLAGS;
*info = NULL;
if (!node && !service && !hints)
return -FI_EBADFLAGS;
if (version != FI_VERSION(SOCK_MAJOR_VERSION,
SOCK_MINOR_VERSION))
return -FI_ENODATA;
if (hints) {
if ((SOCK_EP_MSG_CAP | hints->caps) != SOCK_EP_MSG_CAP) {
SOCK_LOG_INFO(
"Cannot support requested options!\n");
return -FI_ENODATA;
}
ret = sock_msg_verify_rx_attr(hints->rx_attr);
if (ret)
return ret;
ret = sock_msg_verify_tx_attr(hints->tx_attr);
if (ret)
return ret;
}
src_addr = calloc(1, sizeof(struct sockaddr_in));
dest_addr = calloc(1, sizeof(struct sockaddr_in));
memset(&sock_hints, 0, sizeof(struct addrinfo));
sock_hints.ai_family = AF_INET;
sock_hints.ai_socktype = SOCK_STREAM;
if (flags & FI_NUMERICHOST)
sock_hints.ai_flags |= AI_NUMERICHOST;
if ((flags & FI_SOURCE) || !node) {
if (!node) {
gethostname(hostname, HOST_NAME_MAX);
}
ret = getaddrinfo(node ? node : hostname, service,
&sock_hints, &result);
if (ret != 0) {
ret = FI_ENODATA;
SOCK_LOG_INFO("getaddrinfo failed!\n");
goto err;
}
while (result) {
if (result->ai_family == AF_INET &&
result->ai_addrlen == sizeof(struct sockaddr_in))
break;
result = result->ai_next;
}
if (!result) {
SOCK_LOG_ERROR("getaddrinfo failed\n");
ret = -FI_EINVAL;
goto err;
}
memcpy(src_addr, result->ai_addr, result->ai_addrlen);
freeaddrinfo(result);
} else if (node || service) {
ret = getaddrinfo(node, service, &sock_hints, &result);
if (ret != 0) {
ret = FI_ENODATA;
SOCK_LOG_INFO("getaddrinfo failed!\n");
goto err;
}
while (result) {
if (result->ai_family == AF_INET &&
result->ai_addrlen == sizeof(struct sockaddr_in))
break;
result = result->ai_next;
}
if (!result) {
SOCK_LOG_ERROR("getaddrinfo failed\n");
ret = -FI_EINVAL;
goto err;
}
memcpy(dest_addr, result->ai_addr, result->ai_addrlen);
udp_sock = socket(AF_INET, SOCK_DGRAM, 0);
ret = connect(udp_sock, result->ai_addr,
result->ai_addrlen);
if ( ret != 0) {
SOCK_LOG_ERROR("Failed to create udp socket\n");
ret = FI_ENODATA;
goto err;
}
len = sizeof(struct sockaddr_in);
ret = getsockname(udp_sock, (struct sockaddr*)src_addr, &len);
if (ret != 0) {
SOCK_LOG_ERROR("getsockname failed\n");
close(udp_sock);
ret = FI_ENODATA;
goto err;
}
close(udp_sock);
freeaddrinfo(result);
}
if (dest_addr) {
memcpy(sa_ip, inet_ntoa(dest_addr->sin_addr), INET_ADDRSTRLEN);
SOCK_LOG_INFO("dest_addr: family: %d, IP is %s\n",
((struct sockaddr_in*)dest_addr)->sin_family, sa_ip);
}
if (src_addr) {
memcpy(sa_ip, inet_ntoa(src_addr->sin_addr), INET_ADDRSTRLEN);
SOCK_LOG_INFO("src_addr: family: %d, IP is %s\n",
((struct sockaddr_in*)src_addr)->sin_family, sa_ip);
}
_info = sock_msg_fi_info(hints, src_addr, dest_addr);
if (!_info) {
ret = FI_ENOMEM;
goto err;
}
*info = _info;
free(src_addr);
free(dest_addr);
return 0;
err:
free(src_addr);
free(dest_addr);
SOCK_LOG_ERROR("fi_getinfo failed\n");
return ret;
}
static int sock_msg_ep_cm_getname(fid_t fid, void *addr, size_t *addrlen)
{
struct sock_ep *sock_ep;
if (*addrlen == 0) {
*addrlen = sizeof(struct sockaddr_in);
return -FI_ETOOSMALL;
}
sock_ep = container_of(fid, struct sock_ep, ep.fid);
*addrlen = MIN(*addrlen, sizeof(struct sockaddr_in));
memcpy(addr, sock_ep->src_addr, *addrlen);
return 0;
}
static int sock_msg_ep_cm_getpeer(struct fid_ep *ep, void *addr, size_t *addrlen)
{
struct sock_ep *sock_ep;
if (*addrlen == 0) {
*addrlen = sizeof(struct sockaddr_in);
return -FI_ETOOSMALL;
}
sock_ep = container_of(ep, struct sock_ep, ep);
*addrlen = MIN(*addrlen, sizeof(struct sockaddr_in));
memcpy(addr, sock_ep->dest_addr, *addrlen);
return 0;
}
static int sock_msg_ep_cm_connect(struct fid_ep *ep, const void *addr,
const void *param, size_t paramlen)
{
return -FI_ENOSYS;
}
static int sock_msg_ep_cm_listen(struct fid_pep *pep)
{
return -FI_ENOSYS;
}
static int sock_msg_ep_cm_accept(struct fid_ep *ep, const void *param, size_t paramlen)
{
return -FI_ENOSYS;
}
static int sock_msg_ep_cm_reject(struct fid_pep *pep, fi_connreq_t connreq,
const void *param, size_t paramlen)
{
return -FI_ENOSYS;
}
static int sock_msg_ep_cm_shutdown(struct fid_ep *ep, uint64_t flags)
{
return -FI_ENOSYS;
}
struct fi_ops_cm sock_msg_ep_cm_ops = {
.size = sizeof(struct fi_ops_cm),
.getname = sock_msg_ep_cm_getname,
.getpeer = sock_msg_ep_cm_getpeer,
.connect = sock_msg_ep_cm_connect,
.listen = sock_msg_ep_cm_listen,
.accept = sock_msg_ep_cm_accept,
.reject = sock_msg_ep_cm_reject,
.shutdown = sock_msg_ep_cm_shutdown,
.join = fi_no_join,
.leave = fi_no_leave,
};
int sock_msg_endpoint(struct fid_domain *domain, struct fi_info *info,
struct sock_ep **ep, void *context, size_t fclass)
{
int ret;
if (info) {
if (info->ep_attr) {
ret = sock_msg_verify_ep_attr(info->ep_attr,
info->tx_attr,
info->rx_attr);
if (ret)
return ret;
}
if (info->tx_attr) {
ret = sock_msg_verify_tx_attr(info->tx_attr);
if (ret)
return ret;
}
if (info->rx_attr) {
ret = sock_msg_verify_rx_attr(info->rx_attr);
if (ret)
return ret;
}
}
ret = sock_alloc_endpoint(domain, info, ep, context, fclass);
if (ret)
return ret;
if (!info || !info->ep_attr)
(*ep)->ep_attr = sock_msg_ep_attr;
if (!info || !info->tx_attr)
(*ep)->tx_attr = sock_msg_tx_attr;
if (!info || !info->rx_attr)
(*ep)->rx_attr = sock_msg_rx_attr;
return 0;
}
int sock_msg_ep(struct fid_domain *domain, struct fi_info *info,
struct fid_ep **ep, void *context)
{
int ret;
struct sock_ep *endpoint;
ret = sock_msg_endpoint(domain, info, &endpoint, context, FI_CLASS_EP);
if (ret)
return ret;
*ep = &endpoint->ep;
return 0;
}
int sock_msg_sep(struct fid_domain *domain, struct fi_info *info,
struct fid_sep **sep, void *context)
{
int ret;
struct sock_ep *endpoint;
ret = sock_msg_endpoint(domain, info, &endpoint, context, FI_CLASS_SEP);
if (ret)
return ret;
*sep = &endpoint->sep;
return 0;
}
int sock_msg_passive_ep(struct fid_fabric *fabric, struct fi_info *info,
struct fid_pep **pep, void *context)
{
int ret;
struct sock_ep *endpoint;
ret = sock_msg_endpoint(NULL, info, &endpoint, context, FI_CLASS_PEP);
if (ret)
return ret;
*pep = &endpoint->pep;
return 0;
}

Просмотреть файл

@ -0,0 +1,442 @@
/*
* Copyright (c) 2014 Intel Corporation, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#if HAVE_CONFIG_H
# include <config.h>
#endif /* HAVE_CONFIG_H */
#include <errno.h>
#include <fcntl.h>
#include <netdb.h>
#include <netinet/in.h>
#include <netinet/tcp.h>
#include <poll.h>
#include <stdarg.h>
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/select.h>
#include <sys/socket.h>
#include <sys/types.h>
#include <sys/time.h>
#include <unistd.h>
#include <arpa/inet.h>
#include <limits.h>
#include "sock.h"
#include "sock_util.h"
const struct fi_ep_attr sock_rdm_ep_attr = {
.protocol = FI_PROTO_SOCK_TCP,
.max_msg_size = SOCK_EP_MAX_MSG_SZ,
.inject_size = SOCK_EP_MAX_INJECT_SZ,
.total_buffered_recv = SOCK_EP_MAX_BUFF_RECV,
.max_order_raw_size = SOCK_EP_MAX_ORDER_RAW_SZ,
.max_order_war_size = SOCK_EP_MAX_ORDER_WAR_SZ,
.max_order_waw_size = SOCK_EP_MAX_ORDER_WAW_SZ,
.mem_tag_format = SOCK_EP_MEM_TAG_FMT,
.msg_order = SOCK_EP_MSG_ORDER,
.tx_ctx_cnt = 0,
.rx_ctx_cnt = 0,
};
const struct fi_tx_attr sock_rdm_tx_attr = {
.caps = SOCK_EP_RDM_CAP,
.op_flags = SOCK_DEF_OPS,
.msg_order = SOCK_EP_MSG_ORDER,
.inject_size = SOCK_EP_MAX_INJECT_SZ,
.size = SOCK_EP_MAX_TX_CTX_SZ,
.iov_limit = SOCK_EP_MAX_IOV_LIMIT,
};
const struct fi_rx_attr sock_rdm_rx_attr = {
.caps = SOCK_EP_RDM_CAP,
.op_flags = SOCK_DEF_OPS,
.msg_order = SOCK_EP_MSG_ORDER,
.total_buffered_recv = SOCK_EP_MAX_BUFF_RECV,
.size = SOCK_EP_MAX_MSG_SZ,
.iov_limit = SOCK_EP_MAX_IOV_LIMIT,
};
static int sock_rdm_verify_rx_attr(const struct fi_rx_attr *attr)
{
if (!attr)
return 0;
if ((attr->caps | SOCK_EP_RDM_CAP) != SOCK_EP_RDM_CAP)
return -FI_ENODATA;
if ((attr->op_flags | SOCK_EP_RDM_CAP) != SOCK_EP_RDM_CAP)
return -FI_ENODATA;
if ((attr->msg_order | SOCK_EP_MSG_ORDER) != SOCK_EP_MSG_ORDER)
return -FI_ENODATA;
if (attr->total_buffered_recv > sock_rdm_rx_attr.total_buffered_recv)
return -FI_ENODATA;
if (attr->size > sock_rdm_rx_attr.size)
return -FI_ENODATA;
if (attr->iov_limit > sock_rdm_rx_attr.iov_limit)
return -FI_ENODATA;
return 0;
}
static int sock_rdm_verify_tx_attr(const struct fi_tx_attr *attr)
{
if (!attr)
return 0;
if ((attr->caps | SOCK_EP_RDM_CAP) != SOCK_EP_RDM_CAP)
return -FI_ENODATA;
if ((attr->op_flags | SOCK_EP_RDM_CAP) != SOCK_EP_RDM_CAP)
return -FI_ENODATA;
if ((attr->msg_order | SOCK_EP_MSG_ORDER) != SOCK_EP_MSG_ORDER)
return -FI_ENODATA;
if (attr->inject_size > sock_rdm_tx_attr.inject_size)
return -FI_ENODATA;
if (attr->size > sock_rdm_tx_attr.size)
return -FI_ENODATA;
if (attr->iov_limit > sock_rdm_tx_attr.iov_limit)
return -FI_ENODATA;
return 0;
}
int sock_rdm_verify_ep_attr(struct fi_ep_attr *ep_attr,
struct fi_tx_attr *tx_attr,
struct fi_rx_attr *rx_attr)
{
if (ep_attr) {
switch (ep_attr->protocol) {
case FI_PROTO_UNSPEC:
case FI_PROTO_SOCK_TCP:
break;
default:
return -FI_ENODATA;
}
if (ep_attr->max_msg_size > sock_rdm_ep_attr.max_msg_size)
return -FI_ENODATA;
if (ep_attr->inject_size > sock_rdm_ep_attr.inject_size)
return -FI_ENODATA;
if (ep_attr->total_buffered_recv >
sock_rdm_ep_attr.total_buffered_recv)
return -FI_ENODATA;
if (ep_attr->max_order_raw_size >
sock_rdm_ep_attr.max_order_raw_size)
return -FI_ENODATA;
if (ep_attr->max_order_war_size >
sock_rdm_ep_attr.max_order_war_size)
return -FI_ENODATA;
if (ep_attr->max_order_waw_size >
sock_rdm_ep_attr.max_order_waw_size)
return -FI_ENODATA;
if ((ep_attr->msg_order | SOCK_EP_MSG_ORDER) != SOCK_EP_MSG_ORDER)
return -FI_ENODATA;
if ((ep_attr->tx_ctx_cnt > SOCK_EP_MAX_TX_CNT) &&
ep_attr->tx_ctx_cnt != FI_SHARED_CONTEXT)
return -FI_ENODATA;
if ((ep_attr->rx_ctx_cnt > SOCK_EP_MAX_RX_CNT) &&
ep_attr->rx_ctx_cnt != FI_SHARED_CONTEXT)
return -FI_ENODATA;
}
if (sock_rdm_verify_tx_attr(tx_attr) || sock_rdm_verify_rx_attr(rx_attr))
return -FI_ENODATA;
return 0;
}
static struct fi_info *sock_rdm_fi_info(struct fi_info *hints,
void *src_addr, void *dest_addr)
{
struct fi_info *_info = sock_fi_info(FI_EP_RDM, hints,
src_addr, dest_addr);
if (!_info)
return NULL;
if (!hints->caps)
_info->caps = SOCK_EP_RDM_CAP;
if (!hints->tx_attr)
*(_info->tx_attr) = sock_rdm_tx_attr;
if (!hints->rx_attr)
*(_info->rx_attr) = sock_rdm_rx_attr;
if (!hints->ep_attr)
*(_info->ep_attr) = sock_rdm_ep_attr;
return _info;
}
int sock_rdm_getinfo(uint32_t version, const char *node, const char *service,
uint64_t flags, struct fi_info *hints, struct fi_info **info)
{
int ret;
int udp_sock;
socklen_t len;
struct fi_info *_info;
struct addrinfo sock_hints;
struct addrinfo *result = NULL;
struct sockaddr_in *src_addr = NULL, *dest_addr = NULL;
char sa_ip[INET_ADDRSTRLEN];
char hostname[HOST_NAME_MAX];
if (!info)
return -FI_EBADFLAGS;
*info = NULL;
if (version != FI_VERSION(SOCK_MAJOR_VERSION,
SOCK_MINOR_VERSION))
return -FI_ENODATA;
if (hints) {
if ((SOCK_EP_RDM_CAP | hints->caps) != SOCK_EP_RDM_CAP) {
SOCK_LOG_INFO("Cannot support requested options!\n");
return -FI_ENODATA;
}
ret = sock_rdm_verify_rx_attr(hints->rx_attr);
if (ret)
return ret;
ret = sock_rdm_verify_tx_attr(hints->tx_attr);
if (ret)
return ret;
}
src_addr = calloc(1, sizeof(struct sockaddr_in));
dest_addr = calloc(1, sizeof(struct sockaddr_in));
memset(&sock_hints, 0, sizeof(struct addrinfo));
sock_hints.ai_family = AF_INET;
sock_hints.ai_socktype = SOCK_STREAM;
if (flags & FI_NUMERICHOST)
sock_hints.ai_flags |= AI_NUMERICHOST;
if ((flags & FI_SOURCE) || !node) {
if (!node) {
gethostname(hostname, HOST_NAME_MAX);
}
ret = getaddrinfo(node ? node : hostname, service,
&sock_hints, &result);
if (ret != 0) {
ret = FI_ENODATA;
SOCK_LOG_INFO("getaddrinfo failed!\n");
goto err;
}
while (result) {
if (result->ai_family == AF_INET &&
result->ai_addrlen == sizeof(struct sockaddr_in))
break;
result = result->ai_next;
}
if (!result) {
SOCK_LOG_ERROR("getaddrinfo failed\n");
ret = -FI_EINVAL;
goto err;
}
memcpy(src_addr, result->ai_addr, result->ai_addrlen);
freeaddrinfo(result);
} else if (node || service) {
ret = getaddrinfo(node, service, &sock_hints, &result);
if (ret != 0) {
ret = FI_ENODATA;
SOCK_LOG_INFO("getaddrinfo failed!\n");
goto err;
}
while (result) {
if (result->ai_family == AF_INET &&
result->ai_addrlen == sizeof(struct sockaddr_in))
break;
result = result->ai_next;
}
if (!result) {
SOCK_LOG_ERROR("getaddrinfo failed\n");
ret = -FI_EINVAL;
goto err;
}
memcpy(dest_addr, result->ai_addr, result->ai_addrlen);
udp_sock = socket(AF_INET, SOCK_DGRAM, 0);
ret = connect(udp_sock, result->ai_addr,
result->ai_addrlen);
if ( ret != 0) {
SOCK_LOG_ERROR("Failed to create udp socket\n");
ret = FI_ENODATA;
goto err;
}
len = sizeof(struct sockaddr_in);
ret = getsockname(udp_sock, (struct sockaddr*)src_addr, &len);
if (ret != 0) {
SOCK_LOG_ERROR("getsockname failed\n");
close(udp_sock);
ret = FI_ENODATA;
goto err;
}
close(udp_sock);
freeaddrinfo(result);
}
if (dest_addr) {
memcpy(sa_ip, inet_ntoa(dest_addr->sin_addr), INET_ADDRSTRLEN);
SOCK_LOG_INFO("dest_addr: family: %d, IP is %s\n",
((struct sockaddr_in*)dest_addr)->sin_family, sa_ip);
}
if (src_addr) {
memcpy(sa_ip, inet_ntoa(src_addr->sin_addr), INET_ADDRSTRLEN);
SOCK_LOG_INFO("src_addr: family: %d, IP is %s\n",
((struct sockaddr_in*)src_addr)->sin_family, sa_ip);
}
_info = sock_rdm_fi_info(hints, src_addr, dest_addr);
if (!_info) {
ret = FI_ENOMEM;
goto err;
}
*info = _info;
free(src_addr);
free(dest_addr);
return 0;
err:
free(src_addr);
free(dest_addr);
SOCK_LOG_ERROR("fi_getinfo failed\n");
return ret;
}
int sock_rdm_endpoint(struct fid_domain *domain, struct fi_info *info,
struct sock_ep **ep, void *context, size_t fclass)
{
int ret;
if (info) {
if (info->ep_attr) {
ret = sock_rdm_verify_ep_attr(info->ep_attr,
info->tx_attr,
info->rx_attr);
if (ret)
return ret;
}
if (info->tx_attr) {
ret = sock_rdm_verify_tx_attr(info->tx_attr);
if (ret)
return ret;
}
if (info->rx_attr) {
ret = sock_rdm_verify_rx_attr(info->rx_attr);
if (ret)
return ret;
}
}
ret = sock_alloc_endpoint(domain, info, ep, context, fclass);
if (ret)
return ret;
if (!info || !info->ep_attr)
(*ep)->ep_attr = sock_rdm_ep_attr;
if (!info || !info->tx_attr)
(*ep)->tx_attr = sock_rdm_tx_attr;
if (!info || !info->rx_attr)
(*ep)->rx_attr = sock_rdm_rx_attr;
return 0;
}
int sock_rdm_ep(struct fid_domain *domain, struct fi_info *info,
struct fid_ep **ep, void *context)
{
int ret;
struct sock_ep *endpoint;
ret = sock_rdm_endpoint(domain, info, &endpoint, context, FI_CLASS_EP);
if (ret)
return ret;
*ep = &endpoint->ep;
return 0;
}
int sock_rdm_sep(struct fid_domain *domain, struct fi_info *info,
struct fid_sep **sep, void *context)
{
int ret;
struct sock_ep *endpoint;
ret = sock_rdm_endpoint(domain, info, &endpoint, context, FI_CLASS_SEP);
if (ret)
return ret;
*sep = &endpoint->sep;
return 0;
}

Просмотреть файл

@ -143,8 +143,11 @@ ssize_t sock_eq_report_event(struct sock_eq *sock_eq, uint32_t event,
entry->len = len;
entry->flags = flags;
memcpy(entry->event, buf, len);
dlistfd_insert_tail(&entry->entry, &sock_eq->list);
if (sock_eq->signal)
sock_wait_signal(sock_eq->waitset);
fastlock_release(&sock_eq->lock);
return 0;
}
@ -167,8 +170,11 @@ ssize_t sock_eq_report_error(struct sock_eq *sock_eq, fid_t fid, void *context,
err_entry->prov_errno = prov_errno;
err_entry->err_data = err_data;
entry->len = sizeof(struct fi_eq_err_entry);
dlistfd_insert_tail(&entry->entry, &sock_eq->err_list);
if (sock_eq->signal)
sock_wait_signal(sock_eq->waitset);
fastlock_release(&sock_eq->lock);
return 0;
}
@ -212,26 +218,43 @@ int sock_eq_fi_close(struct fid *fid)
fastlock_destroy(&sock_eq->lock);
atomic_dec(&sock_eq->sock_fab->ref);
if (sock_eq->signal && sock_eq->attr.wait_obj == FI_WAIT_MUTEX_COND)
sock_wait_close(&sock_eq->waitset->fid);
free(sock_eq);
return 0;
}
int sock_eq_fi_control(struct fid *fid, int command, void *arg)
int sock_eq_control(struct fid *fid, int command, void *arg)
{
struct sock_eq *eq;
int ret = 0;
struct sock_eq *eq;
eq = container_of(fid, struct sock_eq, eq.fid);
switch (command) {
case FI_GETWAIT:
*(void **) arg = &eq->list.fd[LIST_READ_FD];
switch (eq->attr.wait_obj) {
case FI_WAIT_NONE:
case FI_WAIT_UNSPEC:
case FI_WAIT_FD:
memcpy(arg, &eq->list.fd[LIST_READ_FD], sizeof(int));
break;
case FI_WAIT_SET:
case FI_WAIT_MUTEX_COND:
sock_wait_get_obj(eq->waitset, arg);
break;
default:
ret = -FI_EINVAL;
break;
}
break;
default:
ret = -FI_ENOSYS;
ret = -FI_EINVAL;
break;
}
return ret;
}
@ -239,7 +262,7 @@ static struct fi_ops sock_eq_fi_ops = {
.size = sizeof(struct fi_ops),
.close = sock_eq_fi_close,
.bind = fi_no_bind,
.control = sock_eq_fi_control,
.control = sock_eq_control,
.ops_open = fi_no_ops_open,
};
@ -251,6 +274,8 @@ static int _sock_eq_verify_attr(struct fi_eq_attr *attr)
switch (attr->wait_obj) {
case FI_WAIT_NONE:
case FI_WAIT_FD:
case FI_WAIT_SET:
case FI_WAIT_MUTEX_COND:
break;
case FI_WAIT_UNSPEC:
attr->wait_obj = FI_WAIT_FD;
@ -275,6 +300,7 @@ int sock_eq_open(struct fid_fabric *fabric, struct fi_eq_attr *attr,
{
int ret;
struct sock_eq *sock_eq;
struct fi_wait_attr wait_attr;
ret = _sock_eq_verify_attr(attr);
if (ret)
@ -293,9 +319,9 @@ int sock_eq_open(struct fid_fabric *fabric, struct fi_eq_attr *attr,
if(attr == NULL)
memcpy(&sock_eq->attr, &_sock_eq_def_attr,
sizeof(struct fi_cq_attr));
sizeof(struct fi_eq_attr));
else
memcpy(&sock_eq->attr, attr, sizeof(struct fi_cq_attr));
memcpy(&sock_eq->attr, attr, sizeof(struct fi_eq_attr));
ret = dlistfd_head_init(&sock_eq->list);
if(ret)
@ -307,6 +333,34 @@ int sock_eq_open(struct fid_fabric *fabric, struct fi_eq_attr *attr,
fastlock_init(&sock_eq->lock);
atomic_inc(&sock_eq->sock_fab->ref);
switch (sock_eq->attr.wait_obj) {
case FI_WAIT_NONE:
case FI_WAIT_UNSPEC:
case FI_WAIT_FD:
sock_eq->signal = 0;
break;
case FI_WAIT_MUTEX_COND:
wait_attr.flags = 0;
wait_attr.wait_obj = FI_WAIT_MUTEX_COND;
/* FIXME: waitset is a domain object, but not EQ. This needs to be
updated based on #394 */
ret = sock_wait_open(NULL, &wait_attr, &sock_eq->waitset);
if (ret)
goto err2;
sock_eq->signal = 1;
break;
case FI_WAIT_SET:
sock_eq->waitset = attr->wait_set;
sock_eq->signal = 1;
break;
default:
break;
}
return 0;
err2:

Просмотреть файл

@ -37,6 +37,8 @@
#include <stdlib.h>
#include <string.h>
#include "prov.h"
#include "sock.h"
#include "sock_util.h"
@ -93,8 +95,9 @@ int sock_verify_info(struct fi_info *hints)
return -FI_ENODATA;
}
if (!sock_rdm_verify_ep_attr(hints->ep_attr,
hints->tx_attr, hints->rx_attr))
if (!sock_rdm_verify_ep_attr(hints->ep_attr, hints->tx_attr, hints->rx_attr) ||
!sock_dgram_verify_ep_attr(hints->ep_attr, hints->tx_attr, hints->rx_attr) ||
!sock_msg_verify_ep_attr(hints->ep_attr, hints->tx_attr, hints->rx_attr))
return 0;
ret = sock_verify_domain_attr(hints->domain_attr);
@ -111,7 +114,7 @@ int sock_verify_info(struct fi_info *hints)
static struct fi_ops_fabric sock_fab_ops = {
.size = sizeof(struct fi_ops_fabric),
.domain = sock_domain,
.passive_ep = sock_passive_ep,
.passive_ep = sock_msg_passive_ep,
.eq_open = sock_eq_open,
};
@ -128,28 +131,12 @@ static int sock_fabric_close(fid_t fid)
return 0;
}
int sock_fabric_bind(struct fid *fid, struct fid *bfid, uint64_t flags)
{
return -FI_ENOSYS;
}
int sock_fabric_control(struct fid *fid, int command, void *arg)
{
return -FI_ENOSYS;
}
int sock_fabric_ops_open(struct fid *fid, const char *name,
uint64_t flags, void **ops, void *context)
{
return -FI_ENOSYS;
}
static struct fi_ops sock_fab_fi_ops = {
.size = sizeof(struct fi_ops),
.close = sock_fabric_close,
.bind = sock_fabric_bind,
.control = sock_fabric_control,
.ops_open = sock_fabric_ops_open,
.bind = fi_no_bind,
.control = fi_no_control,
.ops_open = fi_no_ops_open,
};
static int sock_fabric(struct fi_fabric_attr *attr,
@ -179,8 +166,6 @@ static int sock_getinfo(uint32_t version, const char *node, const char *service,
int ret;
struct fi_info *_info, *tmp;
return -FI_ENODATA;
ret = sock_verify_info(hints);
if (ret)
return ret;
@ -193,6 +178,10 @@ static int sock_getinfo(uint32_t version, const char *node, const char *service,
case FI_EP_DGRAM:
return sock_dgram_getinfo(version, node, service, flags,
hints, info);
case FI_EP_MSG:
return sock_msg_getinfo(version, node, service, flags,
hints, info);
default:
break;
}
@ -213,6 +202,18 @@ static int sock_getinfo(uint32_t version, const char *node, const char *service,
ret = sock_dgram_getinfo(version, node, service, flags,
hints, &_info);
if (ret == 0) {
*info = tmp = _info;
while(tmp->next != NULL)
tmp=tmp->next;
} else if (ret == -FI_ENODATA) {
tmp = NULL;
} else
return ret;
ret = sock_msg_getinfo(version, node, service, flags,
hints, &_info);
if (NULL != tmp) {
tmp->next = _info;
return ret;
@ -222,25 +223,28 @@ static int sock_getinfo(uint32_t version, const char *node, const char *service,
return ret;
}
static void fi_sockets_fini(void)
{
}
struct fi_provider sock_prov = {
.name = "IP",
.version = FI_VERSION(SOCK_MAJOR_VERSION, SOCK_MINOR_VERSION),
.fi_version = FI_VERSION(FI_MAJOR_VERSION, FI_MINOR_VERSION),
.getinfo = sock_getinfo,
.fabric = sock_fabric,
.cleanup = fi_sockets_fini
};
static void __attribute__((constructor)) sock_ini(void)
SOCKETS_INI
{
char *tmp = getenv("SFI_SOCK_DEBUG_LEVEL");
char *tmp = getenv("OFI_SOCK_LOG_LEVEL");
if (tmp) {
sock_log_level = atoi(tmp);
} else {
sock_log_level = SOCK_ERROR;
}
(void) fi_register(&sock_prov);
}
static void __attribute__((destructor)) sock_fini(void)
{
return (&sock_prov);
}

Просмотреть файл

@ -0,0 +1,634 @@
/*
* Copyright (c) 2014 Intel Corporation, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#if HAVE_CONFIG_H
# include <config.h>
#endif /* HAVE_CONFIG_H */
#include <errno.h>
#include <fcntl.h>
#include <netdb.h>
#include <netinet/in.h>
#include <netinet/tcp.h>
#include <poll.h>
#include <stdarg.h>
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/select.h>
#include <sys/socket.h>
#include <sys/types.h>
#include <sys/time.h>
#include <unistd.h>
#include <arpa/inet.h>
#include <limits.h>
#include "sock.h"
#include "sock_util.h"
static ssize_t sock_ep_recvmsg(struct fid_ep *ep, const struct fi_msg *msg,
uint64_t flags)
{
int i;
struct sock_rx_ctx *rx_ctx;
struct sock_rx_entry *rx_entry;
struct sock_ep *sock_ep;
switch (ep->fid.fclass) {
case FI_CLASS_EP:
sock_ep = container_of(ep, struct sock_ep, ep);
rx_ctx = sock_ep->rx_ctx;
break;
case FI_CLASS_RX_CTX:
case FI_CLASS_SRX_CTX:
rx_ctx = container_of(ep, struct sock_rx_ctx, ctx);
break;
default:
SOCK_LOG_ERROR("Invalid ep type\n");
return -FI_EINVAL;
}
assert(rx_ctx->enabled && msg->iov_count <= SOCK_EP_MAX_IOV_LIMIT);
rx_entry = sock_rx_new_entry(rx_ctx);
if (!rx_entry)
return -FI_ENOMEM;
flags |= rx_ctx->attr.op_flags;
rx_entry->rx_op.op = SOCK_OP_RECV;
rx_entry->rx_op.dest_iov_len = msg->iov_count;
rx_entry->flags = flags;
rx_entry->context = (uint64_t)msg->context;
rx_entry->addr = msg->addr;
rx_entry->data = msg->data;
rx_entry->ignore = 0xFFFFFFFF;
for (i=0; i< msg->iov_count; i++) {
rx_entry->iov[i].iov.addr = (uint64_t)msg->msg_iov[i].iov_base;
rx_entry->iov[i].iov.len = (uint64_t)msg->msg_iov[i].iov_len;
rx_entry->total_len += rx_entry->iov[i].iov.len;
}
fastlock_acquire(&rx_ctx->lock);
SOCK_LOG_INFO("New rx_entry: %p (ctx: %p)\n", rx_entry, rx_ctx);
dlist_insert_tail(&rx_entry->entry, &rx_ctx->rx_entry_list);
fastlock_release(&rx_ctx->lock);
return 0;
}
static ssize_t sock_ep_recv(struct fid_ep *ep, void *buf, size_t len, void *desc,
fi_addr_t src_addr, void *context)
{
struct fi_msg msg;
struct iovec msg_iov;
msg_iov.iov_base = buf;
msg_iov.iov_len = len;
msg.msg_iov = &msg_iov;
msg.desc = &desc;
msg.iov_count = 1;
msg.addr = src_addr;
msg.context = context;
return sock_ep_recvmsg(ep, &msg, 0);
}
static ssize_t sock_ep_recvv(struct fid_ep *ep, const struct iovec *iov,
void **desc, size_t count, fi_addr_t src_addr,
void *context)
{
struct fi_msg msg;
msg.msg_iov = iov;
msg.desc = desc;
msg.iov_count = count;
msg.addr = src_addr;
msg.context = context;
return sock_ep_recvmsg(ep, &msg, 0);
}
static ssize_t sock_ep_sendmsg(struct fid_ep *ep, const struct fi_msg *msg,
uint64_t flags)
{
int ret, i;
uint64_t total_len;
struct sock_op tx_op;
union sock_iov tx_iov;
struct sock_conn *conn;
struct sock_tx_ctx *tx_ctx;
struct sock_ep *sock_ep;
switch (ep->fid.fclass) {
case FI_CLASS_EP:
sock_ep = container_of(ep, struct sock_ep, ep);
tx_ctx = sock_ep->tx_ctx;
break;
case FI_CLASS_TX_CTX:
tx_ctx = container_of(ep, struct sock_tx_ctx, ctx);
sock_ep = tx_ctx->ep;
break;
default:
SOCK_LOG_ERROR("Invalid EP type\n");
return -FI_EINVAL;
}
assert(tx_ctx->enabled && msg->iov_count <= SOCK_EP_MAX_IOV_LIMIT);
conn = sock_av_lookup_addr(tx_ctx->av, msg->addr);
assert(conn);
SOCK_LOG_INFO("New sendmsg on TX: %p using conn: %p\n",
tx_ctx, conn);
flags |= tx_ctx->attr.op_flags;
memset(&tx_op, 0, sizeof(struct sock_op));
tx_op.op = SOCK_OP_SEND;
total_len = 0;
if (flags & FI_INJECT) {
for (i=0; i< msg->iov_count; i++) {
total_len += msg->msg_iov[i].iov_len;
}
assert(total_len <= SOCK_EP_MAX_INJECT_SZ);
tx_op.src_iov_len = total_len;
} else {
tx_op.src_iov_len = msg->iov_count;
total_len = msg->iov_count * sizeof(union sock_iov);
}
total_len += sizeof(struct sock_op_send);
if (flags & FI_REMOTE_CQ_DATA)
total_len += sizeof(uint64_t);
sock_tx_ctx_start(tx_ctx);
if (rbfdavail(&tx_ctx->rbfd) < total_len) {
ret = -FI_EAGAIN;
goto err;
}
sock_tx_ctx_write(tx_ctx, &tx_op, sizeof(struct sock_op));
sock_tx_ctx_write(tx_ctx, &flags, sizeof(uint64_t));
sock_tx_ctx_write(tx_ctx, &msg->context, sizeof(uint64_t));
sock_tx_ctx_write(tx_ctx, &msg->addr, sizeof(uint64_t));
sock_tx_ctx_write(tx_ctx, &conn, sizeof(uint64_t));
sock_tx_ctx_write(tx_ctx, &msg->msg_iov[0].iov_base, sizeof(uint64_t));
sock_tx_ctx_write(tx_ctx, &sock_ep, sizeof(uint64_t));
if (flags & FI_REMOTE_CQ_DATA) {
sock_tx_ctx_write(tx_ctx, &msg->data, sizeof(uint64_t));
}
if (flags & FI_INJECT) {
for (i=0; i< msg->iov_count; i++) {
sock_tx_ctx_write(tx_ctx, msg->msg_iov[i].iov_base,
msg->msg_iov[i].iov_len);
}
} else {
for (i=0; i< msg->iov_count; i++) {
tx_iov.iov.addr = (uint64_t)msg->msg_iov[i].iov_base;
tx_iov.iov.len = msg->msg_iov[i].iov_len;
sock_tx_ctx_write(tx_ctx, &tx_iov, sizeof(union sock_iov));
}
}
sock_tx_ctx_commit(tx_ctx);
return 0;
err:
sock_tx_ctx_abort(tx_ctx);
return ret;
}
static ssize_t sock_ep_send(struct fid_ep *ep, const void *buf, size_t len,
void *desc, fi_addr_t dest_addr, void *context)
{
struct fi_msg msg;
struct iovec msg_iov;
msg_iov.iov_base = (void*)buf;
msg_iov.iov_len = len;
msg.msg_iov = &msg_iov;
msg.desc = &desc;
msg.iov_count = 1;
msg.addr = dest_addr;
msg.context = context;
return sock_ep_sendmsg(ep, &msg, 0);
}
static ssize_t sock_ep_sendv(struct fid_ep *ep, const struct iovec *iov,
void **desc, size_t count, fi_addr_t dest_addr,
void *context)
{
struct fi_msg msg;
msg.msg_iov = iov;
msg.desc = desc;
msg.iov_count = count;
msg.addr = dest_addr;
msg.context = context;
return sock_ep_sendmsg(ep, &msg, 0);
}
static ssize_t sock_ep_senddata(struct fid_ep *ep, const void *buf, size_t len,
void *desc, uint64_t data, fi_addr_t dest_addr,
void *context)
{
struct fi_msg msg;
struct iovec msg_iov;
msg_iov.iov_base = (void*)buf;
msg_iov.iov_len = len;
msg.msg_iov = &msg_iov;
msg.desc = desc;
msg.iov_count = 1;
msg.addr = dest_addr;
msg.context = context;
msg.data = data;
return sock_ep_sendmsg(ep, &msg, FI_REMOTE_CQ_DATA);
}
static ssize_t sock_ep_inject(struct fid_ep *ep, const void *buf, size_t len,
fi_addr_t dest_addr)
{
struct fi_msg msg;
struct iovec msg_iov;
msg_iov.iov_base = (void*)buf;
msg_iov.iov_len = len;
msg.msg_iov = &msg_iov;
msg.iov_count = 1;
msg.addr = dest_addr;
return sock_ep_sendmsg(ep, &msg, FI_INJECT);
}
struct fi_ops_msg sock_ep_msg_ops = {
.size = sizeof(struct fi_ops_msg),
.recv = sock_ep_recv,
.recvv = sock_ep_recvv,
.recvmsg = sock_ep_recvmsg,
.send = sock_ep_send,
.sendv = sock_ep_sendv,
.sendmsg = sock_ep_sendmsg,
.inject = sock_ep_inject,
.senddata = sock_ep_senddata,
};
static ssize_t sock_ep_trecvmsg(struct fid_ep *ep,
const struct fi_msg_tagged *msg, uint64_t flags)
{
int i;
struct sock_rx_ctx *rx_ctx;
struct sock_rx_entry *rx_entry;
struct sock_ep *sock_ep;
switch (ep->fid.fclass) {
case FI_CLASS_EP:
sock_ep = container_of(ep, struct sock_ep, ep);
rx_ctx = sock_ep->rx_ctx;
break;
case FI_CLASS_RX_CTX:
case FI_CLASS_SRX_CTX:
rx_ctx = container_of(ep, struct sock_rx_ctx, ctx);
break;
default:
SOCK_LOG_ERROR("Invalid ep type\n");
return -FI_EINVAL;
}
assert(rx_ctx->enabled && msg->iov_count <= SOCK_EP_MAX_IOV_LIMIT);
rx_entry = sock_rx_new_entry(rx_ctx);
if (!rx_entry)
return -FI_ENOMEM;
flags |= rx_ctx->attr.op_flags;
rx_entry->rx_op.op = SOCK_OP_TRECV;
rx_entry->rx_op.dest_iov_len = msg->iov_count;
rx_entry->flags = flags;
rx_entry->context = (uint64_t)msg->context;
rx_entry->addr = msg->addr;
rx_entry->data = msg->data;
rx_entry->tag = msg->tag;
rx_entry->ignore = msg->ignore;
for (i=0; i< msg->iov_count; i++) {
rx_entry->iov[i].iov.addr = (uint64_t)msg->msg_iov[i].iov_base;
rx_entry->iov[i].iov.len = (uint64_t)msg->msg_iov[i].iov_len;
rx_entry->total_len += rx_entry->iov[i].iov.len;
}
fastlock_acquire(&rx_ctx->lock);
dlist_insert_tail(&rx_entry->entry, &rx_ctx->rx_entry_list);
fastlock_release(&rx_ctx->lock);
return 0;
}
static ssize_t sock_ep_trecv(struct fid_ep *ep, void *buf, size_t len, void *desc,
fi_addr_t src_addr, uint64_t tag, uint64_t ignore, void *context)
{
struct fi_msg_tagged msg;
struct iovec msg_iov;
msg_iov.iov_base = buf;
msg_iov.iov_len = len;
msg.msg_iov = &msg_iov;
msg.desc = &desc;
msg.iov_count = 1;
msg.addr = src_addr;
msg.context = context;
msg.tag = tag;
msg.ignore = ignore;
return sock_ep_trecvmsg(ep, &msg, 0);
}
static ssize_t sock_ep_trecvv(struct fid_ep *ep, const struct iovec *iov,
void **desc, size_t count, fi_addr_t src_addr,
uint64_t tag, uint64_t ignore, void *context)
{
struct fi_msg_tagged msg;
msg.msg_iov = iov;
msg.desc = desc;
msg.iov_count = count;
msg.addr = src_addr;
msg.context = context;
msg.tag = tag;
msg.ignore = ignore;
return sock_ep_trecvmsg(ep, &msg, 0);
}
static ssize_t sock_ep_tsendmsg(struct fid_ep *ep,
const struct fi_msg_tagged *msg, uint64_t flags)
{
int ret, i;
uint64_t total_len;
struct sock_op tx_op;
union sock_iov tx_iov;
struct sock_conn *conn;
struct sock_tx_ctx *tx_ctx;
struct sock_ep *sock_ep;
switch (ep->fid.fclass) {
case FI_CLASS_EP:
sock_ep = container_of(ep, struct sock_ep, ep);
tx_ctx = sock_ep->tx_ctx;
break;
case FI_CLASS_TX_CTX:
tx_ctx = container_of(ep, struct sock_tx_ctx, ctx);
sock_ep = tx_ctx->ep;
break;
default:
SOCK_LOG_ERROR("Invalid EP type\n");
return -FI_EINVAL;
}
assert(tx_ctx->enabled && msg->iov_count <= SOCK_EP_MAX_IOV_LIMIT);
conn = sock_av_lookup_addr(tx_ctx->av, msg->addr);
assert(conn);
total_len = 0;
if (flags & FI_INJECT) {
for (i=0; i< msg->iov_count; i++) {
total_len += msg->msg_iov[i].iov_len;
}
assert(total_len <= SOCK_EP_MAX_INJECT_SZ);
} else {
total_len = msg->iov_count * sizeof(union sock_iov);
}
total_len += sizeof(struct sock_op_tsend);
if (flags & FI_REMOTE_CQ_DATA)
total_len += sizeof(uint64_t);
sock_tx_ctx_start(tx_ctx);
if (rbfdavail(&tx_ctx->rbfd) < total_len) {
ret = -FI_EAGAIN;
goto err;
}
flags |= tx_ctx->attr.op_flags;
memset(&tx_op, 0, sizeof(struct sock_op));
tx_op.op = SOCK_OP_TSEND;
tx_op.src_iov_len = msg->iov_count;
sock_tx_ctx_write(tx_ctx, &tx_op, sizeof(struct sock_op));
sock_tx_ctx_write(tx_ctx, &flags, sizeof(uint64_t));
sock_tx_ctx_write(tx_ctx, &msg->context, sizeof(uint64_t));
sock_tx_ctx_write(tx_ctx, &msg->addr, sizeof(uint64_t));
sock_tx_ctx_write(tx_ctx, &conn, sizeof(uint64_t));
sock_tx_ctx_write(tx_ctx, &msg->msg_iov[0].iov_base, sizeof(uint64_t));
sock_tx_ctx_write(tx_ctx, &sock_ep, sizeof(uint64_t));
if (flags & FI_REMOTE_CQ_DATA) {
sock_tx_ctx_write(tx_ctx, &msg->data, sizeof(uint64_t));
}
sock_tx_ctx_write(tx_ctx, &msg->tag, sizeof(uint64_t));
if (flags & FI_INJECT) {
for (i=0; i< msg->iov_count; i++) {
sock_tx_ctx_write(tx_ctx, msg->msg_iov[i].iov_base,
msg->msg_iov[i].iov_len);
}
} else {
for (i=0; i< msg->iov_count; i++) {
tx_iov.iov.addr = (uint64_t)msg->msg_iov[i].iov_base;
tx_iov.iov.len = msg->msg_iov[i].iov_len;
sock_tx_ctx_write(tx_ctx, &tx_iov, sizeof(union sock_iov));
}
}
sock_tx_ctx_commit(tx_ctx);
return 0;
err:
sock_tx_ctx_abort(tx_ctx);
return ret;
}
static ssize_t sock_ep_tsend(struct fid_ep *ep, const void *buf, size_t len,
void *desc, fi_addr_t dest_addr, uint64_t tag, void *context)
{
struct fi_msg_tagged msg;
struct iovec msg_iov;
msg_iov.iov_base = (void*)buf;
msg_iov.iov_len = len;
msg.msg_iov = &msg_iov;
msg.desc = &desc;
msg.iov_count = 1;
msg.addr = dest_addr;
msg.context = context;
msg.tag = tag;
return sock_ep_tsendmsg(ep, &msg, 0);
}
static ssize_t sock_ep_tsendv(struct fid_ep *ep, const struct iovec *iov,
void **desc, size_t count, fi_addr_t dest_addr,
uint64_t tag, void *context)
{
struct fi_msg_tagged msg;
msg.msg_iov = iov;
msg.desc = desc;
msg.iov_count = count;
msg.addr = dest_addr;
msg.context = context;
msg.tag = tag;
return sock_ep_tsendmsg(ep, &msg, 0);
}
static ssize_t sock_ep_tsenddata(struct fid_ep *ep, const void *buf, size_t len,
void *desc, uint64_t data, fi_addr_t dest_addr, uint64_t tag,
void *context)
{
struct fi_msg_tagged msg;
struct iovec msg_iov;
msg_iov.iov_base = (void*)buf;
msg_iov.iov_len = len;
msg.msg_iov = &msg_iov;
msg.desc = desc;
msg.iov_count = 1;
msg.addr = dest_addr;
msg.context = context;
msg.data = data;
msg.tag = tag;
return sock_ep_tsendmsg(ep, &msg, FI_REMOTE_CQ_DATA);
}
static ssize_t sock_ep_tinject(struct fid_ep *ep, const void *buf, size_t len,
fi_addr_t dest_addr, uint64_t tag)
{
struct fi_msg_tagged msg;
struct iovec msg_iov;
msg_iov.iov_base = (void*)buf;
msg_iov.iov_len = len;
msg.msg_iov = &msg_iov;
msg.iov_count = 1;
msg.addr = dest_addr;
msg.tag = tag;
return sock_ep_tsendmsg(ep, &msg, FI_INJECT);
}
static ssize_t sock_ep_tsearch(struct fid_ep *ep, uint64_t *tag, uint64_t ignore,
uint64_t flags, fi_addr_t *src_addr, size_t *len,
void *context)
{
ssize_t ret;
struct dlist_entry *entry;
struct sock_rx_ctx *rx_ctx;
struct sock_rx_entry *rx_entry;
struct sock_ep *sock_ep;
switch (ep->fid.fclass) {
case FI_CLASS_EP:
sock_ep = container_of(ep, struct sock_ep, ep);
rx_ctx = sock_ep->rx_ctx;
break;
case FI_CLASS_RX_CTX:
case FI_CLASS_SRX_CTX:
rx_ctx = container_of(ep, struct sock_rx_ctx, ctx);
break;
default:
SOCK_LOG_ERROR("Invalid ep type\n");
return -FI_EINVAL;
}
fastlock_acquire(&rx_ctx->lock);
for (entry = rx_ctx->rx_buffered_list.next;
entry != &rx_ctx->rx_buffered_list; entry = entry->next) {
rx_entry = container_of(entry, struct sock_rx_entry, entry);
if (rx_entry->is_busy || rx_entry->is_claimed)
continue;
if (((rx_entry->tag & ~rx_entry->ignore) ==
(*tag & ~rx_entry->ignore)) &&
(rx_entry->addr == FI_ADDR_UNSPEC ||
rx_entry->addr == *src_addr)) {
if (flags & FI_CLAIM)
rx_entry->is_claimed = 1;
*tag = rx_entry->tag;
*src_addr = rx_entry->addr;
*len = rx_entry->used;
ret = 1;
break;
}
}
if (entry == &rx_ctx->rx_entry_list)
ret = -FI_ENOENT;
fastlock_release(&rx_ctx->lock);
return ret;
}
struct fi_ops_tagged sock_ep_tagged = {
.size = sizeof(struct fi_ops_tagged),
.recv = sock_ep_trecv,
.recvv = sock_ep_trecvv,
.recvmsg = sock_ep_trecvmsg,
.send = sock_ep_tsend,
.sendv = sock_ep_tsendv,
.sendmsg = sock_ep_tsendmsg,
.inject = sock_ep_tinject,
.senddata = sock_ep_tsenddata,
.search = sock_ep_tsearch,
};

Просмотреть файл

@ -38,26 +38,169 @@
#include <string.h>
#include "sock.h"
#include "sock_util.h"
//static struct fi_ops sock_wait_fi_ops = {
// .size = sizeof(struct fi_ops),
// .close = sock_wait_close,
//};
//
//static struct fi_ops sock_poll_fi_ops = {
// .size = sizeof(struct fi_ops),
// .close = sock_poll_close,
//};
int sock_wait_open(struct fid_domain *domain, struct fi_wait_attr *attr,
struct fid_wait **waitset)
int sock_poll_add(struct fid_poll *pollset, struct fid *event_fid,
uint64_t flags)
{
return -FI_ENOSYS; /* TODO */
struct sock_poll *poll;
struct sock_fid_list *list_item;
poll = container_of(pollset, struct sock_poll, poll_fid.fid);
list_item = calloc(1, sizeof(*list_item));
if (!list_item)
return -FI_ENOMEM;
list_item->fid = event_fid;
dlist_init(&list_item->entry);
dlist_insert_after(&list_item->entry, &poll->fid_list);
return 0;
}
int sock_poll_del(struct fid_poll *pollset, struct fid *event_fid,
uint64_t flags)
{
struct sock_poll *poll;
struct sock_fid_list *list_item;
struct dlist_entry *p, *head;
poll = container_of(pollset, struct sock_poll, poll_fid.fid);
head = &poll->fid_list;
for (p = head->next; p != head; p = p->next) {
list_item = container_of(p, struct sock_fid_list, entry);
if (list_item->fid == event_fid) {
dlist_remove(p);
free(list_item);
break;
}
}
return 0;
}
static int sock_poll_poll(struct fid_poll *pollset, void **context, int count)
{
struct sock_poll *poll;
struct sock_cq *cq;
struct sock_eq *eq;
struct sock_cntr *cntr;
struct sock_fid_list *list_item;
struct dlist_entry *p, *head;
int ret_count = 0;
poll = container_of(pollset, struct sock_poll, poll_fid.fid);
head = &poll->fid_list;
for (p = head->next; p != head && ret_count < count; p = p->next) {
list_item = container_of(p, struct sock_fid_list, entry);
switch (list_item->fid->fclass) {
case FI_CLASS_CQ:
cq = container_of(list_item->fid, struct sock_cq, cq_fid);
if (cq->domain->progress_mode == FI_PROGRESS_MANUAL)
sock_cq_progress(cq);
fastlock_acquire(&cq->lock);
if (rbfdused(&cq->cq_rbfd)) {
*context++ = cq->cq_fid.fid.context;
ret_count++;
}
fastlock_release(&cq->lock);
break;
case FI_CLASS_CNTR:
cntr = container_of(list_item->fid, struct sock_cntr, cntr_fid);
if (cntr->domain->progress_mode == FI_PROGRESS_MANUAL)
sock_cntr_progress(cntr);
fastlock_acquire(&cntr->mut);
if (atomic_get(&cntr->value) >= atomic_get(&cntr->threshold)) {
*context++ = cntr->cntr_fid.fid.context;
ret_count++;
}
fastlock_release(&cntr->mut);
break;
case FI_CLASS_EQ:
eq = container_of(list_item->fid, struct sock_eq, eq);
fastlock_acquire(&eq->lock);
if (!dlistfd_empty(&eq->list)) {
*context++ = eq->eq.fid.context;
ret_count++;
}
fastlock_release(&eq->lock);
break;
default:
break;
}
}
return ret_count;
}
static int sock_poll_close(fid_t fid)
{
struct sock_poll *poll;
struct sock_fid_list *list_item;
struct dlist_entry *p, *head;
poll = container_of(fid, struct sock_poll, poll_fid.fid);
head = &poll->fid_list;
while (!dlist_empty(head)) {
p = head->next;
list_item = container_of(p, struct sock_fid_list, entry);
dlist_remove(p);
free(list_item);
}
atomic_dec(&poll->domain->ref);
free(poll);
return 0;
}
static struct fi_ops sock_poll_fi_ops = {
.size = sizeof(struct fi_ops),
.close = sock_poll_close,
.bind = fi_no_bind,
.control = fi_no_control,
.ops_open = fi_no_ops_open,
};
static struct fi_ops_poll sock_poll_ops = {
.size = sizeof(struct fi_ops_poll),
.poll = sock_poll_poll,
.poll_add = sock_poll_add,
.poll_del = sock_poll_del,
};
static int sock_poll_verify_attr(struct fi_poll_attr *attr)
{
if (attr->flags)
return -FI_ENODATA;
return 0;
}
int sock_poll_open(struct fid_domain *domain, struct fi_poll_attr *attr,
struct fid_poll **pollset)
{
return -FI_ENOSYS; /* TODO */
struct sock_domain *dom;
struct sock_poll *poll;
if (attr && sock_poll_verify_attr(attr))
return -FI_EINVAL;
dom = container_of(domain, struct sock_domain, dom_fid);
poll = calloc(1, sizeof(*poll));
if (!poll)
return -FI_ENOMEM;
dlist_init(&poll->fid_list);
poll->poll_fid.fid.fclass = FI_CLASS_POLL;
poll->poll_fid.fid.context = 0;
poll->poll_fid.fid.ops = &sock_poll_fi_ops;
poll->poll_fid.ops = &sock_poll_ops;
poll->domain = dom;
atomic_inc(&dom->ref);
*pollset = &poll->poll_fid;
return 0;
}

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -0,0 +1,456 @@
/*
* Copyright (c) 2014 Intel Corporation, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#if HAVE_CONFIG_H
# include <config.h>
#endif /* HAVE_CONFIG_H */
#include <errno.h>
#include <fcntl.h>
#include <netdb.h>
#include <netinet/in.h>
#include <netinet/tcp.h>
#include <poll.h>
#include <stdarg.h>
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/select.h>
#include <sys/socket.h>
#include <sys/types.h>
#include <sys/time.h>
#include <unistd.h>
#include <arpa/inet.h>
#include <limits.h>
#include "sock.h"
#include "sock_util.h"
static ssize_t sock_ep_rma_readmsg(struct fid_ep *ep,
const struct fi_msg_rma *msg,
uint64_t flags)
{
int ret, i;
struct sock_op tx_op;
union sock_iov tx_iov;
struct sock_conn *conn;
struct sock_tx_ctx *tx_ctx;
uint64_t total_len, src_len, dst_len;
struct sock_ep *sock_ep;
switch (ep->fid.fclass) {
case FI_CLASS_EP:
sock_ep = container_of(ep, struct sock_ep, ep);
tx_ctx = sock_ep->tx_ctx;
break;
case FI_CLASS_TX_CTX:
tx_ctx = container_of(ep, struct sock_tx_ctx, ctx);
sock_ep = tx_ctx->ep;
break;
default:
SOCK_LOG_ERROR("Invalid EP type\n");
return -FI_EINVAL;
}
assert(tx_ctx->enabled &&
msg->iov_count <= SOCK_EP_MAX_IOV_LIMIT &&
msg->rma_iov_count <= SOCK_EP_MAX_IOV_LIMIT);
conn = sock_av_lookup_addr(tx_ctx->av, msg->addr);
assert(conn);
total_len = sizeof(struct sock_op_send) +
(msg->iov_count * sizeof(union sock_iov)) +
(msg->rma_iov_count * sizeof(union sock_iov));
sock_tx_ctx_start(tx_ctx);
if (rbfdavail(&tx_ctx->rbfd) < total_len) {
ret = -FI_EAGAIN;
goto err;
}
flags |= tx_ctx->attr.op_flags;
memset(&tx_op, 0, sizeof(struct sock_op));
tx_op.op = SOCK_OP_READ;
tx_op.src_iov_len = msg->rma_iov_count;
tx_op.dest_iov_len = msg->iov_count;
sock_tx_ctx_write(tx_ctx, &tx_op, sizeof(struct sock_op));
sock_tx_ctx_write(tx_ctx, &flags, sizeof(uint64_t));
sock_tx_ctx_write(tx_ctx, &msg->context, sizeof(uint64_t));
sock_tx_ctx_write(tx_ctx, &msg->addr, sizeof(uint64_t));
sock_tx_ctx_write(tx_ctx, &conn, sizeof(uint64_t));
sock_tx_ctx_write(tx_ctx, &msg->msg_iov[0].iov_base, sizeof(uint64_t));
sock_tx_ctx_write(tx_ctx, &sock_ep, sizeof(uint64_t));
if (flags & FI_REMOTE_CQ_DATA) {
sock_tx_ctx_write(tx_ctx, &msg->data, sizeof(uint64_t));
}
src_len = 0;
for (i = 0; i< msg->rma_iov_count; i++) {
tx_iov.iov.addr = msg->rma_iov[i].addr;
tx_iov.iov.key = msg->rma_iov[i].key;
tx_iov.iov.len = msg->rma_iov[i].len;
sock_tx_ctx_write(tx_ctx, &tx_iov, sizeof(union sock_iov));
src_len += tx_iov.iov.len;
}
dst_len = 0;
for (i = 0; i< msg->iov_count; i++) {
tx_iov.iov.addr = (uint64_t)msg->msg_iov[i].iov_base;
tx_iov.iov.len = msg->msg_iov[i].iov_len;
tx_iov.iov.key = (uint64_t)msg->desc[i];
sock_tx_ctx_write(tx_ctx, &tx_iov, sizeof(union sock_iov));
dst_len += tx_iov.iov.len;
}
if (dst_len != src_len) {
SOCK_LOG_ERROR("Buffer length mismatch\n");
ret = -FI_EINVAL;
goto err;
}
sock_tx_ctx_commit(tx_ctx);
return 0;
err:
sock_tx_ctx_abort(tx_ctx);
return ret;
}
static ssize_t sock_ep_rma_read(struct fid_ep *ep, void *buf, size_t len,
void *desc, fi_addr_t src_addr, uint64_t addr,
uint64_t key, void *context)
{
struct fi_msg_rma msg;
struct iovec msg_iov;
struct fi_rma_iov rma_iov;
msg_iov.iov_base = (void*)buf;
msg_iov.iov_len = len;
msg.msg_iov = &msg_iov;
msg.desc = &desc;
msg.iov_count = 1;
rma_iov.addr = addr;
rma_iov.key = key;
rma_iov.len = len;
msg.rma_iov_count = 1;
msg.rma_iov = &rma_iov;
msg.addr = src_addr;
msg.context = context;
return sock_ep_rma_readmsg(ep, &msg, 0);
}
static ssize_t sock_ep_rma_readv(struct fid_ep *ep, const struct iovec *iov,
void **desc, size_t count, fi_addr_t src_addr,
uint64_t addr, uint64_t key, void *context)
{
struct fi_msg_rma msg;
struct fi_rma_iov rma_iov;
msg.msg_iov = iov;
msg.desc = desc;
msg.iov_count = count;
rma_iov.addr = addr;
rma_iov.key = key;
rma_iov.len = 1;
msg.rma_iov = &rma_iov;
msg.addr = src_addr;
msg.context = context;
return sock_ep_rma_readmsg(ep, &msg, 0);
}
static ssize_t sock_ep_rma_writemsg(struct fid_ep *ep,
const struct fi_msg_rma *msg,
uint64_t flags)
{
int ret, i;
struct sock_op tx_op;
union sock_iov tx_iov;
struct sock_conn *conn;
struct sock_tx_ctx *tx_ctx;
uint64_t total_len, src_len, dst_len;
struct sock_ep *sock_ep;
switch (ep->fid.fclass) {
case FI_CLASS_EP:
sock_ep = container_of(ep, struct sock_ep, ep);
tx_ctx = sock_ep->tx_ctx;
break;
case FI_CLASS_TX_CTX:
tx_ctx = container_of(ep, struct sock_tx_ctx, ctx);
sock_ep = tx_ctx->ep;
break;
default:
SOCK_LOG_ERROR("Invalid EP type\n");
return -FI_EINVAL;
}
assert(tx_ctx->enabled &&
msg->iov_count <= SOCK_EP_MAX_IOV_LIMIT &&
msg->rma_iov_count <= SOCK_EP_MAX_IOV_LIMIT);
conn = sock_av_lookup_addr(tx_ctx->av, msg->addr);
assert(conn);
flags |= tx_ctx->attr.op_flags;
memset(&tx_op, 0, sizeof(struct sock_op));
tx_op.op = SOCK_OP_WRITE;
tx_op.dest_iov_len = msg->rma_iov_count;
total_len = 0;
if (flags & FI_INJECT) {
for (i=0; i< msg->iov_count; i++) {
total_len += msg->msg_iov[i].iov_len;
}
assert(total_len <= SOCK_EP_MAX_INJECT_SZ);
tx_op.src_iov_len = total_len;
} else {
total_len += msg->iov_count * sizeof(union sock_iov);
tx_op.src_iov_len = msg->iov_count;
}
total_len += (sizeof(struct sock_op_send) +
(msg->rma_iov_count * sizeof(union sock_iov)));
sock_tx_ctx_start(tx_ctx);
if (rbfdavail(&tx_ctx->rbfd) < total_len) {
ret = -FI_EAGAIN;
goto err;
}
sock_tx_ctx_write(tx_ctx, &tx_op, sizeof(struct sock_op));
sock_tx_ctx_write(tx_ctx, &flags, sizeof(uint64_t));
sock_tx_ctx_write(tx_ctx, &msg->context, sizeof(uint64_t));
sock_tx_ctx_write(tx_ctx, &msg->addr, sizeof(uint64_t));
sock_tx_ctx_write(tx_ctx, &conn, sizeof(uint64_t));
sock_tx_ctx_write(tx_ctx, &msg->msg_iov[0].iov_base, sizeof(uint64_t));
sock_tx_ctx_write(tx_ctx, &sock_ep, sizeof(uint64_t));
if (flags & FI_REMOTE_CQ_DATA) {
sock_tx_ctx_write(tx_ctx, &msg->data, sizeof(uint64_t));
}
src_len = 0;
if (flags & FI_INJECT) {
for (i=0; i< msg->iov_count; i++) {
sock_tx_ctx_write(tx_ctx, msg->msg_iov[i].iov_base,
msg->msg_iov[i].iov_len);
src_len += tx_iov.iov.len;
}
} else {
for (i = 0; i< msg->iov_count; i++) {
tx_iov.iov.addr = (uint64_t)msg->msg_iov[i].iov_base;
tx_iov.iov.len = msg->msg_iov[i].iov_len;
tx_iov.iov.key = (uint64_t)msg->desc[i];
sock_tx_ctx_write(tx_ctx, &tx_iov, sizeof(union sock_iov));
src_len += tx_iov.iov.len;
}
}
dst_len = 0;
for (i = 0; i< msg->rma_iov_count; i++) {
tx_iov.iov.addr = msg->rma_iov[i].addr;
tx_iov.iov.key = msg->rma_iov[i].key;
tx_iov.iov.len = msg->rma_iov[i].len;
sock_tx_ctx_write(tx_ctx, &tx_iov, sizeof(union sock_iov));
dst_len += tx_iov.iov.len;
}
if (dst_len != src_len) {
SOCK_LOG_ERROR("Buffer length mismatch\n");
ret = -FI_EINVAL;
goto err;
}
sock_tx_ctx_commit(tx_ctx);
return 0;
err:
sock_tx_ctx_abort(tx_ctx);
return ret;
}
static ssize_t sock_ep_rma_write(struct fid_ep *ep, const void *buf,
size_t len, void *desc, fi_addr_t dest_addr,
uint64_t addr, uint64_t key, void *context)
{
struct fi_msg_rma msg;
struct iovec msg_iov;
struct fi_rma_iov rma_iov;
msg_iov.iov_base = (void*)buf;
msg_iov.iov_len = len;
msg.msg_iov = &msg_iov;
msg.desc = &desc;
msg.iov_count = 1;
rma_iov.addr = addr;
rma_iov.key = key;
rma_iov.len = len;
msg.rma_iov_count = 1;
msg.rma_iov = &rma_iov;
msg.addr = dest_addr;
msg.context = context;
return sock_ep_rma_writemsg(ep, &msg, 0);
}
static ssize_t sock_ep_rma_writev(struct fid_ep *ep,
const struct iovec *iov, void **desc,
size_t count, fi_addr_t dest_addr, uint64_t addr,
uint64_t key, void *context)
{
struct fi_msg_rma msg;
struct fi_rma_iov rma_iov;
msg.msg_iov = iov;
msg.desc = desc;
msg.iov_count = count;
rma_iov.addr = addr;
rma_iov.key = key;
rma_iov.len = 1;
msg.rma_iov = &rma_iov;
msg.context = context;
msg.addr = dest_addr;
return sock_ep_rma_writemsg(ep, &msg, 0);
}
static ssize_t sock_ep_rma_writedata(struct fid_ep *ep, const void *buf,
size_t len, void *desc, uint64_t data,
fi_addr_t dest_addr, uint64_t addr,
uint64_t key, void *context)
{
struct fi_msg_rma msg;
struct iovec msg_iov;
struct fi_rma_iov rma_iov;
msg_iov.iov_base = (void*)buf;
msg_iov.iov_len = len;
msg.desc = &desc;
msg.iov_count = 1;
rma_iov.addr = addr;
rma_iov.key = key;
rma_iov.len = 1;
msg.rma_iov = &rma_iov;
msg.msg_iov = &msg_iov;
msg.addr = dest_addr;
msg.context = context;
msg.data = data;
return sock_ep_rma_writemsg(ep, &msg, FI_REMOTE_CQ_DATA);
}
static ssize_t sock_ep_rma_inject(struct fid_ep *ep, const void *buf,
size_t len, fi_addr_t dest_addr, uint64_t addr,
uint64_t key)
{
struct fi_msg_rma msg;
struct iovec msg_iov;
struct fi_rma_iov rma_iov;
msg_iov.iov_base = (void*)buf;
msg_iov.iov_len = len;
msg.msg_iov = &msg_iov;
msg.iov_count = 1;
rma_iov.addr = addr;
rma_iov.key = key;
rma_iov.len = 1;
msg.rma_iov = &rma_iov;
msg.msg_iov = &msg_iov;
msg.addr = dest_addr;
return sock_ep_rma_writemsg(ep, &msg, FI_INJECT);
}
static ssize_t sock_ep_rma_injectdata(struct fid_ep *ep, const void *buf,
size_t len, uint64_t data, fi_addr_t dest_addr,
uint64_t addr, uint64_t key)
{
struct fi_msg_rma msg;
struct iovec msg_iov;
struct fi_rma_iov rma_iov;
msg_iov.iov_base = (void*)buf;
msg_iov.iov_len = len;
msg.msg_iov = &msg_iov;
msg.iov_count = 1;
rma_iov.addr = addr;
rma_iov.key = key;
rma_iov.len = 1;
msg.rma_iov = &rma_iov;
msg.msg_iov = &msg_iov;
msg.addr = dest_addr;
msg.data = data;
return sock_ep_rma_writemsg(ep, &msg, FI_INJECT|FI_REMOTE_CQ_DATA);
}
struct fi_ops_rma sock_ep_rma = {
.size = sizeof(struct fi_ops_rma),
.read = sock_ep_rma_read,
.readv = sock_ep_rma_readv,
.readmsg = sock_ep_rma_readmsg,
.write = sock_ep_rma_write,
.writev = sock_ep_rma_writev,
.writemsg = sock_ep_rma_writemsg,
.inject = sock_ep_rma_inject,
.injectdata = sock_ep_rma_injectdata,
.writedata = sock_ep_rma_writedata,
};

Просмотреть файл

@ -0,0 +1,129 @@
/*
* Copyright (c) 2014 Intel Corporation, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#if HAVE_CONFIG_H
# include <config.h>
#endif /* HAVE_CONFIG_H */
#include <errno.h>
#include <stdarg.h>
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <unistd.h>
#include "sock.h"
#include "sock_util.h"
struct sock_rx_entry *sock_rx_new_entry(struct sock_rx_ctx *rx_ctx)
{
/* FIXME: pool of rx_entry */
struct sock_rx_entry *rx_entry;
rx_entry = calloc(1, sizeof(struct sock_rx_entry));
if (!rx_entry)
return NULL;
SOCK_LOG_INFO("New rx_entry: %p, ctx: %p\n", rx_entry, rx_ctx);
dlist_init(&rx_entry->entry);
return rx_entry;
}
void sock_rx_release_entry(struct sock_rx_entry *rx_entry)
{
SOCK_LOG_INFO("Releasing rx_entry: %p\n", rx_entry);
free(rx_entry);
}
struct sock_rx_entry *sock_rx_new_buffered_entry(struct sock_rx_ctx *rx_ctx,
size_t len)
{
struct sock_rx_entry *rx_entry;
if (rx_ctx->buffered_len + len >= rx_ctx->attr.total_buffered_recv) {
SOCK_LOG_ERROR("Reached max buffered recv limit\n");
return NULL;
}
rx_entry = calloc(1, sizeof(struct sock_rx_entry) + len);
if (!rx_entry)
return NULL;
SOCK_LOG_INFO("New buffered entry:%p len: %lu, ctx: %p\n",
rx_entry, len, rx_ctx);
rx_entry->is_buffered = 1;
rx_entry->rx_op.dest_iov_len = 1;
rx_entry->iov[0].iov.len = len;
rx_entry->iov[0].iov.addr = (uint64_t)((char*)rx_entry +
sizeof(struct sock_rx_entry));
rx_entry->total_len = len;
rx_ctx->buffered_len += len;
dlist_insert_tail(&rx_entry->entry, &rx_ctx->rx_buffered_list);
return rx_entry;
}
inline size_t sock_rx_avail_len(struct sock_rx_entry *rx_entry)
{
return rx_entry->total_len - rx_entry->used;
}
struct sock_rx_entry *sock_rx_get_entry(struct sock_rx_ctx *rx_ctx,
uint64_t addr, uint64_t tag)
{
struct dlist_entry *entry;
struct sock_rx_entry *rx_entry;
for (entry = rx_ctx->rx_entry_list.next;
entry != &rx_ctx->rx_entry_list; entry = entry->next) {
rx_entry = container_of(entry, struct sock_rx_entry, entry);
if (rx_entry->is_busy)
continue;
if (((rx_entry->tag & ~rx_entry->ignore) ==
(tag & ~rx_entry->ignore)) &&
(rx_entry->addr == FI_ADDR_UNSPEC ||
addr == FI_ADDR_UNSPEC || rx_entry->addr == addr)) {
break;
}
}
if (entry == &rx_ctx->rx_entry_list)
rx_entry = NULL;
return rx_entry;
}

Просмотреть файл

@ -40,30 +40,31 @@
#include <stdio.h>
#define SOCK_ERROR (1)
#define SOCK_WARN (2)
#define SOCK_INFO (3)
#define SOCK_WARN (2)
#define SOCK_INFO (3)
extern int sock_log_level;
#define SOCK_LOG_INFO(...) do { \
if (sock_log_level <= SOCK_INFO) { \
fprintf(stderr, "[SOCK_INFO - %s]: ", __func__); \
#define SOCK_LOG_INFO(...) do { \
if (sock_log_level >= SOCK_INFO) { \
fprintf(stderr, "[SOCK_INFO - %s:%d]: ", __func__, __LINE__); \
fprintf(stderr, __VA_ARGS__); \
} \
} while (0)
#define SOCK_LOG_WARN(...) do { \
if (sock_log_level <= SOCK_WARN) { \
fprintf(stderr, "[SOCK_WARN - %s]: ", __func__); \
#define SOCK_LOG_WARN(...) do { \
if (sock_log_level >= SOCK_WARN) { \
fprintf(stderr, "[SOCK_WARN - %s:%d]: ", __func__, __LINE__); \
fprintf(stderr, __VA_ARGS__); \
} \
} while (0)
#define SOCK_LOG_ERROR(...) do { \
if (sock_log_level <= SOCK_ERROR) { \
fprintf(stderr, "[SOCK_ERROR - %s]: ", __func__); \
if (sock_log_level >= SOCK_ERROR) { \
fprintf(stderr, "[SOCK_ERROR - %s:%d]: ", __func__, __LINE__); \
fprintf(stderr, __VA_ARGS__); \
} \
} while (0)
#endif

Просмотреть файл

@ -0,0 +1,299 @@
/*
* Copyright (c) 2014 Intel Corporation, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#if HAVE_CONFIG_H
# include <config.h>
#endif /* HAVE_CONFIG_H */
#include <stdlib.h>
#include <string.h>
#include <sys/time.h>
#include "sock.h"
#include "sock_util.h"
enum {
WAIT_READ_FD = 0,
WAIT_WRITE_FD,
};
int sock_wait_get_obj(struct fid_wait *fid, void *arg)
{
struct fi_mutex_cond mut_cond;
struct sock_wait *wait;
wait = container_of(fid, struct sock_wait, wait_fid.fid);
switch (wait->type) {
case FI_WAIT_FD:
memcpy(arg,&wait->fd[WAIT_READ_FD], sizeof(int));
break;
case FI_WAIT_MUTEX_COND:
mut_cond.mutex = &wait->mutex;
mut_cond.cond = &wait->cond;
memcpy(arg, &mut_cond, sizeof(mut_cond));
break;
default:
SOCK_LOG_ERROR("Invalid wait obj type\n");
return -FI_EINVAL;
}
return 0;
}
static int sock_wait_init(struct sock_wait *wait, enum fi_wait_obj type)
{
long flags = 0;
wait->type = type;
switch (type) {
case FI_WAIT_FD:
if (socketpair(AF_UNIX, SOCK_STREAM, 0, wait->fd))
return -errno;
fcntl(wait->fd[WAIT_READ_FD], F_GETFL, &flags);
if (fcntl(wait->fd[WAIT_READ_FD], F_SETFL, flags | O_NONBLOCK)) {
close(wait->fd[WAIT_READ_FD]);
close(wait->fd[WAIT_WRITE_FD]);
return -errno;
}
break;
case FI_WAIT_MUTEX_COND:
pthread_mutex_init(&wait->mutex, NULL);
pthread_cond_init(&wait->cond, NULL);
break;
default:
SOCK_LOG_ERROR("Invalid wait object type\n");
return -FI_EINVAL;
}
return 0;
}
static int sock_wait_wait(struct fid_wait *wait_fid, int timeout)
{
int err = 0;
struct sock_cq *cq;
struct sock_cntr *cntr;
struct timeval now;
struct sock_wait *wait;
double start_ms = 0.0, end_ms = 0.0;
struct dlist_entry *p, *head;
struct sock_fid_list *list_item;
wait = container_of(wait_fid, struct sock_wait, wait_fid);
if (wait->domain->progress_mode == FI_PROGRESS_MANUAL) {
if (timeout > 0) {
gettimeofday(&now, NULL);
start_ms = (double)now.tv_sec * 1000.0 +
(double)now.tv_usec / 1000.0;
}
head = &wait->fid_list;
for (p = head->next; p != head; p = p->next) {
list_item = container_of(p, struct sock_fid_list, entry);
switch (list_item->fid->fclass) {
case FI_CLASS_CQ:
cq = container_of(list_item->fid,
struct sock_cq, cq_fid);
sock_cq_progress(cq);
break;
case FI_CLASS_CNTR:
cntr = container_of(list_item->fid,
struct sock_cntr, cntr_fid);
sock_cntr_progress(cntr);
break;
}
}
if (timeout > 0) {
gettimeofday(&now, NULL);
end_ms = (double)now.tv_sec * 1000.0 +
(double)now.tv_usec / 1000.0;
timeout -= (end_ms - start_ms);
timeout = timeout < 0 ? 0 : timeout;
}
}
switch (wait->type) {
case FI_WAIT_FD:
err = fi_poll_fd(wait->fd[WAIT_READ_FD], timeout);
if (err > 0)
err = 0;
else if (err == 0)
err = -FI_ETIMEDOUT;
break;
case FI_WAIT_MUTEX_COND:
err = fi_wait_cond(&wait->cond,
&wait->mutex, timeout);
break;
default:
SOCK_LOG_ERROR("Invalid wait object type\n");
return -FI_EINVAL;
}
return err;
}
void sock_wait_signal(struct fid_wait *wait_fid)
{
struct sock_wait *wait;
static char c = 'a';
wait = container_of(wait_fid, struct sock_wait, wait_fid);
switch (wait->type) {
case FI_WAIT_FD:
write(wait->fd[WAIT_WRITE_FD], &c, 1);
break;
case FI_WAIT_MUTEX_COND:
pthread_cond_signal(&wait->cond);
break;
default:
SOCK_LOG_ERROR("Invalid wait object type\n");
return;
}
}
static struct fi_ops_wait sock_wait_ops = {
.size = sizeof(struct fi_ops_wait),
.wait = sock_wait_wait,
};
static int sock_wait_control(struct fid *fid, int command, void *arg)
{
struct sock_wait *wait;
int ret = 0;
wait = container_of(fid, struct sock_wait, wait_fid.fid);
switch (command) {
case FI_GETWAIT:
ret = sock_wait_get_obj(&wait->wait_fid, arg);
break;
default:
ret = -FI_EINVAL;
break;
}
return ret;
}
int sock_wait_close(fid_t fid)
{
struct sock_fid_list *list_item;
struct dlist_entry *p, *head;
struct sock_wait *wait;
wait = container_of(fid, struct sock_wait, wait_fid.fid);
head = &wait->fid_list;
for (p = head->next; p != head; p = p->next) {
list_item = container_of(p, struct sock_fid_list, entry);
free(list_item);
}
if (wait->type == FI_WAIT_FD) {
close(wait->fd[WAIT_READ_FD]);
close(wait->fd[WAIT_WRITE_FD]);
}
atomic_dec(&wait->domain->ref);
free(wait);
return 0;
}
static struct fi_ops sock_wait_fi_ops = {
.size = sizeof(struct fi_ops),
.close = sock_wait_close,
.bind = fi_no_bind,
.control = sock_wait_control,
.ops_open = fi_no_ops_open,
};
static int sock_verify_wait_attr(struct fi_wait_attr *attr)
{
switch (attr->wait_obj) {
case FI_WAIT_UNSPEC:
case FI_WAIT_FD:
case FI_WAIT_MUTEX_COND:
break;
default:
SOCK_LOG_ERROR("Invalid wait object type\n");
return -FI_EINVAL;
}
if (attr->flags)
return -FI_EINVAL;
return 0;
}
int sock_wait_open(struct fid_domain *domain, struct fi_wait_attr *attr,
struct fid_wait **waitset)
{
int err;
struct sock_wait *wait;
struct sock_domain *dom;
enum fi_wait_obj wait_obj_type;
if(attr && sock_verify_wait_attr(attr))
return -FI_EINVAL;
dom = container_of(domain, struct sock_domain, dom_fid);
if (!attr || attr->wait_obj == FI_WAIT_UNSPEC)
wait_obj_type = FI_WAIT_FD;
wait = calloc(1, sizeof(*wait));
if (!wait)
return -FI_ENOMEM;
err = sock_wait_init(wait, wait_obj_type);
if (err) {
free(wait);
return err;
}
wait->wait_fid.fid.fclass = FI_CLASS_WAIT;
wait->wait_fid.fid.context = 0;
wait->wait_fid.fid.ops = &sock_wait_fi_ops;
wait->wait_fid.ops = &sock_wait_ops;
wait->domain = dom;
wait->type = wait_obj_type;
atomic_inc(&dom->ref);
*waitset = &wait->wait_fid;
return 0;
}

Просмотреть файл

@ -40,25 +40,40 @@
#include <pthread.h>
#include "usdf_progress.h"
#include "usd.h"
#define USDF_PROV_NAME "usnic"
#define USDF_MAJOR_VERS 1
#define USDF_MINOR_VERS 0
#define USDF_PROV_VERSION FI_VERSION(USDF_MAJOR_VERS, USDF_MINOR_VERS)
#define USDF_FI_NAME "usnic"
#define USDF_HDR_BUF_ENTRY 64
#define USDF_EP_CAP_PIO (1ULL << 63)
#define USDF_MAX_PEERS (16 * 1024)
#define USDF_DGRAM_CAPS (FI_MSG | FI_SOURCE | FI_SEND | FI_RECV)
#define USDF_DGRAM_SUPP_MODE (FI_LOCAL_MR | FI_MSG_PREFIX)
#define USDF_DGRAM_REQ_MODE (FI_LOCAL_MR)
#define USDF_MSG_CAPS (FI_MSG | FI_SOURCE | FI_SEND | FI_RECV)
#define USDF_MSG_SUPP_MODE (FI_LOCAL_MR)
#define USDF_MSG_REQ_MODE (FI_LOCAL_MR)
/* usdf event flags */
#define USDF_EVENT_FLAG_ERROR (1ULL << 62)
#define USDF_EVENT_FLAG_FREE_BUF (1ULL << 63)
/*
* TAILQ stuff that should exist
*/
#define TAILQ_REMOVE_MARK(head, elm, link) \
do { \
TAILQ_REMOVE(head, elm, link); \
(elm)->link.tqe_prev = NULL; \
} while (0)
#define TAILQ_ON_LIST(elm, link) ((elm)->link.tqe_prev != NULL)
struct usdf_domain;
struct usdf_dev_entry {
struct usd_device *ue_dev;
struct usd_device_attrs ue_dattr;
@ -73,9 +88,11 @@ extern struct usdf_usnic_info *__usdf_devinfo;
struct usdf_fabric {
struct fid_fabric fab_fid;
struct fi_fabric_attr fab_attr;
struct usd_device_attrs *fab_dev_attrs;
int fab_arp_sockfd;
atomic_t fab_refcnt;
LIST_HEAD(,usdf_domain) fab_domain_list;
/* progression */
pthread_t fab_thread;
@ -98,10 +115,25 @@ struct usdf_fabric {
struct usdf_domain {
struct fid_domain dom_fid;
struct usdf_fabric *dom_fabric;
struct fi_info *dom_info;
atomic_t dom_refcnt;
struct usdf_eq *dom_eq;
struct usd_device *dom_dev;
struct usd_device_attrs dom_dev_attrs;
pthread_spinlock_t dom_progress_lock;
TAILQ_HEAD(,usdf_tx) dom_tx_ready;
TAILQ_HEAD(,usdf_cq_hard) dom_hcq_list;
struct usdf_rdm_connection **dom_rdc_hashtab;
SLIST_HEAD(,usdf_rdm_connection) dom_rdc_free;
atomic_t dom_rdc_free_cnt;
size_t dom_rdc_total;
/* used only by connected endpoints */
struct usdf_ep **dom_peer_tab;
uint32_t dom_next_peer;
LIST_ENTRY(usdf_domain) dom_link;
};
#define dom_ftou(FDOM) container_of(FDOM, struct usdf_domain, dom_fid)
#define dom_utof(DOM) (&(DOM)->dom_fid)
@ -125,41 +157,174 @@ struct usdf_pep {
#define pep_ftou(FPEP) container_of(FPEP, struct usdf_pep, pep_fid)
#define pep_fidtou(FID) container_of(FID, struct usdf_pep, pep_fid.fid)
#define pep_utof(PEP) (&(PEP)->pep_fid)
#define pep_utofid(PEP) (&(PEP)->pep_fid.fid)
struct usdf_tx {
struct fid_stx tx_fid;
atomic_t tx_refcnt;
struct usdf_domain *tx_domain;
TAILQ_ENTRY(usdf_tx) tx_link;
struct fi_tx_attr tx_attr;
struct usd_qp *tx_qp;
void (*tx_progress)(struct usdf_tx *tx);
union {
struct {
struct usdf_cq_hard *tx_hcq;
struct usdf_msg_qe *tx_wqe_buf;
TAILQ_HEAD(,usdf_msg_qe) tx_free_wqe;
TAILQ_HEAD(,usdf_ep) tx_ep_ready;
TAILQ_HEAD(,usdf_ep) tx_ep_have_acks;
} msg;
struct {
struct usdf_cq_hard *tx_hcq;
atomic_t tx_next_msg_id;
struct usdf_rdm_qe *tx_wqe_buf;
TAILQ_HEAD(,usdf_rdm_qe) tx_free_wqe;
TAILQ_HEAD(,usdf_rdm_connection) tx_rdc_ready;
TAILQ_HEAD(,usdf_rdm_connection) tx_rdc_have_acks;
} rdm;
} t;
};
#define tx_ftou(FEP) container_of(FEP, struct usdf_tx, tx_fid)
#define tx_fidtou(FID) container_of(FID, struct usdf_tx, tx_fid)
#define tx_utof(RX) (&(RX)->tx_fid)
#define tx_utofid(RX) (&(RX)->tx_fid.fid)
struct usdf_rx {
struct fid_ep rx_fid;
atomic_t rx_refcnt;
struct usdf_domain *rx_domain;
struct fi_rx_attr rx_attr;
struct usd_qp *rx_qp;
union {
struct {
struct usdf_cq_hard *rx_hcq;
uint8_t *rx_bufs;
struct usdf_msg_qe *rx_rqe_buf;
TAILQ_HEAD(,usdf_msg_qe) rx_free_rqe;
TAILQ_HEAD(,usdf_msg_qe) rx_posted_rqe;
} msg;
struct {
int rx_sock;
struct usdf_cq_hard *rx_hcq;
struct usdf_tx *rx_tx;
uint8_t *rx_bufs;
struct usdf_rdm_qe *rx_rqe_buf;
TAILQ_HEAD(,usdf_rdm_qe) rx_free_rqe;
TAILQ_HEAD(,usdf_rdm_qe) rx_posted_rqe;
} rdm;
} r;
};
#define rx_ftou(FEP) container_of(FEP, struct usdf_rx, rx_fid)
#define rx_fidtou(FID) container_of(FID, struct usdf_rx, rx_fid)
#define rx_utof(RX) (&(RX)->rx_fid)
#define rx_utofid(RX) (&(RX)->rx_fid.fid)
struct usdf_ep {
struct fid_ep ep_fid;
struct usdf_domain *ep_domain;
atomic_t ep_refcnt;
uint64_t ep_caps;
uint64_t ep_mode;
int ep_sock;
int ep_conn_sock;
uint32_t ep_wqe;
uint32_t ep_wqe; /* requested queue sizes */
uint32_t ep_rqe;
struct usdf_domain *ep_domain;
struct usdf_av *ep_av;
struct usdf_cq *ep_wcq;
struct usdf_cq *ep_rcq;
struct usdf_eq *ep_eq;
struct usd_qp *ep_qp;
struct usd_dest *ep_dest;
struct usd_qp_attrs ep_qp_attrs;
void *ep_hdr_buf;
struct usd_udp_hdr **ep_hdr_ptr;
struct usdf_eq *ep_eq;
struct usdf_tx *ep_tx;
struct usdf_rx *ep_rx;
union {
struct {
struct usd_qp *ep_qp;
struct usdf_cq *ep_wcq;
struct usdf_cq *ep_rcq;
int ep_sock;
struct usdf_av *ep_av;
void *ep_hdr_buf;
struct usd_udp_hdr **ep_hdr_ptr;
} dg;
struct {
struct usdf_connreq *ep_connreq;
struct usd_dest *ep_dest;
uint32_t ep_rem_peer_id;
uint32_t ep_lcl_peer_id;
TAILQ_HEAD(,usdf_msg_qe) ep_posted_wqe;
TAILQ_HEAD(usdf_msg_qe_head ,usdf_msg_qe) ep_sent_wqe;
uint32_t ep_fairness_credits;
uint32_t ep_seq_credits;
uint16_t ep_next_tx_seq;
uint16_t ep_last_rx_ack;
int ep_send_nak;
struct usdf_msg_qe *ep_cur_recv;
uint16_t ep_next_rx_seq;
TAILQ_ENTRY(usdf_ep) ep_ack_link;
struct usdf_timer_entry *ep_ack_timer;
TAILQ_ENTRY(usdf_ep) ep_link;
} msg;
struct {
int ep_sock;
struct usdf_av *ep_av;
} rdm;
} e;
};
#define ep_ftou(FEP) container_of(FEP, struct usdf_ep, ep_fid)
#define ep_fidtou(FID) container_of(FID, struct usdf_ep, ep_fid.fid)
#define ep_utof(EP) (&(EP)->ep_fid)
#define ep_utofid(EP) (&(EP)->ep_fid.fid)
struct usdf_mr {
struct fid_mr mr_fid;
struct usd_mr *mr_mr;
};
struct usdf_cq_hard {
struct usdf_cq *cqh_cq;
struct usd_cq *cqh_ucq;
atomic_t cqh_refcnt;
void (*cqh_progress)(struct usdf_cq_hard *hcq);
void (*cqh_post)(struct usdf_cq_hard *hcq, void *context, size_t len);
TAILQ_ENTRY(usdf_cq_hard) cqh_link;
TAILQ_ENTRY(usdf_cq_hard) cqh_dom_link;
};
struct usdf_cq {
struct fid_cq cq_fid;
atomic_t cq_refcnt;
struct usdf_domain *cq_domain;
struct usd_cq *cq_cq;
struct fi_cq_attr cq_attr;
union {
struct {
struct usd_cq *cq_cq;
} hard;
struct {
void *cq_comps;
void *cq_end;
void *cq_head;
void *cq_tail;
TAILQ_HEAD(,usdf_cq_hard) cq_list;
} soft;
} c;
struct usd_completion cq_comp;
};
#define cq_ftou(FCQ) container_of(FCQ, struct usdf_cq, cq_fid)

Просмотреть файл

@ -58,7 +58,6 @@
#include "libnl_utils.h"
#include "usd.h"
#include "usd_queue.h"
#include "usd_dest.h"
#include "usdf.h"
#include "usdf_av.h"
@ -114,11 +113,27 @@ usdf_post_insert_request_error(struct usdf_av_insert *insert,
err_entry.data = req - (struct usdf_av_req *)(insert + 1);
err_entry.err = -req->avr_status;
usdf_eq_write_internal(av->av_eq, FI_COMPLETE,
usdf_eq_write_internal(av->av_eq, 0,
&err_entry, sizeof(err_entry),
USDF_EVENT_FLAG_ERROR);
}
static int
usdf_av_alloc_dest(struct usdf_dest **dest_o)
{
struct usdf_dest *dest;
dest = calloc(1, sizeof(**dest_o));
if (dest == NULL) {
return -errno;
}
SLIST_INIT(&dest->ds_rdm_rdc_list);
*dest_o = dest;
return 0;
}
/*
* Called by progression thread to look for AV completions on this domain
*/
@ -128,7 +143,7 @@ usdf_av_insert_progress(void *v)
int ret;
struct usdf_av_insert *insert;
struct usdf_fabric *fp;
struct usd_dest *dest;
struct usdf_dest *dest;
struct usdf_av_req *req;
struct usdf_av_req *tmpreq;
struct usd_device_attrs *dap;
@ -142,7 +157,7 @@ usdf_av_insert_progress(void *v)
TAILQ_FOREACH_SAFE(req, tmpreq, &insert->avi_req_list, avr_link) {
dest = req->avr_dest;
eth = &dest->ds_dest.ds_udp.u_hdr.uh_eth.ether_dhost[0];
eth = &dest->ds_dest.ds_dest.ds_udp.u_hdr.uh_eth.ether_dhost[0];
ret = usnic_arp_lookup(dap->uda_ifname,
req->avr_daddr_be, fp->fab_arp_sockfd, eth);
@ -153,7 +168,7 @@ usdf_av_insert_progress(void *v)
if (ret == 0) {
++insert->avi_successes;
*(struct usd_dest **)req->avr_fi_addr = dest;
*(struct usdf_dest **)req->avr_fi_addr = dest;
} else {
usdf_post_insert_request_error(insert, req);
}
@ -282,7 +297,7 @@ usdf_am_insert_async(struct fid_av *fav, const void *addr, size_t count,
ret = -FI_ENOMEM;
goto fail;
}
usd_fill_udp_dest(req->avr_dest, dap,
usd_fill_udp_dest(&req->avr_dest->ds_dest, dap,
sin->sin_addr.s_addr, sin->sin_port);
TAILQ_INSERT_TAIL(&insert->avi_req_list, req, avr_link);
@ -313,7 +328,8 @@ usdf_am_insert_sync(struct fid_av *fav, const void *addr, size_t count,
{
const struct sockaddr_in *sin;
struct usdf_av *av;
struct usd_dest *dest;
struct usd_dest *u_dest;
struct usdf_dest *dest = dest; // supress uninit
int ret_count;
int ret;
int i;
@ -327,16 +343,21 @@ usdf_am_insert_sync(struct fid_av *fav, const void *addr, size_t count,
ret_count = 0;
sin = addr;
/* XXX parallelize */
/* XXX parallelize, this will also eliminate u_dest silliness */
for (i = 0; i < count; i++) {
ret = usd_create_dest(av->av_domain->dom_dev,
ret = usdf_av_alloc_dest(&dest);
if (ret == 0) {
ret = usd_create_dest(av->av_domain->dom_dev,
sin->sin_addr.s_addr, sin->sin_port,
&dest);
if (ret != 0) {
fi_addr[i] = FI_ADDR_NOTAVAIL;
} else {
&u_dest);
}
if (ret == 0) {
dest->ds_dest = *u_dest;
free(u_dest);
fi_addr[i] = (fi_addr_t)dest;
++ret_count;
} else {
fi_addr[i] = FI_ADDR_NOTAVAIL;
}
++sin;
}
@ -348,7 +369,7 @@ static int
usdf_am_remove(struct fid_av *fav, fi_addr_t *fi_addr, size_t count,
uint64_t flags)
{
struct usd_dest *dest;
struct usdf_dest *dest;
struct usdf_av *av;
av = av_ftou(fav);
@ -358,8 +379,8 @@ usdf_am_remove(struct fid_av *fav, fi_addr_t *fi_addr, size_t count,
}
// XXX
dest = (struct usd_dest *)(uintptr_t)fi_addr;
usd_destroy_dest(dest);
dest = (struct usdf_dest *)(uintptr_t)fi_addr;
free(dest);
return 0;
}
@ -368,11 +389,11 @@ static int
usdf_am_lookup(struct fid_av *av, fi_addr_t fi_addr, void *addr,
size_t *addrlen)
{
struct usd_dest *dest;
struct usdf_dest *dest;
struct sockaddr_in sin;
size_t copylen;
dest = (struct usd_dest *)(uintptr_t)fi_addr;
dest = (struct usdf_dest *)(uintptr_t)fi_addr;
if (*addrlen < sizeof(sin)) {
copylen = *addrlen;
@ -381,7 +402,7 @@ usdf_am_lookup(struct fid_av *av, fi_addr_t fi_addr, void *addr,
}
sin.sin_family = AF_INET;
usd_expand_dest(dest, &sin.sin_addr.s_addr, &sin.sin_port);
usd_expand_dest(&dest->ds_dest, &sin.sin_addr.s_addr, &sin.sin_port);
memcpy(addr, &sin, copylen);
*addrlen = sizeof(sin);
@ -518,9 +539,6 @@ usdf_av_open(struct fid_domain *domain, struct fi_av_attr *attr,
struct usdf_domain *udp;
struct usdf_av *av;
if (attr->name != NULL) {
return -FI_ENOSYS;
}
if ((attr->flags & ~(FI_EVENT | FI_READ)) != 0) {
return -FI_ENOSYS;
}

Просмотреть файл

@ -36,13 +36,26 @@
#ifndef _USDF_AV_H_
#define _USDF_AV_H_
#include "usd_dest.h"
#define USDF_AV_MAX_ARPS 3
#define USDF_AV_ARP_INTERVAL 1000
struct usdf_rdm_connection;
/*
* libfabric version of dest
*/
struct usdf_dest {
struct usd_dest ds_dest;
SLIST_HEAD(,usdf_rdm_connection) ds_rdm_rdc_list;
};
/* struct used to track async insert requests */
struct usdf_av_req {
fi_addr_t *avr_fi_addr;
struct usd_dest *avr_dest;
struct usdf_dest *avr_dest;
int avr_status;
uint32_t avr_daddr_be;

Просмотреть файл

@ -46,6 +46,7 @@
#include <netinet/in.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/epoll.h>
#include <rdma/fabric.h>
#include <rdma/fi_cm.h>
@ -58,90 +59,401 @@
#include "usnic_direct.h"
#include "usdf.h"
#include "usdf_endpoint.h"
#include "usdf_dgram.h"
#include "usdf_cm.h"
#include "usdf_msg.h"
#include "usdf_av.h"
#include "usdf_cm.h"
static struct fi_ops_msg usdf_dgram_conn_ops = {
.size = sizeof(struct fi_ops_msg),
.recv = usdf_dgram_recv,
.recvv = usdf_dgram_recvv,
.recvmsg = usdf_dgram_recvmsg,
.send = usdf_dgram_conn_send,
.sendv = usdf_dgram_sendv,
.sendmsg = usdf_dgram_sendmsg,
.inject = usdf_dgram_inject,
.senddata = usdf_dgram_senddata,
};
int
usdf_cm_dgram_connect(struct fid_ep *fep, const void *addr,
const void *param, size_t paramlen)
static void
usdf_cm_msg_connreq_cleanup(struct usdf_connreq *crp)
{
struct usdf_ep *ep;
const struct sockaddr_in *sin;
int ret;
struct usdf_pep *pep;
struct usdf_fabric *fp;
ep = ep_ftou(fep);
sin = addr;
ret = usd_create_dest(ep->ep_domain->dom_dev, sin->sin_addr.s_addr,
sin->sin_port, &ep->ep_dest);
if (!ret) {
ep->ep_fid.msg = &usdf_dgram_conn_ops;
ep = crp->cr_ep;
pep = crp->cr_pep;
if (pep != NULL) {
fp = pep->pep_fabric;
} else {
fp = ep->ep_domain->dom_fabric;
}
return ret;
if (crp->cr_pollitem.pi_rtn != NULL) {
epoll_ctl(fp->fab_epollfd, EPOLL_CTL_DEL, crp->cr_sockfd, NULL);
crp->cr_pollitem.pi_rtn = NULL;
}
if (crp->cr_sockfd != -1) {
close(crp->cr_sockfd);
crp->cr_sockfd = -1;
}
/* If there is a passive endpoint, recycle the crp */
if (pep != NULL) {
if (TAILQ_ON_LIST(crp, cr_link)) {
TAILQ_REMOVE(&pep->pep_cr_pending, crp, cr_link);
}
TAILQ_INSERT_TAIL(&pep->pep_cr_free, crp, cr_link);
} else {
free(crp);
}
}
static int
usdf_cm_msg_accept_complete(struct usdf_connreq *crp)
{
struct usdf_ep *ep;
struct fi_eq_cm_entry entry;
int ret;
ep = crp->cr_ep;
/* post EQ entry */
entry.fid = ep_utofid(ep);
entry.info = NULL;
ret = usdf_eq_write_internal(ep->ep_eq, FI_COMPLETE, &entry,
sizeof(entry), 0);
if (ret != sizeof(entry)) {
usdf_cm_msg_connreq_failed(crp, ret);
return 0;
}
usdf_cm_msg_connreq_cleanup(crp);
return 0;
}
int
usdf_cm_dgram_shutdown(struct fid_ep *ep, uint64_t flags)
usdf_cm_msg_accept(struct fid_ep *fep, const void *param, size_t paramlen)
{
return 0; // XXX
struct usdf_ep *ep;
struct usdf_rx *rx;
struct usdf_domain *udp;
struct usdf_fabric *fp;
struct usdf_connreq *crp;
struct usdf_connreq_msg *reqp;
struct usd_qp_impl *qp;
int ret;
int n;
ep = ep_ftou(fep);
udp = ep->ep_domain;
fp = udp->dom_fabric;
crp = ep->e.msg.ep_connreq;
if (crp == NULL) {
return -FI_ENOTCONN;
}
if (ep->ep_eq == NULL) {
return -FI_ENOEQ;
}
crp->cr_ep = ep;
reqp = (struct usdf_connreq_msg *)crp->cr_data;
ep->e.msg.ep_lcl_peer_id = ntohs(reqp->creq_peer_id);
/* start creating the dest early */
ret = usd_create_dest_with_mac(udp->dom_dev, reqp->creq_ipaddr,
reqp->creq_port, reqp->creq_mac,
&ep->e.msg.ep_dest);
if (ret != 0) {
goto fail;
}
ret = usdf_ep_msg_get_queues(ep);
if (ret != 0) {
goto fail;
}
rx = ep->ep_rx;
qp = to_qpi(rx->rx_qp);
/* allocate a peer ID */
ep->e.msg.ep_rem_peer_id = udp->dom_next_peer;
udp->dom_peer_tab[udp->dom_next_peer] = ep;
++udp->dom_next_peer;
crp->cr_ptr = crp->cr_data;
crp->cr_resid = sizeof(*reqp) + paramlen;
reqp->creq_peer_id = htons(ep->e.msg.ep_rem_peer_id);
reqp->creq_ipaddr = fp->fab_dev_attrs->uda_ipaddr_be;
reqp->creq_port =
qp->uq_attrs.uqa_local_addr.ul_addr.ul_udp.u_addr.sin_port;
memcpy(reqp->creq_mac, fp->fab_dev_attrs->uda_mac_addr, ETH_ALEN);
reqp->creq_result = htonl(0);
reqp->creq_datalen = htonl(paramlen);
memcpy(reqp->creq_data, param, paramlen);
n = write(crp->cr_sockfd, crp->cr_ptr, crp->cr_resid);
if (n == -1) {
usdf_cm_msg_connreq_cleanup(crp);
ret = -errno;
goto fail;
}
crp->cr_resid -= n;
if (crp->cr_resid == 0) {
usdf_cm_msg_accept_complete(crp);
} else {
// XXX set up epoll junk to send rest
}
return 0;
fail:
free(ep->e.msg.ep_dest);
/* XXX release queues */
return ret;
}
/*
* Connection request attempt failed
*/
void
usdf_cm_msg_connreq_failed(struct usdf_connreq *crp, int error)
{
struct usdf_pep *pep;
struct usdf_ep *ep;
struct usdf_eq *eq;
fid_t fid;
struct fi_eq_err_entry err;
pep = crp->cr_pep;
ep = crp->cr_ep;
if (ep != NULL) {
fid = ep_utofid(ep);
eq = ep->ep_eq;
ep->ep_domain->dom_peer_tab[ep->e.msg.ep_rem_peer_id] = NULL;
} else {
fid = pep_utofid(pep);
eq = pep->pep_eq;
}
err.fid = fid;
err.context = NULL;
err.data = 0;
err.err = -error;
err.prov_errno = 0;
err.err_data = NULL;
usdf_eq_write_internal(eq, 0, &err, sizeof(err), USDF_EVENT_FLAG_ERROR);
usdf_cm_msg_connreq_cleanup(crp);
}
/*
* read connection request response from the listener
*/
static int
usdf_cm_msg_connect_cb_rd(void *v)
{
struct usdf_connreq *crp;
struct usdf_ep *ep;
struct usdf_fabric *fp;
struct usdf_domain *udp;
struct usdf_connreq_msg *reqp;
struct fi_eq_cm_entry *entry;
size_t entry_len;
int ret;
crp = v;
ep = crp->cr_ep;
fp = ep->ep_domain->dom_fabric;
ret = read(crp->cr_sockfd, crp->cr_ptr, crp->cr_resid);
if (ret == -1) {
usdf_cm_msg_connreq_failed(crp, -errno);
return 0;
}
crp->cr_resid -= ret;
reqp = (struct usdf_connreq_msg *)crp->cr_data;
if (crp->cr_resid == 0 && crp->cr_ptr == crp->cr_data + sizeof(*reqp)) {
reqp->creq_datalen = ntohl(reqp->creq_datalen);
crp->cr_resid = reqp->creq_datalen;
}
/* if resid is 0 now, completely done */
if (crp->cr_resid == 0) {
ret = epoll_ctl(fp->fab_epollfd, EPOLL_CTL_DEL,
crp->cr_sockfd, NULL);
close(crp->cr_sockfd);
crp->cr_sockfd = -1;
entry_len = sizeof(*entry) + reqp->creq_datalen;
entry = malloc(entry_len);
if (entry == NULL) {
usdf_cm_msg_connreq_failed(crp, -errno);
return 0;
}
udp = ep->ep_domain;
ep->e.msg.ep_lcl_peer_id = ntohs(reqp->creq_peer_id);
ret = usd_create_dest_with_mac(udp->dom_dev, reqp->creq_ipaddr,
reqp->creq_port, reqp->creq_mac,
&ep->e.msg.ep_dest);
if (ret != 0) {
free(entry);
usdf_cm_msg_connreq_failed(crp, ret);
return 0;
}
entry->fid = ep_utofid(ep);
entry->info = NULL;
memcpy(entry->data, reqp->creq_data, reqp->creq_datalen);
ret = usdf_eq_write_internal(ep->ep_eq, FI_COMPLETE, entry,
entry_len, 0);
free(entry);
if (ret != entry_len) {
free(ep->e.msg.ep_dest);
ep->e.msg.ep_dest = NULL;
usdf_cm_msg_connreq_failed(crp, ret);
return 0;
}
usdf_cm_msg_connreq_cleanup(crp);
}
return 0;
}
/*
* Write connection request data to the listener
* Once everything is written, switch over into listening mode to
* capture the listener response.
*/
static int
usdf_cm_msg_connect_cb_wr(void *v)
{
struct usdf_connreq *crp;
struct usdf_ep *ep;
struct usdf_fabric *fp;
struct epoll_event ev;
int ret;
crp = v;
ep = crp->cr_ep;
fp = ep->ep_domain->dom_fabric;
ret = write(crp->cr_sockfd, crp->cr_ptr, crp->cr_resid);
if (ret == -1) {
usdf_cm_msg_connreq_failed(crp, -errno);
return 0;
}
crp->cr_resid -= ret;
if (crp->cr_resid == 0) {
crp->cr_pollitem.pi_rtn = usdf_cm_msg_connect_cb_rd;
ev.events = EPOLLIN;
ev.data.ptr = &crp->cr_pollitem;
ret = epoll_ctl(fp->fab_epollfd, EPOLL_CTL_MOD,
crp->cr_sockfd, &ev);
if (ret != 0) {
usdf_cm_msg_connreq_failed(crp, -errno);
return 0;
}
crp->cr_ptr = crp->cr_data;
crp->cr_resid = sizeof(struct usdf_connreq_msg);
}
return 0;
}
int
usdf_cm_msg_connect(struct fid_ep *fep, const void *addr,
const void *param, size_t paramlen)
{
struct usdf_connreq *crp;
struct usdf_ep *ep;
struct usdf_rx *rx;
struct usdf_domain *udp;
const struct sockaddr_in *sin;
struct epoll_event ev;
struct usdf_fabric *fp;
struct usdf_connreq_msg *reqp;
struct usd_qp_impl *qp;
int ret;
ep = ep_ftou(fep);
udp = ep->ep_domain;
fp = udp->dom_fabric;
sin = addr;
crp = NULL;
ep->ep_conn_sock = socket(AF_INET, SOCK_STREAM, 0);
if (ep->ep_conn_sock == -1) {
crp = calloc(1, sizeof(*crp) + sizeof(struct usdf_connreq_msg) +
paramlen);
if (crp == NULL) {
ret = -errno;
goto fail;
}
ret = fcntl(ep->ep_conn_sock, F_GETFL, 0);
crp->cr_sockfd = socket(AF_INET, SOCK_STREAM, 0);
if (crp->cr_sockfd == -1) {
ret = -errno;
goto fail;
}
ret = fcntl(crp->cr_sockfd, F_GETFL, 0);
if (ret == -1) {
ret = -errno;
goto fail;
}
ret = fcntl(ep->ep_conn_sock, F_SETFL, ret | O_NONBLOCK);
ret = fcntl(crp->cr_sockfd, F_SETFL, ret | O_NONBLOCK);
if (ret == -1) {
ret = -errno;
goto fail;
}
ret = connect(ep->ep_conn_sock, (struct sockaddr *)sin, sizeof(*sin));
ret = usdf_ep_msg_get_queues(ep);
if (ret != 0) {
goto fail;
}
rx = ep->ep_rx;
qp = to_qpi(rx->rx_qp);
ret = connect(crp->cr_sockfd, (struct sockaddr *)sin, sizeof(*sin));
if (ret != 0 && errno != EINPROGRESS) {
ret = -errno;
goto fail;
}
printf("connect in progress\n");
/* register for notification when connect completes */
crp->cr_pollitem.pi_rtn = usdf_cm_msg_connect_cb_wr;
crp->cr_pollitem.pi_context = crp;
ev.events = EPOLLOUT;
ev.data.ptr = &crp->cr_pollitem;
ret = epoll_ctl(fp->fab_epollfd, EPOLL_CTL_ADD, crp->cr_sockfd, &ev);
if (ret != 0) {
crp->cr_pollitem.pi_rtn = NULL;
ret = -errno;
goto fail;
}
/* allocate remote peer ID */
ep->e.msg.ep_rem_peer_id = udp->dom_next_peer;
udp->dom_peer_tab[udp->dom_next_peer] = ep;
++udp->dom_next_peer;
crp->cr_ep = ep;
reqp = (struct usdf_connreq_msg *)crp->cr_data;
crp->cr_ptr = crp->cr_data;
crp->cr_resid = sizeof(*reqp) + paramlen;
reqp->creq_peer_id = htons(ep->e.msg.ep_rem_peer_id);
reqp->creq_ipaddr = fp->fab_dev_attrs->uda_ipaddr_be;
reqp->creq_port =
qp->uq_attrs.uqa_local_addr.ul_addr.ul_udp.u_addr.sin_port;
memcpy(reqp->creq_mac, fp->fab_dev_attrs->uda_mac_addr, ETH_ALEN);
reqp->creq_datalen = htonl(paramlen);
memcpy(reqp->creq_data, param, paramlen);
return 0;
fail:
if (ep->ep_conn_sock != -1) {
close(ep->ep_conn_sock);
if (crp != NULL) {
if (crp->cr_sockfd != -1) {
close(crp->cr_sockfd);
}
free(crp);
}
usdf_ep_msg_release_queues(ep);
return ret;
}
@ -150,3 +462,50 @@ usdf_cm_msg_shutdown(struct fid_ep *ep, uint64_t flags)
{
return -FI_ENOSYS;
}
/*
* Check a message CQ for completions and progress the send engine as needed,
* create completions for the app if anything needs to be percolated up
*/
int
usdf_cq_msg_poll(struct usd_cq *ucq, struct usd_completion *comp)
{
return -EAGAIN;
}
/*
* Return local address of an EP
*/
int usdf_cm_rdm_getname(fid_t fid, void *addr, size_t *addrlen)
{
struct usdf_ep *ep;
struct usdf_rx *rx;
struct sockaddr_in sin;
size_t copylen;
ep = ep_fidtou(fid);
rx = ep->ep_rx;
copylen = sizeof(sin);
if (copylen > *addrlen) {
copylen = *addrlen;
}
*addrlen = sizeof(sin);
memset(&sin, 0, sizeof(sin));
sin.sin_family = AF_INET;
sin.sin_addr.s_addr =
ep->ep_domain->dom_fabric->fab_dev_attrs->uda_ipaddr_be;
if (rx == NULL || rx->rx_qp == NULL) {
sin.sin_port = 0;
} else {
sin.sin_port = to_qpi(rx->rx_qp)->uq_attrs.uqa_local_addr.ul_addr.ul_udp.u_addr.sin_port;
}
memcpy(addr, &sin, copylen);
if (copylen < sizeof(sin)) {
return -FI_ETOOSMALL;
} else {
return 0;
}
}

Просмотреть файл

@ -38,18 +38,24 @@
#include <sys/queue.h>
struct usdf_connreq_msg {
uint32_t creq_data_len;
} __attribute__((packed));
#define USDF_MAX_CONN_DATA 256
struct usdf_connresp_msg {
uint32_t cresp_result;
uint32_t cresp_reason;
struct usdf_connreq_msg {
uint32_t creq_peer_id;
uint32_t creq_ipaddr;
uint32_t creq_port;
uint8_t creq_mac[ETH_ALEN];
uint8_t pad[8 - ETH_ALEN];
uint32_t creq_result;
uint32_t creq_reason;
uint32_t creq_datalen;
uint8_t creq_data[0];
} __attribute__((packed));
struct usdf_connreq {
int cr_sockfd;
struct usdf_pep *cr_pep;
struct usdf_ep *cr_ep;
TAILQ_ENTRY(usdf_connreq) cr_link;
struct usdf_poll_item cr_pollitem;
@ -57,7 +63,12 @@ struct usdf_connreq {
uint8_t *cr_ptr;
size_t cr_resid;
size_t cr_datalen;
uint8_t cr_data[0];
};
void usdf_cm_msg_connreq_failed(struct usdf_connreq *crp, int error);
int usdf_cm_rdm_getname(fid_t fid, void *addr, size_t *addrlen);
#endif /* _USDF_CM_H_ */

Просмотреть файл

@ -57,11 +57,14 @@
#include <rdma/fi_rma.h>
#include <rdma/fi_errno.h>
#include "fi.h"
#include "fi_enosys.h"
#include "usnic_direct.h"
#include "usd.h"
#include "usdf.h"
#include "usdf_av.h"
#include "usdf_progress.h"
#include "usdf_cq.h"
static ssize_t
usdf_cq_readerr(struct fid_cq *fcq, struct fi_cq_err_entry *entry,
@ -93,12 +96,23 @@ usdf_cq_sread(struct fid_cq *cq, void *buf, size_t count, const void *cond,
return -FI_ENOSYS;
}
static ssize_t
usdf_cq_read_context(struct fid_cq *fcq, void *buf, size_t count)
/*
* poll a hard CQ
* Since this routine is an inline and is always called with format as
* a constant, I am counting on the compiler optimizing away all the switches
* on format.
*/
static inline ssize_t
usdf_cq_read_common(struct fid_cq *fcq, void *buf, size_t count,
enum fi_cq_format format)
{
struct usdf_cq *cq;
struct fi_cq_entry *entry;
struct fi_cq_entry *last;
uint8_t *entry;
uint8_t *last;
size_t entry_len;
struct fi_cq_entry *ctx_entry;
struct fi_cq_msg_entry *msg_entry;
struct fi_cq_data_entry *data_entry;
ssize_t ret;
cq = cq_ftou(fcq);
@ -106,11 +120,26 @@ usdf_cq_read_context(struct fid_cq *fcq, void *buf, size_t count)
return -FI_EAVAIL;
}
switch (format) {
case FI_CQ_FORMAT_CONTEXT:
entry_len = sizeof(struct fi_cq_entry);
break;
case FI_CQ_FORMAT_MSG:
entry_len = sizeof(struct fi_cq_msg_entry);
break;
case FI_CQ_FORMAT_DATA:
entry_len = sizeof(struct fi_cq_data_entry);
break;
default:
return 0;
}
ret = 0;
entry = buf;
last = entry + count;
last = entry + (entry_len * count);
while (entry < last) {
ret = usd_poll_cq(cq->cq_cq, &cq->cq_comp);
ret = usd_poll_cq(cq->c.hard.cq_cq, &cq->cq_comp);
if (ret == -EAGAIN) {
ret = 0;
break;
@ -119,19 +148,56 @@ usdf_cq_read_context(struct fid_cq *fcq, void *buf, size_t count)
ret = -FI_EAVAIL;
break;
}
entry->op_context = cq->cq_comp.uc_context;
entry++;
switch (format) {
case FI_CQ_FORMAT_CONTEXT:
ctx_entry = (struct fi_cq_entry *)entry;
ctx_entry->op_context = cq->cq_comp.uc_context;
break;
case FI_CQ_FORMAT_MSG:
msg_entry = (struct fi_cq_msg_entry *)entry;
msg_entry->op_context = cq->cq_comp.uc_context;
msg_entry->flags = 0;
msg_entry->len = cq->cq_comp.uc_bytes;
break;
case FI_CQ_FORMAT_DATA:
data_entry = (struct fi_cq_data_entry *)entry;
data_entry->op_context = cq->cq_comp.uc_context;
data_entry->flags = 0;
data_entry->len = cq->cq_comp.uc_bytes;
data_entry->buf = 0; /* XXX */
data_entry->data = 0;
break;
default:
return 0;
}
entry += entry_len;
}
if (entry > (struct fi_cq_entry *)buf) {
return entry - (struct fi_cq_entry *)buf;
if (entry > (uint8_t *)buf) {
return (entry - (uint8_t *)buf) / entry_len;
} else {
return ret;
}
}
static ssize_t
usdf_cq_read_context(struct fid_cq *fcq, void *buf, size_t count)
{
return usdf_cq_read_common(fcq, buf, count, FI_CQ_FORMAT_CONTEXT);
}
static ssize_t
usdf_cq_read_msg(struct fid_cq *fcq, void *buf, size_t count)
{
return usdf_cq_read_common(fcq, buf, count, FI_CQ_FORMAT_MSG);
}
static ssize_t
usdf_cq_read_data(struct fid_cq *fcq, void *buf, size_t count)
{
return usdf_cq_read_common(fcq, buf, count, FI_CQ_FORMAT_DATA);
}
static ssize_t
usdf_cq_readfrom_context(struct fid_cq *fcq, void *buf, size_t count,
fi_addr_t *src_addr)
@ -151,7 +217,7 @@ usdf_cq_readfrom_context(struct fid_cq *fcq, void *buf, size_t count,
if (cq->cq_comp.uc_status != 0) {
return -FI_EAVAIL;
}
ucq = to_cqi(cq->cq_cq);
ucq = to_cqi(cq->c.hard.cq_cq);
ret = 0;
entry = buf;
@ -160,7 +226,7 @@ usdf_cq_readfrom_context(struct fid_cq *fcq, void *buf, size_t count,
cq_desc = (struct cq_desc *)((uint8_t *)ucq->ucq_desc_ring +
(ucq->ucq_next_desc << 4));
ret = usd_poll_cq(cq->cq_cq, &cq->cq_comp);
ret = usd_poll_cq(cq->c.hard.cq_cq, &cq->cq_comp);
if (ret == -EAGAIN) {
ret = 0;
break;
@ -174,13 +240,13 @@ usdf_cq_readfrom_context(struct fid_cq *fcq, void *buf, size_t count,
index = le16_to_cpu(cq_desc->completed_index) &
CQ_DESC_COMP_NDX_MASK;
ep = cq->cq_comp.uc_qp->uq_context;
hdr = ep->ep_hdr_ptr[index];
hdr = ep->e.dg.ep_hdr_ptr[index];
memset(&sin, 0, sizeof(sin));
sin.sin_addr.s_addr = hdr->uh_ip.saddr;
sin.sin_port = hdr->uh_udp.source;
ret = fi_av_insert(av_utof(ep->ep_av), &sin, 1,
ret = fi_av_insert(av_utof(ep->e.dg.ep_av), &sin, 1,
src_addr, 0, NULL);
if (ret != 1) {
*src_addr = FI_ADDR_NOTAVAIL;
@ -201,12 +267,174 @@ usdf_cq_readfrom_context(struct fid_cq *fcq, void *buf, size_t count,
}
}
static ssize_t
usdf_cq_read_msg(struct fid_cq *fcq, void *buf, size_t count)
/*****************************************************************
* "soft" CQ support
*****************************************************************/
static inline void
usdf_progress_hard_cq(struct usdf_cq_hard *hcq, enum fi_cq_format format)
{
int ret;
struct usd_completion comp;
void *entry;
size_t entry_size;
struct fi_cq_entry *ctx_entry;
struct fi_cq_msg_entry *msg_entry;
struct fi_cq_data_entry *data_entry;
struct usdf_cq *cq;
cq = hcq->cqh_cq;
do {
ret = usd_poll_cq(hcq->cqh_ucq, &comp);
if (ret == 0) {
entry = cq->c.soft.cq_head;
switch (format) {
case FI_CQ_FORMAT_CONTEXT:
entry_size = sizeof(*ctx_entry);
ctx_entry = (struct fi_cq_entry *)entry;
ctx_entry->op_context = cq->cq_comp.uc_context;
break;
case FI_CQ_FORMAT_MSG:
entry_size = sizeof(*msg_entry);
msg_entry = (struct fi_cq_msg_entry *)entry;
msg_entry->op_context = cq->cq_comp.uc_context;
msg_entry->flags = 0;
msg_entry->len = cq->cq_comp.uc_bytes;
break;
case FI_CQ_FORMAT_DATA:
entry_size = sizeof(*data_entry);
data_entry = (struct fi_cq_data_entry *)entry;
data_entry->op_context = cq->cq_comp.uc_context;
data_entry->flags = 0;
data_entry->len = cq->cq_comp.uc_bytes;
data_entry->buf = 0; /* XXX */
data_entry->data = 0;
break;
default:
return;
}
/* update with wrap */
entry = (uint8_t *)entry + entry_size;
if (entry != cq->c.soft.cq_end) {
cq->c.soft.cq_head = entry;
} else {
cq->c.soft.cq_head = cq->c.soft.cq_comps;
}
}
} while (ret != -EAGAIN);
}
void
usdf_progress_hard_cq_context(struct usdf_cq_hard *hcq)
{
usdf_progress_hard_cq(hcq, FI_CQ_FORMAT_CONTEXT);
}
void
usdf_progress_hard_cq_msg(struct usdf_cq_hard *hcq)
{
usdf_progress_hard_cq(hcq, FI_CQ_FORMAT_MSG);
}
void
usdf_progress_hard_cq_data(struct usdf_cq_hard *hcq)
{
usdf_progress_hard_cq(hcq, FI_CQ_FORMAT_DATA);
}
static inline void
usdf_cq_post_soft(struct usdf_cq_hard *hcq, void *context, size_t len,
enum fi_cq_format format)
{
void *entry;
size_t entry_size;
struct fi_cq_entry *ctx_entry;
struct fi_cq_msg_entry *msg_entry;
struct fi_cq_data_entry *data_entry;
struct usdf_cq *cq;
cq = hcq->cqh_cq;
entry = cq->c.soft.cq_head;
switch (format) {
case FI_CQ_FORMAT_CONTEXT:
entry_size = sizeof(*ctx_entry);
ctx_entry = (struct fi_cq_entry *)entry;
ctx_entry->op_context = context;
break;
case FI_CQ_FORMAT_MSG:
entry_size = sizeof(*msg_entry);
msg_entry = (struct fi_cq_msg_entry *)entry;
msg_entry->op_context = context;
msg_entry->flags = 0;
msg_entry->len = len;
break;
case FI_CQ_FORMAT_DATA:
entry_size = sizeof(*data_entry);
data_entry = (struct fi_cq_data_entry *)entry;
data_entry->op_context = context;
data_entry->flags = 0;
data_entry->len = len;
data_entry->buf = NULL;
data_entry->data = 0;
break;
default:
return;
}
/* update with wrap */
entry = (uint8_t *)entry + entry_size;
if (entry != cq->c.soft.cq_end) {
cq->c.soft.cq_head = entry;
} else {
cq->c.soft.cq_head = cq->c.soft.cq_comps;
}
}
void
usdf_cq_post_soft_context(struct usdf_cq_hard *hcq, void *context, size_t len)
{
usdf_cq_post_soft(hcq, context, len, FI_CQ_FORMAT_CONTEXT);
}
void
usdf_cq_post_soft_msg(struct usdf_cq_hard *hcq, void *context, size_t len)
{
usdf_cq_post_soft(hcq, context, len, FI_CQ_FORMAT_MSG);
}
void
usdf_cq_post_soft_data(struct usdf_cq_hard *hcq, void *context, size_t len)
{
usdf_cq_post_soft(hcq, context, len, FI_CQ_FORMAT_DATA);
}
ssize_t
usdf_cq_sread_soft(struct fid_cq *cq, void *buf, size_t count, const void *cond,
int timeout)
{
return -FI_ENOSYS;
}
/*
* poll a soft CQ
* This will loop over all the hard CQs within, collecting results.
* Since this routine is an inline and is always called with format as
* a constant, I am counting on the compiler optimizing away all the switches
* on format.
*/
static inline ssize_t
usdf_cq_read_common_soft(struct fid_cq *fcq, void *buf, size_t count,
enum fi_cq_format format)
{
struct usdf_cq *cq;
struct fi_cq_msg_entry *entry;
struct fi_cq_msg_entry *last;
uint8_t *entry;
uint8_t *last;
void *tail;
size_t entry_len;
ssize_t ret;
cq = cq_ftou(fcq);
@ -214,11 +442,94 @@ usdf_cq_read_msg(struct fid_cq *fcq, void *buf, size_t count)
return -FI_EAVAIL;
}
/* progress... */
usdf_domain_progress(cq->cq_domain);
switch (format) {
case FI_CQ_FORMAT_CONTEXT:
entry_len = sizeof(struct fi_cq_entry);
break;
case FI_CQ_FORMAT_MSG:
entry_len = sizeof(struct fi_cq_msg_entry);
break;
case FI_CQ_FORMAT_DATA:
entry_len = sizeof(struct fi_cq_data_entry);
break;
default:
return 0;
}
ret = 0;
entry = buf;
last = entry + (entry_len * count);
tail = cq->c.soft.cq_tail;
// XXX ... handle error comps
while (entry < last && tail != cq->c.soft.cq_head) {
memcpy(entry, tail, entry_len);
entry += entry_len;
tail = (uint8_t *)tail + entry_len;
if (tail == cq->c.soft.cq_end) {
tail = cq->c.soft.cq_comps;
}
}
cq->c.soft.cq_tail = tail;
if (entry > (uint8_t *)buf) {
return (entry - (uint8_t *)buf) / entry_len;
} else {
return ret;
}
}
static ssize_t
usdf_cq_read_context_soft(struct fid_cq *fcq, void *buf, size_t count)
{
return usdf_cq_read_common_soft(fcq, buf, count, FI_CQ_FORMAT_CONTEXT);
}
static ssize_t
usdf_cq_read_msg_soft(struct fid_cq *fcq, void *buf, size_t count)
{
return usdf_cq_read_common_soft(fcq, buf, count, FI_CQ_FORMAT_MSG);
}
static ssize_t
usdf_cq_read_data_soft(struct fid_cq *fcq, void *buf, size_t count)
{
return usdf_cq_read_common_soft(fcq, buf, count, FI_CQ_FORMAT_DATA);
}
static ssize_t
usdf_cq_readfrom_context_soft(struct fid_cq *fcq, void *buf, size_t count,
fi_addr_t *src_addr)
{
struct usdf_cq *cq;
struct usd_cq_impl *ucq;
struct fi_cq_entry *entry;
struct fi_cq_entry *last;
ssize_t ret;
struct cq_desc *cq_desc;
struct usdf_ep *ep;
struct sockaddr_in sin;
struct usd_udp_hdr *hdr;
uint16_t index;
cq = cq_ftou(fcq);
if (cq->cq_comp.uc_status != 0) {
return -FI_EAVAIL;
}
ucq = to_cqi(cq->c.hard.cq_cq);
ret = 0;
entry = buf;
last = entry + count;
while (entry < last) {
ret = usd_poll_cq(cq->cq_cq, &cq->cq_comp);
cq_desc = (struct cq_desc *)((uint8_t *)ucq->ucq_desc_ring +
(ucq->ucq_next_desc << 4));
ret = usd_poll_cq(cq->c.hard.cq_cq, &cq->cq_comp);
if (ret == -EAGAIN) {
ret = 0;
break;
@ -228,62 +539,40 @@ usdf_cq_read_msg(struct fid_cq *fcq, void *buf, size_t count)
break;
}
if (cq->cq_comp.uc_type == USD_COMPTYPE_RECV) {
index = le16_to_cpu(cq_desc->completed_index) &
CQ_DESC_COMP_NDX_MASK;
ep = cq->cq_comp.uc_qp->uq_context;
hdr = ep->e.dg.ep_hdr_ptr[index];
memset(&sin, 0, sizeof(sin));
sin.sin_addr.s_addr = hdr->uh_ip.saddr;
sin.sin_port = hdr->uh_udp.source;
ret = fi_av_insert(av_utof(ep->e.dg.ep_av), &sin, 1,
src_addr, 0, NULL);
if (ret != 1) {
*src_addr = FI_ADDR_NOTAVAIL;
}
++src_addr;
}
entry->op_context = cq->cq_comp.uc_context;
entry->flags = 0;
entry->len = cq->cq_comp.uc_bytes;
entry++;
}
if (entry > (struct fi_cq_msg_entry *)buf) {
return entry - (struct fi_cq_msg_entry *)buf;
if (entry > (struct fi_cq_entry *)buf) {
return entry - (struct fi_cq_entry *)buf;
} else {
return ret;
}
}
static ssize_t
usdf_cq_read_data(struct fid_cq *fcq, void *buf, size_t count)
{
struct usdf_cq *cq;
struct fi_cq_data_entry *entry;
struct fi_cq_data_entry *last;
ssize_t ret;
cq = cq_ftou(fcq);
if (cq->cq_comp.uc_status != 0) {
return -FI_EAVAIL;
}
ret = 0;
entry = buf;
last = entry + count;
while (entry < last) {
ret = usd_poll_cq(cq->cq_cq, &cq->cq_comp);
if (ret == -EAGAIN) {
ret = 0;
break;
}
if (cq->cq_comp.uc_status != 0) {
ret = -FI_EAVAIL;
break;
}
entry->op_context = cq->cq_comp.uc_context;
entry->flags = 0;
entry->len = cq->cq_comp.uc_bytes;
entry->buf = 0; /* XXX */
entry->data = 0;
entry++;
}
if (entry > (struct fi_cq_data_entry *)buf) {
return entry - (struct fi_cq_data_entry *)buf;
} else {
return ret;
}
}
/*****************************************************************
* common CQ support
*****************************************************************/
static const char *
usdf_cq_strerror(struct fid_cq *eq, int prov_errno, const void *err_data,
@ -294,31 +583,6 @@ usdf_cq_strerror(struct fid_cq *eq, int prov_errno, const void *err_data,
return buf;
}
static struct fi_ops_cq usdf_cq_context_ops = {
.size = sizeof(struct fi_ops_cq),
.read = usdf_cq_read_context,
.sread = usdf_cq_sread,
.readfrom = usdf_cq_readfrom_context,
.readerr = usdf_cq_readerr,
.strerror = usdf_cq_strerror
};
static struct fi_ops_cq usdf_cq_msg_ops = {
.size = sizeof(struct fi_ops_cq),
.read = usdf_cq_read_msg,
.sread = usdf_cq_sread,
.readerr = usdf_cq_readerr,
.strerror = usdf_cq_strerror
};
static struct fi_ops_cq usdf_cq_data_ops = {
.size = sizeof(struct fi_ops_cq),
.read = usdf_cq_read_data,
.sread = usdf_cq_sread,
.readerr = usdf_cq_readerr,
.strerror = usdf_cq_strerror
};
static int
usdf_cq_control(fid_t fid, int command, void *arg)
{
@ -329,13 +593,35 @@ static int
usdf_cq_close(fid_t fid)
{
struct usdf_cq *cq;
struct usdf_cq_hard *hcq;
int ret;
cq = container_of(fid, struct usdf_cq, cq_fid.fid);
if (cq->cq_cq) {
ret = usd_destroy_cq(cq->cq_cq);
if (ret != 0) {
return ret;
if (atomic_get(&cq->cq_refcnt) > 0) {
return -FI_EBUSY;
}
if (usdf_cq_is_soft(cq)) {
while (!TAILQ_EMPTY(&cq->c.soft.cq_list)) {
hcq = TAILQ_FIRST(&cq->c.soft.cq_list);
if (atomic_get(&hcq->cqh_refcnt) > 0) {
return -FI_EBUSY;
}
TAILQ_REMOVE(&cq->c.soft.cq_list, hcq, cqh_link);
if (hcq->cqh_ucq != NULL) {
ret = usd_destroy_cq(hcq->cqh_ucq);
if (ret != 0) {
return ret;
}
}
free(hcq);
}
} else {
if (cq->c.hard.cq_cq) {
ret = usd_destroy_cq(cq->c.hard.cq_cq);
if (ret != 0) {
return ret;
}
}
}
@ -343,21 +629,206 @@ usdf_cq_close(fid_t fid)
return 0;
}
static struct fi_ops_cq usdf_cq_context_ops = {
.size = sizeof(struct fi_ops_cq),
.read = usdf_cq_read_context,
.sread = usdf_cq_sread,
.readfrom = usdf_cq_readfrom_context,
.readerr = usdf_cq_readerr,
.strerror = usdf_cq_strerror
};
static struct fi_ops_cq usdf_cq_context_soft_ops = {
.size = sizeof(struct fi_ops_cq),
.read = usdf_cq_read_context_soft,
.sread = usdf_cq_sread_soft,
.readfrom = usdf_cq_readfrom_context_soft,
.readerr = usdf_cq_readerr,
.strerror = usdf_cq_strerror
};
static struct fi_ops_cq usdf_cq_msg_ops = {
.size = sizeof(struct fi_ops_cq),
.read = usdf_cq_read_msg,
.sread = usdf_cq_sread,
.readfrom = fi_no_cq_readfrom, /* XXX */
.readerr = usdf_cq_readerr,
.strerror = usdf_cq_strerror
};
static struct fi_ops_cq usdf_cq_msg_soft_ops = {
.size = sizeof(struct fi_ops_cq),
.read = usdf_cq_read_msg_soft,
.sread = usdf_cq_sread,
.readfrom = fi_no_cq_readfrom, /* XXX */
.readerr = usdf_cq_readerr,
.strerror = usdf_cq_strerror
};
static struct fi_ops_cq usdf_cq_data_ops = {
.size = sizeof(struct fi_ops_cq),
.read = usdf_cq_read_data,
.sread = usdf_cq_sread,
.readfrom = fi_no_cq_readfrom, /* XXX */
.readerr = usdf_cq_readerr,
.strerror = usdf_cq_strerror
};
static struct fi_ops_cq usdf_cq_data_soft_ops = {
.size = sizeof(struct fi_ops_cq),
.read = usdf_cq_read_data_soft,
.sread = usdf_cq_sread,
.readfrom = fi_no_cq_readfrom, /* XXX */
.readerr = usdf_cq_readerr,
.strerror = usdf_cq_strerror
};
static struct fi_ops usdf_cq_fi_ops = {
.size = sizeof(struct fi_ops),
.close = usdf_cq_close,
.control = usdf_cq_control,
};
/*
* Return true is this CQ is in "soft" (emulated) mode
*/
int
usdf_cq_is_soft(struct usdf_cq *cq)
{
struct fi_ops_cq *soft_ops;
switch (cq->cq_attr.format) {
case FI_CQ_FORMAT_CONTEXT:
soft_ops = &usdf_cq_context_soft_ops;
break;
case FI_CQ_FORMAT_MSG:
soft_ops = &usdf_cq_msg_soft_ops;
break;
case FI_CQ_FORMAT_DATA:
soft_ops = &usdf_cq_data_soft_ops;
break;
default:
return 0;
}
return cq->cq_fid.ops == soft_ops;
}
int
usdf_cq_make_soft(struct usdf_cq *cq)
{
struct fi_ops_cq *hard_ops;
struct fi_ops_cq *soft_ops;
struct usdf_cq_hard *hcq;
struct usd_cq *ucq;
size_t comp_size;
void (*rtn)(struct usdf_cq_hard *hcq);
switch (cq->cq_attr.format) {
case FI_CQ_FORMAT_CONTEXT:
hard_ops = &usdf_cq_context_ops;
soft_ops = &usdf_cq_context_soft_ops;
comp_size = sizeof(struct fi_cq_entry);
rtn = usdf_progress_hard_cq_context;
break;
case FI_CQ_FORMAT_MSG:
hard_ops = &usdf_cq_msg_ops;
soft_ops = &usdf_cq_msg_soft_ops;
comp_size = sizeof(struct fi_cq_msg_entry);
rtn = usdf_progress_hard_cq_msg;
break;
case FI_CQ_FORMAT_DATA:
hard_ops = &usdf_cq_data_ops;
soft_ops = &usdf_cq_data_soft_ops;
comp_size = sizeof(struct fi_cq_data_entry);
rtn = usdf_progress_hard_cq_data;
break;
default:
return 0;
}
if (cq->cq_fid.ops == hard_ops) {
/* save the CQ before we trash the union */
ucq = cq->c.hard.cq_cq;
/* fill in the soft part of union */
TAILQ_INIT(&cq->c.soft.cq_list);
cq->c.soft.cq_comps = calloc(cq->cq_attr.size, comp_size);
if (cq->c.soft.cq_comps == NULL) {
return -FI_ENOMEM;
}
cq->c.soft.cq_end = (void *)((uintptr_t)cq->c.soft.cq_comps +
(cq->cq_attr.size * comp_size));
cq->c.soft.cq_head = cq->c.soft.cq_comps;
cq->c.soft.cq_tail = cq->c.soft.cq_comps;
/* need to add hard queue to list? */
if (ucq != NULL) {
hcq = malloc(sizeof(*hcq));
if (hcq == NULL) {
free(cq->c.soft.cq_comps);
cq->c.hard.cq_cq = ucq; /* restore */
return -FI_ENOMEM;
}
hcq->cqh_cq = cq;
hcq->cqh_ucq = ucq;
hcq->cqh_progress = rtn;
atomic_init(&hcq->cqh_refcnt,
atomic_get(&cq->cq_refcnt));
TAILQ_INSERT_HEAD(&cq->c.soft.cq_list, hcq, cqh_link);
}
cq->cq_fid.ops = soft_ops;
}
return 0;
}
static int
usdf_cq_process_attr(struct fi_cq_attr *attr, struct usdf_domain *udp)
{
/* no wait object yet */
if (attr->wait_obj != FI_WAIT_NONE) {
return -FI_ENOSYS;
}
/* bound and default size */
if (attr->size > udp->dom_fabric->fab_dev_attrs->uda_max_cqe) {
return -FI_EINVAL;
}
if (attr->size == 0) {
attr->size = udp->dom_fabric->fab_dev_attrs->uda_max_cqe;
}
/* default format is FI_CQ_FORMAT_CONTEXT */
if (attr->format == FI_CQ_FORMAT_UNSPEC) {
attr->format = FI_CQ_FORMAT_CONTEXT;
}
return 0;
}
int
usdf_cq_create_cq(struct usdf_cq *cq)
{
return usd_create_cq(cq->cq_domain->dom_dev, cq->cq_attr.size, -1,
&cq->c.hard.cq_cq);
}
int
usdf_cq_open(struct fid_domain *domain, struct fi_cq_attr *attr,
struct fid_cq **cq_o, void *context)
{
struct usdf_cq *cq;
struct usdf_domain *udp;
int ret;
if (attr->wait_obj != FI_WAIT_NONE) {
return -FI_ENOSYS;
udp = dom_ftou(domain);
ret = usdf_cq_process_attr(attr, udp);
if (ret != 0) {
return ret;
}
cq = calloc(1, sizeof(*cq));
@ -365,13 +836,7 @@ usdf_cq_open(struct fid_domain *domain, struct fi_cq_attr *attr,
return -FI_ENOMEM;
}
cq->cq_domain = container_of(domain, struct usdf_domain, dom_fid);
ret = usd_create_cq(cq->cq_domain->dom_dev, attr->size, -1, &cq->cq_cq);
if (ret != 0) {
goto fail;
}
cq->cq_domain = udp;
cq->cq_fid.fid.fclass = FI_CLASS_CQ;
cq->cq_fid.fid.context = context;
cq->cq_fid.fid.ops = &usdf_cq_fi_ops;
@ -391,13 +856,14 @@ usdf_cq_open(struct fid_domain *domain, struct fi_cq_attr *attr,
goto fail;
}
cq->cq_attr = *attr;
*cq_o = &cq->cq_fid;
return 0;
fail:
if (cq != NULL) {
if (cq->cq_cq != NULL) {
usd_destroy_cq(cq->cq_cq);
if (cq->c.hard.cq_cq != NULL) {
usd_destroy_cq(cq->c.hard.cq_cq);
}
free(cq);
}

Просмотреть файл

@ -0,0 +1,54 @@
/*
* Copyright (c) 2014, Cisco Systems, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef _USDF_CQ_H_
#define _USDF_CQ_H_
int usdf_cq_is_soft(struct usdf_cq *cq);
int usdf_cq_make_soft(struct usdf_cq *cq);
int usdf_cq_create_cq(struct usdf_cq *cq);
void usdf_progress_hard_cq_context(struct usdf_cq_hard *hcq);
void usdf_progress_hard_cq_msg(struct usdf_cq_hard *hcq);
void usdf_progress_hard_cq_data(struct usdf_cq_hard *hcq);
void usdf_cq_post_soft_context(struct usdf_cq_hard *hcq, void *context,
size_t len);
void usdf_cq_post_soft_msg(struct usdf_cq_hard *hcq, void *context,
size_t len);
void usdf_cq_post_soft_data(struct usdf_cq_hard *hcq, void *context,
size_t len);
#endif /* _USDF_CQ_H_ */

Просмотреть файл

@ -57,11 +57,12 @@
#include <rdma/fi_errno.h>
#include "fi.h"
#include "usnic_direct.h"
#include "usd.h"
#include "usd_post.h"
#include "usdf.h"
#include "usdf_dgram.h"
#include "usdf_av.h"
ssize_t
usdf_dgram_recv(struct fid_ep *fep, void *buf, size_t len,
@ -73,11 +74,11 @@ usdf_dgram_recv(struct fid_ep *fep, void *buf, size_t len,
uint32_t index;
ep = ep_ftou(fep);
qp = to_qpi(ep->ep_qp);
qp = to_qpi(ep->e.dg.ep_qp);
index = qp->uq_rq.urq_post_index;
rxd.urd_context = context;
rxd.urd_iov[0].iov_base = (uint8_t *)ep->ep_hdr_buf +
rxd.urd_iov[0].iov_base = (uint8_t *)ep->e.dg.ep_hdr_buf +
(index * USDF_HDR_BUF_ENTRY) +
(USDF_HDR_BUF_ENTRY - sizeof(struct usd_udp_hdr));
rxd.urd_iov[0].iov_len = sizeof(struct usd_udp_hdr);
@ -86,16 +87,16 @@ usdf_dgram_recv(struct fid_ep *fep, void *buf, size_t len,
rxd.urd_iov_cnt = 2;
rxd.urd_next = NULL;
ep->ep_hdr_ptr[index] = rxd.urd_iov[0].iov_base;
ep->e.dg.ep_hdr_ptr[index] = rxd.urd_iov[0].iov_base;
index = (index + 1) & qp->uq_rq.urq_post_index_mask;
ep->ep_hdr_ptr[index] = rxd.urd_iov[0].iov_base;
ep->e.dg.ep_hdr_ptr[index] = rxd.urd_iov[0].iov_base;
return usd_post_recv(ep->ep_qp, &rxd);
return usd_post_recv(ep->e.dg.ep_qp, &rxd);
}
ssize_t
usdf_dgram_recvv(struct fid_ep *fep, const struct iovec *iov, void **desc,
size_t count, fi_addr_t src_addr, void *context)
size_t count, fi_addr_t src_addr, void *context)
{
struct usdf_ep *ep;
struct usd_recv_desc rxd;
@ -104,10 +105,10 @@ usdf_dgram_recvv(struct fid_ep *fep, const struct iovec *iov, void **desc,
int i;
ep = ep_ftou(fep);
qp = to_qpi(ep->ep_qp);
qp = to_qpi(ep->e.dg.ep_qp);
rxd.urd_context = context;
rxd.urd_iov[0].iov_base = ep->ep_hdr_buf +
rxd.urd_iov[0].iov_base = ep->e.dg.ep_hdr_buf +
qp->uq_rq.urq_post_index * USDF_HDR_BUF_ENTRY;
rxd.urd_iov[0].iov_len = sizeof(struct usd_udp_hdr);
memcpy(&rxd.urd_iov[1], iov, sizeof(*iov) * count);
@ -116,23 +117,30 @@ usdf_dgram_recvv(struct fid_ep *fep, const struct iovec *iov, void **desc,
index = qp->uq_rq.urq_post_index;
for (i = 0; i < count; ++i) {
ep->ep_hdr_ptr[index] = rxd.urd_iov[0].iov_base;
ep->e.dg.ep_hdr_ptr[index] = rxd.urd_iov[0].iov_base;
index = (index + 1) & qp->uq_rq.urq_post_index_mask;
}
return usd_post_recv(ep->ep_qp, &rxd);
return usd_post_recv(ep->e.dg.ep_qp, &rxd);
}
ssize_t
usdf_dgram_recvmsg(struct fid_ep *fep, const struct fi_msg *msg, uint64_t flags)
{
return usdf_dgram_recvv(fep, msg->msg_iov, msg->desc,
msg->iov_count, (fi_addr_t)msg->addr, msg->context);
}
static inline ssize_t
_usdf_dgram_send(struct usdf_ep *ep, struct usd_dest *dest,
_usdf_dgram_send(struct usdf_ep *ep, struct usdf_dest *dest,
const void *buf, size_t len, void *context)
{
if (len <= USD_SEND_MAX_COPY - sizeof(struct usd_udp_hdr)) {
return usd_post_send_one_copy(ep->ep_qp, dest, buf, len,
USD_SF_SIGNAL, context);
return usd_post_send_one_copy(ep->e.dg.ep_qp,
&dest->ds_dest, buf, len, USD_SF_SIGNAL, context);
} else {
return usd_post_send_one(ep->ep_qp, dest, buf, len,
USD_SF_SIGNAL, context);
return usd_post_send_one(ep->e.dg.ep_qp, &dest->ds_dest,
buf, len, USD_SF_SIGNAL, context);
}
}
@ -141,57 +149,170 @@ usdf_dgram_send(struct fid_ep *fep, const void *buf, size_t len, void *desc,
fi_addr_t dest_addr, void *context)
{
struct usdf_ep *ep;
struct usd_dest *dest;
struct usdf_dest *dest;
ep = ep_ftou(fep);
dest = (struct usd_dest *)(uintptr_t) dest_addr;
dest = (struct usdf_dest *)(uintptr_t) dest_addr;
return _usdf_dgram_send(ep, dest, buf, len, context);
}
ssize_t
usdf_dgram_conn_send(struct fid_ep *fep, const void *buf, size_t len,
void *desc, fi_addr_t dest_addr, void *context)
{
struct usdf_ep *ep;
ep = ep_ftou(fep);
return _usdf_dgram_send(ep, ep->ep_dest, buf, len, context);
}
ssize_t
usdf_dgram_senddata(struct fid_ep *ep, const void *buf, size_t len,
void *desc, uint64_t data, fi_addr_t dest_addr,
void *context)
usdf_dgram_senddata(struct fid_ep *fep, const void *buf, size_t len,
void *desc, uint64_t data, fi_addr_t dest_addr,
void *context)
{
return -FI_ENOSYS;
}
static ssize_t
_usdf_dgram_send_iov_copy(struct usdf_ep *ep, struct usd_dest *dest,
const struct iovec *iov, size_t count, void *context)
{
struct usd_wq *wq;
struct usd_qp_impl *qp;
struct usd_udp_hdr *hdr;
uint32_t last_post;
struct usd_wq_post_info *info;
uint8_t *copybuf;
size_t len;
unsigned i;
qp = to_qpi(ep->e.dg.ep_qp);
wq = &qp->uq_wq;
copybuf = wq->uwq_copybuf +
wq->uwq_post_index * USD_SEND_MAX_COPY;
hdr = (struct usd_udp_hdr *)copybuf;
memcpy(hdr, &dest->ds_dest.ds_udp.u_hdr, sizeof(*hdr));
hdr->uh_udp.source =
qp->uq_attrs.uqa_local_addr.ul_addr.ul_udp.u_addr.sin_port;
len = sizeof(*hdr);
for (i = 0; i < count; i++) {
memcpy(copybuf + len, iov[i].iov_base, iov[i].iov_len);
len += iov[i].iov_len;
}
/* adjust lengths */
hdr->uh_ip.tot_len = htons(len - sizeof(struct ether_header));
hdr->uh_udp.len = htons(len - sizeof(struct ether_header) -
sizeof(struct iphdr));
last_post = _usd_post_send_one(wq, hdr, len, 1);
info = &wq->uwq_post_info[last_post];
info->wp_context = context;
info->wp_len = len;
return 0;
}
ssize_t
usdf_dgram_sendv(struct fid_ep *ep, const struct iovec *iov, void **desc,
usdf_dgram_sendv(struct fid_ep *fep, const struct iovec *iov, void **desc,
size_t count, fi_addr_t dest_addr, void *context)
{
return -FI_ENOSYS;
struct usdf_ep *ep;
struct usd_dest *dest;
struct usd_wq *wq;
struct usd_qp_impl *qp;
struct usd_udp_hdr *hdr;
uint32_t last_post;
struct usd_wq_post_info *info;
uint8_t *copybuf;
size_t len;
struct iovec send_iov[USDF_DGRAM_MAX_SGE];
int i;
ep = ep_ftou(fep);
dest = (struct usd_dest *)(uintptr_t) dest_addr;
len = 0;
for (i = 0; i < count; i++) {
len += iov[i].iov_len;
}
if (len + sizeof(struct usd_udp_hdr) > USD_SEND_MAX_COPY) {
qp = to_qpi(ep->e.dg.ep_qp);
wq = &qp->uq_wq;
copybuf = wq->uwq_copybuf +
wq->uwq_post_index * USD_SEND_MAX_COPY;
hdr = (struct usd_udp_hdr *)copybuf;
memcpy(hdr, &dest->ds_dest.ds_udp.u_hdr, sizeof(*hdr));
/* adjust lengths and insert source port */
hdr->uh_ip.tot_len = htons(len + sizeof(struct usd_udp_hdr) -
sizeof(struct ether_header));
hdr->uh_udp.len = htons((sizeof(struct usd_udp_hdr) -
sizeof(struct ether_header) -
sizeof(struct iphdr)) + len);
hdr->uh_udp.source =
qp->uq_attrs.uqa_local_addr.ul_addr.ul_udp.u_addr.sin_port;
send_iov[0].iov_base = hdr;
send_iov[0].iov_len = sizeof(*hdr);
memcpy(&send_iov[1], iov, sizeof(struct iovec) * count);
last_post = _usd_post_send_iov(wq, send_iov, count + 1, 1);
info = &wq->uwq_post_info[last_post];
info->wp_context = context;
info->wp_len = len;
} else {
_usdf_dgram_send_iov_copy(ep, dest, iov, count, context);
}
return 0;
}
ssize_t
usdf_dgram_sendmsg(struct fid_ep *ep, const struct fi_msg *msg, uint64_t flags)
usdf_dgram_sendmsg(struct fid_ep *fep, const struct fi_msg *msg, uint64_t flags)
{
return -FI_ENOSYS;
return usdf_dgram_sendv(fep, msg->msg_iov, msg->desc, msg->iov_count,
(fi_addr_t)msg->addr, msg->context);
}
ssize_t
usdf_dgram_inject(struct fid_ep *ep, const void *buf, size_t len,
usdf_dgram_inject(struct fid_ep *fep, const void *buf, size_t len,
fi_addr_t dest_addr)
{
return -FI_ENOSYS;
}
struct usdf_ep *ep;
struct usdf_dest *dest;
struct usd_wq *wq;
struct usd_qp_impl *qp;
struct usd_udp_hdr *hdr;
uint32_t last_post;
struct usd_wq_post_info *info;
uint8_t *copybuf;
ssize_t
usdf_dgram_recvmsg(struct fid_ep *ep, const struct fi_msg *msg, uint64_t flags)
{
return -FI_ENOSYS;
if (len + sizeof(struct usd_udp_hdr) > USD_SEND_MAX_COPY) {
return -FI_ENOSPC;
}
ep = ep_ftou(fep);
dest = (struct usdf_dest *)(uintptr_t)dest_addr;
qp = to_qpi(ep->e.dg.ep_qp);
wq = &qp->uq_wq;
copybuf = wq->uwq_copybuf +
wq->uwq_post_index * USD_SEND_MAX_COPY;
hdr = (struct usd_udp_hdr *)copybuf;
memcpy(hdr, &dest->ds_dest.ds_dest.ds_udp.u_hdr, sizeof(*hdr));
hdr->uh_udp.source =
qp->uq_attrs.uqa_local_addr.ul_addr.ul_udp.u_addr.sin_port;
hdr->uh_ip.tot_len = htons(len + sizeof(*hdr)
- sizeof(struct ether_header));
hdr->uh_udp.len = htons(len + sizeof(*hdr) -
sizeof(struct ether_header) -
sizeof(struct iphdr));
memcpy(hdr + 1, buf, len);
last_post = _usd_post_send_one(wq, hdr, len + sizeof(*hdr), 1);
info = &wq->uwq_post_info[last_post];
info->wp_context = NULL;
info->wp_len = len;
return 0;
}
/*
@ -207,19 +328,20 @@ usdf_dgram_prefix_recv(struct fid_ep *fep, void *buf, size_t len,
uint32_t index;
ep = ep_ftou(fep);
qp = to_qpi(ep->ep_qp);
qp = to_qpi(ep->e.dg.ep_qp);
index = qp->uq_rq.urq_post_index;
rxd.urd_context = context;
rxd.urd_iov[0].iov_base = (uint8_t *)buf +
USDF_HDR_BUF_ENTRY - sizeof(struct usd_udp_hdr);
rxd.urd_iov[0].iov_len = len;
rxd.urd_iov[0].iov_len = len -
(USDF_HDR_BUF_ENTRY - sizeof(struct usd_udp_hdr));
rxd.urd_iov_cnt = 1;
rxd.urd_next = NULL;
ep->ep_hdr_ptr[index] = rxd.urd_iov[0].iov_base;
ep->e.dg.ep_hdr_ptr[index] = rxd.urd_iov[0].iov_base;
return usd_post_recv(ep->ep_qp, &rxd);
return usd_post_recv(ep->e.dg.ep_qp, &rxd);
}
ssize_t
@ -233,61 +355,131 @@ usdf_dgram_prefix_recvv(struct fid_ep *fep, const struct iovec *iov,
int i;
ep = ep_ftou(fep);
qp = to_qpi(ep->ep_qp);
qp = to_qpi(ep->e.dg.ep_qp);
rxd.urd_context = context;
memcpy(&rxd.urd_iov[0], iov, sizeof(*iov) * count);
rxd.urd_iov[0].iov_base = (uint8_t *)rxd.urd_iov[0].iov_base +
USDF_HDR_BUF_ENTRY - sizeof(struct usd_udp_hdr);
rxd.urd_iov[0].iov_len -= (USDF_HDR_BUF_ENTRY -
sizeof(struct usd_udp_hdr));
rxd.urd_iov_cnt = count;
rxd.urd_next = NULL;
index = qp->uq_rq.urq_post_index;
for (i = 0; i < count; ++i) {
ep->ep_hdr_ptr[index] = rxd.urd_iov[0].iov_base;
ep->e.dg.ep_hdr_ptr[index] = rxd.urd_iov[0].iov_base;
index = (index + 1) & qp->uq_rq.urq_post_index_mask;
}
return usd_post_recv(ep->ep_qp, &rxd);
return usd_post_recv(ep->e.dg.ep_qp, &rxd);
}
ssize_t
usdf_dgram_prefix_recvmsg(struct fid_ep *fep, const struct fi_msg *msg, uint64_t flags)
{
return usdf_dgram_recvv(fep, msg->msg_iov, msg->desc,
msg->iov_count, (fi_addr_t)msg->addr, msg->context);
}
ssize_t
usdf_dgram_prefix_send(struct fid_ep *fep, const void *buf, size_t len,
void *desc, fi_addr_t dest_addr, void *context)
void *desc, fi_addr_t dest_addr, void *context)
{
struct usdf_ep *ep;
struct usd_dest *dest;
struct usd_qp_impl *qp;
struct usd_udp_hdr *hdr;
struct usd_wq *wq;
uint32_t last_post;
struct usd_wq_post_info *info;
struct usdf_ep *ep;
struct usdf_dest *dest;
struct usd_qp_impl *qp;
struct usd_udp_hdr *hdr;
struct usd_wq *wq;
uint32_t last_post;
struct usd_wq_post_info *info;
ep = ep_ftou(fep);
dest = (struct usd_dest *)(uintptr_t)dest_addr;
ep = ep_ftou(fep);
dest = (struct usdf_dest *)(uintptr_t)dest_addr;
qp = to_qpi(ep->ep_qp);
wq = &qp->uq_wq;
qp = to_qpi(ep->e.dg.ep_qp);
wq = &qp->uq_wq;
hdr = (struct usd_udp_hdr *) buf - 1;
memcpy(hdr, &dest->ds_dest.ds_udp.u_hdr, sizeof(*hdr));
hdr = (struct usd_udp_hdr *) buf - 1;
memcpy(hdr, &dest->ds_dest.ds_dest.ds_udp.u_hdr, sizeof(*hdr));
/* adjust lengths and insert source port */
hdr->uh_ip.tot_len = htons(len + sizeof(struct usd_udp_hdr) -
sizeof(struct ether_header));
hdr->uh_udp.len = htons((sizeof(struct usd_udp_hdr) -
sizeof(struct ether_header) -
sizeof(struct iphdr)) + len);
hdr->uh_udp.source =
qp->uq_attrs.uqa_local_addr.ul_addr.ul_udp.u_addr.sin_port;
/* adjust lengths and insert source port */
hdr->uh_ip.tot_len = htons(len + sizeof(struct usd_udp_hdr) -
sizeof(struct ether_header));
hdr->uh_udp.len = htons((sizeof(struct usd_udp_hdr) -
sizeof(struct ether_header) -
sizeof(struct iphdr)) + len);
hdr->uh_udp.source =
qp->uq_attrs.uqa_local_addr.ul_addr.ul_udp.u_addr.sin_port;
last_post = _usd_post_send_one(wq, hdr,
len + sizeof(struct usd_udp_hdr), 1);
last_post = _usd_post_send_one(wq, hdr,
len + sizeof(struct usd_udp_hdr), 1);
info = &wq->uwq_post_info[last_post];
info->wp_context = context;
info->wp_len = len;
info = &wq->uwq_post_info[last_post];
info->wp_context = context;
info->wp_len = len;
return 0;
return 0;
}
ssize_t
usdf_dgram_prefix_sendv(struct fid_ep *fep, const struct iovec *iov, void **desc,
size_t count, fi_addr_t dest_addr, void *context)
{
struct usdf_ep *ep;
struct usd_dest *dest;
struct usd_wq *wq;
struct usd_qp_impl *qp;
struct usd_udp_hdr *hdr;
uint32_t last_post;
struct usd_wq_post_info *info;
struct iovec send_iov[USDF_DGRAM_MAX_SGE];
size_t len;
unsigned i;
ep = ep_ftou(fep);
dest = (struct usd_dest *)(uintptr_t) dest_addr;
len = 0;
for (i = 0; i < count; i++) {
len += iov[i].iov_len;
}
if (len + sizeof(struct usd_udp_hdr) > USD_SEND_MAX_COPY) {
qp = to_qpi(ep->e.dg.ep_qp);
wq = &qp->uq_wq;
hdr = (struct usd_udp_hdr *) iov[0].iov_base - 1;
memcpy(hdr, &dest->ds_dest.ds_udp.u_hdr, sizeof(*hdr));
/* adjust lengths and insert source port */
hdr->uh_ip.tot_len = htons(len + sizeof(struct usd_udp_hdr) -
sizeof(struct ether_header));
hdr->uh_udp.len = htons((sizeof(struct usd_udp_hdr) -
sizeof(struct ether_header) -
sizeof(struct iphdr)) + len);
hdr->uh_udp.source =
qp->uq_attrs.uqa_local_addr.ul_addr.ul_udp.u_addr.sin_port;
memcpy(send_iov, iov, sizeof(struct iovec) * count);
send_iov[0].iov_base = hdr;
send_iov[0].iov_len += sizeof(*hdr);
last_post = _usd_post_send_iov(wq, send_iov, count, 1);
info = &wq->uwq_post_info[last_post];
info->wp_context = context;
info->wp_len = len;
} else {
_usdf_dgram_send_iov_copy(ep, dest, iov, count, context);
}
return 0;
}
ssize_t
usdf_dgram_prefix_sendmsg(struct fid_ep *fep, const struct fi_msg *msg, uint64_t flags)
{
return usdf_dgram_prefix_sendv(fep, msg->msg_iov, msg->desc, msg->iov_count,
(fi_addr_t)msg->addr, msg->context);
}

Просмотреть файл

@ -36,10 +36,8 @@
#ifndef _USDF_DGRAM_H_
#define _USDF_DGRAM_H_
/* fi_ops_cm for DGRAM */
int usdf_cm_dgram_connect(struct fid_ep *ep, const void *addr,
const void *param, size_t paramlen);
int usdf_cm_dgram_shutdown(struct fid_ep *ep, uint64_t flags);
#define USDF_DGRAM_MAX_SGE 8
#define USDF_DGRAM_DFLT_SGE 4
/* fi_ops_msg for DGRAM */
ssize_t usdf_dgram_recv(struct fid_ep *ep, void *buf, size_t len, void *desc,
@ -50,8 +48,6 @@ ssize_t usdf_dgram_recvmsg(struct fid_ep *ep, const struct fi_msg *msg,
uint64_t flags);
ssize_t usdf_dgram_send(struct fid_ep *ep, const void *buf, size_t len,
void *desc, fi_addr_t dest_addr, void *context);
ssize_t usdf_dgram_conn_send(struct fid_ep *ep, const void *buf, size_t len,
void *desc, fi_addr_t dest_addr, void *context);
ssize_t usdf_dgram_sendv(struct fid_ep *ep, const struct iovec *iov,
void **desc, size_t count, fi_addr_t dest_addr, void *context);
ssize_t usdf_dgram_sendmsg(struct fid_ep *ep, const struct fi_msg *msg,
@ -65,7 +61,13 @@ ssize_t usdf_dgram_prefix_recv(struct fid_ep *ep, void *buf, size_t len,
void *desc, fi_addr_t src_addr, void *context);
ssize_t usdf_dgram_prefix_recvv(struct fid_ep *ep, const struct iovec *iov,
void **desc, size_t count, fi_addr_t src_addr, void *context);
ssize_t usdf_dgram_prefix_recvmsg(struct fid_ep *fep, const struct fi_msg *msg,
uint64_t flags);
ssize_t usdf_dgram_prefix_send(struct fid_ep *ep, const void *buf, size_t len,
void *desc, fi_addr_t dest_addr, void *context);
ssize_t usdf_dgram_prefix_sendv(struct fid_ep *fep, const struct iovec *iov,
void **desc, size_t count, fi_addr_t dest_addr, void *context);
ssize_t usdf_dgram_prefix_sendmsg(struct fid_ep *ep, const struct fi_msg *msg,
uint64_t flags);
#endif /* _USDF_DGRAM_H_ */

Просмотреть файл

@ -55,6 +55,8 @@
#include "usnic_direct.h"
#include "usdf.h"
#include "usdf_rdm.h"
#include "usdf_timer.h"
static int
usdf_domain_bind(struct fid *fid, struct fid *bfid, uint64_t flags)
@ -78,6 +80,81 @@ usdf_domain_bind(struct fid *fid, struct fid *bfid, uint64_t flags)
return 0;
}
static void
usdf_dom_rdc_free_data(struct usdf_domain *udp)
{
struct usdf_rdm_connection *rdc;
int i;
if (udp->dom_rdc_hashtab != NULL) {
pthread_spin_lock(&udp->dom_progress_lock);
for (i = 0; i < USDF_RDM_HASH_SIZE; ++i) {
rdc = udp->dom_rdc_hashtab[i];
while (rdc != NULL) {
usdf_timer_reset(udp->dom_fabric,
rdc->dc_timer, 0);
rdc = rdc->dc_hash_next;
}
}
pthread_spin_unlock(&udp->dom_progress_lock);
/* XXX probably want a timeout here... */
while (atomic_get(&udp->dom_rdc_free_cnt) <
udp->dom_rdc_total) {
pthread_yield();
}
free(udp->dom_rdc_hashtab);
udp->dom_rdc_hashtab = NULL;
}
while (!SLIST_EMPTY(&udp->dom_rdc_free)) {
rdc = SLIST_FIRST(&udp->dom_rdc_free);
SLIST_REMOVE_HEAD(&udp->dom_rdc_free, dc_addr_link);
usdf_timer_free(udp->dom_fabric, rdc->dc_timer);
free(rdc);
}
}
static int
usdf_dom_rdc_alloc_data(struct usdf_domain *udp)
{
struct usdf_rdm_connection *rdc;
int ret;
int i;
udp->dom_rdc_hashtab = calloc(USDF_RDM_HASH_SIZE,
sizeof(*udp->dom_rdc_hashtab));
if (udp->dom_rdc_hashtab == NULL) {
return -FI_ENOMEM;
}
SLIST_INIT(&udp->dom_rdc_free);
atomic_init(&udp->dom_rdc_free_cnt, 0);
for (i = 0; i < USDF_RDM_FREE_BLOCK; ++i) {
rdc = calloc(1, sizeof(*rdc));
if (rdc == NULL) {
return -FI_ENOMEM;
}
ret = usdf_timer_alloc(usdf_rdm_rdc_timeout, rdc,
&rdc->dc_timer);
if (ret != 0) {
free(rdc);
return ret;
}
rdc->dc_flags = USDF_DCS_UNCONNECTED | USDF_DCF_NEW_RX;
rdc->dc_next_rx_seq = 0;
rdc->dc_next_tx_seq = 0;
rdc->dc_last_rx_ack = rdc->dc_next_tx_seq - 1;
TAILQ_INIT(&rdc->dc_wqe_posted);
TAILQ_INIT(&rdc->dc_wqe_sent);
SLIST_INSERT_HEAD(&udp->dom_rdc_free, rdc, dc_addr_link);
atomic_inc(&udp->dom_rdc_free_cnt);
}
udp->dom_rdc_total = USDF_RDM_FREE_BLOCK;
return 0;
}
static int
usdf_domain_close(fid_t fid)
{
@ -95,11 +172,14 @@ usdf_domain_close(fid_t fid)
return ret;
}
}
usdf_dom_rdc_free_data(udp);
if (udp->dom_eq != NULL) {
atomic_dec(&udp->dom_eq->eq_refcnt);
}
atomic_dec(&udp->dom_fabric->fab_refcnt);
LIST_REMOVE(udp, dom_link);
fi_freeinfo(udp->dom_info);
free(udp);
return 0;
@ -132,6 +212,8 @@ usdf_domain_open(struct fid_fabric *fabric, struct fi_info *info,
struct usdf_domain *udp;
struct usdf_usnic_info *dp;
struct usdf_dev_entry *dep;
struct sockaddr_in *sin;
size_t addrlen;
int d;
int ret;
@ -143,6 +225,27 @@ usdf_domain_open(struct fid_fabric *fabric, struct fi_info *info,
fp = fab_fidtou(fabric);
/*
* Make sure address format is good and matches this fabric
*/
switch (info->addr_format) {
case FI_SOCKADDR:
addrlen = sizeof(struct sockaddr);
break;
case FI_SOCKADDR_IN:
addrlen = sizeof(struct sockaddr_in);
break;
default:
ret = -FI_EINVAL;
goto fail;
}
sin = info->src_addr;
if (info->src_addrlen != addrlen || sin->sin_family != AF_INET ||
sin->sin_addr.s_addr != fp->fab_dev_attrs->uda_ipaddr_be) {
ret = -FI_EINVAL;
goto fail;
}
/* steal cached device from info if we can */
dp = __usdf_devinfo;
for (d = 0; d < dp->uu_num_devs; ++d) {
@ -169,7 +272,32 @@ usdf_domain_open(struct fid_fabric *fabric, struct fi_info *info,
udp->dom_fid.ops = &usdf_domain_ops;
udp->dom_fid.mr = &usdf_domain_mr_ops;
ret = pthread_spin_init(&udp->dom_progress_lock,
PTHREAD_PROCESS_PRIVATE);
if (ret != 0) {
ret = -ret;
goto fail;
}
TAILQ_INIT(&udp->dom_tx_ready);
TAILQ_INIT(&udp->dom_hcq_list);
udp->dom_info = fi_dupinfo(info);
if (udp->dom_info == NULL) {
ret = -FI_ENOMEM;
goto fail;
}
if (udp->dom_info->dest_addr != NULL) {
free(udp->dom_info->dest_addr);
udp->dom_info->dest_addr = NULL;
}
ret = usdf_dom_rdc_alloc_data(udp);
if (ret != 0) {
goto fail;
}
udp->dom_fabric = fp;
LIST_INSERT_HEAD(&fp->fab_domain_list, udp, dom_link);
atomic_init(&udp->dom_refcnt, 0);
atomic_inc(&fp->fab_refcnt);
@ -178,6 +306,13 @@ usdf_domain_open(struct fid_fabric *fabric, struct fi_info *info,
fail:
if (udp != NULL) {
if (udp->dom_info != NULL) {
fi_freeinfo(udp->dom_info);
}
if (udp->dom_dev != NULL) {
usd_close(udp->dom_dev);
}
usdf_dom_rdc_free_data(udp);
free(udp);
}
return ret;

Просмотреть файл

@ -58,90 +58,8 @@
#include "fi.h"
#include "fi_enosys.h"
#include "usnic_direct.h"
#include "usd.h"
#include "usdf.h"
#include "usdf_av.h"
#include "usdf_endpoint.h"
#include "usdf_progress.h"
static int
usdf_ep_bind(struct fid *fid, struct fid *bfid, uint64_t flags)
{
struct usdf_ep *ep;
ep = ep_fidtou(fid);
switch (bfid->fclass) {
case FI_CLASS_AV:
if (ep->ep_av != NULL) {
return -FI_EINVAL;
}
ep->ep_av = av_fidtou(bfid);
break;
case FI_CLASS_CQ:
if (flags & FI_SEND) {
if (ep->ep_wcq != NULL) {
return -FI_EINVAL;
}
ep->ep_wcq = cq_fidtou(bfid);
}
if (flags & FI_RECV) {
if (ep->ep_rcq != NULL) {
return -FI_EINVAL;
}
ep->ep_rcq = cq_fidtou(bfid);
}
break;
case FI_CLASS_EQ:
printf("bind EQ to ep!\n");
if (ep->ep_eq != NULL) {
return -FI_EINVAL;
}
ep->ep_eq = eq_fidtou(bfid);
atomic_inc(&ep->ep_eq->eq_refcnt);
break;
default:
return -FI_EINVAL;
}
return 0;
}
static int
usdf_ep_close(fid_t fid)
{
struct usdf_ep *ep;
ep = ep_fidtou(fid);
if (atomic_get(&ep->ep_refcnt) > 0) {
return -FI_EBUSY;
}
if (ep->ep_qp != NULL) {
usd_destroy_qp(ep->ep_qp);
}
atomic_dec(&ep->ep_domain->dom_refcnt);
if (ep->ep_eq != NULL) {
atomic_dec(&ep->ep_eq->eq_refcnt);
}
free(ep);
return 0;
}
struct fi_ops usdf_ep_ops = {
.size = sizeof(struct fi_ops),
.close = usdf_ep_close,
.bind = usdf_ep_bind,
.control = fi_no_control,
.ops_open = fi_no_ops_open
};
int
usdf_ep_port_bind(struct usdf_ep *ep, struct fi_info *info)
@ -151,13 +69,13 @@ usdf_ep_port_bind(struct usdf_ep *ep, struct fi_info *info)
int ret;
sin = (struct sockaddr_in *)info->src_addr;
ret = bind(ep->ep_sock, (struct sockaddr *)sin, sizeof(*sin));
ret = bind(ep->e.dg.ep_sock, (struct sockaddr *)sin, sizeof(*sin));
if (ret == -1) {
return -errno;
}
addrlen = sizeof(*sin);
ret = getsockname(ep->ep_sock, (struct sockaddr *)sin, &addrlen);
ret = getsockname(ep->e.dg.ep_sock, (struct sockaddr *)sin, &addrlen);
if (ret == -1) {
return -errno;
}
@ -174,6 +92,8 @@ usdf_endpoint_open(struct fid_domain *domain, struct fi_info *info,
return usdf_ep_dgram_open(domain, info, ep_o, context);
case FI_EP_MSG:
return usdf_ep_msg_open(domain, info, ep_o, context);
case FI_EP_RDM:
return usdf_ep_rdm_open(domain, info, ep_o, context);
default:
return -FI_ENODEV;
}

Просмотреть файл

@ -41,6 +41,10 @@ int usdf_ep_dgram_open(struct fid_domain *domain, struct fi_info *info,
struct fid_ep **ep, void *context);
int usdf_ep_msg_open(struct fid_domain *domain, struct fi_info *info,
struct fid_ep **ep, void *context);
int usdf_ep_rdm_open(struct fid_domain *domain, struct fi_info *info,
struct fid_ep **ep, void *context);
int usdf_ep_msg_get_queues(struct usdf_ep *ep);
void usdf_ep_msg_release_queues(struct usdf_ep *ep);
extern struct fi_ops usdf_ep_ops;

Просмотреть файл

@ -63,9 +63,11 @@
#include "usdf.h"
#include "usdf_endpoint.h"
#include "usdf_dgram.h"
#include "usdf_av.h"
#include "usdf_cq.h"
static int
usdf_dgram_ep_enable(struct fid_ep *fep)
usdf_ep_dgram_enable(struct fid_ep *fep)
{
struct usdf_ep *ep;
struct usd_filter filt;
@ -75,18 +77,18 @@ usdf_dgram_ep_enable(struct fid_ep *fep)
ep = ep_ftou(fep);
filt.uf_type = USD_FTY_UDP_SOCK;
filt.uf_filter.uf_udp_sock.u_sock = ep->ep_sock;
filt.uf_filter.uf_udp_sock.u_sock = ep->e.dg.ep_sock;
if (ep->ep_caps & USDF_EP_CAP_PIO) {
ret = usd_create_qp(ep->ep_domain->dom_dev,
USD_QTR_UDP,
USD_QTY_PIO,
ep->ep_wcq->cq_cq,
ep->ep_rcq->cq_cq,
ep->e.dg.ep_wcq->c.hard.cq_cq,
ep->e.dg.ep_rcq->c.hard.cq_cq,
127, // XXX
127, // XXX
&filt,
&ep->ep_qp);
&ep->e.dg.ep_qp);
} else {
ret = -EAGAIN;
}
@ -95,33 +97,33 @@ usdf_dgram_ep_enable(struct fid_ep *fep)
ret = usd_create_qp(ep->ep_domain->dom_dev,
USD_QTR_UDP,
USD_QTY_NORMAL,
ep->ep_wcq->cq_cq,
ep->ep_rcq->cq_cq,
ep->e.dg.ep_wcq->c.hard.cq_cq,
ep->e.dg.ep_rcq->c.hard.cq_cq,
ep->ep_wqe,
ep->ep_rqe,
&filt,
&ep->ep_qp);
&ep->e.dg.ep_qp);
}
if (ret != 0) {
goto fail;
}
ep->ep_qp->uq_context = ep;
ep->e.dg.ep_qp->uq_context = ep;
/*
* Allocate a memory region big enough to hold a header for each
* RQ entry
*/
uqp = to_qpi(ep->ep_qp);
ep->ep_hdr_ptr = calloc(uqp->uq_rq.urq_num_entries,
sizeof(ep->ep_hdr_ptr[0]));
if (ep->ep_hdr_ptr == NULL) {
uqp = to_qpi(ep->e.dg.ep_qp);
ep->e.dg.ep_hdr_ptr = calloc(uqp->uq_rq.urq_num_entries,
sizeof(ep->e.dg.ep_hdr_ptr[0]));
if (ep->e.dg.ep_hdr_ptr == NULL) {
ret = -FI_ENOMEM;
goto fail;
}
ret = usd_alloc_mr(ep->ep_domain->dom_dev,
usd_get_recv_credits(ep->ep_qp) * USDF_HDR_BUF_ENTRY,
&ep->ep_hdr_buf);
usd_get_recv_credits(ep->e.dg.ep_qp) * USDF_HDR_BUF_ENTRY,
&ep->e.dg.ep_hdr_buf);
if (ret != 0) {
goto fail;
}
@ -129,18 +131,143 @@ usdf_dgram_ep_enable(struct fid_ep *fep)
return 0;
fail:
if (ep->ep_hdr_ptr != NULL) {
free(ep->ep_hdr_ptr);
if (ep->e.dg.ep_hdr_ptr != NULL) {
free(ep->e.dg.ep_hdr_ptr);
}
if (ep->ep_qp != NULL) {
usd_destroy_qp(ep->ep_qp);
if (ep->e.dg.ep_qp != NULL) {
usd_destroy_qp(ep->e.dg.ep_qp);
}
return ret;
}
static int
usdf_ep_dgram_bind(struct fid *fid, struct fid *bfid, uint64_t flags)
{
struct usdf_ep *ep;
struct usdf_cq *cq;
int ret;
ep = ep_fidtou(fid);
switch (bfid->fclass) {
case FI_CLASS_AV:
if (ep->e.dg.ep_av != NULL) {
return -FI_EINVAL;
}
ep->e.dg.ep_av = av_fidtou(bfid);
break;
case FI_CLASS_CQ:
cq = cq_fidtou(bfid);
/* actually, could look through CQ list for a hard
* CQ with function usd_poll_cq() and use that... XXX
*/
if (usdf_cq_is_soft(cq)) {
return -FI_EINVAL;
}
if (cq->c.hard.cq_cq == NULL) {
ret = usdf_cq_create_cq(cq);
if (ret != 0) {
return ret;
}
}
if (flags & FI_SEND) {
if (ep->e.dg.ep_wcq != NULL) {
return -FI_EINVAL;
}
ep->e.dg.ep_wcq = cq;
atomic_inc(&cq->cq_refcnt);
}
if (flags & FI_RECV) {
if (ep->e.dg.ep_rcq != NULL) {
return -FI_EINVAL;
}
ep->e.dg.ep_rcq = cq;
atomic_inc(&cq->cq_refcnt);
}
break;
case FI_CLASS_EQ:
if (ep->ep_eq != NULL) {
return -FI_EINVAL;
}
ep->ep_eq = eq_fidtou(bfid);
atomic_inc(&ep->ep_eq->eq_refcnt);
break;
default:
return -FI_EINVAL;
}
return 0;
}
static void
usdf_ep_dgram_deref_cq(struct usdf_cq *cq)
{
struct usdf_cq_hard *hcq;
void (*rtn)(struct usdf_cq_hard *hcq);
if (cq == NULL) {
return;
}
atomic_dec(&cq->cq_refcnt);
switch (cq->cq_attr.format) {
case FI_CQ_FORMAT_CONTEXT:
rtn = usdf_progress_hard_cq_context;
break;
case FI_CQ_FORMAT_MSG:
rtn = usdf_progress_hard_cq_msg;
break;
case FI_CQ_FORMAT_DATA:
rtn = usdf_progress_hard_cq_data;
break;
default:
return;
}
if (usdf_cq_is_soft(cq)) {
TAILQ_FOREACH(hcq, &cq->c.soft.cq_list, cqh_link) {
if (hcq->cqh_progress == rtn) {
atomic_dec(&hcq->cqh_refcnt);
return;
}
}
}
}
static int
usdf_ep_dgram_close(fid_t fid)
{
struct usdf_ep *ep;
ep = ep_fidtou(fid);
if (atomic_get(&ep->ep_refcnt) > 0) {
return -FI_EBUSY;
}
if (ep->e.dg.ep_qp != NULL) {
usd_destroy_qp(ep->e.dg.ep_qp);
}
atomic_dec(&ep->ep_domain->dom_refcnt);
if (ep->ep_eq != NULL) {
atomic_dec(&ep->ep_eq->eq_refcnt);
}
usdf_ep_dgram_deref_cq(ep->e.dg.ep_wcq);
usdf_ep_dgram_deref_cq(ep->e.dg.ep_rcq);
free(ep);
return 0;
}
static struct fi_ops_ep usdf_base_dgram_ops = {
.size = sizeof(struct fi_ops_ep),
.enable = usdf_dgram_ep_enable,
.enable = usdf_ep_dgram_enable,
.cancel = fi_no_cancel,
.getopt = fi_no_getopt,
.setopt = fi_no_setopt,
@ -165,10 +292,10 @@ static struct fi_ops_msg usdf_dgram_prefix_ops = {
.size = sizeof(struct fi_ops_msg),
.recv = usdf_dgram_prefix_recv,
.recvv = usdf_dgram_prefix_recvv,
.recvmsg = usdf_dgram_recvmsg,
.send = usdf_dgram_send,
.sendv = usdf_dgram_sendv,
.sendmsg = usdf_dgram_sendmsg,
.recvmsg = usdf_dgram_prefix_recvmsg,
.send = usdf_dgram_prefix_send,
.sendv = usdf_dgram_prefix_sendv,
.sendmsg = usdf_dgram_prefix_sendmsg,
.inject = usdf_dgram_inject,
.senddata = usdf_dgram_senddata,
.injectdata = fi_no_msg_injectdata,
@ -176,8 +303,16 @@ static struct fi_ops_msg usdf_dgram_prefix_ops = {
static struct fi_ops_cm usdf_cm_dgram_ops = {
.size = sizeof(struct fi_ops_cm),
.connect = usdf_cm_dgram_connect,
.shutdown = usdf_cm_dgram_shutdown,
.connect = fi_no_connect,
.shutdown = fi_no_shutdown,
};
static struct fi_ops usdf_ep_dgram_ops = {
.size = sizeof(struct fi_ops),
.close = usdf_ep_dgram_close,
.bind = usdf_ep_dgram_bind,
.control = fi_no_control,
.ops_open = fi_no_ops_open
};
int
@ -199,8 +334,8 @@ usdf_ep_dgram_open(struct fid_domain *domain, struct fi_info *info,
return -FI_ENOMEM;
}
ep->ep_sock = socket(AF_INET, SOCK_DGRAM, 0);
if (ep->ep_sock == -1) {
ep->e.dg.ep_sock = socket(AF_INET, SOCK_DGRAM, 0);
if (ep->e.dg.ep_sock == -1) {
ret = -errno;
goto fail;
}
@ -216,7 +351,7 @@ usdf_ep_dgram_open(struct fid_domain *domain, struct fi_info *info,
ep->ep_fid.fid.fclass = FI_CLASS_EP;
ep->ep_fid.fid.context = context;
ep->ep_fid.fid.ops = &usdf_ep_ops;
ep->ep_fid.fid.ops = &usdf_ep_dgram_ops;
ep->ep_fid.ops = &usdf_base_dgram_ops;
ep->ep_fid.cm = &usdf_cm_dgram_ops;
ep->ep_domain = udp;
@ -225,12 +360,14 @@ usdf_ep_dgram_open(struct fid_domain *domain, struct fi_info *info,
if (info->tx_attr != NULL && info->tx_attr->size != 0) {
ep->ep_wqe = info->tx_attr->size;
} else {
ep->ep_wqe = udp->dom_dev_attrs.uda_max_send_credits;
ep->ep_wqe =
udp->dom_fabric->fab_dev_attrs->uda_max_send_credits;
}
if (info->rx_attr != NULL && info->rx_attr->size != 0) {
ep->ep_rqe = info->rx_attr->size;
} else {
ep->ep_rqe = udp->dom_dev_attrs.uda_max_recv_credits;
ep->ep_rqe =
udp->dom_fabric->fab_dev_attrs->uda_max_recv_credits;
}
if (ep->ep_mode & FI_MSG_PREFIX) {
@ -252,8 +389,8 @@ usdf_ep_dgram_open(struct fid_domain *domain, struct fi_info *info,
fail:
if (ep != NULL) {
if (ep->ep_sock != -1) {
close(ep->ep_sock);
if (ep->e.dg.ep_sock != -1) {
close(ep->e.dg.ep_sock);
}
free(ep);
}

Просмотреть файл

@ -62,10 +62,221 @@
#include "usd.h"
#include "usdf.h"
#include "usdf_endpoint.h"
#include "usdf_rudp.h"
#include "usdf_msg.h"
#include "usdf_cq.h"
#include "usdf_timer.h"
static int
usdf_msg_ep_getopt(fid_t fid, int level, int optname,
usdf_tx_msg_enable(struct usdf_tx *tx)
{
struct usdf_msg_qe *wqe;
struct usdf_domain *udp;
struct usdf_cq_hard *hcq;
struct usd_filter filt;
int ret;
int i;
udp = tx->tx_domain;
hcq = tx->t.msg.tx_hcq;
if (hcq == NULL) {
return -FI_ENOCQ;
}
/* XXX temp until we can allocate WQ and RQ independently */
filt.uf_type = USD_FTY_UDP;
filt.uf_filter.uf_udp.u_port = 0;
ret = usd_create_qp(udp->dom_dev,
USD_QTR_UDP,
USD_QTY_NORMAL,
hcq->cqh_ucq,
hcq->cqh_ucq,
udp->dom_fabric->fab_dev_attrs->uda_max_send_credits,
udp->dom_fabric->fab_dev_attrs->uda_max_recv_credits,
&filt,
&tx->tx_qp);
if (ret != 0) {
goto fail;
}
tx->tx_qp->uq_context = tx;
/* msg send queue */
tx->t.msg.tx_wqe_buf = malloc(tx->tx_attr.size *
sizeof(struct usdf_msg_qe));
if (tx->t.msg.tx_wqe_buf == NULL) {
ret = -errno;
goto fail;
}
/* populate free list */
TAILQ_INIT(&tx->t.msg.tx_free_wqe);
wqe = tx->t.msg.tx_wqe_buf;
for (i = 0; i < tx->tx_attr.size; ++i) {
TAILQ_INSERT_TAIL(&tx->t.msg.tx_free_wqe, wqe, ms_link);
++wqe;
}
return 0;
fail:
if (tx->t.msg.tx_wqe_buf != NULL) {
free(tx->t.msg.tx_wqe_buf);
tx->t.msg.tx_wqe_buf = NULL;
TAILQ_INIT(&tx->t.msg.tx_free_wqe);
}
if (tx->tx_qp != NULL) {
usd_destroy_qp(tx->tx_qp);
}
return ret;
}
static int
usdf_rx_msg_enable(struct usdf_rx *rx)
{
struct usdf_domain *udp;
struct usdf_cq_hard *hcq;
struct usdf_msg_qe *rqe;
struct usd_filter filt;
struct usd_qp_impl *qp;
uint8_t *ptr;
size_t mtu;
int ret;
int i;
udp = rx->rx_domain;
hcq = rx->r.msg.rx_hcq;
if (hcq == NULL) {
return -FI_ENOCQ;
}
/* XXX temp until we can allocate WQ and RQ independently */
filt.uf_type = USD_FTY_UDP;
filt.uf_filter.uf_udp.u_port = 0;
ret = usd_create_qp(udp->dom_dev,
USD_QTR_UDP,
USD_QTY_NORMAL,
hcq->cqh_ucq,
hcq->cqh_ucq,
udp->dom_fabric->fab_dev_attrs->uda_max_send_credits,
udp->dom_fabric->fab_dev_attrs->uda_max_recv_credits,
&filt,
&rx->rx_qp);
if (ret != 0) {
goto fail;
}
rx->rx_qp->uq_context = rx;
qp = to_qpi(rx->rx_qp);
/* receive buffers */
mtu = rx->rx_domain->dom_fabric->fab_dev_attrs->uda_mtu;
ret = usd_alloc_mr(rx->rx_domain->dom_dev,
qp->uq_rq.urq_num_entries * mtu,
(void **)&rx->r.msg.rx_bufs);
if (ret != 0) {
goto fail;
}
/* post all the buffers */
ptr = rx->r.msg.rx_bufs;
for (i = 0; i < qp->uq_rq.urq_num_entries - 1; ++i) {
usdf_msg_post_recv(rx, ptr, mtu);
ptr += mtu;
}
/* msg recv queue */
rx->r.msg.rx_rqe_buf = malloc(rx->rx_attr.size *
sizeof(struct usdf_msg_qe));
if (rx->r.msg.rx_rqe_buf == NULL) {
ret = -errno;
goto fail;
}
/* populate free list */
TAILQ_INIT(&rx->r.msg.rx_free_rqe);
rqe = rx->r.msg.rx_rqe_buf;
for (i = 0; i < rx->rx_attr.size; ++i) {
TAILQ_INSERT_TAIL(&rx->r.msg.rx_free_rqe, rqe, ms_link);
++rqe;
}
return 0;
fail:
if (rx->r.msg.rx_rqe_buf != NULL) {
free(rx->r.msg.rx_rqe_buf);
rx->r.msg.rx_rqe_buf = NULL;
TAILQ_INIT(&rx->r.msg.rx_free_rqe);
}
if (rx->r.msg.rx_bufs != NULL) {
usd_free_mr(rx->r.msg.rx_bufs);
rx->r.msg.rx_bufs = NULL;
}
if (rx->rx_qp != NULL) {
usd_destroy_qp(rx->rx_qp);
}
return ret;
}
/*
* release queue resources
*/
void
usdf_ep_msg_release_queues(struct usdf_ep *ep)
{
/* XXX */
}
/*
* Allocate any missing queue resources for this endpoint
*/
int
usdf_ep_msg_get_queues(struct usdf_ep *ep)
{
struct usdf_tx *tx;
struct usdf_rx *rx;
int ret;
/* Must have TX context at this point */
tx = ep->ep_tx;
if (tx == NULL) {
ret = -FI_EINVAL;
goto fail;
}
if (tx->tx_qp == NULL) {
ret = usdf_tx_msg_enable(tx);
if (ret != 0) {
goto fail;
}
}
/* Must have RX context at this point */
rx = ep->ep_rx;
if (rx == NULL) {
ret = -FI_EINVAL;
goto fail;
}
if (rx->rx_qp == NULL) {
ret = usdf_rx_msg_enable(rx);
if (ret != 0) {
goto fail;
}
}
return 0;
fail:
return ret;
}
static int
usdf_ep_msg_enable(struct fid_ep *fep)
{
return usdf_ep_msg_get_queues(ep_ftou(fep));
}
static int
usdf_ep_msg_getopt(fid_t fid, int level, int optname,
void *optval, size_t *optlen)
{
struct usdf_ep *ep;
@ -82,7 +293,7 @@ usdf_msg_ep_getopt(fid_t fid, int level, int optname,
}
static int
usdf_msg_ep_setopt(fid_t fid, int level, int optname,
usdf_ep_msg_setopt(fid_t fid, int level, int optname,
const void *optval, size_t optlen)
{
struct usdf_ep *ep;
@ -98,84 +309,291 @@ usdf_msg_ep_setopt(fid_t fid, int level, int optname,
return 0;
}
static int
usdf_msg_ep_enable(struct fid_ep *fep)
static ssize_t
usdf_ep_msg_cancel(fid_t fid, void *context)
{
struct usdf_ep *ep;
struct usd_filter filt;
struct usd_qp_impl *uqp;
return 0;
}
int
usdf_msg_fill_tx_attr(struct fi_tx_attr *txattr)
{
if (txattr->size > USDF_MSG_MAX_CTX_SIZE ||
txattr->iov_limit > USDF_MSG_MAX_SGE) {
return -FI_ENODATA;
}
if (txattr->size == 0) {
txattr->size = USDF_MSG_DFLT_CTX_SIZE;
}
if (txattr->iov_limit == 0) {
txattr->iov_limit = USDF_MSG_DFLT_SGE;
}
return 0;
}
int
usdf_msg_fill_rx_attr(struct fi_rx_attr *rxattr)
{
if (rxattr->size > USDF_MSG_MAX_CTX_SIZE ||
rxattr->iov_limit > USDF_MSG_MAX_SGE) {
return -FI_ENODATA;
}
if (rxattr->size == 0) {
rxattr->size = USDF_MSG_DFLT_CTX_SIZE;
}
if (rxattr->iov_limit == 0) {
rxattr->iov_limit = USDF_MSG_DFLT_SGE;
}
return 0;
}
/*
* Find a hard CQ within this soft CQ that services message EPs
*/
static struct usdf_cq_hard *
usdf_ep_msg_find_cqh(struct usdf_cq *cq)
{
struct usdf_cq_hard *hcq;
TAILQ_FOREACH(hcq, &cq->c.soft.cq_list, cqh_link) {
if (hcq->cqh_progress == usdf_msg_hcq_progress) {
return hcq;
}
}
return NULL;
}
static int
usdf_ep_msg_bind_cq(struct usdf_ep *ep, struct usdf_cq *cq, uint64_t flags)
{
struct usdf_cq_hard **hcqp;
struct usdf_cq_hard *hcq;
int ret;
ep = ep_ftou(fep);
filt.uf_type = USD_FTY_UDP_SOCK;
filt.uf_filter.uf_udp_sock.u_sock = ep->ep_sock;
ret = usd_create_qp(ep->ep_domain->dom_dev,
USD_QTR_UDP,
USD_QTY_NORMAL,
ep->ep_wcq->cq_cq,
ep->ep_rcq->cq_cq,
ep->ep_wqe,
ep->ep_rqe,
&filt,
&ep->ep_qp);
if (ret != 0) {
goto fail;
}
ep->ep_qp->uq_context = ep;
/*
* Allocate a memory region big enough to hold a header for each
* RQ entry
* The CQ is actually bound the RX or TX ctx, not the EP directly
*/
uqp = to_qpi(ep->ep_qp);
ep->ep_hdr_ptr = calloc(uqp->uq_rq.urq_num_entries,
sizeof(ep->ep_hdr_ptr[0]));
if (ep->ep_hdr_ptr == NULL) {
ret = -FI_ENOMEM;
goto fail;
if (flags & FI_SEND) {
/* if TX is shared, but bind directly */
if (ep->ep_tx->tx_fid.fid.fclass == FI_CLASS_STX_CTX) {
return -FI_EINVAL;
}
hcqp = &ep->ep_tx->t.msg.tx_hcq;
} else {
/* if RX is shared, but bind directly */
if (ep->ep_rx->rx_fid.fid.fclass == FI_CLASS_SRX_CTX) {
return -FI_EINVAL;
}
hcqp = &ep->ep_rx->r.msg.rx_hcq;
}
if (*hcqp != NULL) {
return -FI_EINVAL;
}
ret = usd_alloc_mr(ep->ep_domain->dom_dev,
usd_get_recv_credits(ep->ep_qp) * USDF_HDR_BUF_ENTRY,
&ep->ep_hdr_buf);
/* Make sure this CQ is "soft" */
ret = usdf_cq_make_soft(cq);
if (ret != 0) {
goto fail;
return ret;
}
/* Use existing msg CQ if present */
hcq = usdf_ep_msg_find_cqh(cq);
if (hcq == NULL) {
hcq = malloc(sizeof(*hcq));
if (hcq == NULL) {
return -errno;
}
ret = usd_create_cq(cq->cq_domain->dom_dev, 8195, /* XXX */
-1, &hcq->cqh_ucq);
if (ret != 0) {
goto fail;
}
hcq->cqh_cq = cq;
atomic_init(&hcq->cqh_refcnt, 0);
hcq->cqh_progress = usdf_msg_hcq_progress;
switch (cq->cq_attr.format) {
default:
case FI_CQ_FORMAT_CONTEXT:
hcq->cqh_post = usdf_cq_post_soft_context;
break;
case FI_CQ_FORMAT_MSG:
hcq->cqh_post = usdf_cq_post_soft_msg;
break;
case FI_CQ_FORMAT_DATA:
hcq->cqh_post = usdf_cq_post_soft_data;
break;
}
TAILQ_INSERT_TAIL(&cq->c.soft.cq_list, hcq, cqh_link);
/* add to domain progression list */
TAILQ_INSERT_TAIL(&ep->ep_domain->dom_hcq_list,
hcq, cqh_dom_link);
}
atomic_inc(&hcq->cqh_refcnt);
atomic_inc(&cq->cq_refcnt);
*hcqp = hcq;
return 0;
fail:
if (ep->ep_hdr_ptr != NULL) {
free(ep->ep_hdr_ptr);
}
if (ep->ep_qp != NULL) {
usd_destroy_qp(ep->ep_qp);
if (hcq != NULL) {
free(hcq);
}
return ret;
}
static ssize_t
usdf_msg_ep_cancel(fid_t fid, void *context)
static int
usdf_ep_msg_bind(struct fid *fid, struct fid *bfid, uint64_t flags)
{
struct usdf_ep *ep;
struct usdf_cq *cq;
ep = ep_fidtou(fid);
switch (bfid->fclass) {
case FI_CLASS_CQ:
if (flags & FI_SEND) {
cq = cq_fidtou(bfid);
usdf_ep_msg_bind_cq(ep, cq, FI_SEND);
}
if (flags & FI_RECV) {
cq = cq_fidtou(bfid);
usdf_ep_msg_bind_cq(ep, cq, FI_RECV);
}
break;
case FI_CLASS_EQ:
if (ep->ep_eq != NULL) {
return -FI_EINVAL;
}
ep->ep_eq = eq_fidtou(bfid);
atomic_inc(&ep->ep_eq->eq_refcnt);
break;
default:
return -FI_EINVAL;
}
return 0;
}
static int
usdf_msg_rx_ctx_close(fid_t fid)
{
struct usdf_rx *rx;
struct usdf_cq_hard *hcq;
rx = rx_fidtou(fid);
if (atomic_get(&rx->rx_refcnt) > 0) {
return -FI_EBUSY;
}
hcq = rx->r.msg.rx_hcq;
if (hcq != NULL) {
atomic_dec(&hcq->cqh_refcnt);
atomic_dec(&hcq->cqh_cq->cq_refcnt);
}
if (rx->rx_qp != NULL) {
usd_free_mr(rx->r.msg.rx_bufs);
free(rx->r.msg.rx_rqe_buf);
usd_destroy_qp(rx->rx_qp);
}
atomic_dec(&rx->rx_domain->dom_refcnt);
free(rx);
return 0;
}
static int
usdf_msg_tx_ctx_close(fid_t fid)
{
struct usdf_tx *tx;
struct usdf_cq_hard *hcq;
tx = tx_fidtou(fid);
if (atomic_get(&tx->tx_refcnt) > 0) {
return -FI_EBUSY;
}
hcq = tx->t.msg.tx_hcq;
if (hcq != NULL) {
atomic_dec(&hcq->cqh_refcnt);
atomic_dec(&hcq->cqh_cq->cq_refcnt);
}
if (tx->tx_qp != NULL) {
free(tx->t.msg.tx_wqe_buf);
usd_destroy_qp(tx->tx_qp);
}
atomic_dec(&tx->tx_domain->dom_refcnt);
free(tx);
return 0;
}
static int
usdf_ep_msg_close(fid_t fid)
{
struct usdf_ep *ep;
ep = ep_fidtou(fid);
if (atomic_get(&ep->ep_refcnt) > 0) {
return -FI_EBUSY;
}
if (ep->ep_rx != NULL) {
atomic_dec(&ep->ep_rx->rx_refcnt);
if (rx_utofid(ep->ep_rx)->fclass == FI_CLASS_RX_CTX) {
(void) usdf_msg_rx_ctx_close(rx_utofid(ep->ep_rx));
}
}
if (ep->ep_tx != NULL) {
atomic_dec(&ep->ep_tx->tx_refcnt);
if (tx_utofid(ep->ep_tx)->fclass == FI_CLASS_TX_CTX) {
(void) usdf_msg_tx_ctx_close(tx_utofid(ep->ep_tx));
}
}
atomic_dec(&ep->ep_domain->dom_refcnt);
if (ep->ep_eq != NULL) {
atomic_dec(&ep->ep_eq->eq_refcnt);
}
usdf_timer_free(ep->ep_domain->dom_fabric, ep->e.msg.ep_ack_timer);
free(ep);
return 0;
}
static struct fi_ops_ep usdf_base_msg_ops = {
.size = sizeof(struct fi_ops_ep),
.enable = usdf_msg_ep_enable,
.cancel = usdf_msg_ep_cancel,
.getopt = usdf_msg_ep_getopt,
.setopt = usdf_msg_ep_setopt,
.enable = usdf_ep_msg_enable,
.cancel = usdf_ep_msg_cancel,
.getopt = usdf_ep_msg_getopt,
.setopt = usdf_ep_msg_setopt,
.tx_ctx = fi_no_tx_ctx,
.rx_ctx = fi_no_rx_ctx,
};
static struct fi_ops_cm usdf_cm_msg_ops = {
.size = sizeof(struct fi_ops_cm),
.getname = fi_no_getname,
.getpeer = fi_no_getpeer,
.connect = usdf_cm_msg_connect,
.listen = fi_no_listen,
.accept = usdf_cm_msg_accept,
.reject = fi_no_reject,
.shutdown = usdf_cm_msg_shutdown,
.join = fi_no_join,
.leave = fi_no_leave,
};
static struct fi_ops_msg usdf_msg_ops = {
@ -191,58 +609,130 @@ static struct fi_ops_msg usdf_msg_ops = {
.injectdata = fi_no_msg_injectdata,
};
static struct fi_ops usdf_ep_msg_ops = {
.size = sizeof(struct fi_ops),
.close = usdf_ep_msg_close,
.bind = usdf_ep_msg_bind,
.control = fi_no_control,
.ops_open = fi_no_ops_open
};
int
usdf_ep_msg_open(struct fid_domain *domain, struct fi_info *info,
struct fid_ep **ep_o, void *context)
{
struct usdf_domain *udp;
struct usdf_fabric *fp;
struct usdf_tx *tx;
struct usdf_rx *rx;
struct usdf_ep *ep;
int ret;
if ((info->caps & ~USDF_DGRAM_CAPS) != 0) {
return -FI_EBADF;
ep = NULL;
rx = NULL;
tx = NULL;
if ((info->caps & ~USDF_MSG_CAPS) != 0) {
return -FI_EBADFLAGS;
}
udp = dom_ftou(domain);
fp = udp->dom_fabric;
ep = calloc(1, sizeof(*ep));
if (ep == NULL) {
return -FI_ENOMEM;
/* allocate peer table if not done */
if (udp->dom_peer_tab == NULL) {
udp->dom_peer_tab = calloc(USDF_MAX_PEERS, sizeof(ep));
}
ep->ep_sock = socket(AF_INET, SOCK_DGRAM, 0);
if (ep->ep_sock == -1) {
if (udp->dom_peer_tab == NULL) {
ret = -errno;
goto fail;
}
if (info->src_addr != NULL) {
if (info->addr_format == FI_SOCKADDR ||
info->addr_format == FI_SOCKADDR_IN) {
ret = usdf_ep_port_bind(ep, info);
if (ret != 0) {
goto fail;
}
}
ep = calloc(1, sizeof(*ep));
if (ep == NULL) {
ret = -errno;
goto fail;
}
ep->ep_fid.fid.fclass = FI_CLASS_EP;
ep->ep_fid.fid.context = context;
ep->ep_fid.fid.ops = &usdf_ep_ops;
ep->ep_fid.fid.ops = &usdf_ep_msg_ops;
ep->ep_fid.ops = &usdf_base_msg_ops;
ep->ep_fid.cm = &usdf_cm_msg_ops;
ep->ep_fid.msg = &usdf_msg_ops;
ep->ep_domain = udp;
ep->ep_caps = info->caps;
ep->ep_mode = info->mode;
if (info->tx_attr != NULL && info->tx_attr->size != 0) {
ep->ep_wqe = info->tx_attr->size;
} else {
ep->ep_wqe = udp->dom_dev_attrs.uda_max_send_credits;
ep->e.msg.ep_connreq = info->connreq;
ep->e.msg.ep_seq_credits = USDF_RUDP_SEQ_CREDITS;
TAILQ_INIT(&ep->e.msg.ep_posted_wqe);
TAILQ_INIT(&ep->e.msg.ep_sent_wqe);
--ep->e.msg.ep_last_rx_ack;
ret = usdf_timer_alloc(usdf_msg_ep_timeout, ep,
&ep->e.msg.ep_ack_timer);
if (ret != 0) {
goto fail;
}
if (info->rx_attr != NULL && info->rx_attr->size != 0) {
ep->ep_rqe = info->rx_attr->size;
} else {
ep->ep_rqe = udp->dom_dev_attrs.uda_max_recv_credits;
/* implicitly create TX context if not to be shared */
if (info->ep_attr == NULL ||
info->ep_attr->tx_ctx_cnt != FI_SHARED_CONTEXT) {
tx = calloc(1, sizeof(*tx));
if (tx == NULL) {
ret = -errno;
goto fail;
}
tx->tx_fid.fid.fclass = FI_CLASS_TX_CTX;
atomic_init(&tx->tx_refcnt, 0);
tx->tx_domain = udp;
tx->tx_progress = usdf_msg_tx_progress;
atomic_inc(&udp->dom_refcnt);
if (info->tx_attr != NULL) {
ret = usdf_msg_fill_tx_attr(info->tx_attr);
if (ret != 0) {
goto fail;
}
tx->tx_attr = *info->tx_attr;
} else {
ret = usdf_msg_fill_tx_attr(&tx->tx_attr);
}
TAILQ_INIT(&tx->t.msg.tx_free_wqe);
TAILQ_INIT(&tx->t.msg.tx_ep_ready);
TAILQ_INIT(&tx->t.msg.tx_ep_have_acks);
ep->ep_tx = tx;
atomic_inc(&tx->tx_refcnt);
atomic_inc(&udp->dom_refcnt);
}
TAILQ_INIT(&ep->e.msg.ep_posted_wqe);
/* implicitly create RX context if not to be shared */
if (info->ep_attr == NULL ||
info->ep_attr->rx_ctx_cnt != FI_SHARED_CONTEXT) {
rx = calloc(1, sizeof(*rx));
if (rx == NULL) {
ret = -errno;
goto fail;
}
rx->rx_fid.fid.fclass = FI_CLASS_RX_CTX;
atomic_init(&rx->rx_refcnt, 0);
rx->rx_domain = udp;
atomic_inc(&udp->dom_refcnt);
if (info->rx_attr != NULL) {
ret = usdf_msg_fill_rx_attr(info->rx_attr);
if (ret != 0) {
goto fail;
}
rx->rx_attr = *info->rx_attr;
} else {
ret = usdf_msg_fill_rx_attr(&rx->rx_attr);
}
TAILQ_INIT(&rx->r.msg.rx_free_rqe);
TAILQ_INIT(&rx->r.msg.rx_posted_rqe);
ep->ep_rx = rx;
atomic_inc(&rx->rx_refcnt);
}
atomic_init(&ep->ep_refcnt, 0);
@ -250,11 +740,18 @@ usdf_ep_msg_open(struct fid_domain *domain, struct fi_info *info,
*ep_o = ep_utof(ep);
return 0;
fail:
if (rx != NULL) {
free(rx);
atomic_dec(&udp->dom_refcnt);
}
if (tx != NULL) {
free(tx);
atomic_dec(&udp->dom_refcnt);
}
if (ep != NULL) {
if (ep->ep_sock != -1) {
close(ep->ep_sock);
if (ep->e.msg.ep_ack_timer != NULL) {
usdf_timer_free(fp, ep->e.msg.ep_ack_timer);
}
free(ep);
}

Просмотреть файл

@ -0,0 +1,808 @@
/*
* Copyright (c) 2014, Cisco Systems, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#if HAVE_CONFIG_H
# include <config.h>
#endif /* HAVE_CONFIG_H */
#include <asm/types.h>
#include <assert.h>
#include <errno.h>
#include <fcntl.h>
#include <netinet/in.h>
#include <poll.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <rdma/fabric.h>
#include <rdma/fi_cm.h>
#include <rdma/fi_domain.h>
#include <rdma/fi_prov.h>
#include <rdma/fi_endpoint.h>
#include <rdma/fi_rma.h>
#include <rdma/fi_errno.h>
#include "fi.h"
#include "fi_enosys.h"
#include "usd.h"
#include "usdf.h"
#include "usdf_endpoint.h"
#include "usdf_rudp.h"
#include "usdf_cq.h"
#include "usdf_cm.h"
#include "usdf_av.h"
#include "usdf_timer.h"
#include "usdf_rdm.h"
static int
usdf_tx_rdm_enable(struct usdf_tx *tx)
{
struct usdf_rdm_qe *wqe;
struct usdf_domain *udp;
struct usdf_cq_hard *hcq;
struct usd_filter filt;
int ret;
int i;
udp = tx->tx_domain;
hcq = tx->t.rdm.tx_hcq;
if (hcq == NULL) {
return -FI_ENOCQ;
}
/* XXX temp until we can allocate WQ and RQ independently */
filt.uf_type = USD_FTY_UDP;
filt.uf_filter.uf_udp.u_port = 0;
ret = usd_create_qp(udp->dom_dev,
USD_QTR_UDP,
USD_QTY_NORMAL,
hcq->cqh_ucq,
hcq->cqh_ucq,
udp->dom_fabric->fab_dev_attrs->uda_max_send_credits,
udp->dom_fabric->fab_dev_attrs->uda_max_recv_credits,
&filt,
&tx->tx_qp);
if (ret != 0) {
goto fail;
}
tx->tx_qp->uq_context = tx;
/* rdm send queue */
tx->t.rdm.tx_wqe_buf = malloc(tx->tx_attr.size *
sizeof(struct usdf_rdm_qe));
if (tx->t.rdm.tx_wqe_buf == NULL) {
ret = -errno;
goto fail;
}
/* populate free list */
TAILQ_INIT(&tx->t.rdm.tx_free_wqe);
wqe = tx->t.rdm.tx_wqe_buf;
for (i = 0; i < tx->tx_attr.size; ++i) {
TAILQ_INSERT_TAIL(&tx->t.rdm.tx_free_wqe, wqe, rd_link);
++wqe;
}
return 0;
fail:
if (tx->t.rdm.tx_wqe_buf != NULL) {
free(tx->t.rdm.tx_wqe_buf);
tx->t.rdm.tx_wqe_buf = NULL;
TAILQ_INIT(&tx->t.rdm.tx_free_wqe);
}
if (tx->tx_qp != NULL) {
usd_destroy_qp(tx->tx_qp);
}
return ret;
}
static int
usdf_rx_rdm_enable(struct usdf_rx *rx)
{
struct usdf_domain *udp;
struct usdf_cq_hard *hcq;
struct usdf_rdm_qe *rqe;
struct usd_filter filt;
struct usd_qp_impl *qp;
uint8_t *ptr;
size_t mtu;
int ret;
int i;
udp = rx->rx_domain;
hcq = rx->r.rdm.rx_hcq;
if (hcq == NULL) {
return -FI_ENOCQ;
}
/* XXX temp until we can allocate WQ and RQ independently */
filt.uf_type = USD_FTY_UDP_SOCK;
filt.uf_filter.uf_udp_sock.u_sock = rx->r.rdm.rx_sock;
ret = usd_create_qp(udp->dom_dev,
USD_QTR_UDP,
USD_QTY_NORMAL,
hcq->cqh_ucq,
hcq->cqh_ucq,
udp->dom_fabric->fab_dev_attrs->uda_max_send_credits,
udp->dom_fabric->fab_dev_attrs->uda_max_recv_credits,
&filt,
&rx->rx_qp);
if (ret != 0) {
goto fail;
}
rx->rx_qp->uq_context = rx;
qp = to_qpi(rx->rx_qp);
/* receive buffers */
mtu = rx->rx_domain->dom_fabric->fab_dev_attrs->uda_mtu;
ret = usd_alloc_mr(rx->rx_domain->dom_dev,
qp->uq_rq.urq_num_entries * mtu,
(void **)&rx->r.rdm.rx_bufs);
if (ret != 0) {
goto fail;
}
/* post all the buffers */
ptr = rx->r.rdm.rx_bufs;
for (i = 0; i < qp->uq_rq.urq_num_entries - 1; ++i) {
usdf_rdm_post_recv(rx, ptr, mtu);
ptr += mtu;
}
/* rdm recv queue */
rx->r.rdm.rx_rqe_buf = malloc(rx->rx_attr.size *
sizeof(struct usdf_rdm_qe));
if (rx->r.rdm.rx_rqe_buf == NULL) {
ret = -errno;
goto fail;
}
/* populate free list */
TAILQ_INIT(&rx->r.rdm.rx_free_rqe);
rqe = rx->r.rdm.rx_rqe_buf;
for (i = 0; i < rx->rx_attr.size; ++i) {
TAILQ_INSERT_TAIL(&rx->r.rdm.rx_free_rqe, rqe, rd_link);
++rqe;
}
return 0;
fail:
if (rx->r.rdm.rx_rqe_buf != NULL) {
free(rx->r.rdm.rx_rqe_buf);
rx->r.rdm.rx_rqe_buf = NULL;
TAILQ_INIT(&rx->r.rdm.rx_free_rqe);
}
if (rx->r.rdm.rx_bufs != NULL) {
usd_free_mr(rx->r.rdm.rx_bufs);
rx->r.rdm.rx_bufs = NULL;
}
if (rx->rx_qp != NULL) {
usd_destroy_qp(rx->rx_qp);
}
return ret;
}
/*
* release queue resources
*/
void
usdf_ep_rdm_release_queues(struct usdf_ep *ep)
{
/* XXX */
}
/*
* Allocate any missing queue resources for this endpoint
*/
int
usdf_ep_rdm_get_queues(struct usdf_ep *ep)
{
struct usdf_tx *tx;
struct usdf_rx *rx;
int ret;
/* Must have TX context at this point */
tx = ep->ep_tx;
if (tx == NULL) {
ret = -FI_EINVAL;
goto fail;
}
if (tx->tx_qp == NULL) {
ret = usdf_tx_rdm_enable(tx);
if (ret != 0) {
goto fail;
}
}
/* Must have RX context at this point */
rx = ep->ep_rx;
if (rx == NULL) {
ret = -FI_EINVAL;
goto fail;
}
if (rx->rx_qp == NULL) {
ret = usdf_rx_rdm_enable(rx);
if (ret != 0) {
goto fail;
}
}
return 0;
fail:
return ret;
}
static int
usdf_ep_rdm_enable(struct fid_ep *fep)
{
return usdf_ep_rdm_get_queues(ep_ftou(fep));
}
static int
usdf_ep_rdm_getopt(fid_t fid, int level, int optname,
void *optval, size_t *optlen)
{
struct usdf_ep *ep;
ep = ep_fidtou(fid);
(void)ep;
switch (level) {
case FI_OPT_ENDPOINT:
return -FI_ENOPROTOOPT;
default:
return -FI_ENOPROTOOPT;
}
return 0;
}
static int
usdf_ep_rdm_setopt(fid_t fid, int level, int optname,
const void *optval, size_t optlen)
{
struct usdf_ep *ep;
ep = ep_fidtou(fid);
(void)ep;
switch (level) {
case FI_OPT_ENDPOINT:
return -FI_ENOPROTOOPT;
default:
return -FI_ENOPROTOOPT;
}
return 0;
}
static ssize_t
usdf_ep_rdm_cancel(fid_t fid, void *context)
{
return 0;
}
int
usdf_rdm_fill_tx_attr(struct fi_tx_attr *txattr)
{
if (txattr->size > USDF_RDM_MAX_CTX_SIZE ||
txattr->iov_limit > USDF_RDM_MAX_SGE) {
return -FI_ENODATA;
}
if (txattr->size == 0) {
txattr->size = USDF_RDM_DFLT_CTX_SIZE;
}
if (txattr->iov_limit == 0) {
txattr->iov_limit = USDF_RDM_DFLT_SGE;
}
return 0;
}
int
usdf_rdm_fill_rx_attr(struct fi_rx_attr *rxattr)
{
if (rxattr->size > USDF_RDM_MAX_CTX_SIZE ||
rxattr->iov_limit > USDF_RDM_MAX_SGE) {
return -FI_ENODATA;
}
if (rxattr->size == 0) {
rxattr->size = USDF_RDM_DFLT_CTX_SIZE;
}
if (rxattr->iov_limit == 0) {
rxattr->iov_limit = USDF_RDM_DFLT_SGE;
}
return 0;
}
/*
* Find a hard CQ within this soft CQ that services message EPs
*/
static struct usdf_cq_hard *
usdf_ep_rdm_find_cqh(struct usdf_cq *cq)
{
struct usdf_cq_hard *hcq;
TAILQ_FOREACH(hcq, &cq->c.soft.cq_list, cqh_link) {
if (hcq->cqh_progress == usdf_rdm_hcq_progress) {
return hcq;
}
}
return NULL;
}
static int
usdf_ep_rdm_bind_cq(struct usdf_ep *ep, struct usdf_cq *cq, uint64_t flags)
{
struct usdf_cq_hard **hcqp;
struct usdf_cq_hard *hcq;
int ret;
/*
* The CQ is actually bound the RX or TX ctx, not the EP directly
*/
if (flags & FI_SEND) {
/* if TX is shared, but bind directly */
if (ep->ep_tx->tx_fid.fid.fclass == FI_CLASS_STX_CTX) {
return -FI_EINVAL;
}
hcqp = &ep->ep_tx->t.rdm.tx_hcq;
} else {
/* if RX is shared, but bind directly */
if (ep->ep_rx->rx_fid.fid.fclass == FI_CLASS_SRX_CTX) {
return -FI_EINVAL;
}
hcqp = &ep->ep_rx->r.rdm.rx_hcq;
}
if (*hcqp != NULL) {
return -FI_EINVAL;
}
/* Make sure this CQ is "soft" */
ret = usdf_cq_make_soft(cq);
if (ret != 0) {
return ret;
}
/* Use existing rdm CQ if present */
hcq = usdf_ep_rdm_find_cqh(cq);
if (hcq == NULL) {
hcq = malloc(sizeof(*hcq));
if (hcq == NULL) {
return -errno;
}
ret = usd_create_cq(cq->cq_domain->dom_dev, 8195, /* XXX */
-1, &hcq->cqh_ucq);
if (ret != 0) {
goto fail;
}
hcq->cqh_cq = cq;
atomic_init(&hcq->cqh_refcnt, 0);
hcq->cqh_progress = usdf_rdm_hcq_progress;
switch (cq->cq_attr.format) {
default:
case FI_CQ_FORMAT_CONTEXT:
hcq->cqh_post = usdf_cq_post_soft_context;
break;
case FI_CQ_FORMAT_MSG:
hcq->cqh_post = usdf_cq_post_soft_msg;
break;
case FI_CQ_FORMAT_DATA:
hcq->cqh_post = usdf_cq_post_soft_data;
break;
}
TAILQ_INSERT_TAIL(&cq->c.soft.cq_list, hcq, cqh_link);
/* add to domain progression list */
TAILQ_INSERT_TAIL(&ep->ep_domain->dom_hcq_list,
hcq, cqh_dom_link);
}
atomic_inc(&hcq->cqh_refcnt);
atomic_inc(&cq->cq_refcnt);
*hcqp = hcq;
return 0;
fail:
if (hcq != NULL) {
free(hcq);
}
return ret;
}
static int
usdf_ep_rdm_bind(struct fid *fid, struct fid *bfid, uint64_t flags)
{
struct usdf_ep *ep;
struct usdf_cq *cq;
ep = ep_fidtou(fid);
switch (bfid->fclass) {
case FI_CLASS_AV:
if (ep->e.rdm.ep_av != NULL) {
return -FI_EINVAL;
}
ep->e.rdm.ep_av = av_fidtou(bfid);
break;
case FI_CLASS_CQ:
if (flags & FI_SEND) {
cq = cq_fidtou(bfid);
usdf_ep_rdm_bind_cq(ep, cq, FI_SEND);
}
if (flags & FI_RECV) {
cq = cq_fidtou(bfid);
usdf_ep_rdm_bind_cq(ep, cq, FI_RECV);
}
break;
case FI_CLASS_EQ:
if (ep->ep_eq != NULL) {
return -FI_EINVAL;
}
ep->ep_eq = eq_fidtou(bfid);
atomic_inc(&ep->ep_eq->eq_refcnt);
break;
default:
return -FI_EINVAL;
}
return 0;
}
/*
* XXX clean up pending transmits
*/
static int
usdf_rdm_rx_ctx_close(fid_t fid)
{
struct usdf_rx *rx;
struct usdf_cq_hard *hcq;
rx = rx_fidtou(fid);
if (atomic_get(&rx->rx_refcnt) > 0) {
return -FI_EBUSY;
}
hcq = rx->r.rdm.rx_hcq;
if (hcq != NULL) {
atomic_dec(&hcq->cqh_refcnt);
atomic_dec(&hcq->cqh_cq->cq_refcnt);
}
if (rx->r.rdm.rx_sock != -1) {
close(rx->r.rdm.rx_sock);
}
if (rx->rx_qp != NULL) {
usd_free_mr(rx->r.rdm.rx_bufs);
free(rx->r.rdm.rx_rqe_buf);
usd_destroy_qp(rx->rx_qp);
}
atomic_dec(&rx->rx_domain->dom_refcnt);
free(rx);
return 0;
}
/*
* XXX clean up pending receives
*/
static int
usdf_rdm_tx_ctx_close(fid_t fid)
{
struct usdf_tx *tx;
struct usdf_cq_hard *hcq;
tx = tx_fidtou(fid);
if (atomic_get(&tx->tx_refcnt) > 0) {
return -FI_EBUSY;
}
hcq = tx->t.rdm.tx_hcq;
if (hcq != NULL) {
atomic_dec(&hcq->cqh_refcnt);
atomic_dec(&hcq->cqh_cq->cq_refcnt);
}
if (tx->tx_qp != NULL) {
free(tx->t.rdm.tx_wqe_buf);
usd_destroy_qp(tx->tx_qp);
}
atomic_dec(&tx->tx_domain->dom_refcnt);
free(tx);
return 0;
}
int
usdf_rx_rdm_port_bind(struct usdf_rx *rx, struct fi_info *info)
{
struct sockaddr_in *sin;
struct sockaddr_in src;
socklen_t addrlen;
int ret;
if (info->src_addr != NULL) {
if (info->addr_format != FI_SOCKADDR &&
info->addr_format != FI_SOCKADDR_IN) {
return -FI_EINVAL;
}
sin = (struct sockaddr_in *)info->src_addr;
} else {
memset(&src, 0, sizeof(src));
sin = &src;
sin->sin_family = AF_INET;
sin->sin_addr.s_addr =
rx->rx_domain->dom_fabric->fab_dev_attrs->uda_ipaddr_be;
}
rx->r.rdm.rx_sock = socket(AF_INET, SOCK_DGRAM, 0);
if (rx->r.rdm.rx_sock == -1) {
return -errno;
}
ret = bind(rx->r.rdm.rx_sock, (struct sockaddr *)sin, sizeof(*sin));
if (ret == -1) {
return -errno;
}
addrlen = sizeof(*sin);
ret = getsockname(rx->r.rdm.rx_sock, (struct sockaddr *)sin, &addrlen);
if (ret == -1) {
return -errno;
}
return 0;
}
static int
usdf_ep_rdm_close(fid_t fid)
{
struct usdf_ep *ep;
ep = ep_fidtou(fid);
if (atomic_get(&ep->ep_refcnt) > 0) {
return -FI_EBUSY;
}
if (ep->ep_rx != NULL) {
atomic_dec(&ep->ep_rx->rx_refcnt);
if (rx_utofid(ep->ep_rx)->fclass == FI_CLASS_RX_CTX) {
(void) usdf_rdm_rx_ctx_close(rx_utofid(ep->ep_rx));
}
}
if (ep->ep_tx != NULL) {
atomic_dec(&ep->ep_tx->tx_refcnt);
if (tx_utofid(ep->ep_tx)->fclass == FI_CLASS_TX_CTX) {
(void) usdf_rdm_tx_ctx_close(tx_utofid(ep->ep_tx));
}
}
atomic_dec(&ep->ep_domain->dom_refcnt);
if (ep->ep_eq != NULL) {
atomic_dec(&ep->ep_eq->eq_refcnt);
}
free(ep);
return 0;
}
static struct fi_ops_ep usdf_base_rdm_ops = {
.size = sizeof(struct fi_ops_ep),
.enable = usdf_ep_rdm_enable,
.cancel = usdf_ep_rdm_cancel,
.getopt = usdf_ep_rdm_getopt,
.setopt = usdf_ep_rdm_setopt,
.tx_ctx = fi_no_tx_ctx,
.rx_ctx = fi_no_rx_ctx,
};
static struct fi_ops_cm usdf_cm_rdm_ops = {
.size = sizeof(struct fi_ops_cm),
.getname = usdf_cm_rdm_getname,
.getpeer = fi_no_getpeer,
.connect = fi_no_connect,
.listen = fi_no_listen,
.accept = fi_no_accept,
.reject = fi_no_reject,
.shutdown = fi_no_shutdown,
.join = fi_no_join,
.leave = fi_no_leave,
};
static struct fi_ops_msg usdf_rdm_ops = {
.size = sizeof(struct fi_ops_msg),
.recv = usdf_rdm_recv,
.recvv = usdf_rdm_recvv,
.recvmsg = usdf_rdm_recvmsg,
.send = usdf_rdm_send,
.sendv = usdf_rdm_sendv,
.sendmsg = usdf_rdm_sendmsg,
.inject = usdf_rdm_inject,
.senddata = usdf_rdm_senddata,
.injectdata = fi_no_msg_injectdata,
};
static struct fi_ops usdf_ep_rdm_ops = {
.size = sizeof(struct fi_ops),
.close = usdf_ep_rdm_close,
.bind = usdf_ep_rdm_bind,
.control = fi_no_control,
.ops_open = fi_no_ops_open
};
int
usdf_ep_rdm_open(struct fid_domain *domain, struct fi_info *info,
struct fid_ep **ep_o, void *context)
{
struct usdf_domain *udp;
struct usdf_tx *tx;
struct usdf_rx *rx;
struct usdf_ep *ep;
int ret;
ep = NULL;
rx = NULL;
tx = NULL;
if ((info->caps & ~USDF_RDM_CAPS) != 0) {
return -FI_EBADFLAGS;
}
udp = dom_ftou(domain);
/* allocate peer table if not done */
if (udp->dom_peer_tab == NULL) {
udp->dom_peer_tab = calloc(USDF_MAX_PEERS, sizeof(ep));
}
if (udp->dom_peer_tab == NULL) {
ret = -errno;
goto fail;
}
ep = calloc(1, sizeof(*ep));
if (ep == NULL) {
ret = -errno;
goto fail;
}
ep->ep_fid.fid.fclass = FI_CLASS_EP;
ep->ep_fid.fid.context = context;
ep->ep_fid.fid.ops = &usdf_ep_rdm_ops;
ep->ep_fid.ops = &usdf_base_rdm_ops;
ep->ep_fid.cm = &usdf_cm_rdm_ops;
ep->ep_fid.msg = &usdf_rdm_ops;
ep->ep_domain = udp;
ep->ep_caps = info->caps;
ep->ep_mode = info->mode;
/* implicitly create TX context if not to be shared */
if (info->ep_attr == NULL ||
info->ep_attr->tx_ctx_cnt != FI_SHARED_CONTEXT) {
tx = calloc(1, sizeof(*tx));
if (tx == NULL) {
ret = -errno;
goto fail;
}
tx->tx_fid.fid.fclass = FI_CLASS_TX_CTX;
atomic_init(&tx->tx_refcnt, 0);
tx->tx_domain = udp;
tx->tx_progress = usdf_rdm_tx_progress;
atomic_init(&tx->t.rdm.tx_next_msg_id, 1);
atomic_inc(&udp->dom_refcnt);
if (info->tx_attr != NULL) {
ret = usdf_rdm_fill_tx_attr(info->tx_attr);
if (ret != 0) {
goto fail;
}
tx->tx_attr = *info->tx_attr;
} else {
ret = usdf_rdm_fill_tx_attr(&tx->tx_attr);
}
TAILQ_INIT(&tx->t.rdm.tx_free_wqe);
TAILQ_INIT(&tx->t.rdm.tx_rdc_ready);
TAILQ_INIT(&tx->t.rdm.tx_rdc_have_acks);
ep->ep_tx = tx;
atomic_inc(&tx->tx_refcnt);
}
/* implicitly create RX context if not to be shared */
if (info->ep_attr == NULL ||
info->ep_attr->rx_ctx_cnt != FI_SHARED_CONTEXT) {
rx = calloc(1, sizeof(*rx));
if (rx == NULL) {
ret = -errno;
goto fail;
}
rx->rx_fid.fid.fclass = FI_CLASS_RX_CTX;
atomic_init(&rx->rx_refcnt, 0);
rx->rx_domain = udp;
rx->r.rdm.rx_tx = tx;
rx->r.rdm.rx_sock = -1;
atomic_inc(&udp->dom_refcnt);
ret = usdf_rx_rdm_port_bind(rx, info);
if (ret != 0) {
goto fail;
}
if (info->rx_attr != NULL) {
ret = usdf_rdm_fill_rx_attr(info->rx_attr);
if (ret != 0) {
goto fail;
}
rx->rx_attr = *info->rx_attr;
} else {
ret = usdf_rdm_fill_rx_attr(&rx->rx_attr);
}
TAILQ_INIT(&rx->r.rdm.rx_free_rqe);
TAILQ_INIT(&rx->r.rdm.rx_posted_rqe);
ep->ep_rx = rx;
atomic_inc(&rx->rx_refcnt);
}
atomic_init(&ep->ep_refcnt, 0);
atomic_inc(&udp->dom_refcnt);
*ep_o = ep_utof(ep);
return 0;
fail:
if (rx != NULL) {
if (rx->r.rdm.rx_sock != -1) {
close(rx->r.rdm.rx_sock);
}
free(rx);
atomic_dec(&udp->dom_refcnt);
}
if (tx != NULL) {
free(tx);
atomic_dec(&udp->dom_refcnt);
}
return ret;
}

Просмотреть файл

@ -566,6 +566,9 @@ usdf_eq_open(struct fid_fabric *fabric, struct fi_eq_attr *attr,
/*
* Allocate and initialize event ring
*/
if (attr->size == 0) {
attr->size = 1024; // XXX
}
eq->eq_ev_ring = calloc(attr->size, sizeof(*eq->eq_ev_ring));
eq->eq_ev_buf = calloc(attr->size, sizeof(*eq->eq_ev_buf));
if (eq->eq_ev_ring == NULL || eq->eq_ev_buf == NULL) {

Просмотреть файл

@ -60,6 +60,7 @@
#include <rdma/fi_errno.h>
#include "fi.h"
#include "fi_enosys.h"
#include "prov.h"
#include "usnic_direct.h"
#include "libnl_utils.h"
@ -68,6 +69,9 @@
#include "fi_usnic.h"
#include "usdf_progress.h"
#include "usdf_timer.h"
#include "usdf_dgram.h"
#include "usdf_msg.h"
#include "usdf_rdm.h"
struct usdf_usnic_info *__usdf_devinfo;
@ -108,8 +112,12 @@ usdf_validate_hints(struct fi_info *hints, struct usd_device_attrs *dap)
fattrp = hints->fabric_attr;
if (fattrp != NULL) {
if (fattrp->prov_version != 0 &&
fattrp->prov_version != USDF_PROV_VERSION) {
return -FI_ENODATA;
}
if (fattrp->prov_name != NULL &&
strcmp(fattrp->prov_name, USDF_FI_NAME) != 0) {
strcmp(fattrp->prov_name, USDF_PROV_NAME) != 0) {
return -FI_ENODATA;
}
if (fattrp->name != NULL &&
@ -122,16 +130,15 @@ usdf_validate_hints(struct fi_info *hints, struct usd_device_attrs *dap)
}
static int
usdf_fill_addr_info(struct fi_info *fi, struct fi_info *hints,
usdf_fill_addr_info(struct fi_info *fi, uint32_t addr_format,
struct sockaddr_in *src, struct sockaddr_in *dest,
struct usd_device_attrs *dap)
{
struct sockaddr_in *sin;
int ret;
/* If hints speficied, we already validated requested addr_format */
if (hints != NULL && hints->addr_format != FI_FORMAT_UNSPEC) {
fi->addr_format = hints->addr_format;
if (addr_format != FI_FORMAT_UNSPEC) {
fi->addr_format = addr_format;
} else {
fi->addr_format = FI_SOCKADDR_IN;
}
@ -192,6 +199,8 @@ usdf_fill_info_dgram(
struct fi_tx_attr *txattr;
struct fi_rx_attr *rxattr;
struct fi_ep_attr *eattrp;
uint32_t addr_format;
size_t entries;
int ret;
/* check that we are capable of what's requested */
@ -214,12 +223,14 @@ usdf_fill_info_dgram(
if (hints != NULL) {
fi->mode = hints->mode & USDF_DGRAM_SUPP_MODE;
addr_format = hints->addr_format;
} else {
fi->mode = USDF_DGRAM_SUPP_MODE;
addr_format = FI_FORMAT_UNSPEC;
}
fi->ep_type = FI_EP_DGRAM;
ret = usdf_fill_addr_info(fi, hints, src, dest, dap);
ret = usdf_fill_addr_info(fi, addr_format, src, dest, dap);
if (ret != 0) {
goto fail;
}
@ -234,22 +245,72 @@ usdf_fill_info_dgram(
/* TX attrs */
txattr = fi->tx_attr;
txattr->size = dap->uda_max_send_credits;
if (hints != NULL &&
hints->tx_attr != NULL &&
hints->tx_attr->size != 0 &&
hints->tx_attr->size < txattr->size) {
txattr->size = hints->tx_attr->size;
txattr->iov_limit = USDF_DGRAM_DFLT_SGE;
txattr->size = dap->uda_max_send_credits / USDF_DGRAM_DFLT_SGE;
if (hints != NULL && hints->tx_attr != NULL) {
if (hints->tx_attr->iov_limit > USDF_MSG_MAX_SGE) {
ret = -FI_ENODATA;
goto fail;
}
if (hints->tx_attr->iov_limit != 0) {
txattr->iov_limit = hints->tx_attr->iov_limit;
entries = hints->tx_attr->size * txattr->iov_limit;
if (entries > dap->uda_max_send_credits) {
ret = -FI_ENODATA;
goto fail;
} else if (entries == 0) {
txattr->size = dap->uda_max_send_credits /
txattr->iov_limit;
} else {
txattr->size = hints->tx_attr->size;
}
} else if (hints->tx_attr->size != 0) {
txattr->size = hints->tx_attr->size;
if (txattr->size > dap->uda_max_send_credits) {
ret = -FI_ENODATA;
goto fail;
}
entries = txattr->size * txattr->iov_limit;
if (entries > dap->uda_max_send_credits) {
txattr->iov_limit = dap->uda_max_send_credits /
txattr->size;
}
}
}
/* RX attrs */
rxattr = fi->rx_attr;
rxattr->size = dap->uda_max_recv_credits;
if (hints != NULL &&
hints->rx_attr != NULL &&
hints->rx_attr->size != 0 &&
hints->rx_attr->size < rxattr->size) {
rxattr->size = hints->rx_attr->size;
rxattr->iov_limit = USDF_DGRAM_DFLT_SGE;
rxattr->size = dap->uda_max_recv_credits / USDF_DGRAM_DFLT_SGE;
if (hints != NULL && hints->rx_attr != NULL) {
if (hints->rx_attr->iov_limit > USDF_MSG_MAX_SGE) {
ret = -FI_ENODATA;
goto fail;
}
if (hints->rx_attr->iov_limit != 0) {
rxattr->iov_limit = hints->rx_attr->iov_limit;
entries = hints->rx_attr->size * rxattr->iov_limit;
if (entries > dap->uda_max_recv_credits) {
ret = -FI_ENODATA;
goto fail;
} else if (entries == 0) {
rxattr->size = dap->uda_max_recv_credits /
rxattr->iov_limit;
} else {
rxattr->size = hints->rx_attr->size;
}
} else if (hints->rx_attr->size != 0) {
rxattr->size = hints->rx_attr->size;
if (rxattr->size > dap->uda_max_recv_credits) {
ret = -FI_ENODATA;
goto fail;
}
entries = rxattr->size * rxattr->iov_limit;
if (entries > dap->uda_max_recv_credits) {
rxattr->iov_limit = dap->uda_max_recv_credits /
rxattr->size;
}
}
}
/* endpoint attrs */
@ -267,7 +328,7 @@ usdf_fill_info_dgram(
dattrp = fi->domain_attr;
dattrp->threading = FI_THREAD_UNSPEC;
dattrp->control_progress = FI_PROGRESS_AUTO;
dattrp->data_progress = FI_PROGRESS_AUTO;
dattrp->data_progress = FI_PROGRESS_MANUAL;
/* add to tail of list */
if (*fi_first == NULL) {
@ -301,6 +362,7 @@ usdf_fill_info_msg(
struct fi_tx_attr *txattr;
struct fi_rx_attr *rxattr;
struct fi_ep_attr *eattrp;
uint32_t addr_format;
int ret;
/* check that we are capable of what's requested */
@ -323,13 +385,15 @@ usdf_fill_info_msg(
if (hints != NULL) {
fi->mode = hints->mode & USDF_MSG_SUPP_MODE;
addr_format = hints->addr_format;
} else {
fi->mode = USDF_MSG_SUPP_MODE;
addr_format = FI_FORMAT_UNSPEC;
}
fi->ep_type = FI_EP_MSG;
ret = usdf_fill_addr_info(fi, hints, src, dest, dap);
ret = usdf_fill_addr_info(fi, addr_format, src, dest, dap);
if (ret != 0) {
goto fail;
}
@ -344,28 +408,21 @@ usdf_fill_info_msg(
/* TX attrs */
txattr = fi->tx_attr;
txattr->size = dap->uda_max_send_credits;
if (hints != NULL &&
hints->tx_attr != NULL &&
hints->tx_attr->size != 0 &&
hints->tx_attr->size < txattr->size) {
txattr->size = hints->tx_attr->size;
if (hints != NULL && hints->tx_attr != NULL) {
*txattr = *hints->tx_attr;
}
usdf_msg_fill_tx_attr(txattr);
/* RX attrs */
rxattr = fi->rx_attr;
rxattr->size = dap->uda_max_recv_credits;
if (hints != NULL &&
hints->rx_attr != NULL &&
hints->rx_attr->size != 0 &&
hints->rx_attr->size < rxattr->size) {
rxattr->size = hints->rx_attr->size;
if (hints != NULL && hints->rx_attr != NULL) {
*rxattr = *hints->rx_attr;
}
usdf_msg_fill_rx_attr(rxattr);
/* endpoint attrs */
eattrp = fi->ep_attr;
eattrp->max_msg_size = dap->uda_mtu -
sizeof(struct usd_udp_hdr);
eattrp->max_msg_size = USDF_MSG_MAX_MSG;
eattrp->protocol = FI_PROTO_RUDP;
eattrp->tx_ctx_cnt = 1;
eattrp->rx_ctx_cnt = 1;
@ -374,7 +431,109 @@ usdf_fill_info_msg(
dattrp = fi->domain_attr;
dattrp->threading = FI_THREAD_UNSPEC;
dattrp->control_progress = FI_PROGRESS_AUTO;
dattrp->data_progress = FI_PROGRESS_AUTO;
dattrp->data_progress = FI_PROGRESS_MANUAL;
/* add to tail of list */
if (*fi_first == NULL) {
*fi_first = fi;
} else {
(*fi_last)->next = fi;
}
*fi_last = fi;
return 0;
fail:
if (fi != NULL) {
fi_freeinfo(fi);
}
return ret;
}
static int
usdf_fill_info_rdm(
struct fi_info *hints,
struct sockaddr_in *src,
struct sockaddr_in *dest,
struct usd_device_attrs *dap,
struct fi_info **fi_first,
struct fi_info **fi_last)
{
struct fi_info *fi;
struct fi_fabric_attr *fattrp;
struct fi_domain_attr *dattrp;
struct fi_tx_attr *txattr;
struct fi_rx_attr *rxattr;
struct fi_ep_attr *eattrp;
uint32_t addr_format;
int ret;
/* check that we are capable of what's requested */
if ((hints->caps & ~USDF_RDM_CAPS) != 0) {
return -FI_ENODATA;
}
/* app must support these modes */
if ((hints->mode & USDF_RDM_REQ_MODE) != USDF_RDM_REQ_MODE) {
return -FI_ENODATA;
}
fi = fi_allocinfo_internal();
if (fi == NULL) {
ret = -FI_ENOMEM;
goto fail;
}
fi->caps = USDF_RDM_CAPS;
if (hints != NULL) {
fi->mode = hints->mode & USDF_RDM_SUPP_MODE;
addr_format = hints->addr_format;
} else {
fi->mode = USDF_RDM_SUPP_MODE;
addr_format = FI_FORMAT_UNSPEC;
}
fi->ep_type = FI_EP_RDM;
ret = usdf_fill_addr_info(fi, addr_format, src, dest, dap);
if (ret != 0) {
goto fail;
}
/* fabric attrs */
fattrp = fi->fabric_attr;
fattrp->name = strdup(dap->uda_devname);
if (fattrp->name == NULL) {
ret = -FI_ENOMEM;
goto fail;
}
/* TX attrs */
txattr = fi->tx_attr;
if (hints != NULL && hints->tx_attr != NULL) {
*txattr = *hints->tx_attr;
}
usdf_rdm_fill_tx_attr(txattr);
/* RX attrs */
rxattr = fi->rx_attr;
if (hints != NULL && hints->rx_attr != NULL) {
*rxattr = *hints->rx_attr;
}
usdf_rdm_fill_rx_attr(rxattr);
/* endpoint attrs */
eattrp = fi->ep_attr;
eattrp->max_msg_size = USDF_RDM_MAX_MSG;
eattrp->protocol = FI_PROTO_RUDP;
eattrp->tx_ctx_cnt = 1;
eattrp->rx_ctx_cnt = 1;
/* domain attrs */
dattrp = fi->domain_attr;
dattrp->threading = FI_THREAD_UNSPEC;
dattrp->control_progress = FI_PROGRESS_AUTO;
dattrp->data_progress = FI_PROGRESS_MANUAL;
/* add to tail of list */
if (*fi_first == NULL) {
@ -561,6 +720,14 @@ usdf_getinfo(uint32_t version, const char *node, const char *service,
goto fail;
}
}
if (ep_type == FI_EP_RDM || ep_type == FI_EP_UNSPEC) {
ret = usdf_fill_info_rdm(hints, src, dest, dap,
&fi_first, &fi_last);
if (ret != 0 && ret != -FI_ENODATA) {
goto fail;
}
}
}
if (fi_first != NULL) {
@ -690,6 +857,17 @@ usdf_fabric_open(struct fi_fabric_attr *fattrp, struct fid_fabric **fabric,
}
fp->fab_epollfd = -1;
fp->fab_arp_sockfd = -1;
LIST_INIT(&fp->fab_domain_list);
fp->fab_attr.fabric = fab_utof(fp);
fp->fab_attr.name = strdup(fattrp->name);
fp->fab_attr.prov_name = strdup(USDF_PROV_NAME);
fp->fab_attr.prov_version = USDF_PROV_VERSION;
if (fp->fab_attr.name == NULL ||
fp->fab_attr.prov_name == NULL) {
ret = -FI_ENOMEM;
goto fail;
}
fp->fab_fid.fid.fclass = FI_CLASS_FABRIC;
fp->fab_fid.fid.context = context;
@ -726,6 +904,7 @@ usdf_fabric_open(struct fi_fabric_attr *fattrp, struct fid_fabric **fabric,
goto fail;
}
/* initialize timer subsystem */
ret = usdf_timer_init(fp);
if (ret != 0) {
goto fail;
@ -746,7 +925,9 @@ usdf_fabric_open(struct fi_fabric_attr *fattrp, struct fid_fabric **fabric,
}
atomic_init(&fp->fab_refcnt, 0);
*fabric = &fp->fab_fid;
fattrp->fabric = fab_utof(fp);
fattrp->prov_version = USDF_PROV_VERSION;
*fabric = fab_utof(fp);
return 0;
fail:
@ -766,20 +947,20 @@ fail:
return ret;
}
static void usdf_fini(void)
{
}
static struct fi_provider usdf_ops = {
.name = USDF_FI_NAME,
.version = FI_VERSION(0, 7),
.name = USDF_PROV_NAME,
.version = USDF_PROV_VERSION,
.fi_version = FI_VERSION(FI_MAJOR_VERSION, FI_MINOR_VERSION),
.getinfo = usdf_getinfo,
.fabric = usdf_fabric_open,
.cleanup = usdf_fini
};
static void __attribute__((constructor))
usdf_ini(void)
{
(void) fi_register(&usdf_ops);
}
static void __attribute__((destructor))
usdf_fini(void)
USNIC_INI
{
return (&usdf_ops);
}

Просмотреть файл

@ -57,81 +57,153 @@
#include <rdma/fi_errno.h>
#include "fi.h"
#include "usnic_direct.h"
#include "usd.h"
#include "usd_post.h"
#include "usdf.h"
#include "usdf_rudp.h"
#include "usdf_msg.h"
#include "usdf_timer.h"
#include "usdf_progress.h"
static inline void
usdf_msg_ep_ready(struct usdf_ep *ep)
{
struct usdf_tx *tx;
tx = ep->ep_tx;
if (!TAILQ_ON_LIST(ep, e.msg.ep_link)) {
ep->e.msg.ep_fairness_credits = USDF_MSG_FAIRNESS_CREDITS;
TAILQ_INSERT_TAIL(&tx->t.msg.tx_ep_ready, ep, e.msg.ep_link);
/* Make sure TX is on domain ready list */
if (!TAILQ_ON_LIST(tx, tx_link)) {
TAILQ_INSERT_TAIL(&tx->tx_domain->dom_tx_ready,
tx, tx_link);
}
}
}
static inline void
usdf_msg_rewind_qe(struct usdf_msg_qe *qe, size_t rewind, size_t mtu)
{
size_t cur_resid;
size_t cur_iov;
size_t bytes;
size_t len;
if (qe->ms_resid == 0) {
bytes = qe->ms_length % mtu;
cur_resid = 0;
} else {
bytes = mtu;
cur_resid = qe->ms_iov_resid;
}
bytes += (rewind - 1) * mtu;
qe->ms_resid += bytes;
cur_iov = qe->ms_cur_iov;
while (bytes > 0) {
len = qe->ms_iov[cur_iov].iov_len - cur_resid;
if (len >= bytes) {
len = bytes;
cur_resid += len;
} else {
--cur_iov;
cur_resid = 0;
}
bytes -= len;
}
qe->ms_cur_iov = cur_iov;
qe->ms_cur_ptr = qe->ms_iov[cur_iov].iov_base +
qe->ms_iov[cur_iov].iov_len - cur_resid;
qe->ms_iov_resid = cur_resid;
}
/*
* semi-native rx buffer post, i want to eventually avoid using the
* vnic_*() calls
*/
static inline int
_usdf_msg_post_recv(struct usdf_rx *rx, void *buf, size_t len)
{
struct usd_rq *rq;
struct vnic_rq *vrq;
struct rq_enet_desc *desc;
struct usd_qp_impl *qp;
qp = to_qpi(rx->rx_qp);
rq = &qp->uq_rq;
vrq = &rq->urq_vnic_rq;
rq->urq_context[rq->urq_post_index] = buf;
rq->urq_post_index = (rq->urq_post_index + 1)
& rq->urq_post_index_mask;
desc = vnic_rq_next_desc(vrq);
rq_enet_desc_enc(desc, (dma_addr_t) buf,
RQ_ENET_TYPE_ONLY_SOP, len);
wmb();
vnic_rq_post(vrq, buf, 0, (dma_addr_t) buf, len, 0);
return 0;
}
/*
* Allow external access to the inline
*/
int
usdf_msg_post_recv(struct usdf_rx *rx, void *buf, size_t len)
{
return _usdf_msg_post_recv(rx, buf, len);
}
ssize_t
usdf_msg_recv(struct fid_ep *fep, void *buf, size_t len,
void *desc, fi_addr_t src_addr, void *context)
{
struct usdf_ep *ep;
struct usd_qp_impl *qp;
struct usd_recv_desc rxd;
uint32_t index;
struct usdf_rx *rx;
struct usdf_msg_qe *rqe;
struct usdf_domain *udp;
ep = ep_ftou(fep);
qp = to_qpi(ep->ep_qp);
rx = ep->ep_rx;
udp = ep->ep_domain;
index = qp->uq_rq.urq_post_index;
rxd.urd_context = context;
rxd.urd_iov[0].iov_base = (uint8_t *)ep->ep_hdr_buf +
(index * USDF_HDR_BUF_ENTRY) +
(USDF_HDR_BUF_ENTRY - sizeof(struct usd_udp_hdr));
rxd.urd_iov[0].iov_len = sizeof(struct usd_udp_hdr);
rxd.urd_iov[1].iov_base = buf;
rxd.urd_iov[1].iov_len = len;
rxd.urd_iov_cnt = 2;
rxd.urd_next = NULL;
if (TAILQ_EMPTY(&rx->r.msg.rx_free_rqe)) {
return -FI_EAGAIN;
}
ep->ep_hdr_ptr[index] = rxd.urd_iov[0].iov_base;
index = (index + 1) & qp->uq_rq.urq_post_index_mask;
ep->ep_hdr_ptr[index] = rxd.urd_iov[0].iov_base;
pthread_spin_lock(&udp->dom_progress_lock);
return usd_post_recv(ep->ep_qp, &rxd);
rqe = TAILQ_FIRST(&rx->r.msg.rx_free_rqe);
TAILQ_REMOVE(&rx->r.msg.rx_free_rqe, rqe, ms_link);
rqe->ms_context = context;
rqe->ms_iov[0].iov_base = buf;
rqe->ms_iov[0].iov_len = len;
rqe->ms_last_iov = 0;
rqe->ms_cur_iov = 0;
rqe->ms_cur_ptr = buf;
rqe->ms_iov_resid = len;
rqe->ms_length = 0;
TAILQ_INSERT_TAIL(&rx->r.msg.rx_posted_rqe, rqe, ms_link);
pthread_spin_unlock(&udp->dom_progress_lock);
return 0;
}
ssize_t
usdf_msg_recvv(struct fid_ep *fep, const struct iovec *iov, void **desc,
size_t count, fi_addr_t src_addr, void *context)
{
struct usdf_ep *ep;
struct usd_recv_desc rxd;
struct usd_qp_impl *qp;
uint32_t index;
int i;
ep = ep_ftou(fep);
qp = to_qpi(ep->ep_qp);
rxd.urd_context = context;
rxd.urd_iov[0].iov_base = ep->ep_hdr_buf +
qp->uq_rq.urq_post_index * USDF_HDR_BUF_ENTRY;
rxd.urd_iov[0].iov_len = sizeof(struct usd_udp_hdr);
memcpy(&rxd.urd_iov[1], iov, sizeof(*iov) * count);
rxd.urd_iov_cnt = count + 1;
rxd.urd_next = NULL;
index = qp->uq_rq.urq_post_index;
for (i = 0; i < count; ++i) {
ep->ep_hdr_ptr[index] = rxd.urd_iov[0].iov_base;
index = (index + 1) & qp->uq_rq.urq_post_index_mask;
}
return usd_post_recv(ep->ep_qp, &rxd);
}
static inline ssize_t
_usdf_msg_send(struct usdf_ep *ep, struct usd_dest *dest,
const void *buf, size_t len, fi_addr_t dest_addr, void *context)
{
if (len <= USD_SEND_MAX_COPY - sizeof(struct usd_udp_hdr)) {
return usd_post_send_one_copy(ep->ep_qp, dest, buf, len,
USD_SF_SIGNAL, context);
} else {
return usd_post_send_one(ep->ep_qp, dest, buf, len,
USD_SF_SIGNAL, context);
}
return -FI_ENOSYS;
}
ssize_t
@ -139,20 +211,48 @@ usdf_msg_send(struct fid_ep *fep, const void *buf, size_t len, void *desc,
fi_addr_t dest_addr, void *context)
{
struct usdf_ep *ep;
struct usd_dest *dest;
int ret;
struct usdf_tx *tx;
struct usdf_msg_qe *wqe;
struct usdf_domain *udp;
ep = ep_ftou(fep);
tx = ep->ep_tx;
udp = ep->ep_domain;
dest = (struct usd_dest *)(uintptr_t)dest_addr;
return _usdf_msg_send(ep, dest, buf, len, dest_addr, context);
if (TAILQ_EMPTY(&tx->t.msg.tx_free_wqe)) {
return -FI_EAGAIN;
}
return ret;
pthread_spin_lock(&udp->dom_progress_lock);
wqe = TAILQ_FIRST(&tx->t.msg.tx_free_wqe);
TAILQ_REMOVE(&tx->t.msg.tx_free_wqe, wqe, ms_link);
wqe->ms_context = context;
wqe->ms_iov[0].iov_base = (void *)buf;
wqe->ms_iov[0].iov_len = len;
wqe->ms_last_iov = 0;
wqe->ms_cur_iov = 0;
wqe->ms_cur_ptr = buf;
wqe->ms_iov_resid = len;
wqe->ms_resid = len;
wqe->ms_length = len;
/* add send to EP, and add EP to TX list if not present */
TAILQ_INSERT_TAIL(&ep->e.msg.ep_posted_wqe, wqe, ms_link);
usdf_msg_ep_ready(ep);
pthread_spin_unlock(&udp->dom_progress_lock);
usdf_domain_progress(udp);
return 0;
}
ssize_t
usdf_msg_senddata(struct fid_ep *ep, const void *buf, size_t len,
void *desc, uint64_t data, fi_addr_t dest_addr, void *context)
usdf_msg_senddata(struct fid_ep *ep, const void *buf, size_t len, void *desc,
uint64_t data, fi_addr_t dest_addr, void *context)
{
return -FI_ENOSYS;
}
@ -183,60 +283,640 @@ usdf_msg_recvmsg(struct fid_ep *ep, const struct fi_msg *msg, uint64_t flags)
return -FI_ENOSYS;
}
/*
* Versions that rely on user to reserve space for header at start of buffer
*/
ssize_t
usdf_msg_prefix_recv(struct fid_ep *fep, void *buf, size_t len,
void *desc, fi_addr_t src_addr, void *context)
static void
usdf_msg_send_complete(struct usdf_ep *ep, struct usdf_msg_qe *wqe)
{
struct usdf_ep *ep;
struct usd_qp_impl *qp;
struct usd_recv_desc rxd;
uint32_t index;
TAILQ_REMOVE(&ep->e.msg.ep_posted_wqe, wqe, ms_link);
ep = ep_ftou(fep);
qp = to_qpi(ep->ep_qp);
index = qp->uq_rq.urq_post_index;
rxd.urd_context = context;
rxd.urd_iov[0].iov_base = (uint8_t *)buf +
USDF_HDR_BUF_ENTRY - sizeof(struct usd_udp_hdr);
rxd.urd_iov[0].iov_len = len;
rxd.urd_iov_cnt = 1;
rxd.urd_next = NULL;
ep->ep_hdr_ptr[index] = rxd.urd_iov[0].iov_base;
return usd_post_recv(ep->ep_qp, &rxd);
wqe->ms_last_seq = ep->e.msg.ep_next_tx_seq - 1;
TAILQ_INSERT_TAIL(&ep->e.msg.ep_sent_wqe, wqe, ms_link);
}
ssize_t
usdf_msg_prefix_recvv(struct fid_ep *fep, const struct iovec *iov,
void **desc, size_t count, fi_addr_t src_addr, void *context)
static inline void
usdf_msg_send_segment(struct usdf_tx *tx, struct usdf_ep *ep)
{
struct usdf_ep *ep;
struct usd_recv_desc rxd;
struct usd_qp_impl *qp;
struct usdf_msg_qe *msg;
struct rudp_pkt *hdr;
struct usd_wq *wq;
uint32_t index;
int i;
size_t cur_iov;
size_t cur_resid;
size_t resid;
const uint8_t *cur_ptr;
const uint8_t *send_ptr;
size_t sge_len;
uint8_t *ptr;
struct usd_wq_post_info *info;
ep = ep_ftou(fep);
qp = to_qpi(ep->ep_qp);
msg = TAILQ_FIRST(&ep->e.msg.ep_posted_wqe);
wq = &(to_qpi(tx->tx_qp)->uq_wq);
rxd.urd_context = context;
memcpy(&rxd.urd_iov[0], iov, sizeof(*iov) * count);
rxd.urd_iov[0].iov_base = (uint8_t *)rxd.urd_iov[0].iov_base +
USDF_HDR_BUF_ENTRY - sizeof(struct usd_udp_hdr);
index = wq->uwq_post_index;
hdr = (struct rudp_pkt *)(wq->uwq_copybuf + index * USD_SEND_MAX_COPY);
rxd.urd_iov_cnt = count;
rxd.urd_next = NULL;
memcpy(hdr, &ep->e.msg.ep_dest->ds_dest.ds_udp.u_hdr,
sizeof(struct usd_udp_hdr));
hdr->msg.src_peer_id = htons(ep->e.msg.ep_lcl_peer_id);
index = qp->uq_rq.urq_post_index;
for (i = 0; i < count; ++i) {
ep->ep_hdr_ptr[index] = rxd.urd_iov[0].iov_base;
index = (index + 1) & qp->uq_rq.urq_post_index_mask;
resid = msg->ms_resid;
cur_iov = msg->ms_cur_iov;
cur_ptr = msg->ms_cur_ptr;
cur_resid = msg->ms_iov_resid;
/* save first seq for message */
if (cur_iov == 0 && cur_resid == msg->ms_iov[0].iov_len) {
msg->ms_first_seq = ep->e.msg.ep_next_tx_seq;
}
return usd_post_recv(ep->ep_qp, &rxd);
if (resid < USD_SEND_MAX_COPY - sizeof(*hdr)) {
hdr->msg.opcode = htons(RUDP_OP_LAST);
hdr->msg.m.rc_data.length = htons(resid);
hdr->msg.m.rc_data.seqno = htons(ep->e.msg.ep_next_tx_seq);
++ep->e.msg.ep_next_tx_seq;
ptr = (uint8_t *)(hdr + 1);
while (resid > 0) {
memcpy(ptr, cur_ptr, cur_resid);
ptr += msg->ms_iov_resid;
resid -= msg->ms_iov_resid;
++cur_iov;
cur_ptr = msg->ms_iov[cur_iov].iov_base;
cur_resid = msg->ms_iov[cur_iov].iov_len;
}
/* add packet lengths */
sge_len = resid;
hdr->hdr.uh_ip.tot_len = htons(
sge_len + sizeof(struct rudp_pkt) -
sizeof(struct ether_header));
hdr->hdr.uh_udp.len = htons(
(sizeof(struct rudp_pkt) -
sizeof(struct ether_header) -
sizeof(struct iphdr)) + sge_len);
index = _usd_post_send_one(wq, hdr,
resid + sizeof(*hdr), 1);
} else {
struct vnic_wq *vwq;
u_int8_t offload_mode = 0, eop;
u_int16_t mss = 7, header_length = 0, vlan_tag = 0;
u_int8_t vlan_tag_insert = 0, loopback = 0, fcoe_encap = 0;
struct wq_enet_desc *desc;
size_t space;
size_t num_sge;
size_t sent;
vwq = &wq->uwq_vnic_wq;
desc = wq->uwq_next_desc;
space = ep->ep_domain->dom_fabric->fab_dev_attrs->uda_mtu -
sizeof(*hdr);
num_sge = 1;
/* encode header desc */
eop = 0;
wq_enet_desc_enc(desc, (uintptr_t)hdr, sizeof(*hdr),
mss, header_length, offload_mode, eop, 0, fcoe_encap,
vlan_tag_insert, vlan_tag, loopback);
do {
desc = (struct wq_enet_desc *)
((uintptr_t)wq->uwq_desc_ring + (index << 4));
index = (index + 1) & wq->uwq_post_index_mask;
send_ptr = cur_ptr;
if (cur_resid >= space) {
sge_len = space;
eop = 1;
cur_resid -= sge_len;
cur_ptr += sge_len;
} else {
sge_len = cur_resid;
if (num_sge == USDF_MSG_MAX_SGE - 1 ||
cur_resid == resid) {
eop = 1;
}
++cur_iov;
cur_ptr = msg->ms_iov[cur_iov].iov_base;
cur_resid = msg->ms_iov[cur_iov].iov_len;
}
wq_enet_desc_enc(desc, (uintptr_t)send_ptr, sge_len,
mss, header_length, offload_mode, eop, eop,
fcoe_encap, vlan_tag_insert,
vlan_tag, loopback);
++num_sge;
space -= sge_len;
resid -= sge_len;
} while (space > 0 && num_sge <= USDF_MSG_MAX_SGE && resid > 0);
/* add packet lengths */
sent = ep->ep_domain->dom_fabric->fab_dev_attrs->uda_mtu -
space;
hdr->hdr.uh_ip.tot_len = htons(
sent + sizeof(struct rudp_pkt) -
sizeof(struct ether_header));
hdr->hdr.uh_udp.len = htons(
(sizeof(struct rudp_pkt) -
sizeof(struct ether_header) -
sizeof(struct iphdr)) + sent);
if (0) {
if ((random() % 177) == 0 && resid == 0) {
hdr->hdr.uh_eth.ether_type = 0;
//printf("BORK seq %u\n", ep->e.msg.ep_next_tx_seq);
}
}
if (resid == 0) {
hdr->msg.opcode = htons(RUDP_OP_LAST);
} else {
hdr->msg.opcode = htons(RUDP_OP_FIRST);
}
hdr->msg.m.rc_data.length = htons(sent);
hdr->msg.m.rc_data.seqno = htons(ep->e.msg.ep_next_tx_seq);
++ep->e.msg.ep_next_tx_seq;
wmb();
iowrite64(index, &vwq->ctrl->posted_index);
wq->uwq_next_desc = (struct wq_enet_desc *)
((uintptr_t)wq->uwq_desc_ring + (index << 4));
wq->uwq_post_index = (index + 1) & wq->uwq_post_index_mask;
wq->uwq_send_credits -= num_sge;
}
info = &wq->uwq_post_info[index];
info->wp_context = tx;
info->wp_len = sge_len;
/* If send complete, remove from send list */
if (resid == 0) {
usdf_msg_send_complete(ep, msg);
} else {
msg->ms_resid = resid;
msg->ms_iov_resid = cur_resid;
msg->ms_cur_iov = cur_iov;
msg->ms_cur_ptr = cur_ptr;
}
/* set ACK timer */
usdf_timer_set(ep->ep_domain->dom_fabric, ep->e.msg.ep_ack_timer,
USDF_RUDP_ACK_TIMEOUT);
}
static inline void
usdf_msg_send_ack(struct usdf_tx *tx, struct usdf_ep *ep)
{
struct rudp_pkt *hdr;
struct usd_wq *wq;
uint32_t last_post;
struct usd_wq_post_info *info;
uint16_t seq;
wq = &(to_qpi(tx->tx_qp)->uq_wq);
hdr = (struct rudp_pkt *) (wq->uwq_copybuf +
wq->uwq_post_index * USD_SEND_MAX_COPY);
memcpy(hdr, &ep->e.msg.ep_dest->ds_dest.ds_udp.u_hdr,
sizeof(struct usd_udp_hdr));
hdr->msg.src_peer_id = htons(ep->e.msg.ep_lcl_peer_id);
if (ep->e.msg.ep_send_nak) {
hdr->msg.opcode = htons(RUDP_OP_NAK);
seq = ep->e.msg.ep_next_rx_seq;
hdr->msg.m.nak.nak_seq = htons(seq);
ep->e.msg.ep_send_nak = 0;
} else {
hdr->msg.opcode = htons(RUDP_OP_ACK);
seq = ep->e.msg.ep_next_rx_seq - 1;
hdr->msg.m.ack.ack_seq = htons(seq);
}
/* add packet lengths */
hdr->hdr.uh_ip.tot_len = htons(
sizeof(struct rudp_pkt) -
sizeof(struct ether_header));
hdr->hdr.uh_udp.len = htons(sizeof(struct rudp_pkt) -
sizeof(struct ether_header) - sizeof(struct iphdr));
last_post = _usd_post_send_one(wq, hdr, sizeof(*hdr), 1);
info = &wq->uwq_post_info[last_post];
info->wp_context = tx;
info->wp_len = 0;
}
/*
* If this TX has sends to do and is not on domain ready list, then
* this completion means we can go back on the domain ready list
*/
static void
usdf_msg_send_completion(struct usd_completion *comp)
{
struct usdf_tx *tx;
tx = comp->uc_context;
if (!TAILQ_EMPTY(&tx->t.msg.tx_ep_ready) &&
!TAILQ_ON_LIST(tx, tx_link)) {
TAILQ_INSERT_TAIL(&tx->tx_domain->dom_tx_ready, tx, tx_link);
}
}
/*
* Keep progressing sends on this queue until:
* a) no more send credits on the queue (it's full)
* or
* b) all endpoints are complete or blocked awaiting ACKs
*/
void
usdf_msg_tx_progress(struct usdf_tx *tx)
{
struct usdf_ep *ep;
struct usd_qp_impl *qp;
qp = to_qpi(tx->tx_qp);
while (qp->uq_wq.uwq_send_credits > 1 &&
!TAILQ_EMPTY(&tx->t.msg.tx_ep_have_acks)) {
ep = TAILQ_FIRST(&tx->t.msg.tx_ep_have_acks);
TAILQ_REMOVE_MARK(&tx->t.msg.tx_ep_have_acks,
ep, e.msg.ep_ack_link);
usdf_msg_send_ack(tx, ep);
}
while (qp->uq_wq.uwq_send_credits > 1 &&
!TAILQ_EMPTY(&tx->t.msg.tx_ep_ready)) {
ep = TAILQ_FIRST(&tx->t.msg.tx_ep_ready);
/*
* Send next segment on this EP. This will also remove the
* current send from the EP send list if it completes
*/
usdf_msg_send_segment(tx, ep);
--ep->e.msg.ep_seq_credits;
if (TAILQ_EMPTY(&ep->e.msg.ep_posted_wqe)) {
TAILQ_REMOVE_MARK(&tx->t.msg.tx_ep_ready,
ep, e.msg.ep_link);
} else {
--ep->e.msg.ep_fairness_credits;
if (ep->e.msg.ep_seq_credits == 0) {
TAILQ_REMOVE_MARK(&tx->t.msg.tx_ep_ready,
ep, e.msg.ep_link);
ep->e.msg.ep_fairness_credits =
USDF_MSG_FAIRNESS_CREDITS;
/* fairness credits exhausted, go to back of the line */
} else if (ep->e.msg.ep_fairness_credits == 0) {
TAILQ_REMOVE(&tx->t.msg.tx_ep_ready,
ep, e.msg.ep_link);
TAILQ_INSERT_TAIL(&tx->t.msg.tx_ep_ready,
ep, e.msg.ep_link);
ep->e.msg.ep_fairness_credits =
USDF_MSG_FAIRNESS_CREDITS;
}
}
}
}
static void inline
usdf_msg_recv_complete(struct usdf_ep *ep, struct usdf_msg_qe *rqe)
{
struct usdf_cq_hard *hcq;
hcq = ep->ep_rx->r.msg.rx_hcq;
hcq->cqh_post(hcq, rqe->ms_context, rqe->ms_length);
TAILQ_INSERT_HEAD(&ep->ep_rx->r.msg.rx_free_rqe, rqe, ms_link);
}
static inline void
usdf_msg_ep_has_ack(struct usdf_ep *ep)
{
struct usdf_tx *tx;
struct usdf_domain *udp;
if (!TAILQ_ON_LIST(ep, e.msg.ep_ack_link)) {
tx = ep->ep_tx;
udp = ep->ep_domain;
TAILQ_INSERT_TAIL(&tx->t.msg.tx_ep_have_acks, ep,
e.msg.ep_ack_link);
/* Add TX to domain list if not present */
if (!TAILQ_ON_LIST(tx, tx_link)) {
TAILQ_INSERT_TAIL(&udp->dom_tx_ready, tx, tx_link);
}
}
}
static inline int
usdf_msg_check_seq(struct usdf_ep *ep, struct rudp_pkt *pkt)
{
uint16_t seq;
int ret;
seq = ntohs(pkt->msg.m.rc_data.seqno);
/* Drop bad seq, send NAK if seq from the future */
if (seq != ep->e.msg.ep_next_rx_seq) {
if (RUDP_SEQ_GT(seq, ep->e.msg.ep_next_rx_seq)) {
ep->e.msg.ep_send_nak = 1;
}
ret = -1;
} else {
++ep->e.msg.ep_next_rx_seq;
ret = 0;
}
usdf_msg_ep_has_ack(ep);
return ret;
}
static inline void
usdf_msg_process_ack(struct usdf_ep *ep, uint16_t seq)
{
struct usdf_cq_hard *hcq;
struct usdf_msg_qe *wqe;
uint16_t max_ack;
unsigned credits;
/* don't try to ACK what we don't think we've sent */
max_ack = ep->e.msg.ep_next_tx_seq - 1;
if (RUDP_SEQ_GT(seq, max_ack)) {
seq = max_ack;
}
hcq = ep->ep_tx->t.msg.tx_hcq;
while (!TAILQ_EMPTY(&ep->e.msg.ep_sent_wqe)) {
wqe = TAILQ_FIRST(&ep->e.msg.ep_sent_wqe);
if (RUDP_SEQ_LE(wqe->ms_last_seq, seq)) {
TAILQ_REMOVE(&ep->e.msg.ep_sent_wqe, wqe, ms_link);
hcq->cqh_post(hcq, wqe->ms_context, wqe->ms_length);
TAILQ_INSERT_HEAD(&ep->ep_tx->t.msg.tx_free_wqe,
wqe, ms_link);
} else {
break;
}
}
credits = RUDP_SEQ_DIFF(seq, ep->e.msg.ep_last_rx_ack);
if (ep->e.msg.ep_seq_credits == 0 && credits > 0 &&
!TAILQ_EMPTY(&ep->e.msg.ep_posted_wqe)) {
usdf_msg_ep_ready(ep);
}
ep->e.msg.ep_seq_credits += credits;
ep->e.msg.ep_last_rx_ack = seq;
/* If all ACKed, cancel timer, else reset it */
if (seq == max_ack) {
usdf_timer_cancel(ep->ep_domain->dom_fabric,
ep->e.msg.ep_ack_timer);
} else {
usdf_timer_reset(ep->ep_domain->dom_fabric,
ep->e.msg.ep_ack_timer, USDF_RUDP_ACK_TIMEOUT);
}
}
static inline void
usdf_process_nak(struct usdf_ep *ep, uint16_t seq)
{
struct usdf_msg_qe *wqe;
size_t rewind;
/* Ignore NAKs of future packets */
if (RUDP_SEQ_GE(seq, ep->e.msg.ep_next_tx_seq)) {
return;
}
/*
* Move any WQEs that contain NAKed sequences back to the
* posted list. We set ms_resid == 0 here because final set to zero
* is optimized out of the fastpath
*/
while (!TAILQ_EMPTY(&ep->e.msg.ep_sent_wqe)) {
wqe = TAILQ_LAST(&ep->e.msg.ep_sent_wqe, usdf_msg_qe_head);
TAILQ_REMOVE(&ep->e.msg.ep_sent_wqe, wqe, ms_link);
wqe->ms_resid = 0;
TAILQ_INSERT_HEAD(&ep->e.msg.ep_posted_wqe, wqe, ms_link);
}
wqe = TAILQ_FIRST(&ep->e.msg.ep_posted_wqe);
/* reset WQE to old sequence # */
if (wqe->ms_resid == 0) {
rewind = RUDP_SEQ_DIFF(wqe->ms_last_seq, seq) + 1;
} else {
rewind = RUDP_SEQ_DIFF(ep->e.msg.ep_next_tx_seq, seq);
}
if (rewind > 0) {
ep->e.msg.ep_seq_credits = USDF_RUDP_SEQ_CREDITS;
ep->e.msg.ep_next_tx_seq = seq;
usdf_msg_rewind_qe(wqe, rewind,
ep->ep_domain->dom_fabric->fab_dev_attrs->uda_mtu -
sizeof(struct rudp_pkt));
usdf_msg_ep_ready(ep);
}
}
void
usdf_msg_ep_timeout(void *vep)
{
struct usdf_ep *ep;
struct usdf_domain *udp;
uint16_t nak;
ep = vep;
udp = ep->ep_domain;
pthread_spin_lock(&udp->dom_progress_lock);
nak = ep->e.msg.ep_last_rx_ack + 1;
usdf_process_nak(ep, nak);
pthread_spin_unlock(&udp->dom_progress_lock);
}
static inline void
usdf_msg_rx_ack(struct usdf_ep *ep, struct rudp_pkt *pkt)
{
uint16_t seq;
seq = ntohs(pkt->msg.m.ack.ack_seq);
usdf_msg_process_ack(ep, seq);
}
static inline void
usdf_msg_rx_nak(struct usdf_ep *ep, struct rudp_pkt *pkt)
{
uint16_t seq;
seq = ntohs(pkt->msg.m.nak.nak_seq);
usdf_msg_process_ack(ep, seq);
usdf_process_nak(ep, seq);
}
/*
* Handle a receive on a queue servicing a message endpoint
*/
static inline void
usdf_msg_handle_recv(struct usdf_domain *udp, struct usd_completion *comp)
{
struct rudp_pkt *pkt;
struct usdf_msg_qe *rqe;
struct usdf_ep *ep;
struct usd_qp *qp;
struct usdf_rx *rx;
uint32_t peer_id;
uint32_t opcode;
uint8_t *rx_ptr;
uint8_t *rqe_ptr;
size_t cur_iov;
size_t iov_resid;
size_t rxlen;
size_t copylen;
int ret;
pkt = comp->uc_context;
opcode = ntohs(pkt->msg.opcode);
peer_id = ntohs(pkt->msg.src_peer_id);
if (peer_id > USDF_MAX_PEERS) {
qp = comp->uc_qp;
rx = qp->uq_context;
goto dropit;
}
ep = udp->dom_peer_tab[peer_id];
if (ep == NULL) {
qp = comp->uc_qp;
rx = qp->uq_context;
goto dropit;
}
rx = ep->ep_rx;
switch (opcode) {
case RUDP_OP_ACK:
usdf_msg_rx_ack(ep, pkt);
break;
case RUDP_OP_NAK:
usdf_msg_rx_nak(ep, pkt);
break;
case RUDP_OP_FIRST:
ret = usdf_msg_check_seq(ep, pkt);
if (ret == -1) {
goto dropit;
}
rqe = ep->e.msg.ep_cur_recv;
if (rqe == NULL) {
if (TAILQ_EMPTY(&rx->r.msg.rx_posted_rqe)) {
goto dropit;
}
rqe = TAILQ_FIRST(&rx->r.msg.rx_posted_rqe);
TAILQ_REMOVE(&rx->r.msg.rx_posted_rqe, rqe, ms_link);
ep->e.msg.ep_cur_recv = rqe;
}
rx_ptr = (uint8_t *)(pkt + 1);
rxlen = ntohs(pkt->msg.m.rc_data.length);
rqe->ms_length += rxlen;
rqe_ptr = (uint8_t *)rqe->ms_cur_ptr;
iov_resid = rqe->ms_iov_resid;
cur_iov = rqe->ms_cur_iov;
while (rxlen > 0) {
copylen = MIN(rxlen, iov_resid);
memcpy(rqe_ptr, rx_ptr, copylen);
rx_ptr += copylen;
rxlen -= copylen;
iov_resid -= copylen;
if (iov_resid == 0) {
if (cur_iov == rqe->ms_last_iov) {
break;
}
++cur_iov;
rqe_ptr = rqe->ms_iov[cur_iov].iov_base;
iov_resid = rqe->ms_iov[cur_iov].iov_len;
} else {
rqe_ptr += copylen;
}
}
break;
case RUDP_OP_LAST:
ret = usdf_msg_check_seq(ep, pkt);
if (ret == -1) {
goto dropit;
}
rqe = ep->e.msg.ep_cur_recv;
if (rqe == NULL) {
rqe = TAILQ_FIRST(&rx->r.msg.rx_posted_rqe);
if (rqe == NULL) {
goto dropit;
}
TAILQ_REMOVE(&rx->r.msg.rx_posted_rqe, rqe, ms_link);
ep->e.msg.ep_cur_recv = rqe;
}
rx_ptr = (uint8_t *)(pkt + 1);
rxlen = ntohs(pkt->msg.m.rc_data.length);
rqe->ms_length += rxlen;
rqe_ptr = (uint8_t *)rqe->ms_cur_ptr;
iov_resid = rqe->ms_iov_resid;
cur_iov = rqe->ms_cur_iov;
while (rxlen > 0) {
copylen = MIN(rxlen, iov_resid);
memcpy(rqe_ptr, rx_ptr, copylen);
rx_ptr += copylen;
rxlen -= copylen;
iov_resid -= copylen;
if (iov_resid == 0) {
if (cur_iov == rqe->ms_last_iov) {
break;
}
++cur_iov;
rqe_ptr = rqe->ms_iov[cur_iov].iov_base;
iov_resid = rqe->ms_iov[cur_iov].iov_len;
} else {
rqe_ptr += copylen;
}
}
if (rxlen > 0) {
rqe->ms_length -= rxlen;
/* printf("RQE truncated XXX\n"); */
} else {
usdf_msg_recv_complete(ep, rqe);
}
break;
default:
break;
}
dropit:
/* repost buffer */
_usdf_msg_post_recv(rx, pkt,
rx->rx_domain->dom_fabric->fab_dev_attrs->uda_mtu);
}
/*
* Process message completions
*/
void
usdf_msg_hcq_progress(struct usdf_cq_hard *hcq)
{
struct usd_completion comp;
while (usd_poll_cq(hcq->cqh_ucq, &comp) != -EAGAIN) {
switch (comp.uc_type) {
case USD_COMPTYPE_SEND:
usdf_msg_send_completion(&comp);
break;
case USD_COMPTYPE_RECV:
usdf_msg_handle_recv(hcq->cqh_cq->cq_domain, &comp);
break;
}
}
}

Просмотреть файл

@ -36,9 +36,54 @@
#ifndef _USDF_MSG_H_
#define _USDF_MSG_H_
#define USDF_MSG_CAPS (FI_MSG | FI_SOURCE | FI_SEND | FI_RECV)
#define USDF_MSG_SUPP_MODE (FI_LOCAL_MR)
#define USDF_MSG_REQ_MODE (FI_LOCAL_MR)
#define USDF_MSG_MAX_SGE 8
#define USDF_MSG_DFLT_SGE 8
#define USDF_MSG_MAX_CTX_SIZE 1024
#define USDF_MSG_DFLT_CTX_SIZE 128
#define USDF_MSG_MAX_MSG UINT_MAX
#define USDF_MSG_FAIRNESS_CREDITS 16
#define USDF_MSG_RUDP_SEQ_CREDITS 256
struct usdf_msg_qe {
void *ms_context;
struct iovec ms_iov[USDF_MSG_MAX_SGE];
size_t ms_last_iov;
size_t ms_length;
uint16_t ms_first_seq;
uint16_t ms_last_seq;
size_t ms_cur_iov;
const uint8_t *ms_cur_ptr;
size_t ms_resid; /* amount remaining in entire msg */
size_t ms_iov_resid; /* amount remaining in current iov */
TAILQ_ENTRY(usdf_msg_qe) ms_link;
};
int usdf_msg_post_recv(struct usdf_rx *rx, void *buf, size_t len);
int usdf_msg_fill_tx_attr(struct fi_tx_attr *txattr);
int usdf_msg_fill_rx_attr(struct fi_rx_attr *rxattr);
int usdf_cq_msg_poll(struct usd_cq *ucq, struct usd_completion *comp);
void usdf_msg_ep_timeout(void *vep);
void usdf_msg_hcq_progress(struct usdf_cq_hard *hcq);
void usdf_msg_tx_progress(struct usdf_tx *tx);
/* fi_ops_cm for RC */
int usdf_cm_msg_connect(struct fid_ep *ep, const void *addr,
const void *param, size_t paramlen);
int usdf_cm_msg_accept(struct fid_ep *fep, const void *param, size_t paramlen);
int usdf_cm_msg_shutdown(struct fid_ep *ep, uint64_t flags);
/* fi_ops_msg for RC */

Просмотреть файл

@ -58,10 +58,12 @@
#include "fi.h"
#include "fi_enosys.h"
#include "fi_usnic.h"
#include "usnic_direct.h"
#include "usd.h"
#include "usdf.h"
#include "usdf_cm.h"
#include "usdf_msg.h"
int
usdf_pep_bind(fid_t fid, fid_t bfid, uint64_t flags)
@ -73,7 +75,6 @@ usdf_pep_bind(fid_t fid, fid_t bfid, uint64_t flags)
switch (bfid->fclass) {
case FI_CLASS_EQ:
printf("bind EQ!\n");
if (pep->pep_eq != NULL) {
return -FI_EINVAL;
}
@ -88,12 +89,100 @@ printf("bind EQ!\n");
return 0;
}
/*
* Report an error to the PEP's EQ
*/
static void
usdf_pep_accept_error(struct usdf_pep *pep, int error)
static struct fi_info *
usdf_pep_conn_info(struct usdf_connreq *crp)
{
struct fi_info *ip;
struct usdf_pep *pep;
struct sockaddr_in *sin;
struct usdf_fabric *fp;
struct usdf_domain *udp;
struct usd_device_attrs *dap;
struct usdf_connreq_msg *reqp;
pep = crp->cr_pep;
fp = pep->pep_fabric;
udp = LIST_FIRST(&fp->fab_domain_list);
dap = fp->fab_dev_attrs;
reqp = (struct usdf_connreq_msg *)crp->cr_data;
/* If there is a domain, just copy info from there */
if (udp != NULL) {
ip = fi_dupinfo(udp->dom_info);
if (ip == NULL) {
return NULL;
}
/* no domains yet, make an info suitable for creating one */
} else {
ip = fi_allocinfo_internal();
if (ip == NULL) {
return NULL;
}
ip->caps = USDF_MSG_CAPS;
ip->mode = USDF_MSG_SUPP_MODE;
ip->ep_type = FI_EP_MSG;
ip->addr_format = FI_SOCKADDR_IN;
ip->src_addrlen = sizeof(struct sockaddr_in);
sin = calloc(1, ip->src_addrlen);
if (sin == NULL) {
goto fail;
}
sin->sin_family = AF_INET;
sin->sin_addr.s_addr = dap->uda_ipaddr_be;
ip->src_addr = sin;
ip->ep_attr->protocol = FI_PROTO_RUDP;
ip->fabric_attr->fabric = fab_utof(fp);
ip->fabric_attr->name = strdup(fp->fab_attr.name);
ip->fabric_attr->prov_name = strdup(fp->fab_attr.prov_name);
ip->fabric_attr->prov_version = fp->fab_attr.prov_version;
if (ip->fabric_attr->name == NULL ||
ip->fabric_attr->prov_name == NULL) {
goto fail;
}
}
/* fill in dest addr */
ip->dest_addrlen = ip->src_addrlen;
sin = calloc(1, ip->dest_addrlen);
sin->sin_family = AF_INET;
sin->sin_addr.s_addr = reqp->creq_ipaddr;
sin->sin_port = reqp->creq_port;
ip->connreq = crp;
return ip;
fail:
fi_freeinfo(ip);
return NULL;
}
/*
* Remove connection request from epoll list if not done already.
* crp->cr_pollitem.pi_rtn is non-NULL when epoll() is active
*/
static int
usdf_pep_creq_epoll_del(struct usdf_connreq *crp)
{
int ret;
struct usdf_pep *pep;
pep = crp->cr_pep;
if (crp->cr_pollitem.pi_rtn != NULL) {
ret = epoll_ctl(pep->pep_fabric->fab_epollfd, EPOLL_CTL_DEL,
crp->cr_sockfd, NULL);
crp->cr_pollitem.pi_rtn = NULL;
if (ret != 0) {
ret = -errno;
}
} else {
ret = 0;
}
return ret;
}
static int
@ -102,6 +191,9 @@ usdf_pep_read_connreq(void *v)
struct usdf_connreq *crp;
struct usdf_pep *pep;
struct usdf_connreq_msg *reqp;
struct fi_eq_cm_entry *entry;
size_t entry_len;
int ret;
int n;
crp = v;
@ -109,25 +201,51 @@ usdf_pep_read_connreq(void *v)
n = read(crp->cr_sockfd, crp->cr_ptr, crp->cr_resid);
if (n == -1) {
usdf_pep_accept_error(pep, -errno);
// XXX DEL epoll item
close(crp->cr_sockfd);
TAILQ_REMOVE(&pep->pep_cr_pending, crp, cr_link);
usdf_cm_msg_connreq_failed(crp, -errno);
return 0;
}
crp->cr_ptr += n;
crp->cr_resid -= n;
reqp = (struct usdf_connreq_msg *)crp->cr_data;
if (crp->cr_resid == 0 && crp->cr_ptr == crp->cr_data + sizeof(*reqp)) {
reqp = (struct usdf_connreq_msg *)crp->cr_data;
crp->cr_resid = ntohl(reqp->creq_data_len);
reqp->creq_datalen = ntohl(reqp->creq_datalen);
crp->cr_resid = reqp->creq_datalen;
}
/* if resid is 0 now, completely done */
if (crp->cr_resid == 0) {
// DEL epoll_wait
// create CONNREQ EQ entry
ret = usdf_pep_creq_epoll_del(crp);
if (ret != 0) {
usdf_cm_msg_connreq_failed(crp, ret);
return 0;
}
/* create CONNREQ EQ entry */
entry_len = sizeof(*entry) + reqp->creq_datalen;
entry = malloc(entry_len);
if (entry == NULL) {
usdf_cm_msg_connreq_failed(crp, -errno);
return 0;
}
entry->fid = &pep->pep_fid.fid;
entry->info = usdf_pep_conn_info(crp);
if (entry->info == NULL) {
free(entry);
usdf_cm_msg_connreq_failed(crp, -FI_ENOMEM);
return 0;
}
memcpy(entry->data, reqp->creq_data, reqp->creq_datalen);
ret = usdf_eq_write_internal(pep->pep_eq, FI_CONNREQ, entry,
entry_len, 0);
free(entry);
if (ret != entry_len) {
usdf_cm_msg_connreq_failed(crp, ret);
return 0;
}
}
return 0;
@ -149,21 +267,21 @@ usdf_pep_listen_cb(void *v)
socklen = sizeof(sin);
s = accept(pep->pep_sock, &sin, &socklen);
if (s == -1) {
usdf_pep_accept_error(pep, -errno);
/* ignore early failure */
return 0;
}
printf("connreq on %p, s = %d (%x)!\n", pep, s, sin.sin_addr.s_addr);
crp = NULL;
pthread_spin_lock(&pep->pep_cr_lock);
if (!TAILQ_EMPTY(&pep->pep_cr_free)) {
crp = TAILQ_FIRST(&pep->pep_cr_free);
TAILQ_REMOVE(&pep->pep_cr_free, crp, cr_link);
TAILQ_REMOVE_MARK(&pep->pep_cr_free, crp, cr_link);
TAILQ_NEXT(crp, cr_link) = NULL;
}
pthread_spin_unlock(&pep->pep_cr_lock);
/* no room for request, just drop it */
if (crp == NULL) {
// send response?
/* XXX send response? */
close(s);
return 0;
}
@ -181,9 +299,8 @@ printf("connreq on %p, s = %d (%x)!\n", pep, s, sin.sin_addr.s_addr);
ret = epoll_ctl(pep->pep_fabric->fab_epollfd, EPOLL_CTL_ADD,
crp->cr_sockfd, &ev);
if (ret == -1) {
usdf_pep_accept_error(pep, -errno);
close(crp->cr_sockfd);
TAILQ_INSERT_TAIL(&pep->pep_cr_free, crp, cr_link);
crp->cr_pollitem.pi_rtn = NULL;
usdf_cm_msg_connreq_failed(crp, -errno);
return 0;
}
@ -208,7 +325,7 @@ usdf_pep_listen(struct fid_pep *fpep)
ret = -errno;
}
pep->pep_pollitem.pi_rtn = &usdf_pep_listen_cb;
pep->pep_pollitem.pi_rtn = usdf_pep_listen_cb;
pep->pep_pollitem.pi_context = pep;
ev.events = EPOLLIN;
ev.data.ptr = &pep->pep_pollitem;
@ -226,12 +343,6 @@ usdf_pep_cancel(fid_t fid, void *context)
return -FI_EINVAL;
}
int
usdf_pep_accept(struct fid_ep *ep, const void *param, size_t paramlen)
{
return 0;
}
int
usdf_pep_reject(struct fid_pep *pep, fi_connreq_t connreq,
const void *param, size_t paramlen)
@ -264,9 +375,6 @@ usdf_pep_grow_backlog(struct usdf_pep *pep)
size_t extra;
extra = sizeof(struct usdf_connreq_msg) + pep->pep_cr_max_data;
if (extra < sizeof(struct usdf_connresp_msg)) {
extra = sizeof(struct usdf_connresp_msg);
}
while (pep->pep_cr_alloced < pep->pep_backlog) {
crp = calloc(1, sizeof(*crp) + extra);
@ -316,6 +424,8 @@ static struct fi_ops_ep usdf_pep_base_ops = {
.cancel = usdf_pep_cancel,
.getopt = fi_no_getopt,
.setopt = fi_no_setopt,
.tx_ctx = fi_no_tx_ctx,
.rx_ctx = fi_no_rx_ctx,
};
static struct fi_ops_cm usdf_pep_cm_ops = {
@ -324,7 +434,7 @@ static struct fi_ops_cm usdf_pep_cm_ops = {
.getpeer = fi_no_getpeer,
.connect = fi_no_connect,
.listen = usdf_pep_listen,
.accept = usdf_pep_accept,
.accept = fi_no_accept,
.reject = usdf_pep_reject,
.shutdown = fi_no_shutdown,
.join = fi_no_join,

Просмотреть файл

@ -111,7 +111,7 @@ usdf_fabric_progression_thread(void *v)
}
n = epoll_wait(epfd, &ev, 1, sleep_time);
if (n == -1) {
if (fp->fab_exit || (n == -1 && errno != EINTR)) {
pthread_exit(NULL);
}
@ -126,9 +126,31 @@ usdf_fabric_progression_thread(void *v)
/* call timer progress each wakeup */
usdf_timer_progress(fp);
if (fp->fab_exit) {
pthread_exit(NULL);
}
}
}
/*
* Progress operations in this domain
*/
void
usdf_domain_progress(struct usdf_domain *udp)
{
struct usdf_tx *tx;
struct usdf_cq_hard *hcq;
/* one big hammer lock... */
pthread_spin_lock(&udp->dom_progress_lock);
TAILQ_FOREACH(hcq, &udp->dom_hcq_list, cqh_dom_link) {
hcq->cqh_progress(hcq);
}
while (!TAILQ_EMPTY(&udp->dom_tx_ready)) {
tx = TAILQ_FIRST(&udp->dom_tx_ready);
TAILQ_REMOVE_MARK(&udp->dom_tx_ready, tx, tx_link);
tx->tx_progress(tx);
}
pthread_spin_unlock(&udp->dom_progress_lock);
}

Просмотреть файл

@ -42,9 +42,11 @@ struct usdf_poll_item {
};
struct usdf_fabric;
struct usdf_domain;
void *usdf_fabric_progression_thread(void *v);
int usdf_fabric_wake_thread(struct usdf_fabric *fp);
int usdf_fabric_progression_cb(void *v);
void usdf_domain_progress(struct usdf_domain *udp);
#endif /* _USDF_PROGRESS_H_ */

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -0,0 +1,162 @@
/*
* Copyright (c) 2014, Cisco Systems, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef _USDF_RDM_H_
#define _USDF_RDM_H_
#define USDF_RDM_CAPS (FI_MSG | FI_SOURCE | FI_SEND | FI_RECV)
#define USDF_RDM_SUPP_MODE (FI_LOCAL_MR)
#define USDF_RDM_REQ_MODE (FI_LOCAL_MR)
#define USDF_RDM_MAX_SGE 8
#define USDF_RDM_DFLT_SGE 8
#define USDF_RDM_MAX_CTX_SIZE 1024
#define USDF_RDM_DFLT_CTX_SIZE 128
#define USDF_RDM_MAX_MSG UINT_MAX
#define USDF_RDM_FREE_BLOCK (16 * 1024)
#define USDF_RDM_HASH_SIZE (64 * 1024)
#define USDF_RDM_HASH_MASK (USDF_RDM_HASH_SIZE - 1)
#define USDF_RDM_FAIRNESS_CREDITS 16
#define USDF_RDM_RUDP_SEQ_CREDITS 256
#define USDF_RDM_RDC_TIMEOUT 1000 /* ms */
struct usdf_rdm_qe {
void *rd_context;
uint32_t rd_msg_id_be;
struct iovec rd_iov[USDF_RDM_MAX_SGE];
size_t rd_last_iov;
size_t rd_length;
size_t rd_cur_iov;
const uint8_t *rd_cur_ptr;
size_t rd_resid; /* amount remaining in entire rdm */
size_t rd_iov_resid; /* amount remaining in current iov */
TAILQ_ENTRY(usdf_rdm_qe) rd_link;
struct usdf_rdm_connection *rd_conn;
};
/*
* RDM connection state
*/
enum {
USDF_DCS_UNCONNECTED = 0,
USDF_DCS_CONNECTING = 1,
USDF_DCS_CONNECTED = 2
};
#define USDF_DCF_STATE_BITS 0x03
#define USDF_DCF_NEW_RX 0x04
/*
* We're only connectionless to the app.
* This connection struct is used to manage messages in flight.
*/
struct usdf_rdm_connection {
atomic_t dc_refcnt;
struct usdf_tx *dc_tx;
struct usd_udp_hdr dc_hdr;
uint16_t dc_flags;
struct usdf_timer_entry *dc_timer;
/* RX state */
uint32_t dc_rx_msg_id;
struct usdf_rdm_qe *dc_cur_rqe;
uint16_t dc_next_rx_seq;
uint16_t dc_send_nak;
uint32_t dc_ack_msg_id;
uint16_t dc_ack_seq;
TAILQ_ENTRY(usdf_rdm_connection) dc_ack_link;
/* TX state */
struct usdf_dest *dc_dest;
TAILQ_HEAD(,usdf_rdm_qe) dc_wqe_posted;
TAILQ_HEAD(,usdf_rdm_qe) dc_wqe_sent;
uint16_t dc_next_tx_seq;
uint16_t dc_last_rx_ack;
size_t dc_fairness_credits;
size_t dc_seq_credits;
TAILQ_ENTRY(usdf_rdm_connection) dc_tx_link;
SLIST_ENTRY(usdf_rdm_connection) dc_addr_link;
struct usdf_rdm_connection *dc_hash_next;
};
int usdf_rdm_post_recv(struct usdf_rx *rx, void *buf, size_t len);
int usdf_rdm_fill_tx_attr(struct fi_tx_attr *txattr);
int usdf_rdm_fill_rx_attr(struct fi_rx_attr *rxattr);
int usdf_cq_rdm_poll(struct usd_cq *ucq, struct usd_completion *comp);
void usdf_rdm_rdc_timeout(void *vrdc);
void usdf_rdm_hcq_progress(struct usdf_cq_hard *hcq);
void usdf_rdm_tx_progress(struct usdf_tx *tx);
/* fi_ops_cm for RC */
int usdf_cm_rdm_connect(struct fid_ep *ep, const void *addr,
const void *param, size_t paramlen);
int usdf_cm_rdm_accept(struct fid_ep *fep, const void *param, size_t paramlen);
int usdf_cm_rdm_shutdown(struct fid_ep *ep, uint64_t flags);
/* fi_ops_rdm for RC */
ssize_t usdf_rdm_recv(struct fid_ep *ep, void *buf, size_t len, void *desc,
fi_addr_t src_addr, void *context);
ssize_t usdf_rdm_recvv(struct fid_ep *ep, const struct iovec *iov,
void **desc, size_t count, fi_addr_t src_addr, void *context);
ssize_t usdf_rdm_recvmsg(struct fid_ep *ep, const struct fi_msg *msg,
uint64_t flags);
ssize_t usdf_rdm_send(struct fid_ep *ep, const void *buf, size_t len,
void *desc, fi_addr_t src_addr, void *context);
ssize_t usdf_rdm_sendv(struct fid_ep *ep, const struct iovec *iov,
void **desc, size_t count, fi_addr_t src_addr, void *context);
ssize_t usdf_rdm_sendmsg(struct fid_ep *ep, const struct fi_msg *msg,
uint64_t flags);
ssize_t usdf_rdm_senddata(struct fid_ep *ep, const void *buf, size_t len,
void *desc, uint64_t data, fi_addr_t src_addr, void *context);
ssize_t usdf_rdm_inject(struct fid_ep *ep, const void *buf, size_t len,
fi_addr_t src_addr);
#endif /* _USDF_RDM_H_ */

Просмотреть файл

@ -0,0 +1,109 @@
/*
* Copyright (c) 2014, Cisco Systems, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef _USDF_RUDP_H_
#define _USDF_RUDP_H_
#include "usnic_direct.h"
#define USDF_RUDP_SEQ_CREDITS 256
#define USDF_RUDP_ACK_TIMEOUT 5 /* ms */
#define RUDP_SEQ_DIFF(A, B) ((int16_t)((u_int16_t)(A) - (u_int16_t)(B)))
#define RUDP_SEQ_LT(A, B) (RUDP_SEQ_DIFF((A), (B)) < 0)
#define RUDP_SEQ_LE(A, B) (RUDP_SEQ_DIFF((A), (B)) <= 0)
#define RUDP_SEQ_GT(A, B) (RUDP_SEQ_DIFF((A), (B)) > 0)
#define RUDP_SEQ_GE(A, B) (RUDP_SEQ_DIFF((A), (B)) >= 0)
#define RUDP_MSGID_DIFF(A, B) ((int32_t)((u_int32_t)(A) - (u_int32_t)(B)))
#define RUDP_MSGID_LT(A, B) (RUDP_MSGID_DIFF((A), (B)) < 0)
#define RUDP_MSGID_LE(A, B) (RUDP_MSGID_DIFF((A), (B)) <= 0)
#define RUDP_MSGID_GT(A, B) (RUDP_MSGID_DIFF((A), (B)) > 0)
#define RUDP_MSGID_GE(A, B) (RUDP_MSGID_DIFF((A), (B)) >= 0)
enum {
/* data messages (a bitmask of FIRST and LAST) */
RUDP_OP_MID = 0x00,
RUDP_OP_FIRST = 0x01,
RUDP_OP_LAST = 0x02,
RUDP_OP_ONLY = 0x03,
/* control messages */
RUDP_OP_CONNECT_REQ = 0x81,
RUDP_OP_CONNECT_RESP = 0x82,
RUDP_OP_NAK = 0x83,
RUDP_OP_ACK = 0x84,
};
#define RUDP_OP_DATA_MASK (RUDP_OP_FIRST | RUDP_OP_LAST)
struct rudp_rc_data_msg {
u_int32_t offset; /* 4 */
u_int16_t rkey; /* 8 */
u_int16_t length; /* 10 */
u_int16_t seqno; /* 12 */
u_int16_t rdma_id; /* 14 */
} __attribute__ ((__packed__));
struct rudp_msg {
u_int16_t opcode;
u_int16_t src_peer_id;
u_int32_t msg_id;
union {
struct rudp_rc_data_msg rc_data;
struct {
u_int16_t dst_peer_id;
} connect_req;
struct {
u_int16_t dst_peer_id;
} connect_resp;
struct {
u_int16_t ack_seq;
} ack;
struct {
u_int16_t nak_seq;
u_int32_t seq_mask;
} nak;
} __attribute__ ((__packed__)) m;
} __attribute__ ((__packed__));
struct rudp_pkt {
struct usd_udp_hdr hdr;
struct rudp_msg msg;
} __attribute__ ((__packed__));
#endif /* _USDF_RUDP_H_ */

Просмотреть файл

@ -131,21 +131,21 @@ usdf_timer_cancel(struct usdf_fabric *fp, struct usdf_timer_entry *entry)
* be called again until usdf_timer_set() is called again to re-set it.
* usdf_timer_set() is safe to call from timer service routine.
*/
int
usdf_timer_set(struct usdf_fabric *fp, struct usdf_timer_entry *entry,
static inline int
_usdf_timer_do_set(struct usdf_fabric *fp, struct usdf_timer_entry *entry,
uint32_t ms)
{
int ret;
unsigned bucket;
pthread_spin_lock(&fp->fab_timer_lock);
/* If no timers active, cur_bucket_ms may need catchup */
if (fp->fab_active_timer_count == 0) {
++fp->fab_active_timer_count;
if (fp->fab_active_timer_count == 1) {
fp->fab_cur_bucket_ms = usdf_get_ms();
ret = usdf_fabric_wake_thread(fp);
if (ret != 0) {
goto out;
--fp->fab_active_timer_count;
return ret;
}
}
@ -156,21 +156,47 @@ usdf_timer_set(struct usdf_fabric *fp, struct usdf_timer_entry *entry,
// we could make "overflow" bucket...
if (ms >= USDF_NUM_TIMER_BUCKETS) {
ret = -FI_EINVAL;
goto out;
--fp->fab_active_timer_count;
return -FI_EINVAL;
}
bucket = (fp->fab_cur_bucket + ms) & (USDF_NUM_TIMER_BUCKETS - 1);
LIST_INSERT_HEAD(&fp->fab_timer_buckets[bucket], entry, te_link);
entry->te_flags |= USDF_TF_QUEUED;
++fp->fab_active_timer_count;
ret = 0;
return 0;
}
out:
int
usdf_timer_set(struct usdf_fabric *fp, struct usdf_timer_entry *entry,
uint32_t ms)
{
int ret;
pthread_spin_lock(&fp->fab_timer_lock);
if (entry->te_flags & USDF_TF_QUEUED) {
ret = 0;
} else {
ret = _usdf_timer_do_set(fp, entry, ms);
}
pthread_spin_unlock(&fp->fab_timer_lock);
return ret;
}
int
usdf_timer_reset(struct usdf_fabric *fp, struct usdf_timer_entry *entry,
uint32_t ms)
{
int ret;
pthread_spin_lock(&fp->fab_timer_lock);
ret = _usdf_timer_do_set(fp, entry, ms);
pthread_spin_unlock(&fp->fab_timer_lock);
return ret;
}
static inline void
usdf_run_bucket(struct usdf_fabric *fp, struct usdf_timer_bucket *bp)
{

Просмотреть файл

@ -61,6 +61,8 @@ void usdf_timer_free(struct usdf_fabric *fp, struct usdf_timer_entry *entry);
int usdf_timer_set(struct usdf_fabric *fp, struct usdf_timer_entry *entry,
uint32_t timeout);
int usdf_timer_reset(struct usdf_fabric *fp, struct usdf_timer_entry *entry,
uint32_t timeout);
void usdf_timer_cancel(struct usdf_fabric *fp, struct usdf_timer_entry *entry);

Просмотреть файл

@ -60,7 +60,7 @@
#define USD_SF_ISSET(flags, flagname) \
((flags >> USD_SFS_##flagname) & 1)
#define USD_SEND_MAX_COPY 1024
#define USD_SEND_MAX_COPY 992
#define USD_MAX_CQ_GROUP 1024
#define USD_MAX_PRESEND 4

Просмотреть файл

@ -87,11 +87,38 @@ usd_desc_to_rq_comp(
CQ_ENET_RQ_DESC_FLAGS_TCP_UDP_CSUM_OK;
if (bytes_written_flags & CQ_ENET_RQ_DESC_FLAGS_TRUNCATED ||
(edesc->flags & ipudpok) != ipudpok) {
if (edesc->flags & CQ_ENET_RQ_DESC_FLAGS_FCS_OK ||
bytes_written != 0)
if (((edesc->flags & CQ_ENET_RQ_DESC_FLAGS_FCS_OK) == 0) &&
bytes_written == 0) {
size_t rcvbuf_len;
dma_addr_t bus_addr;
u16 len;
u8 type;
uint16_t i;
i = q_index;
rcvbuf_len = 0;
do {
rq_enet_desc_dec( (struct rq_enet_desc *)
((uintptr_t)rq->urq_vnic_rq.ring.descs + (i<<4)),
&bus_addr, &type, &len);
rcvbuf_len += len;
i = (i - 1) & rq->urq_post_index_mask;
} while (type == RQ_ENET_TYPE_NOT_SOP);
/*
* If only the paddings to meet 64-byte minimum eth frame
* requirement are truncated, do not mark packet as
* error due to truncation.
* The usnic hdr should not be split into multiple receive buffer
*/
if (ntohs(((struct usd_udp_hdr *)bus_addr)->uh_ip.tot_len)
+ sizeof(struct ether_header) > rcvbuf_len)
comp->uc_status = USD_COMPSTAT_ERROR_TRUNC;
else
comp->uc_status = USD_COMPSTAT_SUCCESS;
} else {
comp->uc_status = USD_COMPSTAT_ERROR_CRC;
else
comp->uc_status = USD_COMPSTAT_ERROR_TRUNC;
}
} else {
comp->uc_status = USD_COMPSTAT_SUCCESS;
}

Просмотреть файл

@ -100,7 +100,6 @@ usd_post_recv(
vnic_rq_post(vrq, iovp[0].iov_base, 0,
(dma_addr_t) iovp[0].iov_base, iovp[0].iov_len, 0);
for (i = 1; i < recv_list->urd_iov_cnt; ++i) {
rq->urq_context[rq->urq_post_index] = recv_list->urd_context;

Просмотреть файл

@ -43,6 +43,8 @@
#ifndef _USD_POST_H_
#define _USD_POST_H_
#include <sys/uio.h>
#include "usd.h"
#include "usd_util.h"
@ -94,7 +96,6 @@ _usd_post_send_two(
struct vnic_wq *vwq;
uint32_t index;
struct wq_enet_desc *desc;
uint64_t wr;
u_int8_t offload_mode = 0, eop;
u_int16_t mss = 7, header_length = 0, vlan_tag = 0;
u_int8_t vlan_tag_insert = 0, loopback = 0, fcoe_encap = 0;
@ -119,8 +120,7 @@ _usd_post_send_two(
vlan_tag_insert, vlan_tag, loopback);
wmb();
wr = vnic_cached_posted_index((dma_addr_t)hdr, hdrlen, index);
iowrite64(wr, &vwq->ctrl->posted_index);
iowrite32(index, &vwq->ctrl->posted_index);
wq->uwq_next_desc = (struct wq_enet_desc *)
((uintptr_t)wq->uwq_desc_ring + (index<<4));
@ -130,4 +130,51 @@ _usd_post_send_two(
return index;
}
/*
* Consume iov count credits, assumes that iov[0] includes usnic header
*/
static inline uint32_t
_usd_post_send_iov(
struct usd_wq *wq,
const struct iovec *iov,
size_t count,
u_int8_t cq_entry)
{
struct vnic_wq *vwq;
uint32_t index;
struct wq_enet_desc *desc;
u_int8_t offload_mode = 0;
u_int16_t mss = 7, header_length = 0, vlan_tag = 0;
u_int8_t vlan_tag_insert = 0, loopback = 0, fcoe_encap = 0;
unsigned i;
vwq = &wq->uwq_vnic_wq;
desc = wq->uwq_next_desc;
index = wq->uwq_post_index;
for (i = 0; i < count - 1; i++) {
wq_enet_desc_enc(desc, (uintptr_t)(iov[i].iov_base),
iov[i].iov_len, mss, header_length, offload_mode,
0, 0, fcoe_encap, vlan_tag_insert, vlan_tag, loopback);
desc = (struct wq_enet_desc *) ((uintptr_t)wq->uwq_desc_ring
+ (index<<4));
index = (index+1) & wq->uwq_post_index_mask;
}
wq_enet_desc_enc(desc, (uintptr_t)(iov[i].iov_base),
iov[i].iov_len, mss, header_length, offload_mode,
1, cq_entry, fcoe_encap, vlan_tag_insert, vlan_tag, loopback);
wmb();
iowrite32(index, &vwq->ctrl->posted_index);
wq->uwq_next_desc = (struct wq_enet_desc *)
((uintptr_t)wq->uwq_desc_ring + (index<<4));
wq->uwq_post_index = (index+1) & wq->uwq_post_index_mask;
wq->uwq_send_credits -= count;
return index;
}
#endif /* _USD_POST_H_ */

Просмотреть файл

@ -224,9 +224,59 @@ usd_post_send_two_copy_udp_normal(
return 0;
}
static int
usd_post_send_iov_udp_normal(struct usd_qp *uqp,
struct usd_dest *dest, const struct iovec* iov,
size_t iov_count, uint32_t flags, void *context)
{
struct usd_qp_impl *qp;
struct usd_udp_hdr *hdr;
struct usd_wq *wq;
uint32_t last_post;
uint8_t *copybuf;
struct usd_wq_post_info *info;
struct iovec send_iov[USD_SEND_MAX_SGE + 1];
size_t len;
unsigned i;
qp = to_qpi(uqp);
wq = &qp->uq_wq;
copybuf = wq->uwq_copybuf + wq->uwq_post_index * USD_SEND_MAX_COPY;
for (i = 0, len = 0; i < iov_count; i++) {
len += iov[i].iov_len;
}
hdr = (struct usd_udp_hdr *)copybuf;
memcpy(hdr, &dest->ds_dest.ds_udp.u_hdr, sizeof(*hdr));
/* adjust lengths and insert source port */
hdr->uh_ip.tot_len = htons(len + sizeof(struct usd_udp_hdr) -
sizeof(struct ether_header));
hdr->uh_udp.len = htons((sizeof(struct usd_udp_hdr) -
sizeof(struct ether_header) -
sizeof(struct iphdr)) + len);
hdr->uh_udp.source =
qp->uq_attrs.uqa_local_addr.ul_addr.ul_udp.u_addr.sin_port;
send_iov[0].iov_base = hdr;
send_iov[0].iov_len = sizeof(*hdr);
memcpy(&send_iov[1], iov, sizeof(struct iovec) * iov_count);
last_post = _usd_post_send_iov(wq, send_iov, iov_count + 1,
USD_SF_ISSET(flags, SIGNAL));
info = &wq->uwq_post_info[last_post];
info->wp_context = context;
info->wp_len = len;
return 0;
}
struct usd_qp_ops usd_qp_ops_udp_normal = {
.qo_post_send_one = usd_post_send_one_udp_normal,
.qo_post_send_one_prefixed = usd_post_send_one_prefixed_udp_normal,
.qo_post_send_one_copy = usd_post_send_one_copy_udp_normal,
.qo_post_send_two_copy = usd_post_send_two_copy_udp_normal,
.qo_post_send_iov = usd_post_send_iov_udp_normal,
};

Просмотреть файл

@ -1116,6 +1116,16 @@ usd_create_qp(
}
rq->urq_state |= USD_QS_FILTER_ALLOC;
/* Fill in some attrs */
switch (transport) {
case USD_QTR_UDP:
qp->uq_attrs.uqa_hdr_len = sizeof(struct usd_udp_hdr);
break;
case USD_QTR_RAW:
qp->uq_attrs.uqa_hdr_len = 0;
break;
}
/*
* Now, do the type-specific configuration
*/
@ -1133,16 +1143,6 @@ usd_create_qp(
break;
}
/* Fill in some attrs */
switch (transport) {
case USD_QTR_UDP:
qp->uq_attrs.uqa_hdr_len = sizeof(struct usd_udp_hdr);
break;
case USD_QTR_RAW:
qp->uq_attrs.uqa_hdr_len = 0;
break;
}
*uqp_o = to_usdqp(qp);
return 0;

Просмотреть файл

@ -53,6 +53,7 @@
#define USD_MAX_DEVICES 8
#define USD_MAX_DEVNAME 16
#define USD_RECV_MAX_SGE 8
#define USD_SEND_MAX_SGE 8
enum usd_link_state {
USD_LINK_DOWN,
@ -147,6 +148,9 @@ struct usd_qp_ops {
int (*qo_post_send_two_copy)(struct usd_qp *qp,
struct usd_dest *dest, const void *hdr, size_t hdrlen,
const void *pkt, size_t pktlen, uint32_t flags, void *context);
int (*qo_post_send_iov)(struct usd_qp *qp,
struct usd_dest *dest, const struct iovec* iov,
size_t iov_count, uint32_t flags, void *context);
};
/*
@ -604,11 +608,16 @@ usd_post_send_two_copy(struct usd_qp *qp, struct usd_dest *dest,
/*
* Post an N-buffer send
* All buffers must be in registered memory.
* Requires iov_len + 1 send credits
* Requires iov_count + 1 send credits
*/
int usd_post_send_sge(struct usd_qp *qp, struct usd_dest *dest,
const struct iovec *iov, size_t iov_len, uint32_t flags, void *context);
static inline int
usd_post_send_iov(struct usd_qp *qp, struct usd_dest *dest,
const struct iovec *iov, size_t iov_count, uint32_t flags,
void *context)
{
return qp->uq_ops.qo_post_send_iov(
qp, dest, iov, iov_count, flags, context);
}
/****************************************************************
* enum-to-string utility functions (for prettyprinting)
****************************************************************/

Просмотреть файл

@ -54,14 +54,28 @@
#include <rdma/fi_endpoint.h>
#include <rdma/fi_rma.h>
#include <rdma/fi_errno.h>
#include "fi.h"
#include "fi_enosys.h"
#include "prov.h"
#define PROV_NAME "verbs"
#define PROV_VERS FI_VERSION(0,7)
#define VERBS_PROV_NAME "verbs"
#define VERBS_PROV_VERS FI_VERSION(1,0)
#define PROV_WARN(fmt, ...) \
do { fprintf(stderr, "%s:%s: " fmt, PACKAGE, PROV_NAME, ##__VA_ARGS__); } while (0)
#define VERBS_WARN(fmt, ...) \
do { fprintf(stderr, "%s:%s: " fmt, PACKAGE, VERBS_PROV_NAME, ##__VA_ARGS__); } while (0)
#define VERBS_MSG_SIZE (1ULL << 31)
#define VERBS_IB_PREFIX "IB-0x"
#define VERBS_IWARP_FABRIC "Ethernet-iWARP"
#define VERBS_ANY_FABRIC "Any RDMA fabric"
#define VERBS_CAPS (FI_MSG | FI_RMA | FI_ATOMICS | FI_READ | FI_WRITE | \
FI_SEND | FI_RECV | FI_REMOTE_READ | FI_REMOTE_WRITE | \
FI_REMOTE_CQ_DATA | FI_REMOTE_COMPLETE)
#define VERBS_MODE (FI_LOCAL_MR | FI_PROV_MR_ATTR)
#define VERBS_MSG_ORDER (FI_ORDER_RAR | FI_ORDER_RAW | FI_ORDER_RAS | \
FI_ORDER_WAW | FI_ORDER_WAS | FI_ORDER_SAW | FI_ORDER_SAS )
struct fi_ibv_fabric {
struct fid_fabric fabric_fid;
@ -119,6 +133,53 @@ static char def_send_sge[16] = "4";
static char def_recv_sge[16] = "4";
static char def_inline_data[16] = "64";
const struct fi_fabric_attr verbs_fabric_attr = {
.name = VERBS_PROV_NAME,
.prov_version = VERBS_PROV_VERS,
};
const struct fi_domain_attr verbs_domain_attr = {
.threading = FI_THREAD_SAFE,
.control_progress = FI_PROGRESS_AUTO,
.data_progress = FI_PROGRESS_AUTO,
.mr_key_size = sizeof_field(struct ibv_sge, lkey),
.cq_data_size = sizeof_field(struct ibv_send_wr, imm_data),
.max_ep_tx_ctx = 1,
.max_ep_rx_ctx = 1,
};
const struct fi_ep_attr verbs_ep_attr = {
.protocol_version = 1,
.max_msg_size = VERBS_MSG_SIZE,
.total_buffered_recv = 0,
.msg_prefix_size = 0,
.max_order_raw_size = VERBS_MSG_SIZE,
.max_order_war_size = 0,
.max_order_waw_size = VERBS_MSG_SIZE,
.mem_tag_format = 0,
.msg_order = VERBS_MSG_ORDER,
.tx_ctx_cnt = 1,
.rx_ctx_cnt = 1,
};
const struct fi_rx_attr verbs_rx_attr = {
.caps = VERBS_CAPS,
.mode = VERBS_MODE,
.msg_order = VERBS_MSG_ORDER,
.total_buffered_recv = 0,
.size = 256,
.iov_limit = 8,
};
const struct fi_tx_attr verbs_tx_attr = {
.caps = VERBS_CAPS,
.mode = VERBS_MODE,
.msg_order = VERBS_MSG_ORDER,
.inject_size = 0,
.size = 256,
.iov_limit = 8,
};
static int fi_ibv_sockaddr_len(struct sockaddr *addr)
{
if (!addr)
@ -136,9 +197,174 @@ static int fi_ibv_sockaddr_len(struct sockaddr *addr)
}
}
static int fi_ibv_check_hints(struct fi_info *hints)
static int fi_ibv_check_fabric_attr(struct fi_fabric_attr *attr)
{
switch (hints->ep_type) {
if (attr->name && !(!strcmp(attr->name, VERBS_ANY_FABRIC) ||
!strncmp(attr->name, VERBS_IB_PREFIX, strlen(VERBS_IB_PREFIX)) ||
!strcmp(attr->name, VERBS_IWARP_FABRIC)))
return -FI_ENODATA;
if (attr->prov_name && strcmp(attr->prov_name, VERBS_PROV_NAME))
return -FI_ENODATA;
if (attr->prov_version > VERBS_PROV_VERS)
return -FI_ENODATA;
return 0;
}
static int fi_ibv_check_domain_attr(struct fi_domain_attr *attr)
{
switch (attr->threading) {
case FI_THREAD_UNSPEC:
case FI_THREAD_SAFE:
case FI_THREAD_PROGRESS:
break;
default:
VERBS_WARN("Invalid threading model\n");
return -FI_ENODATA;
}
switch (attr->control_progress) {
case FI_PROGRESS_UNSPEC:
case FI_PROGRESS_AUTO:
case FI_PROGRESS_MANUAL:
break;
default:
VERBS_WARN("Given control progress mode not supported\n");
return -FI_ENODATA;
}
switch (attr->data_progress) {
case FI_PROGRESS_UNSPEC:
case FI_PROGRESS_AUTO:
case FI_PROGRESS_MANUAL:
break;
default:
VERBS_WARN("Given data progress mode not supported!\n");
return -FI_ENODATA;
}
if (attr->mr_key_size > sizeof_field(struct ibv_sge, lkey))
return -FI_ENODATA;
if (attr->cq_data_size > sizeof_field(struct ibv_send_wr, imm_data))
return -FI_ENODATA;
return 0;
}
static int fi_ibv_check_ep_attr(struct fi_ep_attr *attr)
{
switch (attr->protocol) {
case FI_PROTO_UNSPEC:
case FI_PROTO_RDMA_CM_IB_RC:
case FI_PROTO_IWARP:
case FI_PROTO_IB_UD:
break;
default:
return -FI_ENODATA;
}
if (attr->protocol_version > 1)
return -FI_ENODATA;
if (attr->max_msg_size > verbs_ep_attr.max_msg_size)
return -FI_ENODATA;
if (attr->total_buffered_recv) {
VERBS_WARN("Buffered Recv not supported\n");
return -FI_ENODATA;
}
if (attr->max_order_raw_size > verbs_ep_attr.max_order_raw_size) {
VERBS_WARN("max_order_raw_size exceeds supported size\n");
return -FI_ENODATA;
}
if (attr->max_order_war_size) {
VERBS_WARN("max_order_war_size exceeds supported size\n");
return -FI_ENODATA;
}
if (attr->max_order_waw_size > verbs_ep_attr.max_order_waw_size) {
VERBS_WARN("max_order_waw_size exceeds supported size\n");
return -FI_ENODATA;
}
if (attr->msg_order & ~(verbs_ep_attr.msg_order)) {
VERBS_WARN("Given msg ordering not supported\n");
return -FI_ENODATA;
}
if (attr->tx_ctx_cnt > verbs_ep_attr.tx_ctx_cnt) {
VERBS_WARN("tx_ctx_cnt exceeds supported size\n");
return -FI_ENODATA;
}
if (attr->rx_ctx_cnt > verbs_ep_attr.rx_ctx_cnt) {
VERBS_WARN("rx_ctx_cnt exceeds supported size\n");
return -FI_ENODATA;
}
return 0;
}
static int fi_ibv_check_rx_attr(struct fi_rx_attr *attr)
{
if (attr->caps & ~(verbs_rx_attr.caps)) {
VERBS_WARN("Given rx_attr->caps not supported\n");
return -FI_ENODATA;
}
if ((attr->mode & verbs_rx_attr.mode) != verbs_rx_attr.mode) {
VERBS_WARN("Given rx_attr->mode not supported\n");
return -FI_ENODATA;
}
if (attr->msg_order & ~(verbs_rx_attr.msg_order)) {
VERBS_WARN("Given rx_attr->msg_order not supported\n");
return -FI_ENODATA;
}
if (attr->total_buffered_recv > verbs_rx_attr.total_buffered_recv) {
VERBS_WARN("Given rx_attr->total_buffered_recv exceeds supported size\n");
return -FI_ENODATA;
}
return 0;
}
static int fi_ibv_check_tx_attr(struct fi_tx_attr *attr)
{
if (attr->caps & ~(verbs_tx_attr.caps)) {
VERBS_WARN("Given tx_attr->caps not supported\n");
return -FI_ENODATA;
}
if ((attr->mode & verbs_tx_attr.mode) != verbs_tx_attr.mode) {
VERBS_WARN("Given tx_attr->mode not supported\n");
return -FI_ENODATA;
}
if (attr->msg_order & ~(verbs_tx_attr.msg_order)) {
VERBS_WARN("Given tx_attr->msg_order not supported\n");
return -FI_ENODATA;
}
if (attr->inject_size > verbs_tx_attr.inject_size) {
VERBS_WARN("Given tx_attr->inject_size exceeds supported size\n");
return -FI_ENODATA;
}
return 0;
}
static int fi_ibv_check_info(struct fi_info *info)
{
int ret;
switch (info->ep_type) {
case FI_EP_UNSPEC:
case FI_EP_MSG:
break;
@ -146,24 +372,64 @@ static int fi_ibv_check_hints(struct fi_info *hints)
return -FI_ENODATA;
}
if (hints->ep_attr) {
switch (hints->ep_attr->protocol) {
case FI_PROTO_UNSPEC:
case FI_PROTO_RDMA_CM_IB_RC:
case FI_PROTO_IWARP:
case FI_PROTO_IB_UD:
break;
default:
return -FI_ENODATA;
}
if (!(info->caps & VERBS_CAPS) && info->caps)
return -FI_ENODATA;
if (info->fabric_attr) {
ret = fi_ibv_check_fabric_attr(info->fabric_attr);
if (ret)
return ret;
}
if (!(hints->caps & (FI_MSG | FI_RMA)) && hints->caps)
return -FI_ENODATA;
if (info->domain_attr) {
ret = fi_ibv_check_domain_attr(info->domain_attr);
if (ret)
return ret;
}
if (hints->fabric_attr && hints->fabric_attr->name &&
strcmp(hints->fabric_attr->name, "RDMA"))
if (info->ep_attr) {
ret = fi_ibv_check_ep_attr(info->ep_attr);
if (ret)
return ret;
}
if (info->rx_attr) {
ret = fi_ibv_check_rx_attr(info->rx_attr);
if (ret)
return ret;
}
if (info->tx_attr) {
ret = fi_ibv_check_tx_attr(info->tx_attr);
if (ret)
return ret;
}
return 0;
}
static int fi_ibv_check_dev_limits(struct fi_domain_attr *domain_attr,
struct ibv_device_attr *device_attr)
{
if (domain_attr->cq_cnt > device_attr->max_cq) {
VERBS_WARN("cq_cnt exceeds supported size\n");
return -FI_ENODATA;
}
if (domain_attr->ep_cnt > device_attr->max_qp) {
VERBS_WARN("ep_cnt exceeds supported size\n");
return -FI_ENODATA;
}
if (domain_attr->tx_ctx_cnt > device_attr->max_qp) {
VERBS_WARN("domain_attr: tx_ctx_cnt exceeds supported size\n");
return -FI_ENODATA;
}
if (domain_attr->rx_ctx_cnt > device_attr->max_qp) {
VERBS_WARN("domain_attr: rx_ctx_cnt exceeds supported size\n");
return -FI_ENODATA;
}
return 0;
}
@ -210,7 +476,7 @@ static int fi_ibv_fi_to_rai(struct fi_info *fi, uint64_t flags, struct rdma_addr
}
static int fi_ibv_rai_to_fi(struct rdma_addrinfo *rai, struct fi_info *hints,
struct fi_info *fi)
struct fi_info *fi)
{
// fi->sa_family = rai->ai_family;
if (rai->ai_qp_type == IBV_QPT_RC || rai->ai_port_space == RDMA_PS_TCP) {
@ -241,6 +507,90 @@ static int fi_ibv_rai_to_fi(struct rdma_addrinfo *rai, struct fi_info *hints,
return 0;
}
static int fi_ibv_fill_info_attr(struct ibv_context *ctx, struct fi_info *hints,
struct fi_info *fi)
{
struct ibv_device_attr device_attr;
struct ibv_port_attr port_attr;
union ibv_gid gid;
size_t name_len;
int ret;
*(fi->fabric_attr) = verbs_fabric_attr;
*(fi->domain_attr) = verbs_domain_attr;
*(fi->ep_attr) = verbs_ep_attr;
*(fi->tx_attr) = verbs_tx_attr;
*(fi->rx_attr) = verbs_rx_attr;
if (!(fi->fabric_attr->prov_name = strdup(VERBS_PROV_NAME)))
return -FI_ENOMEM;
if (!ctx) {
if (!(fi->fabric_attr->name = strdup(VERBS_ANY_FABRIC)))
return -FI_ENOMEM;
return 0;
}
ibv_query_gid(ctx, 1, 0, &gid);
ret = ibv_query_device(ctx, &device_attr);
if (ret)
return -errno;
ret = ibv_query_port(ctx, 1, &port_attr);
if (ret)
return -errno;
if (hints && hints->domain_attr) {
ret = fi_ibv_check_dev_limits(hints->domain_attr, &device_attr);
if (ret)
return ret;
}
switch (ctx->device->transport_type) {
case IBV_TRANSPORT_IB:
name_len = strlen(VERBS_IB_PREFIX) + INET6_ADDRSTRLEN;
if (!(fi->fabric_attr->name = calloc(1, name_len + 1)))
return -FI_ENOMEM;
snprintf(fi->fabric_attr->name, name_len, VERBS_IB_PREFIX "%lx",
gid.global.subnet_prefix);
break;
case IBV_TRANSPORT_IWARP:
fi->fabric_attr->name = strdup(VERBS_IWARP_FABRIC);
break;
default:
VERBS_WARN("Unknown transport type");
return -FI_ENODATA;
}
if (!(fi->domain_attr->name = strdup(ctx->device->name)))
return -FI_ENOMEM;
fi->domain_attr->cq_cnt = device_attr.max_cq;
fi->domain_attr->ep_cnt = device_attr.max_qp;
fi->domain_attr->tx_ctx_cnt = device_attr.max_qp;
fi->domain_attr->rx_ctx_cnt = device_attr.max_qp;
switch (ctx->device->transport_type) {
case IBV_TRANSPORT_IWARP:
fi->ep_attr->protocol = FI_PROTO_IWARP;
break;
case IBV_TRANSPORT_IB:
fi->ep_attr->protocol = FI_PROTO_RDMA_CM_IB_RC;
break;
default:
return -FI_ENODATA;
}
fi->ep_attr->protocol_version = 1;
fi->ep_attr->max_msg_size = port_attr.max_msg_sz;
// TODO Give a real size once verbs provider supports inject
fi->ep_attr->inject_size = 0;
return 0;
}
static int
fi_ibv_getepinfo(const char *node, const char *service,
uint64_t flags, struct fi_info *hints,
@ -251,7 +601,7 @@ fi_ibv_getepinfo(const char *node, const char *service,
int ret;
if (hints) {
ret = fi_ibv_check_hints(hints);
ret = fi_ibv_check_info(hints);
if (ret)
return ret;
@ -282,22 +632,13 @@ fi_ibv_getepinfo(const char *node, const char *service,
ret = -errno;
goto err2;
}
rdma_freeaddrinfo(rai);
if ((*id)->verbs) {
if (!(fi->domain_attr->name = strdup((*id)->verbs->device->name))) {
ret = -FI_ENOMEM;
goto err3;
}
}
// TODO: Get a real name here
if (!(fi->fabric_attr->name = strdup("RDMA"))) {
ret = -FI_ENOMEM;
ret = fi_ibv_fill_info_attr((*id)->verbs, hints, fi);
if (ret)
goto err3;
}
*info = fi;
rdma_freeaddrinfo(rai);
return 0;
err3:
@ -310,7 +651,7 @@ err1:
}
static int fi_ibv_getinfo(uint32_t version, const char *node, const char *service,
uint64_t flags, struct fi_info *hints, struct fi_info **info)
uint64_t flags, struct fi_info *hints, struct fi_info **info)
{
struct rdma_cm_id *id;
int ret;
@ -1488,6 +1829,7 @@ fi_ibv_eq_readerr(struct fid_eq *eq, struct fi_eq_err_entry *entry,
return sizeof(*entry);
}
/* TODO: This should copy the listening fi_info as the base */
static struct fi_info *
fi_ibv_eq_cm_getinfo(struct fi_ibv_fabric *fab, struct rdma_cm_event *event)
{
@ -1498,12 +1840,7 @@ fi_ibv_eq_cm_getinfo(struct fi_ibv_fabric *fab, struct rdma_cm_event *event)
return NULL;
fi->ep_type = FI_EP_MSG;
fi->caps = FI_MSG | FI_RMA;
if (event->id->verbs->device->transport_type == IBV_TRANSPORT_IWARP) {
fi->ep_attr->protocol = FI_PROTO_IWARP;
} else {
fi->ep_attr->protocol = FI_PROTO_RDMA_CM_IB_RC;
}
fi->caps = VERBS_CAPS;
fi->src_addrlen = fi_ibv_sockaddr_len(rdma_get_local_addr(event->id));
if (!(fi->src_addr = malloc(fi->src_addrlen)))
@ -1515,14 +1852,7 @@ fi_ibv_eq_cm_getinfo(struct fi_ibv_fabric *fab, struct rdma_cm_event *event)
goto err;
memcpy(fi->dest_addr, rdma_get_peer_addr(event->id), fi->dest_addrlen);
if (!(fi->fabric_attr->name = strdup("RDMA")))
goto err;
if (!(fi->fabric_attr->prov_name = strdup(PROV_NAME)))
goto err;
fi->fabric_attr->prov_version = PROV_VERS;
if (!(fi->domain_attr->name = strdup(event->id->verbs->device->name)))
goto err;
fi_ibv_fill_info_attr(event->id->verbs, NULL, fi);
fi->connreq = event->id;
return fi;
@ -2360,8 +2690,10 @@ static struct fi_ops_fabric fi_ibv_ops_fabric = {
int fi_ibv_fabric(struct fi_fabric_attr *attr, struct fid_fabric **fabric, void *context)
{
struct fi_ibv_fabric *fab;
int ret;
if (strcmp(attr->name, "RDMA"))
ret = fi_ibv_check_fabric_attr(attr);
if (ret)
return -FI_ENODATA;
fab = calloc(1, sizeof(*fab));
@ -2376,18 +2708,20 @@ int fi_ibv_fabric(struct fi_fabric_attr *attr, struct fid_fabric **fabric, void
return 0;
}
static void fi_ibv_fini(void)
{
}
static struct fi_provider fi_ibv_prov = {
.name = PROV_NAME,
.version = PROV_VERS,
.name = VERBS_PROV_NAME,
.version = VERBS_PROV_VERS,
.fi_version = FI_VERSION(FI_MAJOR_VERSION, FI_MINOR_VERSION),
.getinfo = fi_ibv_getinfo,
.fabric = fi_ibv_fabric,
.cleanup = fi_ibv_fini
};
static void __attribute__((constructor)) fi_ibv_ini(void)
{
(void) fi_register(&fi_ibv_prov);
}
static void __attribute__((destructor)) fi_ibv_fini(void)
VERBS_INI
{
return &fi_ibv_prov;
}

Просмотреть файл

@ -44,6 +44,7 @@
#include <rdma/fi_errno.h>
#include "fi.h"
#include "prov.h"
#ifdef HAVE_LIBDL
#include <dlfcn.h>
@ -57,38 +58,47 @@ struct fi_prov {
struct fi_provider *provider;
};
static struct fi_prov *prov_head, *prov_tail;
static struct fi_prov *fi_getprov(const char *prov_name);
static struct fi_prov *prov_head, *prov_tail;
static volatile int init = 0;
static pthread_mutex_t ini_lock = PTHREAD_MUTEX_INITIALIZER;
__attribute__((visibility ("default")))
int fi_register_provider_(uint32_t fi_version, struct fi_provider *provider)
static int fi_register_provider(struct fi_provider *provider)
{
struct fi_prov *prov;
int ret;
if (FI_MAJOR(fi_version) != FI_MAJOR_VERSION ||
FI_MINOR(fi_version) > FI_MINOR_VERSION)
return -FI_ENOSYS;
if (!provider)
return -FI_EINVAL;
if (FI_MAJOR(provider->fi_version) != FI_MAJOR_VERSION ||
FI_MINOR(provider->fi_version) > FI_MINOR_VERSION) {
ret = -FI_ENOSYS;
goto cleanup;
}
/* If a provider with this name is already registered:
* - if the new provider has a lower version number, just fail
* to register it
* - otherwise, just overwrite the old prov entry
* If the provider is a new/unique name, calloc() a new prov entry.
*/
prov = fi_getprov(provider->name);
if (prov) {
if (FI_VERSION_GE(prov->provider->version, provider->version))
return -FI_EALREADY;
/* If we have two versions of the same provider,
* keep the most recent
*/
if (FI_VERSION_GE(prov->provider->version, provider->version)) {
ret = -FI_EALREADY;
goto cleanup;
}
prov->provider->cleanup();
prov->provider = provider;
return 0;
}
prov = calloc(sizeof *prov, 1);
if (!prov)
return -FI_ENOMEM;
if (!prov) {
ret = -FI_ENOMEM;
goto cleanup;
}
prov->provider = provider;
if (prov_tail)
@ -97,8 +107,11 @@ int fi_register_provider_(uint32_t fi_version, struct fi_provider *provider)
prov_head = prov;
prov_tail = prov;
return 0;
cleanup:
provider->cleanup();
return ret;
}
default_symver(fi_register_provider_, fi_register_provider);
#ifdef HAVE_LIBDL
static int lib_filter(const struct dirent *entry)
@ -111,13 +124,26 @@ static int lib_filter(const struct dirent *entry)
else
return 0;
}
#endif
static void __attribute__((constructor)) fi_ini(void)
static void fi_ini(void)
{
pthread_mutex_lock(&ini_lock);
if (init)
goto unlock;
fi_register_provider(VERBS_INIT);
fi_register_provider(PSM_INIT);
fi_register_provider(SOCKETS_INIT);
fi_register_provider(USNIC_INIT);
#ifdef HAVE_LIBDL
struct dirent **liblist;
int n, want_warn = 0;
char *lib, *extdir = getenv("FI_EXTDIR");
void *dlhandle;
struct fi_provider* (*inif)(void);
if (extdir) {
/* Warn if user specified $FI_EXTDIR, but there's a
@ -130,7 +156,7 @@ static void __attribute__((constructor)) fi_ini(void)
/* If dlopen fails, assume static linking and just return
without error */
if (dlopen(NULL, RTLD_NOW) == NULL) {
return;
goto done;
}
n = scandir(extdir, &liblist, lib_filter, NULL);
@ -139,13 +165,14 @@ static void __attribute__((constructor)) fi_ini(void)
FI_WARN("scandir error reading %s: %s\n",
extdir, strerror(errno));
}
return;
goto done;
}
while (n--) {
if (asprintf(&lib, "%s/%s", extdir, liblist[n]->d_name) < 0) {
FI_WARN("asprintf failed to allocate memory\n");
return;
free(liblist[n]);
goto done;
}
dlhandle = dlopen(lib, RTLD_NOW);
@ -154,14 +181,26 @@ static void __attribute__((constructor)) fi_ini(void)
free(liblist[n]);
free(lib);
inif = dlsym(dlhandle, "fi_prov_ini");
if (inif == NULL)
FI_WARN("dlsym: %s\n", dlerror());
else
fi_register_provider((inif)());
}
free(liblist);
}
done:
#endif
init = 1;
unlock:
pthread_mutex_unlock(&ini_lock);
}
static void __attribute__((destructor)) fi_fini(void)
{
for (struct fi_prov *prov = prov_head; prov; prov = prov->next)
prov->provider->cleanup();
}
static struct fi_prov *fi_getprov(const char *prov_name)
@ -182,7 +221,10 @@ int fi_getinfo_(uint32_t version, const char *node, const char *service,
{
struct fi_prov *prov;
struct fi_info *tail, *cur;
int ret = -ENOSYS;
int ret = -FI_ENOSYS;
if (!init)
fi_ini();
*info = tail = NULL;
for (prov = prov_head; prov; prov = prov->next) {
@ -345,6 +387,9 @@ int fi_fabric_(struct fi_fabric_attr *attr, struct fid_fabric **fabric, void *co
if (!attr || !attr->prov_name || !attr->name)
return -FI_EINVAL;
if (!init)
fi_ini();
prov = fi_getprov(attr->prov_name);
if (!prov || !prov->provider->fabric)
return -FI_ENODEV;

Некоторые файлы не были показаны из-за слишком большого количества измененных файлов Показать больше