1
1

libfabric: update to (just past) 1.0rc1

Updated to Github ofiwg/libfabric@6b005d0d19.
Этот коммит содержится в:
Jeff Squyres 2015-02-03 10:11:38 -08:00
родитель 7299cc3ab9
Коммит 3dc0abfbc4
99 изменённых файлов: 2606 добавлений и 1249 удалений

Просмотреть файл

@ -18,6 +18,10 @@ else !HAVE_LD_VERSION_SCRIPT
libfabric_version_script =
endif !HAVE_LD_VERSION_SCRIPT
rdmaincludedir = $(includedir)/rdma
rdmainclude_HEADERS =
# internal utility functions shared by in-tree providers:
common_srcs = \
src/common.c \
@ -31,10 +35,12 @@ src_libfabric_la_SOURCES = \
include/fi_enosys.h \
include/fi_indexer.h \
include/fi_list.h \
include/fi_log.h \
include/fi_rbuf.h \
include/prov.h \
src/fabric.c \
src/fi_tostr.c \
src/log.c \
$(common_srcs)
if HAVE_SOCKETS
@ -154,7 +160,7 @@ libusnic_direct_sources = \
_usnic_files = \
$(libusnic_direct_sources) \
prov/usnic/src/fi_usnic.h \
prov/usnic/src/fi_ext_usnic.h \
prov/usnic/src/usdf.h \
prov/usnic/src/usdf_av.c \
prov/usnic/src/usdf_av.h \
@ -189,6 +195,9 @@ _usnic_cppflags = \
-DHAVE_LIBNL3=$(HAVE_LIBNL3) $(USNIC_LIBNL_CPPFLAGS) \
-I$(top_srcdir)/prov/usnic/src/usnic_direct
rdmainclude_HEADERS += \
prov/usnic/src/fi_ext_usnic.h
if HAVE_USNIC_DL
pkglib_LTLIBRARIES += libusnic-fi.la
libusnic_fi_la_CPPFLAGS = $(AM_CPPFLAGS) $(_usnic_cppflags)
@ -242,9 +251,7 @@ src_libfabric_la_LDFLAGS = -version-info 1 -export-dynamic \
$(libfabric_version_script)
src_libfabric_la_DEPENDENCIES = $(srcdir)/libfabric.map
rdmaincludedir = $(includedir)/rdma
rdmainclude_HEADERS = \
rdmainclude_HEADERS += \
$(top_srcdir)/include/rdma/fabric.h \
$(top_srcdir)/include/rdma/fi_atomic.h \
$(top_srcdir)/include/rdma/fi_cm.h \

Просмотреть файл

@ -1,7 +1,7 @@
This README is for userspace RDMA fabric library.
Version Libfabric v0.0.2
Released on 2015-01-24
Version Libfabric v1.0.0-rc1
Released on 2015-02-03
Building
========

Просмотреть файл

@ -1,5 +1,9 @@
/* config.h.in. Generated from configure.ac by autoheader. */
/* defined to 1 if libfabric was configured with --enable-debug, 0 otherwise
*/
#undef ENABLE_DEBUG
/* Set to 1 to use c11 atomic functions */
#undef HAVE_ATOMICS

Просмотреть файл

@ -32,6 +32,15 @@ sub subst {
$copy =~ s/\@VERSION\@/Libfabric v$version/g;
$copy =~ s/\@DATE\@/$today/g;
# Note that there appears to be a bug in some versions of Pandoc
# that will escape the appearance of @ in generated man pages
# (e.g., in the "@VERSION@" that appears in the man page version
# field). So rather than be clever in the regexp's above, do the
# simple/clear thing and repeat the same regexp's as above, but
# with double-escaped @'s.
$copy =~ s/\\\@VERSION\\\@/Libfabric v$version/g;
$copy =~ s/\\\@DATE\\\@/$today/g;
if ($copy ne $orig) {
print "*** VERSION/DATE-ifying $file...\n";
open(OUT, ">$file") || die "Can't write to $file: $!";

Просмотреть файл

@ -1,7 +1,7 @@
dnl Process this file with autoconf to produce a configure script.
AC_PREREQ(2.57)
AC_INIT([libfabric], [0.0.2], [ofiwg@lists.openfabrics.org])
AC_INIT([libfabric], [1.0.0-rc1], [ofiwg@lists.openfabrics.org])
AC_CONFIG_SRCDIR([src/fabric.c])
AC_CONFIG_AUX_DIR(config)
AC_CONFIG_MACRO_DIR(config)
@ -13,8 +13,13 @@ AC_ARG_ENABLE([debug],
[AS_HELP_STRING([--enable-debug],
[Enable debugging @<:@default=no@:>@])
],
[CFLAGS="$CFLAGS -g -O0 -Wall"],
[enable_debug=no])
[CFLAGS="$CFLAGS -g -O0 -Wall"
dbg=1],
[enable_debug=no
dbg=0])
AC_DEFINE_UNQUOTED([ENABLE_DEBUG],[$dbg],
[defined to 1 if libfabric was configured with --enable-debug, 0 otherwise])
dnl Fix autoconf's habit of adding -g -O2 by default
AS_IF([test -z "$CFLAGS"],

Просмотреть файл

@ -190,6 +190,11 @@ size_t fi_datatype_size(enum fi_datatype datatype);
uint64_t fi_tag_bits(uint64_t mem_tag_format);
uint64_t fi_tag_format(uint64_t tag_bits);
int fi_send_allowed(uint64_t caps);
int fi_recv_allowed(uint64_t caps);
int fi_rma_initiate_allowed(uint64_t caps);
int fi_rma_target_allowed(uint64_t caps);
#define RDMA_CONF_DIR SYSCONFDIR "/" RDMADIR
#define FI_CONF_DIR RDMA_CONF_DIR "/fabric"

Просмотреть файл

@ -235,6 +235,8 @@ static struct fi_ops_ep X = {
.setopt = fi_no_setopt,
.tx_ctx = fi_no_tx_ctx,
.rx_ctx = fi_no_rx_ctx,
.rx_size_left = fi_no_rx_size_left,
.tx_size_left = fi_no_tx_size_left,
};
*/
int fi_no_enable(struct fid_ep *ep);
@ -243,12 +245,14 @@ int fi_no_getopt(fid_t fid, int level, int optname,
void *optval, size_t *optlen);
int fi_no_setopt(fid_t fid, int level, int optname,
const void *optval, size_t optlen);
int fi_no_tx_ctx(struct fid_sep *sep, int index,
int fi_no_tx_ctx(struct fid_ep *sep, int index,
struct fi_tx_attr *attr, struct fid_ep **tx_ep,
void *context);
int fi_no_rx_ctx(struct fid_sep *sep, int index,
int fi_no_rx_ctx(struct fid_ep *sep, int index,
struct fi_rx_attr *attr, struct fid_ep **rx_ep,
void *context);
ssize_t fi_no_rx_size_left(struct fid_ep *ep);
ssize_t fi_no_tx_size_left(struct fid_ep *ep);
/*
static struct fi_ops_msg X = {
@ -262,8 +266,6 @@ static struct fi_ops_msg X = {
.inject = fi_no_msg_inject,
.senddata = fi_no_msg_senddata,
.injectdata = fi_no_msg_injectdata,
.rx_size_left = fi_no_msg_rx_size_left,
.tx_size_left = fi_no_msg_tx_size_left,
};
*/
ssize_t fi_no_msg_recv(struct fid_ep *ep, void *buf, size_t len, void *desc,
@ -284,8 +286,6 @@ ssize_t fi_no_msg_senddata(struct fid_ep *ep, const void *buf, size_t len, void
uint64_t data, fi_addr_t dest_addr, void *context);
ssize_t fi_no_msg_injectdata(struct fid_ep *ep, const void *buf, size_t len,
uint64_t data, fi_addr_t dest_addr);
ssize_t fi_no_msg_rx_size_left(struct fid_ep *ep);
ssize_t fi_no_msg_tx_size_left(struct fid_ep *ep);
/*
static struct fi_ops_wait X = {

Просмотреть файл

@ -0,0 +1,65 @@
/*
* Copyright (c) 2015, Cisco Systems, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
*/
#if !defined(FI_LOG_H)
#define FI_LOG_H
#if HAVE_CONFIG_H
# include <config.h>
#endif /* HAVE_CONFIG_H */
extern int fi_log_level;
void fi_log_init(void);
void fi_warn_impl(const char *prov, const char *fmt, ...);
void fi_log_impl(int level, const char *prov, const char *fmt, ...);
void fi_debug_impl(const char *prov, const char *fmt, ...);
/* Callers are responsible for including their own trailing "\n". Non-provider
* code should pass prov=NULL.
*/
#define FI_WARN(prov, ...) fi_warn_impl(prov, __VA_ARGS__)
#define FI_LOG(level, prov, ...) \
do { \
if ((level) <= fi_log_level) \
fi_log_impl(level, prov, __VA_ARGS__); \
} while (0)
#if ENABLE_DEBUG
# define FI_DEBUG(prov, ...) fi_debug_impl(prov, __VA_ARGS__)
#else
# define FI_DEBUG(prov, ...) do {} while (0)
#endif
#endif /* !defined(FI_LOG_H) */

Просмотреть файл

@ -277,4 +277,10 @@ static inline size_t rbfdsread(struct ringbuffd *rbfd, void *buf, size_t len,
return ret;
}
static inline size_t rbfdwait(struct ringbuffd *rbfd, int timeout)
{
return fi_poll_fd(rbfd->fd[RB_READ_FD], timeout);
}
#endif /* RBUF_H */

Просмотреть файл

@ -77,7 +77,6 @@ struct fid_cntr;
struct fid_ep;
struct fid_pep;
struct fid_stx;
struct fid_sep;
struct fid_mr;
typedef struct fid *fid_t;
@ -96,7 +95,8 @@ typedef struct fid *fid_t;
#define FI_MSG (1ULL << 1)
#define FI_RMA (1ULL << 2)
#define FI_TAGGED (1ULL << 3)
#define FI_ATOMICS (1ULL << 4)
#define FI_ATOMIC (1ULL << 4)
#define FI_ATOMICS FI_ATOMIC
#define FI_DYNAMIC_MR (1ULL << 7)
#define FI_NAMED_RX_CTX (1ULL << 8)
#define FI_BUFFERED_RECV (1ULL << 9)
@ -121,6 +121,7 @@ typedef struct fid *fid_t;
#define FI_WRITE (1ULL << 17)
#define FI_RECV (1ULL << 18)
#define FI_SEND (1ULL << 19)
#define FI_TRANSMIT FI_SEND
#define FI_REMOTE_READ (1ULL << 20)
#define FI_REMOTE_WRITE (1ULL << 21)
@ -133,6 +134,7 @@ typedef struct fid *fid_t;
#define FI_MORE (1ULL << 29)
#define FI_PEEK (1ULL << 30)
#define FI_TRIGGER (1ULL << 31)
#define FI_FENCE (1ULL << 32)
struct fi_ioc {
@ -220,6 +222,7 @@ enum {
#define FI_LOCAL_MR (1ULL << 1)
#define FI_PROV_MR_ATTR (1ULL << 2)
#define FI_MSG_PREFIX (1ULL << 3)
#define FI_ASYNC_IOV (1ULL << 4)
struct fi_tx_attr {
uint64_t caps;
@ -230,6 +233,7 @@ struct fi_tx_attr {
size_t inject_size;
size_t size;
size_t iov_limit;
size_t rma_iov_limit;
};
struct fi_rx_attr {
@ -435,6 +439,7 @@ enum fi_type {
FI_TYPE_AV_TYPE,
FI_TYPE_ATOMIC_TYPE,
FI_TYPE_ATOMIC_OP,
FI_TYPE_VERSION,
};
char *fi_tostr(const void *data, enum fi_type datatype);

Просмотреть файл

@ -120,7 +120,7 @@ struct fi_ops_domain {
int (*endpoint)(struct fid_domain *domain, struct fi_info *info,
struct fid_ep **ep, void *context);
int (*scalable_ep)(struct fid_domain *domain, struct fi_info *info,
struct fid_sep **sep, void *context);
struct fid_ep **sep, void *context);
int (*cntr_open)(struct fid_domain *domain, struct fi_cntr_attr *attr,
struct fid_cntr **cntr, void *context);
int (*poll_open)(struct fid_domain *domain, struct fi_poll_attr *attr,
@ -223,6 +223,11 @@ static inline uint64_t fi_mr_key(struct fid_mr *mr)
return mr->key;
}
static inline int fi_mr_bind(struct fid_mr *mr, struct fid *bfid, uint64_t flags)
{
return mr->fid.ops->bind(&mr->fid, bfid, flags);
}
static inline int
fi_av_open(struct fid_domain *domain, struct fi_av_attr *attr,
struct fid_av **av, void *context)

Просмотреть файл

@ -71,12 +71,14 @@ struct fi_ops_ep {
void *optval, size_t *optlen);
int (*setopt)(fid_t fid, int level, int optname,
const void *optval, size_t optlen);
int (*tx_ctx)(struct fid_sep *sep, int index,
int (*tx_ctx)(struct fid_ep *sep, int index,
struct fi_tx_attr *attr, struct fid_ep **tx_ep,
void *context);
int (*rx_ctx)(struct fid_sep *sep, int index,
int (*rx_ctx)(struct fid_ep *sep, int index,
struct fi_rx_attr *attr, struct fid_ep **rx_ep,
void *context);
ssize_t (*rx_size_left)(struct fid_ep *ep);
ssize_t (*tx_size_left)(struct fid_ep *ep);
};
struct fi_ops_msg {
@ -99,8 +101,6 @@ struct fi_ops_msg {
uint64_t data, fi_addr_t dest_addr, void *context);
ssize_t (*injectdata)(struct fid_ep *ep, const void *buf, size_t len,
uint64_t data, fi_addr_t dest_addr);
ssize_t (*rx_size_left)(struct fid_ep *ep);
ssize_t (*tx_size_left)(struct fid_ep *ep);
};
struct fi_ops_cm;
@ -139,12 +139,6 @@ struct fid_stx {
struct fi_ops_ep *ops;
};
struct fid_sep {
struct fid fid;
struct fi_ops_ep *ops;
struct fi_ops_cm *cm;
};
#ifndef FABRIC_DIRECT
static inline int
@ -163,7 +157,7 @@ fi_endpoint(struct fid_domain *domain, struct fi_info *info,
static inline int
fi_scalable_ep(struct fid_domain *domain, struct fi_info *info,
struct fid_sep **sep, void *context)
struct fid_ep **sep, void *context)
{
return domain->ops->scalable_ep(domain, info, sep, context);
}
@ -178,7 +172,7 @@ static inline int fi_pep_bind(struct fid_pep *pep, struct fid *bfid, uint64_t fl
return pep->fid.ops->bind(&pep->fid, bfid, flags);
}
static inline int fi_scalable_ep_bind(struct fid_sep *sep, struct fid *bfid, uint64_t flags)
static inline int fi_scalable_ep_bind(struct fid_ep *sep, struct fid *bfid, uint64_t flags)
{
return sep->fid.ops->bind(&sep->fid, bfid, flags);
}
@ -211,17 +205,29 @@ fi_getopt(fid_t fid, int level, int optname,
}
static inline int
fi_tx_context(struct fid_sep *sep, int index, struct fi_tx_attr *attr,
fi_tx_context(struct fid_ep *ep, int index, struct fi_tx_attr *attr,
struct fid_ep **tx_ep, void *context)
{
return sep->ops->tx_ctx(sep, index, attr, tx_ep, context);
return ep->ops->tx_ctx(ep, index, attr, tx_ep, context);
}
static inline int
fi_rx_context(struct fid_sep *sep, int index, struct fi_rx_attr *attr,
fi_rx_context(struct fid_ep *ep, int index, struct fi_rx_attr *attr,
struct fid_ep **rx_ep, void *context)
{
return sep->ops->rx_ctx(sep, index, attr, rx_ep, context);
return ep->ops->rx_ctx(ep, index, attr, rx_ep, context);
}
static inline ssize_t
fi_rx_size_left(struct fid_ep *ep)
{
return ep->ops->rx_size_left(ep);
}
static inline ssize_t
fi_tx_size_left(struct fid_ep *ep)
{
return ep->ops->tx_size_left(ep);
}
static inline int
@ -298,18 +304,6 @@ fi_injectdata(struct fid_ep *ep, const void *buf, size_t len,
return ep->msg->injectdata(ep, buf, len, data, dest_addr);
}
static inline ssize_t
fi_rx_size_left(struct fid_ep *ep)
{
return ep->msg->rx_size_left(ep);
}
static inline ssize_t
fi_tx_size_left(struct fid_ep *ep)
{
return ep->msg->tx_size_left(ep);
}
#else // FABRIC_DIRECT
#include <rdma/fi_direct_endpoint.h>
#endif

Просмотреть файл

@ -1,5 +1,6 @@
/*
* Copyright (c) 2013-2014 Intel Corporation. All rights reserved.
* Copyright (c) 2015 Cisco Systems, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@ -41,6 +42,8 @@ extern "C" {
/* FI directly mapped errno values */
#define FI_SUCCESS 0
//#define FI_EPERM EPERM /* Operation not permitted */
#define FI_ENOENT ENOENT /* No such file or directory */
//#define FI_ESRCH ESRCH /* No such process */
@ -183,6 +186,8 @@ extern "C" {
#define FI_ENOEQ 261 /* Missing or unavailable event queue */
#define FI_EDOMAIN 262 /* Invalid resource domain */
#define FI_ENOCQ 263 /* Missing or unavailable completion queue */
#define FI_ECRC 264 /* CRC error */
#define FI_ETRUNC 265 /* Truncation error */
const char *fi_strerror(int errnum);

Просмотреть файл

@ -43,6 +43,7 @@ extern "C" {
#endif
#define FI_CLAIM (1ULL << 0)
#define FI_DISCARD FI_CANCEL
struct fi_msg_tagged {
const struct iovec *msg_iov;

Просмотреть файл

@ -1,6 +1,6 @@
Name: libfabric
Version: 0.0.2
Release: 1%{?dist}
Version: 1.0.0-rc1
Release: 1.rc1%{?dist}
Summary: User-space RDMA Fabric Interfaces
Group: System Environment/Libraries
License: GPLv2 or BSD

Просмотреть файл

@ -1,6 +1,6 @@
Name: libfabric
Version: @VERSION@
Release: 1%{?dist}
Release: 1.rc1%{?dist}
Summary: User-space RDMA Fabric Interfaces
Group: System Environment/Libraries
License: GPLv2 or BSD

Просмотреть файл

@ -1,4 +1,4 @@
.TH fabric 7 "2014\-12\-15" "Libfabric Programmer\[aq]s Manual" "\@VERSION\@"
.TH fabric 7 "2014\-12\-15" "Libfabric Programmer\[aq]s Manual" "Libfabric v1.0.0-rc1"
.SH NAME
.PP
Fabric Interface Library

Просмотреть файл

@ -1,4 +1,4 @@
.TH fi_av 3 "2014-11-14" "Libfabric Programmer\[aq]s Manual" "\@VERSION\@"
.TH fi_av 3 "2015\-02\-01" "Libfabric Programmer\[aq]s Manual" "Libfabric v1.0.0-rc1"
.SH NAME
.PP
fi_av - Address vector operations
@ -37,7 +37,7 @@ int\ fi_av_insertsym(struct\ fid_av\ *av,\ const\ char\ *node,
\ \ \ \ size_t\ nodecnt,\ const\ char\ *service,\ size_t\ svccnt,
\ \ \ \ fi_addr_t\ *fi_addr,\ uint64_t\ flags,\ void\ *context);
int\ fi_av_remove(struct\ fid_av\ *av,\ fi_addr_t\ fi_addr,\ size_t\ count,
int\ fi_av_remove(struct\ fid_av\ *av,\ fi_addr_t\ *fi_addr,\ size_t\ count,
\ \ \ \ uint64_t\ flags);
int\ fi_av_lookup(struct\ fid_av\ *av,\ fi_addr_t\ fi_addr,
@ -227,6 +227,11 @@ Note that any events queued on an event queue referencing the AV are
left untouched.
It is recommended that callers retrieve all events associated with the
AV before closing it.
.PP
When closing the address vector, there must be no opened endpoints
associated with the AV.
If resources are still associated with the AV when attempting to close,
the call will return -FI_EBUSY.
.SS fi_av_bind
.PP
Associates an event queue with the AV.

Просмотреть файл

@ -1,4 +1,4 @@
.TH fi_cm 3 "2015\-01\-01" "Libfabric Programmer\[aq]s Manual" "\@VERSION\@"
.TH fi_cm 3 "2015\-01\-01" "Libfabric Programmer\[aq]s Manual" "Libfabric v1.0.0-rc1"
.SH NAME
.PP
fi_cm - Connection management operations

Просмотреть файл

@ -1,4 +1,4 @@
.TH fi_cntr 3 "2014\-12\-15" "Libfabric Programmer\[aq]s Manual" "\@VERSION\@"
.TH fi_cntr 3 "2015\-01\-29" "Libfabric Programmer\[aq]s Manual" "Libfabric v1.0.0-rc1"
.SH NAME
.PP
fi_cntr - Completion and event counter operations
@ -146,8 +146,11 @@ This field is ignored if wait_obj is not FI_WAIT_SET.
.SS fi_close
.PP
The fi_close call releases all resources associated with a counter.
The counter must not be bound to any other resources prior to being
freed.
When closing the counter, there must be no opened endpoints, transmit
contexts, receive contexts or memory regions associated with the
counter.
If resources are still associated with the counter when attempting to
close, the call will return -FI_EBUSY.
.SS fi_cntr_control
.PP
The fi_cntr_control call is used to access provider or implementation

Просмотреть файл

@ -1,10 +1,12 @@
.TH fi_cq 3 "2014\-12\-15" "Libfabric Programmer\[aq]s Manual" "\@VERSION\@"
.TH fi_cq 3 "2015\-02\-01" "Libfabric Programmer\[aq]s Manual" "Libfabric v1.0.0-rc1"
.SH NAME
.PP
fi_cq - Completion queue operations
.PP
fi_cq_open / fi_close : Open/close a completion queue
.PP
fi_control : Control CQ operation or attributes.
.PP
fi_cq_read / fi_cq_readfrom / fi_cq_readerr : Read a completion from a
completion queue
.PP
@ -78,6 +80,10 @@ information.
.PP
\f[I]flags\f[] : Additional flags to apply to the operation
.PP
\f[I]command\f[] : Command of control operation to perform on CQ.
.PP
\f[I]arg\f[] : Optional control argument
.PP
\f[I]cond\f[] : Condition that must be met before a completion is
generated
.PP
@ -288,8 +294,12 @@ This field is ignored if wait_obj is not FI_WAIT_SET.
.PP
The fi_close call releases all resources associated with a completion
queue.
The CQ must not be bound to any other resources prior to being closed.
Any completions which remain on the CQ when it is closed are lost.
.PP
When closing the CQ, there must be no opened endpoints, transmit
contexts, or receive contexts associated with the CQ.
If resources are still associated with the CQ when attempting to close,
the call will return -FI_EBUSY.
.SS fi_control
.PP
The fi_control call is used to access provider or implementation
@ -298,11 +308,13 @@ Access to the CQ should be serialized across all calls when fi_control
is invoked, as it may redirect the implementation of CQ operations.
The following control commands are usable with an CQ.
.PP
*FI_GETWAIT (void *\f[I])\f[] : This command allows the user to retrieve
\f[I]FI_GETWAIT (void **)\f[] : This command allows the user to retrieve
the low-level wait object associated with the CQ.
The format of the wait-object is specified during CQ creation, through
the CQ attributes.
See fi_eq.3 for addition details using control with FI_GETWAIT.
The fi_control arg parameter should be an address where a pointer to the
returned wait object will be written.
See fi_eq.3 for addition details using fi_control with FI_GETWAIT.
.SS fi_cq_read / fi_cq_readfrom
.PP
The fi_cq_read and fi_cq_readfrom operations perform a non-blocking read

Просмотреть файл

@ -1,4 +1,4 @@
.TH fi_direct 7 "2014\-11\-21" "Libfabric Programmer\[aq]s Manual" "\@VERSION\@"
.TH fi_direct 7 "2014\-11\-21" "Libfabric Programmer\[aq]s Manual" "Libfabric v1.0.0-rc1"
.SH NAME
.PP
Direct fabric provider access

Просмотреть файл

@ -1,4 +1,4 @@
.TH fi_domain 3 "2015\-01\-12" "Libfabric Programmer\[aq]s Manual" "\@VERSION\@"
.TH fi_domain 3 "2015\-02\-01" "Libfabric Programmer\[aq]s Manual" "Libfabric v1.0.0-rc1"
.SH NAME
.PP
fi_domain - Open a fabric access domain
@ -81,8 +81,8 @@ then memory registration requests complete synchronously.
.PP
The fi_close call is used to release all resources associated with a
domain or interface.
All items associated with the opened domain must be released prior to
calling fi_close.
All objects associated with the opened domain must be released prior to
calling fi_close, otherwise the call will return -FI_EBUSY.
.SH DOMAIN ATTRIBUTES
.PP
The \f[C]fi_domain_attr\f[] structure defines the set of attributes
@ -238,9 +238,11 @@ allocated below the fabric interfaces.
provider requires the use of an application thread to complete an
asynchronous request.
When manual progress is set, the provider will attempt to advance an
asynchronous operation forward when the application invokes any event
queue read or wait operation where the completion will be reported.
Progress also occurs when the application processes a poll or wait set.
asynchronous operation forward when the application attempts to wait on
or read an event queue, completion queue, or counter where the completed
operation will be reported.
Progress also occurs when the application processes a poll or wait set
that has been associated with the event or completion queue.
.PP
Only wait operations defined by the fabric interface will result in an
operation progressing.

Просмотреть файл

@ -1,4 +1,4 @@
.TH fi_endpoint 3 "2015\-01\-16" "Libfabric Programmer\[aq]s Manual" "\@VERSION\@"
.TH fi_endpoint 3 "2015\-02\-01" "Libfabric Programmer\[aq]s Manual" "Libfabric v1.0.0-rc1"
.SH NAME
.PP
fi_endpoint - Fabric endpoint operations
@ -19,6 +19,11 @@ Associate a scalable endpoint with an address vector
.RS
.RE
.TP
.B fi_pep_bind
Associate a passive endpoint with an event queue
.RS
.RE
.TP
.B fi_enable
Transitions an endpoint into an active state.
.RS
@ -48,6 +53,12 @@ Get or set endpoint options.
Open a transmit or receive context.
.RS
.RE
.TP
.B fi_rx_size_left / fi_tx_size_left
Query the lower bound on how many RX/TX operations may be posted without
an operation returning -FI_EAGAIN.
.RS
.RE
.SH SYNOPSIS
.IP
.nf
@ -60,16 +71,16 @@ int\ fi_endpoint(struct\ fid_domain\ *domain,\ struct\ fi_info\ *info,
\ \ \ \ struct\ fid_ep\ **ep,\ void\ *context);
int\ fi_scalable_ep(struct\ fid_domain\ *domain,\ struct\ fi_info\ *info,
\ \ \ \ struct\ fid_sep\ **ep,\ void\ *context);
\ \ \ \ struct\ fid_ep\ **sep,\ void\ *context);
int\ fi_passive_ep(struct\ fi_fabric\ *fabric,\ struct\ fi_info\ *info,
\ \ \ \ struct\ fid_pep\ **pep,\ void\ *context);
int\ fi_tx_context(struct\ fid_ep\ *ep,\ int\ index,
int\ fi_tx_context(struct\ fid_ep\ *sep,\ int\ index,
\ \ \ \ struct\ fi_tx_attr\ *attr,\ struct\ fid_ep\ **tx_ep,
\ \ \ \ void\ *context);
int\ fi_rx_context(struct\ fid_ep\ *ep,\ int\ index,
int\ fi_rx_context(struct\ fid_ep\ *sep,\ int\ index,
\ \ \ \ struct\ fi_rx_attr\ *attr,\ struct\ fid_ep\ **rx_ep,
\ \ \ \ void\ *context);
@ -85,7 +96,9 @@ int\ fi_close(struct\ fid\ *ep);
int\ fi_ep_bind(struct\ fid_ep\ *ep,\ struct\ fid\ *fid,\ uint64_t\ flags);
int\ fi_scalable_ep_bind(struct\ fid_sep\ *sep,\ struct\ fid\ *fid,\ uint64_t\ flags);
int\ fi_scalable_ep_bind(struct\ fid_ep\ *sep,\ struct\ fid\ *fid,\ uint64_t\ flags);
int\ fi_pep_bind(struct\ fid_pep\ *pep,\ struct\ fid\ *fid,\ uint64_t\ flags);
int\ fi_enable(struct\ fid_ep\ *ep);
@ -100,6 +113,10 @@ int\ fi_getopt(struct\ fid_\ *ep,\ int\ level,\ int\ optname,
int\ fi_setopt(struct\ fid\ *ep,\ int\ level,\ int\ optname,
\ \ \ \ const\ void\ *optval,\ size_t\ optlen);
ssize_t\ fi_rx_size_left(struct\ fid_ep\ *ep);
ssize_t\ fi_tx_size_left(struct\ fid_ep\ *ep);
\f[]
.fi
.SH ARGUMENTS
@ -115,6 +132,8 @@ opened, obtained from fi_getinfo.
.PP
\f[I]sep\f[] : A scalable fabric endpoint.
.PP
\f[I]pep\f[] : A passive fabric endpoint.
.PP
\f[I]fid\f[] : Fabric identifier of an associated resource.
.PP
\f[I]context\f[] : Context associated with the endpoint or asynchronous
@ -142,9 +161,19 @@ incoming connection requests.
Active endpoints belong to access domains and can perform data
transfers.
.PP
Data transfer interfaces are bound to active endpoints.
Active endpoints may be connection-oriented or connectionless, and may
provide data reliability.
The data transfer interfaces -- messages (fi_msg), tagged messages
(fi_tagged), RMA (fi_rma), and atomics (fi_atomic) -- are associated
with active endpoints.
In basic configurations, an active endpoint has transmit and receive
queues.
In general, operations that generate traffic on the fabric are posted to
the transmit queue.
This includes all RMA and atomic operations, along with sent messages
and sent tagged messages.
Operations that post buffers for receiving incoming data are submitted
to the receive queue.
.PP
Active endpoints are created in the disabled state.
They must transition into an enabled state before accepting data
@ -188,6 +217,11 @@ fi_info connreq must reference the corresponding request.
.SS fi_close
.PP
Closes an endpoint and release all resources associated with it.
.PP
When closing a scalable endpoint, there must be no opened transmit
contexts, or receive contexts associated with the scalable endpoint.
If resources are still associated with the scalable endpoint when
attempting to close, the call will return -FI_EBUSY.
.SS fi_ep_bind
.PP
fi_ep_bind is used to associate an endpoint with hardware resources.
@ -207,9 +241,10 @@ This is specified using fi_ep_bind flags.
The following flags may be used separately or OR\[aq]ed together when
binding an endpoint to a completion domain CQ.
.PP
\f[I]FI_SEND\f[] : Directs the completion of outbound data transfer
\f[I]FI_TRANSMIT\f[] : Directs the completion of outbound data transfer
requests to the specified completion queue.
This includes send message, RMA, and atomic operations.
The FI_SEND flag may be used interchangeably.
.PP
\f[I]FI_RECV\f[] : Directs the notification of inbound data transfers to
the specified completion queue.
@ -285,6 +320,10 @@ successful RMA write or atomic operation is initiated from a remote
endpoint that targets the given endpoint.
.PP
Connectionless endpoints must be bound to a single address vector.
If an endpoint is using a shared transmit and/or receive context, the
shared contexts must be bound to the endpoint.
CQs, counters, AV, and shared contexts must be bound to endpoints before
they are enabled.
.SS fi_scalable_ep_bind
.PP
fi_scalable_ep_bind is used to associate a scalable endpoint with an
@ -314,6 +353,9 @@ The endpoint must have been configured to support cancelable operations
Canceling an operation causes the fabric provider to search for the
operation and, if it is still pending, complete it as having been
canceled.
If multiple outstanding operations match the context parameter, only one
will be canceled.
In this case, the operation which is canceled is provider specific.
The cancel operation will complete within a bounded period of time.
.SS fi_alias
.PP
@ -373,6 +415,26 @@ needed on receives posted after the value has been changed.
It is recommended that applications that want to override the default
MIN_MULTI_RECV value set this option before enabling the corresponding
endpoint.
.SS fi_rx_size_left
.PP
The fi_rx_size_left call returns a lower bound on the number of receive
operations that may be posted to the given endpoint without that
operation returning -FI_EAGAIN.
Depending on the specific details of the subsequently posted receive
operations (e.g., number of iov entries, which receive function is
called, etc.)
, it may be possible to post more receive operations than originally
indicated by fi_rx_size_left.
.SS fi_tx_size_left
.PP
The fi_tx_size_left call returns a lower bound on the number of transmit
operations that may be posted to the given endpoint without that
operation returning -FI_EAGAIN.
Depending on the specific details of the subsequently posted transmit
operations (e.g., number of iov entries, which transmit function is
called, etc.)
, it may be possible to post more transmit operations than originally
indicated by fi_tx_size_left.
.SH ENDPOINT ATTRIBUTES
.PP
The fi_ep_attr structure defines the set of attributes associated with
@ -755,6 +817,7 @@ struct\ fi_tx_attr\ {
\ \ \ \ size_t\ \ \ \ inject_size;
\ \ \ \ size_t\ \ \ \ size;
\ \ \ \ size_t\ \ \ \ iov_limit;
\ \ \ \ size_t\ \ \ \ rma_iov_limit;
};
\f[]
.fi
@ -796,6 +859,17 @@ operation.
.PP
\f[I]iov_limit\f[] : This is the maximum number of IO vectors
(scatter-gather elements) that a single posted operation may reference.
.PP
\f[I]rma_iov_limit\f[] : This is the maximum number of RMA IO vectors
(scatter-gather elements) that an RMA or atomic operation may reference.
The rma_iov_limit corresponds to the rma_iov_count values in RMA and
atomic operations.
See struct fi_msg_rma and struct fi_msg_atomic in fi_rma.3 and
fi_atomic.3, for additional details.
This limit applies to both the number of RMA IO vectors that may be
specified when initiating an operation from the local endpoint, as well
as the maximum number of IO vectors that may be carried in a single
request from a remote endpoint.
.SS fi_rx_context
.PP
Receive contexts are independent receive queues for receiving incoming
@ -896,12 +970,14 @@ processing, with the potential cost of serializing access across
multiple endpoints.
Support for sharable contexts is domain specific.
.PP
Conceptually, sharable contexts are transmit queues that may be accessed
by many endpoints.
Conceptually, sharable transmit contexts are transmit queues that may be
accessed by many endpoints.
The use of a shared transmit context is mostly opaque to an application.
Applications must allocate and bind shared transmit contexts to
endpoints, but otherwise transmit operations are posted directly to the
endpoint.
endpoints, but operations are posted directly to the endpoint.
Shared transmit contexts are not associated with completion queues or
counters.
Completed operations are posted to the CQs bound to the endpoint.
An endpoint may only be associated with a single shared transmit
context.
.PP
@ -909,7 +985,13 @@ Unlike shared transmit contexts, applications interact directly with
shared receive contexts.
Users post receive buffers directly to a shared receive context, with
the buffers usable by any endpoint bound to the shared receive context.
An endpoint may only be associated with a single receive context.
Shared receive contexts are not associated with completion queues or
counters.
Completed receive operations are posted to the CQs bound to the
endpoint.
An endpoint may only be associated with a single receive context, and
all connectless endpoints associated with a shared receive context must
also share the same address vector.
.PP
Endpoints associated with a shared transmit context may use dedicated
receive contexts, and vice-versa.

Просмотреть файл

@ -1,10 +1,12 @@
.TH fi_eq 3 "2015\-01\-01" "Libfabric Programmer\[aq]s Manual" "\@VERSION\@"
.TH fi_eq 3 "2015\-02\-01" "Libfabric Programmer\[aq]s Manual" "Libfabric v1.0.0-rc1"
.SH NAME
.PP
fi_eq - Event queue operations
.PP
fi_eq_open / fi_close : Open/close an event queue
.PP
fi_control : Control operation of EQ
.PP
fi_eq_read / fi_eq_readerr : Read an event from an event queue
.PP
fi_eq_write : Writes an event to an event queue
@ -66,6 +68,10 @@ information.
.PP
\f[I]flags\f[] : Additional flags to apply to the operation
.PP
\f[I]command\f[] : Command of control operation to perform on EQ.
.PP
\f[I]arg\f[] : Optional control argument
.PP
\f[I]prov_errno\f[] : Provider specific error value
.PP
\f[I]err_data\f[] : Provider specific error data related to a completion
@ -160,8 +166,10 @@ This field is ignored if wait_obj is not FI_WAIT_SET.
.SS fi_close
.PP
The fi_close call releases all resources associated with an event queue.
The EQ must not be bound to any other resources prior to being closed.
Any events which remain on the EQ when it is closed are lost.
.PP
The EQ must not be bound to any other objects prior to being closed,
otherwise the call will return -FI_EBUSY.
.SS fi_control
.PP
The fi_control call is used to access provider or implementation

Просмотреть файл

@ -1,4 +1,4 @@
.TH fi_errno 3 "2015\-01\-08" "Libfabric Programmer\[aq]s Manual" "\@VERSION\@"
.TH fi_errno 3 "2015\-01\-08" "Libfabric Programmer\[aq]s Manual" "Libfabric v1.0.0-rc1"
.SH NAME
.PP
fi_errno - fabric errors

Просмотреть файл

@ -1,4 +1,4 @@
.TH fi_fabric 3 "2015\-01\-08" "Libfabric Programmer\[aq]s Manual" "\@VERSION\@"
.TH fi_fabric 3 "2015\-01\-24" "Libfabric Programmer\[aq]s Manual" "Libfabric v1.0.0-rc1"
.SH NAME
.PP
fi_fabric - Fabric domain operations
@ -90,6 +90,10 @@ uint64_t flags
.PP
\f[I]FI_TYPE_MSG_ORDER\f[] : struct fi_ep_attr::msg_order field
.PP
\f[I]FI_TYPE_VERSION\f[] : Returns the library version of libfabric in
string form.
The data parameter is ignored.
.PP
fi_tostr() will return a pointer to an internal libfabric buffer that
should not be modified, and will be overwritten the next time fi_tostr()
is invoked.

Просмотреть файл

@ -1,4 +1,4 @@
.TH fi_getinfo 3 "2015\-01\-20" "Libfabric Programmer\[aq]s Manual" "\@VERSION\@"
.TH fi_getinfo 3 "2015\-02\-01" "Libfabric Programmer\[aq]s Manual" "Libfabric v1.0.0-rc1"
.SH NAME
.PP
fi_getinfo / fi_freeinfo - Obtain / free fabric interface information
@ -250,11 +250,11 @@ send and receive tagged messages.
Applications can use the FI_SEND and FI_RECV flags to optimize an
endpoint as send-only or receive-only.
.PP
\f[I]FI_ATOMICS\f[] : Specifies that the endpoint supports some set of
\f[I]FI_ATOMIC\f[] : Specifies that the endpoint supports some set of
atomic operations.
Endpoints supporting this capability support operations defined by
struct fi_ops_atomic.
In the absence of any relevant flags, FI_ATOMICS implies the ability to
In the absence of any relevant flags, FI_ATOMIC implies the ability to
initiate and be the target of remote atomic reads and writes.
Applications can use the FI_READ, FI_WRITE, FI_REMOTE_READ, and
FI_REMOTE_WRITE flags to restrict the types of atomic operations
@ -306,11 +306,11 @@ data, which may adversely affect performance.
.PP
\f[I]FI_READ\f[] : Indicates that the user requires an endpoint capable
of initiating reads against remote memory regions.
Remote reads include some RMA and atomic operations.
This flag requires that FI_RMA and/or FI_ATOMIC be set.
.PP
\f[I]FI_WRITE\f[] : Indicates that the user requires an endpoint capable
of initiating writes against remote memory regions.
Remote writes include some RMA and most atomic operations.
This flag requires that FI_RMA and/or FI_ATOMIC be set.
.PP
\f[I]FI_SEND\f[] : Indicates that the user requires an endpoint capable
of sending message data transfers.
@ -324,12 +324,11 @@ message functionality.
.PP
\f[I]FI_REMOTE_READ\f[] : Indicates that the user requires an endpoint
capable of receiving read memory operations from remote endpoints.
Remote read operations include some RMA and atomic operations.
This flag requires that FI_RMA and/or FI_ATOMIC be set.
.PP
\f[I]FI_REMOTE_WRITE\f[] : Indicates that the user requires an endpoint
capable of receiving write memory operations from remote endpoints.
Remote write operations include some RMA operations and most atomic
operations.
This flag requires that FI_RMA and/or FI_ATOMIC be set.
.PP
\f[I]FI_REMOTE_CQ_DATA\f[] : Applications may include a small message
with a data transfer that is placed directly into a remote event queue
@ -362,6 +361,15 @@ assumption that fi_cancel will not be used by the application.
triggered operations.
Endpoints support this capability must meet the usage model as described
by fi_trigger.3.
.PP
\f[I]FI_FENCE\f[] : Indicates that the endpoint support the FI_FENCE
flag on data transfer operations.
Support requires tracking that all previous transmit requests to a
specified remote endpoint complete prior to initiating the fenced
operation.
Fenced operations are often used to enforce ordering between operations
that are not otherwise guaranteed by the underlying provider or
protocol.
.SH MODE
.PP
The operational mode bits are used to convey requirements that an
@ -449,6 +457,18 @@ associated with a registration request, and the resulting memory region
will start at a base address of 0.
Applications can request that providers select MR attributes by forcing
this bit set after fi_getinfo returns.
.PP
\f[I]FI_ASYNC_IOV\f[] : Applications can reference multiple data buffers
as part of a single transmit operation through the use of IO vectors
(SGEs).
Typically, the contents of an IO vector are copied by the provider into
an internal buffer area, or directly to the underlying hardware.
However, when a large number of IOV entries are supported, IOV buffering
may have a negative impact on performance and memory consumption.
The FI_ASYNC_IOV mode indicates that the application must provide the
buffering needed for the IO vectors.
When set, an application must not modify an IO vector until the
associated operation has completed.
.SH ENDPOINT TYPES
.PP
\f[I]FI_EP_UNSPEC\f[] : The type of endpoint is not specified.

Просмотреть файл

@ -1,4 +1,4 @@
.TH fi_mr 3 "2014\-12\-19" "Libfabric Programmer\[aq]s Manual" "\@VERSION\@"
.TH fi_mr 3 "2015\-01\-29" "Libfabric Programmer\[aq]s Manual" "Libfabric v1.0.0-rc1"
.SH NAME
.PP
fi_mr - Memory region operations
@ -14,8 +14,8 @@ memory region
fi_mr_key : Return the remote key needed to access a registered memory
region
.PP
fi_mr_bind : Associate a registered memory region with an event
collector.
fi_mr_bind : Associate a registered memory region with a completion
queue or counter.
.SH SYNOPSIS
.IP
.nf
@ -39,7 +39,7 @@ void\ *\ fi_mr_desc(struct\ fid_mr\ *mr);
uint64_t\ fi_mr_key(struct\ fid_mr\ *mr);
int\ fi_mr_bind(struct\ fid_mr\ *mr,\ struct\ fid\ *ec,\ uint64_t\ flags);
int\ fi_mr_bind(struct\ fid_mr\ *mr,\ struct\ fid\ *bfid,\ uint64_t\ flags);
\f[]
.fi
.SH ARGUMENTS
@ -48,7 +48,7 @@ int\ fi_mr_bind(struct\ fid_mr\ *mr,\ struct\ fid\ *ec,\ uint64_t\ flags);
.PP
\f[I]mr\f[] : Memory region
.PP
\f[I]ec\f[] : Event queue or counter
\f[I]bfid\f[] : Fabric identifier of an associated resource.
.PP
\f[I]context\f[] : User specified context associated with the memory
region.
@ -236,6 +236,11 @@ Fi_close is used to release all resources associated with a registering
a memory region.
Once unregistered, further access to the registered memory is not
guaranteed.
.PP
When closing the MR, there must be no opened endpoints or counters
associated with the MR.
If resources are still associated with the MR when attempting to close,
the call will return -FI_EBUSY.
.SS fi_mr_desc / fi_mr_key
.PP
The local memory descriptor and remote protection key associated with a
@ -244,14 +249,14 @@ The memory registration must have completed successfully before invoking
these calls.
.SS fi_mr_bind
.PP
The fi_mr_bind function associates a memory region with an event counter
or queue, for providers that support the generation of events based on
fabric operations.
The fi_mr_bind function associates a memory region with a counter, for
providers that support the generation of completions based on fabric
operations.
The type of events tracked against the memory region is based on the
bitwise OR of the following flags.
.PP
\f[I]FI_WRITE\f[] : Generates an event whenever a remote RMA write or
atomic operation modify the memory region.
\f[I]FI_REMOTE_WRITE\f[] : Generates an event whenever a remote RMA
write or atomic operation modify the memory region.
.SH FLAGS
.PP
The following flags are usable with fi_mr_reg, fi_mr_regv,

Просмотреть файл

@ -1,4 +1,4 @@
.TH fi_msg 3 "2015\-01\-23" "Libfabric Programmer\[aq]s Manual" "\@VERSION\@"
.TH fi_msg 3 "2015\-01\-28" "Libfabric Programmer\[aq]s Manual" "Libfabric v1.0.0-rc1"
.SH NAME
.PP
fi_msg - Message data transfer operations
@ -42,10 +42,6 @@ ssize_t\ fi_inject(struct\ fid_ep\ *ep,\ void\ *buf,\ size_t\ len,
ssize_t\ fi_senddata(struct\ fid_ep\ *ep,\ void\ *buf,\ size_t\ len,
\ \ \ \ void\ *desc,\ uint64_t\ data,\ fi_addr_t\ dest_addr,\ void\ *context);
ssize_t\ fi_rx_size_left(struct\ fid_ep\ *ep);
ssize_t\ fi_tx_size_left(struct\ fid_ep\ *ep);
\f[]
.fi
.SH ARGUMENTS
@ -101,10 +97,6 @@ asynchronously.
Users should not touch the posted data buffer(s) until the receive
operation has completed.
.PP
The "size_left" functions -- fi_rx_size_left, fi_tx_size_left -- return
a lower bound on the number of receive/send operations that may be
posted to the given endpoint without returning -FI_EAGAIN.
.PP
Completed message operations are reported to the user through one or
more event collectors associated with the endpoint.
Users provide context which are associated with each operation, and is
@ -181,26 +173,6 @@ The fi_recvmsg call supports posting buffers over both connected and
unconnected endpoints, with the ability to control the receive operation
per call through the use of flags.
The fi_recvmsg function takes a struct fi_msg as input.
.SS fi_rx_size_left
.PP
The fi_rx_size_left call returns a lower bound on the number of receive
operations that may be posted to the given endpoint without that
operation returning -FI_EAGAIN.
Depending on the specific details of the subsequently posted receive
operations (e.g., number of iov entries, which receive function is
called, etc.)
, it may be possible to post more receive operations than originally
indicated by fi_rx_size_left.
.SS fi_tx_size_left
.PP
The fi_tx_size_left call returns a lower bound on the number of send
operations that may be posted to the given endpoint without that
operation returning -FI_EAGAIN.
Depending on the specific details of the subsequently posted send
operations (e.g., number of iov entries, which send function is called,
etc.)
, it may be possible to post more send operations than originally
indicated by fi_tx_size_left.
.SH FLAGS
.PP
The fi_recvmsg and fi_sendmsg calls allow the user to specify flags
@ -254,6 +226,11 @@ FI_OPT_MIN_MULTI_RECV).
\f[I]FI_REMOTE_COMPLETE\f[] : Applies to fi_sendmsg.
Indicates that a completion should not be generated until the operation
has completed on the remote side.
.PP
\f[I]FI_FENCE\f[] : Applies to transmits.
Indicates that the requested operation, also known as the fenced
operation, be deferred until all previous operations targeting the same
target endpoint have completed.
.SH RETURN VALUE
.PP
Returns 0 on success.

Просмотреть файл

@ -1,4 +1,4 @@
.TH fi_poll 3 "2015\-01\-06" "Libfabric Programmer\[aq]s Manual" "\@VERSION\@"
.TH fi_poll 3 "2015\-01\-29" "Libfabric Programmer\[aq]s Manual" "Libfabric v1.0.0-rc1"
.SH NAME
.PP
fi_poll - Polling and wait set operations
@ -82,7 +82,7 @@ The use of this field is reserved and must be set to 0 by the caller.
.PP
The fi_close call releases all resources associated with a poll set.
The poll set must not be associated with any other resources prior to
being closed.
being closed, otherwise the call will return -FI_EBUSY.
.SS fi_poll_add
.PP
Associates an event queue or counter with a poll set.
@ -150,7 +150,7 @@ The use of this field is reserved and must be set to 0 by the caller.
.PP
The fi_close call releases all resources associated with a wait set.
The wait set must not be bound to any other opened resources prior to
being closed.
being closed, otherwise the call will return -FI_EBUSY.
.SS fi_wait
.PP
Waits on a wait set until one or more of its underlying wait objects is

Просмотреть файл

@ -1,4 +1,4 @@
.TH fi_rma 3 "2015\-01\-07" "Libfabric Programmer\[aq]s Manual" "\@VERSION\@"
.TH fi_rma 3 "2015\-01\-28" "Libfabric Programmer\[aq]s Manual" "Libfabric v1.0.0-rc1"
.SH NAME
.PP
fi_rma - Remote memory access operations
@ -231,6 +231,10 @@ data into a local buffer and transfer out of that buffer.
\f[I]FI_REMOTE_COMPLETE\f[] : Applies to fi_writemsg.
Indicates that a completion should not be generated until the operation
has completed on the remote side.
.PP
\f[I]FI_FENCE\f[] : Indicates that the requested operation, also known
as the fenced operation, be deferred until all previous operations
targeting the same target endpoint have completed.
.SH RETURN VALUE
.PP
Returns 0 on success.

Просмотреть файл

@ -1 +1 @@
.so man3/fi_msg.3
.so man3/fi_endpoint.3

Просмотреть файл

@ -1,4 +1,4 @@
.TH fi_tagged 3 "2015\-01\-06" "Libfabric Programmer\[aq]s Manual" "\@VERSION\@"
.TH fi_tagged 3 "2015\-02\-01" "Libfabric Programmer\[aq]s Manual" "Libfabric v1.0.0-rc1"
.SH NAME
.PP
fi_tagged - Tagged data transfer operations
@ -224,6 +224,10 @@ When set, FI_CLAIM indicates that when a search successfully finds a
matching message, the message is claimed by caller.
Subsequent searches cannot find the same message, although they may
match other messages that have the same tag.
.PP
An application can request that a buffered message be discarded by using
the FI_DISCARD flag as part of the search.
When set, FI_DISCARD indicates that any matching message be dropped.
.SH FLAGS
.PP
The fi_trecvmsg and fi_tsendmsg calls allow the user to specify flags
@ -265,12 +269,21 @@ data into a local buffer and transfer out of that buffer.
Indicates that a completion should not be generated until the operation
has completed on the remote side.
.PP
\f[I]FI_FENCE\f[] : Applies to transmits.
Indicates that the requested operation, also known as the fenced
operation, be deferred until all previous operations targeting the same
target endpoint have completed.
.PP
The following flags may be used with fi_tsearch.
.PP
\f[I]FI_CLAIM\f[] : Indicates that when a search successfully finds a
matching message, the message is claimed by caller.
Subsequent searches cannot find the same message, although they may
match other messages that have the same tag.
.PP
\f[I]FI_DISCARD\f[] : Indicates that if a search successfully finds a
matching message, that the message is discarded by the provider, as the
data is not needed by the application.
.SH RETURN VALUE
.PP
The tagged send and receive calls return 0 on success.

Просмотреть файл

@ -1,4 +1,4 @@
.TH fi_trigger 3 "2015\-01\-01" "Libfabric Programmer\[aq]s Manual" "\@VERSION\@"
.TH fi_trigger 3 "2015\-01\-01" "Libfabric Programmer\[aq]s Manual" "Libfabric v1.0.0-rc1"
.SH NAME
.PP
fi_trigger - Triggered operations

Просмотреть файл

@ -1 +1 @@
.so man3/fi_msg.3
.so man3/fi_endpoint.3

Просмотреть файл

@ -1,4 +1,4 @@
.TH fi_version 3 "2015\-01\-08" "Libfabric Programmer\[aq]s Manual" "\@VERSION\@"
.TH fi_version 3 "2015\-01\-08" "Libfabric Programmer\[aq]s Manual" "Libfabric v1.0.0-rc1"
.SH NAME
.PP
fi_version - Version of the library interfaces

Просмотреть файл

@ -40,62 +40,6 @@ extern "C" {
#define PSM_PFX "libfabric:psm"
#define PSMX_FREE_LIST_INIT(head, tail, type, count) \
do { \
int i; \
type *item; \
head = tail = NULL; \
for (i=0; i<count; i++) { \
item = calloc(sizeof(type), 1); \
if (!item) {\
fprintf(stderr, "%s: out of memory.\n", __func__); \
exit(-1); \
} \
item->next = head; \
head = item; \
if (!tail) \
tail = head; \
} \
} while (0)
#define PSMX_FREE_LIST_GET(head, tail, type, item) \
do { \
if (head) { \
item = head; \
head = head->next; \
if (!head) \
tail = head; \
item->next = NULL; \
} \
else { \
item = calloc(sizeof(type), 1); \
if (!item) {\
fprintf(stderr, "%s: out of memory.\n", __func__); \
exit(-1); \
} \
} \
} while (0)
#define PSMX_FREE_LIST_PUT(head, tail, type, item) \
do { \
memset(item, 0, sizeof(type)); \
if (tail) \
tail->next = item; \
else \
head = tail = item; \
} while (0)
#define PSMX_FREE_LIST_FINALIZE(head, tail, type) \
do { \
type *next; \
while (head) { \
next = head->next; \
free(head); \
head = next; \
} \
tail = NULL; \
} while (0)
#define PSMX_TIME_OUT 120
#define PSMX_OP_FLAGS (FI_INJECT | FI_MULTI_RECV | FI_EVENT | \
@ -253,6 +197,7 @@ struct psmx_multi_recv {
struct psmx_fid_fabric {
struct fid_fabric fabric;
struct psmx_fid_domain *active_domain;
};
struct psmx_fid_domain {
@ -306,13 +251,7 @@ struct psmx_cq_event {
} cqe;
int error;
uint64_t source;
struct psmx_cq_event *next;
};
struct psmx_cq_event_queue {
struct psmx_cq_event *head;
struct psmx_cq_event *tail;
size_t count;
struct slist_entry list_entry;
};
struct psmx_fid_wait {
@ -344,11 +283,13 @@ struct psmx_fid_cq {
struct psmx_fid_domain *domain;
int format;
int entry_size;
struct psmx_cq_event_queue event_queue;
struct psmx_cq_event_queue free_list;
size_t event_count;
struct slist event_queue;
struct slist free_list;
struct psmx_cq_event *pending_error;
struct psmx_fid_wait *wait;
int wait_cond;
int wait_is_local;
};
enum psmx_triggered_op {
@ -489,6 +430,7 @@ struct psmx_fid_cntr {
uint64_t counter_last_read;
uint64_t error_counter_last_read;
struct psmx_fid_wait *wait;
int wait_is_local;
struct psmx_trigger *trigger;
pthread_mutex_t trigger_lock;
};
@ -656,6 +598,12 @@ static inline void psmx_cntr_inc(struct psmx_fid_cntr *cntr)
psmx_wait_signal((struct fid_wait *)cntr->wait);
}
static inline void psmx_progress(struct psmx_fid_domain *domain)
{
psmx_cq_poll_mq(NULL, domain, NULL, 0, NULL);
psmx_am_progress(domain);
}
ssize_t _psmx_send(struct fid_ep *ep, const void *buf, size_t len,
void *desc, fi_addr_t dest_addr, void *context,
uint64_t flags);

Просмотреть файл

@ -1096,11 +1096,28 @@ static ssize_t psmx_atomic_readwritemsg(struct fid_ep *ep,
size_t result_count,
uint64_t flags)
{
if (!msg || msg->iov_count != 1)
void *buf;
size_t count;
if (!msg)
return -EINVAL;
return _psmx_atomic_readwrite(ep, msg->msg_iov[0].addr,
msg->msg_iov[0].count,
if (msg->op == FI_ATOMIC_READ) {
if (result_count != 1)
return -EINVAL;
buf = NULL;
count = resultv[0].count;
}
else {
if (msg->iov_count != 1)
return -EINVAL;
buf = msg->msg_iov[0].addr;
count = msg->msg_iov[0].count;
}
return _psmx_atomic_readwrite(ep, buf, count,
msg->desc ? msg->desc[0] : NULL,
resultv[0].addr,
result_desc ? result_desc[0] : NULL,

Просмотреть файл

@ -193,12 +193,19 @@ void psmx_cntr_add_trigger(struct psmx_fid_cntr *cntr, struct psmx_trigger *trig
psmx_cntr_check_trigger(cntr);
}
#define PSMX_CNTR_POLL_THRESHOLD 100
static uint64_t psmx_cntr_read(struct fid_cntr *cntr)
{
struct psmx_fid_cntr *cntr_priv;
static int poll_cnt = 0;
cntr_priv = container_of(cntr, struct psmx_fid_cntr, cntr);
if (poll_cnt++ == PSMX_CNTR_POLL_THRESHOLD) {
psmx_progress(cntr_priv->domain);
poll_cnt = 0;
}
cntr_priv->counter_last_read = cntr_priv->counter;
return cntr_priv->counter_last_read;
@ -264,8 +271,7 @@ static int psmx_cntr_wait(struct fid_cntr *cntr, uint64_t threshold, int timeout
break;
}
else {
psmx_cq_poll_mq(NULL, cntr_priv->domain, NULL, 0, NULL);
psmx_am_progress(cntr_priv->domain);
psmx_progress(cntr_priv->domain);
}
if (cntr_priv->counter >= threshold)
@ -293,6 +299,9 @@ static int psmx_cntr_close(fid_t fid)
cntr = container_of(fid, struct psmx_fid_cntr, cntr.fid);
if (cntr->wait && cntr->wait_is_local)
fi_close((fid_t)cntr->wait);
pthread_mutex_destroy(&cntr->trigger_lock);
free(cntr);
@ -351,6 +360,7 @@ int psmx_cntr_open(struct fid_domain *domain, struct fi_cntr_attr *attr,
struct psmx_fid_cntr *cntr_priv;
struct psmx_fid_wait *wait = NULL;
struct fi_wait_attr wait_attr;
int wait_is_local = 0;
int events;
uint64_t flags;
int err;
@ -392,6 +402,7 @@ int psmx_cntr_open(struct fid_domain *domain, struct fi_cntr_attr *attr,
&wait_attr, (struct fid_wait **)&wait);
if (err)
return err;
wait_is_local = 1;
break;
default:
@ -407,6 +418,7 @@ int psmx_cntr_open(struct fid_domain *domain, struct fi_cntr_attr *attr,
cntr_priv->domain = domain_priv;
cntr_priv->events = events;
cntr_priv->wait = wait;
cntr_priv->wait_is_local = wait_is_local;
cntr_priv->flags = flags;
cntr_priv->cntr.fid.fclass = FI_CLASS_CNTR;
cntr_priv->cntr.fid.context = context;

Просмотреть файл

@ -32,40 +32,25 @@
#include "psmx.h"
#define PSMX_CQ_EMPTY(cq) (!cq->event_queue.head)
void psmx_cq_enqueue_event(struct psmx_fid_cq *cq, struct psmx_cq_event *event)
{
struct psmx_cq_event_queue *ceq = &cq->event_queue;
if (ceq->tail) {
ceq->tail->next = event;
ceq->tail = event;
}
else {
ceq->head = ceq->tail = event;
}
ceq->count++;
slist_insert_tail(&event->list_entry, &cq->event_queue);
cq->event_count++;
if (cq->wait)
psmx_wait_signal((struct fid_wait *)cq->wait);
}
static struct psmx_cq_event *psmx_cq_dequeue_event(struct psmx_fid_cq *cq)
{
struct psmx_cq_event_queue *ceq = &cq->event_queue;
struct psmx_cq_event *event;
struct slist_entry *entry;
if (!ceq->head)
if (slist_empty(&cq->event_queue))
return NULL;
event = ceq->head;
ceq->head = event->next;
ceq->count--;
if (!ceq->head)
ceq->tail = NULL;
entry = slist_remove_head(&cq->event_queue);
cq->event_count--;
event->next = NULL;
return event;
return container_of(entry, struct psmx_cq_event, list_entry);
}
struct psmx_cq_event *psmx_cq_create_event(struct psmx_fid_cq *cq,
@ -76,7 +61,17 @@ struct psmx_cq_event *psmx_cq_create_event(struct psmx_fid_cq *cq,
{
struct psmx_cq_event *event;
PSMX_FREE_LIST_GET(cq->free_list.head, cq->free_list.tail, struct psmx_cq_event, event);
if (!slist_empty(&cq->free_list)) {
event = container_of(slist_remove_head(&cq->free_list),
struct psmx_cq_event, list_entry);
}
else {
event = calloc(1, sizeof(*event));
if (!event) {
fprintf(stderr, "%s: out of memory.\n", __func__);
exit(-1);
}
}
if ((event->error = !!err)) {
event->cqe.err.op_context = op_context;
@ -117,7 +112,7 @@ struct psmx_cq_event *psmx_cq_create_event(struct psmx_fid_cq *cq,
break;
default:
fprintf(stderr, "%s: unsupported CC format %d\n", __func__, cq->format);
fprintf(stderr, "%s: unsupported CQ format %d\n", __func__, cq->format);
return NULL;
}
@ -168,8 +163,18 @@ static struct psmx_cq_event *psmx_cq_create_event_from_status(
event = event_in;
}
else {
PSMX_FREE_LIST_GET(cq->free_list.head, cq->free_list.tail,
struct psmx_cq_event, event);
if (!slist_empty(&cq->free_list)) {
event = container_of(slist_remove_head(&cq->free_list),
struct psmx_cq_event, list_entry);
}
else {
event = calloc(1, sizeof(*event));
if (!event) {
fprintf(stderr, "%s: out of memory.\n", __func__);
exit(-1);
}
}
event->error = !!psm_status->error_code;
}
@ -458,7 +463,7 @@ static ssize_t psmx_cq_readfrom(struct fid_cq *cq, void *buf, size_t count,
cq_priv = container_of(cq, struct psmx_fid_cq, cq);
if (PSMX_CQ_EMPTY(cq_priv) || !buf) {
if (slist_empty(&cq_priv->event_queue) || !buf) {
ret = psmx_cq_poll_mq(cq_priv, cq_priv->domain,
(struct psmx_cq_event *)buf, count, src_addr);
if (ret > 0)
@ -482,10 +487,8 @@ static ssize_t psmx_cq_readfrom(struct fid_cq *cq, void *buf, size_t count,
if (psmx_cq_get_event_src_addr(cq_priv, event, src_addr))
*src_addr = FI_ADDR_NOTAVAIL;
PSMX_FREE_LIST_PUT(cq_priv->free_list.head,
cq_priv->free_list.tail,
struct psmx_cq_event,
event);
memset(event, 0, sizeof(*event));
slist_insert_tail(&event->list_entry, &cq_priv->free_list);
read_count++;
buf += cq_priv->entry_size;
@ -595,7 +598,7 @@ static ssize_t psmx_cq_sreadfrom(struct fid_cq *cq, void *buf, size_t count,
threshold = 1;
/* NOTE: "cond" is only a hint, not a mandatory condition. */
event_count = cq_priv->event_queue.count;
event_count = cq_priv->event_count;
if (event_count < threshold) {
if (cq_priv->wait) {
psmx_wait_wait((struct fid_wait *)cq_priv->wait, timeout);
@ -607,7 +610,7 @@ static ssize_t psmx_cq_sreadfrom(struct fid_cq *cq, void *buf, size_t count,
break;
/* CQ may be updated asynchronously by the AM handlers */
if (cq_priv->event_queue.count > event_count)
if (cq_priv->event_count > event_count)
break;
if (timeout < 0)
@ -641,19 +644,20 @@ static const char *psmx_cq_strerror(struct fid_cq *cq, int prov_errno, const voi
static int psmx_cq_close(fid_t fid)
{
struct psmx_fid_cq *cq;
struct slist_entry *entry;
struct psmx_cq_event *item;
cq = container_of(fid, struct psmx_fid_cq, cq.fid);
PSMX_FREE_LIST_FINALIZE(cq->free_list.head, cq->free_list.tail, struct psmx_cq_event);
if (cq->wait) {
if (cq->wait->type == FI_WAIT_FD) {
close(cq->wait->fd[0]);
close(cq->wait->fd[1]);
}
free(cq->wait);
while (!slist_empty(&cq->free_list)) {
entry = slist_remove_head(&cq->free_list);
item = container_of(entry, struct psmx_cq_event, list_entry);
free(item);
}
if (cq->wait && cq->wait_is_local)
fi_close((fid_t)cq->wait);
free(cq);
return 0;
@ -703,9 +707,12 @@ int psmx_cq_open(struct fid_domain *domain, struct fi_cq_attr *attr,
struct psmx_fid_domain *domain_priv;
struct psmx_fid_cq *cq_priv;
struct psmx_fid_wait *wait = NULL;
struct psmx_cq_event *event;
struct fi_wait_attr wait_attr;
int wait_is_local = 0;
int entry_size;
int err;
int i;
domain_priv = container_of(domain, struct psmx_fid_domain, domain);
switch (attr->format) {
@ -758,6 +765,7 @@ int psmx_cq_open(struct fid_domain *domain, struct fi_cq_attr *attr,
&wait_attr, (struct fid_wait **)&wait);
if (err)
return err;
wait_is_local = 1;
break;
default:
@ -792,14 +800,25 @@ int psmx_cq_open(struct fid_domain *domain, struct fi_cq_attr *attr,
cq_priv->wait = wait;
if (wait)
cq_priv->wait_cond = attr->wait_cond;
cq_priv->wait_is_local = wait_is_local;
cq_priv->cq.fid.fclass = FI_CLASS_CQ;
cq_priv->cq.fid.context = context;
cq_priv->cq.fid.ops = &psmx_fi_ops;
cq_priv->cq.ops = &psmx_cq_ops;
PSMX_FREE_LIST_INIT(cq_priv->free_list.head, cq_priv->free_list.tail,
struct psmx_cq_event, 64);
slist_init(&cq_priv->event_queue);
slist_init(&cq_priv->free_list);
#define PSMX_FREE_LIST_SIZE 64
for (i=0; i<PSMX_FREE_LIST_SIZE; i++) {
event = calloc(1, sizeof(*event));
if (!event) {
fprintf(stderr, "%s: out of memory.\n", __func__);
exit(-1);
}
slist_insert_tail(&event->list_entry, &cq_priv->free_list);
}
*cq = &cq_priv->cq;
return 0;

Просмотреть файл

@ -64,6 +64,7 @@ static int psmx_domain_close(fid_t fid)
if (err != PSM_OK)
psm_ep_close(domain->psm_ep, PSM_EP_CLOSE_FORCE, 0);
domain->fabric->active_domain = NULL;
free(domain);
return 0;
@ -90,6 +91,7 @@ static struct fi_ops_domain psmx_domain_ops = {
int psmx_domain_open(struct fid_fabric *fabric, struct fi_info *info,
struct fid_domain **domain, void *context)
{
struct psmx_fid_fabric *fabric_priv;
struct psmx_fid_domain *domain_priv;
struct psm_ep_open_opts opts;
psm_uuid_t uuid;
@ -97,6 +99,12 @@ int psmx_domain_open(struct fid_fabric *fabric, struct fi_info *info,
psmx_debug("%s\n", __func__);
fabric_priv = container_of(fabric, struct psmx_fid_fabric, fabric);
if (fabric_priv->active_domain) {
psmx_debug("%s: a domain has been opened for the fabric\n");
return -EBUSY;
}
if (!info->domain_attr->name || strncmp(info->domain_attr->name, "psm", 3))
return -EINVAL;
@ -112,7 +120,7 @@ int psmx_domain_open(struct fid_fabric *fabric, struct fi_info *info,
domain_priv->domain.ops = &psmx_domain_ops;
domain_priv->domain.mr = &psmx_mr_ops;
domain_priv->mode = info->mode;
domain_priv->fabric = container_of(fabric, struct psmx_fid_fabric, fabric);
domain_priv->fabric = fabric_priv;
psm_ep_open_opts_get_defaults(&opts);
@ -154,6 +162,7 @@ int psmx_domain_open(struct fid_fabric *fabric, struct fi_info *info,
goto err_out_close_ep;
}
fabric_priv->active_domain = domain_priv;
*domain = &domain_priv->domain;
return 0;
@ -171,6 +180,10 @@ err_out:
int psmx_domain_check_features(struct psmx_fid_domain *domain, int ep_cap)
{
int rma_target;
rma_target = fi_rma_target_allowed(ep_cap);
if ((ep_cap & PSMX_CAPS) != ep_cap)
return -EINVAL;
@ -180,10 +193,10 @@ int psmx_domain_check_features(struct psmx_fid_domain *domain, int ep_cap)
if ((ep_cap & FI_MSG) && domain->msg_ep)
return -EBUSY;
if ((ep_cap & FI_RMA) && domain->rma_ep)
if ((ep_cap & FI_RMA) && rma_target && domain->rma_ep)
return -EBUSY;
if ((ep_cap & FI_ATOMICS) && domain->atomics_ep)
if ((ep_cap & FI_ATOMICS) && rma_target && domain->atomics_ep)
return -EBUSY;
return 0;
@ -192,6 +205,7 @@ int psmx_domain_check_features(struct psmx_fid_domain *domain, int ep_cap)
int psmx_domain_enable_ep(struct psmx_fid_domain *domain, struct psmx_fid_ep *ep)
{
uint64_t ep_cap = 0;
int rma_target;
if (ep)
ep_cap = ep->caps;
@ -214,10 +228,12 @@ int psmx_domain_enable_ep(struct psmx_fid_domain *domain, struct psmx_fid_ep *ep
domain->am_initialized = 1;
}
if (ep_cap & FI_RMA)
rma_target = fi_rma_target_allowed(ep_cap);
if ((ep_cap & FI_RMA) && rma_target)
domain->rma_ep = ep;
if (ep_cap & FI_ATOMICS)
if ((ep_cap & FI_ATOMICS) && rma_target)
domain->atomics_ep = ep;
if (ep_cap & FI_TAGGED)

Просмотреть файл

@ -237,11 +237,6 @@ static int psmx_ep_bind(struct fid *fid, struct fid *bfid, uint64_t flags)
return 0;
}
static inline int psmx_ep_progress(struct psmx_fid_ep *ep)
{
return psmx_cq_poll_mq(NULL, ep->domain, NULL, 0, NULL);
}
static int psmx_ep_control(fid_t fid, int command, void *arg)
{
struct fi_alias *alias;
@ -293,6 +288,8 @@ static struct fi_ops_ep psmx_ep_ops = {
.enable = psmx_ep_enable,
.tx_ctx = fi_no_tx_ctx,
.rx_ctx = fi_no_rx_ctx,
.rx_size_left = fi_no_rx_size_left,
.tx_size_left = fi_no_tx_size_left,
};
int psmx_ep_open(struct fid_domain *domain, struct fi_info *info,

Просмотреть файл

@ -53,7 +53,7 @@ static int psmx_reserve_tag_bits(int *caps, uint64_t *max_tag_value)
psmx_debug("%s: unable to reserve tag bit for FI_MSG support.\n"
"ADVICE: please reduce the asked max_tag_value, "
"or remove FI_MSG from the asked capabilities, "
"or set SFI_PSM_AM_MSG=1 to use an alternative (but less "
"or set OFI_PSM_AM_MSG=1 to use an alternative (but less "
"optimized) message queue implementation.\n",
__func__);
return -1;
@ -62,7 +62,7 @@ static int psmx_reserve_tag_bits(int *caps, uint64_t *max_tag_value)
psmx_debug("%s: unable to reserve tag bit for FI_MSG support. "
"FI_MSG is removed from the capabilities.\n"
"ADVICE: please reduce the asked max_tag_value, "
"or set SFI_PSM_AM_MSG=1 to use an alternative (but less "
"or set OFI_PSM_AM_MSG=1 to use an alternative (but less "
"optimized) message queue implementation.\n",
__func__);
ret_caps &= ~FI_MSG;
@ -77,7 +77,7 @@ static int psmx_reserve_tag_bits(int *caps, uint64_t *max_tag_value)
psmx_debug("%s: unable to reserve tag bit for tagged RMA acceleration.\n"
"ADVICE: please reduce the asked max_tag_value, "
"or remove FI_RMA from the asked capabilities, "
"or set SFI_PSM_TAGGED_RMA=0 to disable RMA acceleration.\n",
"or set OFI_PSM_TAGGED_RMA=0 to disable RMA acceleration.\n",
__func__);
return -1;
}
@ -85,7 +85,7 @@ static int psmx_reserve_tag_bits(int *caps, uint64_t *max_tag_value)
psmx_debug("%s: unable to reserve tag bit for tagged RMA acceleration. "
"FI_RMA is removed from the capabilities.\n"
"ADVICE: please reduce the asked max_tag_value, "
"or set SFI_PSM_TAGGED_RMA=0 to disable RMA acceleration.\n",
"or set OFI_PSM_TAGGED_RMA=0 to disable RMA acceleration.\n",
__func__);
ret_caps &= ~FI_RMA;
}
@ -369,12 +369,12 @@ PSM_INI
psmx_debug("%s\n", __func__);
psmx_env.name_server = psmx_get_int_env("SFI_PSM_NAME_SERVER", 0);
psmx_env.am_msg = psmx_get_int_env("SFI_PSM_AM_MSG", 0);
psmx_env.tagged_rma = psmx_get_int_env("SFI_PSM_TAGGED_RMA", 0);
psmx_env.debug = psmx_get_int_env("SFI_PSM_DEBUG", 0);
psmx_env.warning = psmx_get_int_env("SFI_PSM_WARNING", 1);
psmx_env.uuid = getenv("SFI_PSM_UUID");
psmx_env.name_server = psmx_get_int_env("OFI_PSM_NAME_SERVER", 0);
psmx_env.am_msg = psmx_get_int_env("OFI_PSM_AM_MSG", 0);
psmx_env.tagged_rma = psmx_get_int_env("OFI_PSM_TAGGED_RMA", 0);
psmx_env.debug = psmx_get_int_env("OFI_PSM_DEBUG", 0);
psmx_env.warning = psmx_get_int_env("OFI_PSM_WARNING", 1);
psmx_env.uuid = getenv("OFI_PSM_UUID");
psm_error_register_handler(NULL, PSM_ERRHANDLER_NO_HANDLER);
@ -388,12 +388,12 @@ PSM_INI
return NULL;
}
check_version = psmx_get_int_env("SFI_PSM_VERSION_CHECK", 1);
check_version = psmx_get_int_env("OFI_PSM_VERSION_CHECK", 1);
if (check_version && major != PSM_VERNO_MAJOR) {
fprintf(stderr, "%s: PSM version mismatch: header %d.%d, library %d.%d.\n",
__func__, PSM_VERNO_MAJOR, PSM_VERNO_MINOR, major, minor);
fprintf(stderr, "\tSet envar SFI_PSM_VERSION_CHECK=0 to bypass version check.\n");
fprintf(stderr, "\tSet envar OFI_PSM_VERSION_CHECK=0 to bypass version check.\n");
return NULL;
}

Просмотреть файл

@ -359,7 +359,5 @@ struct fi_ops_msg psmx_msg_ops = {
.inject = psmx_inject,
.senddata = fi_no_msg_senddata,
.injectdata = fi_no_msg_injectdata,
.rx_size_left = fi_no_msg_rx_size_left,
.tx_size_left = fi_no_msg_tx_size_left,
};

Просмотреть файл

@ -626,7 +626,5 @@ struct fi_ops_msg psmx_msg2_ops = {
.inject = psmx_inject2,
.senddata = fi_no_msg_senddata,
.injectdata = fi_no_msg_injectdata,
.rx_size_left = fi_no_msg_rx_size_left,
.tx_size_left = fi_no_msg_tx_size_left,
};

Просмотреть файл

@ -82,7 +82,7 @@ static int psmx_poll_poll(struct fid_poll *pollset, void **context, int count)
poll_priv = container_of(pollset, struct psmx_fid_poll, poll.fid);
psmx_cq_poll_mq(NULL, poll_priv->domain, NULL, 0, NULL);
psmx_progress(poll_priv->domain);
head = &poll_priv->poll_list_head;
for (p = head->next; p != head && ret_count < count; p = p->next) {
@ -90,7 +90,7 @@ static int psmx_poll_poll(struct fid_poll *pollset, void **context, int count)
switch (list_item->fid->fclass) {
case FI_CLASS_CQ:
cq = container_of(list_item->fid, struct psmx_fid_cq, cq);
if (cq->event_queue.count) {
if (cq->event_count) {
*context++ = cq->cq.fid.context;
ret_count++;
}

Просмотреть файл

@ -82,10 +82,10 @@ static void psmx_name_server_cleanup(void *args)
/*************************************************************
* A simple name resolution mechanism for client-server style
* applications. The server side has to run first. The client
* side then passes the server name as the first parameter
* side then passes the server name as the "node" parameter
* of fi_getinfo call and the resulting provider info should
* have the transport address of the server in the dest_addr
* field. Both side has to use the same UUID.
* have the transport address of the server in the "dest_addr"
* field. Both sides have to use the same UUID.
*************************************************************/
void *psmx_name_server(void *args)
{
@ -181,7 +181,7 @@ void *psmx_resolve_name(const char *servername, int port)
n = getaddrinfo(servername, service, &hints, &res);
if (n < 0) {
fprintf(stderr, "%s:(%s:%d):%s\n", __func__, servername, port, gai_strerror(n));
psmx_debug("%s:(%s:%d):%s\n", __func__, servername, port, gai_strerror(n));
free(service);
return NULL;
}
@ -200,7 +200,7 @@ void *psmx_resolve_name(const char *servername, int port)
free(service);
if (sockfd < 0) {
fprintf(stderr, "%s: couldn't connect to %s:%d\n", __func__, servername, port);
psmx_debug("%s: couldn't connect to %s:%d\n", __func__, servername, port);
return NULL;
}

Просмотреть файл

@ -32,6 +32,78 @@
#include "psmx.h"
/* It is necessary to have a separate thread making progress in order
* for the wait functions to succeed. This thread is only created when
* wait functions are called and. In order to minimize performance
* impact, it only goes active during te time when wait calls are
* blocked.
*/
static pthread_t psmx_wait_thread;
static pthread_mutex_t psmx_wait_mutex;
static pthread_cond_t psmx_wait_cond;
static volatile int psmx_wait_thread_ready = 0;
static volatile int psmx_wait_thread_enabled = 0;
static volatile int psmx_wait_thread_busy = 0;
static void *psmx_wait_progress(void *args)
{
struct psmx_fid_domain *domain = args;
psmx_wait_thread_ready = 1;
pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL);
pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL);
while (1) {
pthread_mutex_lock(&psmx_wait_mutex);
pthread_cond_wait(&psmx_wait_cond, &psmx_wait_mutex);
pthread_mutex_unlock(&psmx_wait_mutex);
pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, NULL);
psmx_wait_thread_busy = 1;
while (psmx_wait_thread_enabled)
psmx_progress(domain);
psmx_wait_thread_busy = 0;
pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL);
}
return NULL;
}
static void psmx_wait_start_progress(struct psmx_fid_domain *domain)
{
pthread_attr_t attr;
int err;
if (!domain)
return;
if (!psmx_wait_thread) {
pthread_mutex_init(&psmx_wait_mutex, NULL);
pthread_cond_init(&psmx_wait_cond, NULL);
pthread_attr_init(&attr);
pthread_attr_setdetachstate(&attr,PTHREAD_CREATE_DETACHED);
err = pthread_create(&psmx_wait_thread, &attr, psmx_wait_progress, (void *)domain);
if (err)
fprintf(stderr, "%s: cannot create wait progress thread\n", __func__);
pthread_attr_destroy(&attr);
while (!psmx_wait_thread_ready)
;
}
psmx_wait_thread_enabled = 1;
pthread_cond_signal(&psmx_wait_cond);
}
static void psmx_wait_stop_progress(void)
{
psmx_wait_thread_enabled = 0;
while (psmx_wait_thread_busy)
;
}
int psmx_wait_get_obj(struct psmx_fid_wait *wait, void *arg)
{
void *obj_ptr;
@ -76,6 +148,9 @@ int psmx_wait_wait(struct fid_wait *wait, int timeout)
int err = 0;
wait_priv = container_of(wait, struct psmx_fid_wait, wait.fid);
psmx_wait_start_progress(wait_priv->fabric->active_domain);
switch (wait_priv->type) {
case FI_WAIT_UNSPEC:
/* TODO: optimized custom wait */
@ -98,6 +173,8 @@ int psmx_wait_wait(struct fid_wait *wait, int timeout)
break;
}
psmx_wait_stop_progress();
return err;
}

Просмотреть файл

@ -58,22 +58,24 @@
#define SOCK_EP_MAX_MSG_SZ (1<<23)
#define SOCK_EP_MAX_INJECT_SZ ((1<<8) - 1)
#define SOCK_EP_MAX_BUFF_RECV (1<<23)
#define SOCK_EP_MAX_ORDER_RAW_SZ (0)
#define SOCK_EP_MAX_ORDER_WAR_SZ (0)
#define SOCK_EP_MAX_ORDER_WAW_SZ (0)
#define SOCK_EP_MAX_BUFF_RECV (1<<20)
#define SOCK_EP_MAX_ORDER_RAW_SZ SOCK_EP_MAX_MSG_SZ
#define SOCK_EP_MAX_ORDER_WAR_SZ SOCK_EP_MAX_MSG_SZ
#define SOCK_EP_MAX_ORDER_WAW_SZ SOCK_EP_MAX_MSG_SZ
#define SOCK_EP_MEM_TAG_FMT (0)
#define SOCK_EP_MAX_EP_CNT (128)
#define SOCK_EP_MAX_TX_CNT (16)
#define SOCK_EP_MAX_RX_CNT (16)
#define SOCK_EP_MAX_IOV_LIMIT (8)
#define SOCK_EP_MAX_TX_CTX_SZ (1<<12)
#define SOCK_EP_TX_SZ (256)
#define SOCK_EP_TX_ENTRY_SZ (256)
#define SOCK_EP_MIN_MULTI_RECV (64)
#define SOCK_EP_MAX_ATOMIC_SZ (512)
#define SOCK_EP_MAX_ATOMIC_SZ (256)
#define SOCK_EP_MAX_CTX_BITS (16)
#define SOCK_PE_POLL_TIMEOUT (100000)
#define SOCK_PE_MAX_ENTRIES (128)
#define SOCK_PE_MIN_ENTRIES (1)
#define SOCK_EQ_DEF_SZ (1<<8)
#define SOCK_CQ_DEF_SZ (1<<8)
@ -82,13 +84,18 @@
#define SOCK_CQ_DATA_SIZE (sizeof(uint64_t))
#define SOCK_TAG_SIZE (sizeof(uint64_t))
#define SOCK_PEP_LISTENER_TIMEOUT (10000)
#define SOCK_CM_COMM_TIMEOUT (5000)
#define SOCK_EP_MAX_RETRY (5)
#define SOCK_EP_MAX_CM_DATA_SZ (256)
#define SOCK_EP_RDM_CAP (FI_MSG | FI_RMA | FI_TAGGED | FI_ATOMICS | FI_DYNAMIC_MR | \
FI_NAMED_RX_CTX | FI_BUFFERED_RECV | FI_DIRECTED_RECV | \
FI_INJECT | FI_MULTI_RECV | FI_SOURCE | FI_READ | FI_WRITE | \
FI_RECV | FI_SEND | FI_REMOTE_READ | FI_REMOTE_WRITE | \
FI_REMOTE_CQ_DATA | FI_COMPLETION | FI_REMOTE_SIGNAL | \
FI_REMOTE_COMPLETE | FI_PEEK | FI_CANCEL)
#define SOCK_EP_RDM_CAP (FI_MSG | FI_RMA | FI_TAGGED | FI_ATOMICS | \
FI_DYNAMIC_MR | FI_NAMED_RX_CTX | FI_BUFFERED_RECV | \
FI_DIRECTED_RECV | FI_INJECT | FI_MULTI_RECV | \
FI_SOURCE | FI_READ | FI_WRITE | FI_RECV | FI_SEND | \
FI_REMOTE_READ | FI_REMOTE_WRITE | FI_REMOTE_CQ_DATA | \
FI_COMPLETION | FI_REMOTE_SIGNAL | FI_REMOTE_COMPLETE | \
FI_MORE | FI_CANCEL | FI_FENCE)
#define SOCK_EP_MSG_CAP SOCK_EP_RDM_CAP
@ -96,14 +103,10 @@
FI_NAMED_RX_CTX | FI_BUFFERED_RECV | FI_DIRECTED_RECV | \
FI_INJECT | FI_MULTI_RECV | FI_SOURCE | FI_RECV | FI_SEND | \
FI_REMOTE_CQ_DATA | FI_COMPLETION | FI_REMOTE_SIGNAL | \
FI_REMOTE_COMPLETE | FI_PEEK | FI_CANCEL)
#define SOCK_DEF_OPS (FI_SEND | FI_RECV | \
FI_BUFFERED_RECV | FI_READ | FI_WRITE | \
FI_REMOTE_READ | FI_REMOTE_WRITE)
#define SOCK_DGRAM_DEF_OPS (FI_SEND | FI_RECV | FI_BUFFERED_RECV)
FI_REMOTE_COMPLETE | FI_MORE | FI_CANCEL | \
FI_FENCE)
#define SOCK_DEF_OPS (FI_SEND | FI_RECV | FI_BUFFERED_RECV)
#define SOCK_EP_MSG_ORDER (FI_ORDER_RAR | FI_ORDER_RAW | FI_ORDER_RAS| \
FI_ORDER_WAR | FI_ORDER_WAW | FI_ORDER_WAS | \
@ -117,6 +120,8 @@
#define SOCK_MAJOR_VERSION 1
#define SOCK_MINOR_VERSION 0
#define SOCK_INJECT_OK(_flgs) (((_flgs) & FI_INJECT) && ((!(_flgs)) & FI_FENCE))
struct sock_fabric{
struct fid_fabric fab_fid;
atomic_t ref;
@ -137,6 +142,7 @@ struct sock_conn_map {
int size;
struct sock_domain *domain;
fastlock_t lock;
struct sockaddr_storage curr_addr;
};
struct sock_domain {
@ -145,6 +151,7 @@ struct sock_domain {
struct sock_fabric *fab;
fastlock_t lock;
atomic_t ref;
short ep_count;
struct sock_eq *eq;
struct sock_eq *mr_eq;
@ -155,7 +162,7 @@ struct sock_domain {
struct sock_conn_map r_cmap;
pthread_t listen_thread;
int listening;
int service;
char service[NI_MAXSERV];
int signal_fds[2];
struct sockaddr_storage src_addr;
};
@ -195,7 +202,8 @@ struct sock_mr {
struct sock_av_addr {
struct sockaddr_storage addr;
uint8_t valid;
uint8_t reserved[7];
uint16_t rem_ep_id;
uint8_t reserved[5];
};
struct sock_av_table_hdr {
@ -366,18 +374,17 @@ struct sock_comp {
};
struct sock_ep {
union {
struct fid_ep ep;
struct fid_sep sep;
struct fid_pep pep;
} fid;
struct fid_ep ep;
size_t fclass;
uint64_t op_flags;
uint8_t connected;
uint8_t tx_shared;
uint8_t rx_shared;
uint16_t ep_id;
uint16_t rem_ep_id;
uint16_t buffered_len;
uint16_t min_multi_recv;
char reserved[4];
atomic_t ref;
struct sock_comp comp;
@ -407,24 +414,25 @@ struct sock_ep {
struct sockaddr_in *dest_addr;
fi_addr_t conn_addr;
uint16_t key;
int socket;
pthread_t listener_thread;
int do_listen;
};
struct sock_pep {
struct fid_pep pep;
struct fid_pep pep;
struct sock_fabric *sock_fab;
struct sock_domain *dom;
int do_listen;
pthread_t listener_thread;
int signal_fds[2];
int socket;
int listener_sock_fd;
struct sockaddr_in src_addr;
struct fi_info info;
int sock_fd;
char service[NI_MAXSERV];
struct sock_eq *eq;
struct sock_cq *send_cq;
struct sock_cq *recv_cq;
uint64_t op_flags;
uint64_t pep_cap;
struct sock_eq *eq;
};
struct sock_rx_entry {
@ -432,6 +440,7 @@ struct sock_rx_entry {
uint8_t is_buffered;
uint8_t is_busy;
uint8_t is_claimed;
uint8_t is_complete;
uint8_t reserved[5];
uint64_t used;
@ -523,10 +532,10 @@ struct sock_tx_ctx {
struct sock_msg_hdr{
uint8_t version;
uint8_t op_type;
uint16_t rx_id;
uint16_t pe_entry_id;
uint8_t rx_id;
uint8_t dest_iov_len;
uint8_t reserved[1];
uint16_t ep_id;
uint16_t pe_entry_id;
uint64_t flags;
uint64_t msg_len;
@ -660,7 +669,7 @@ struct sock_pe_entry{
struct sock_pe{
struct sock_domain *domain;
int num_free_entries;
struct sock_pe_entry pe_table[SOCK_PE_MAX_ENTRIES];
fastlock_t lock;
@ -700,10 +709,16 @@ struct sock_cq {
sock_cq_report_fn report_completion;
};
struct sock_conn_req {
int type;
struct sock_conn_hdr {
uint8_t type;
uint8_t reserved[7];
fid_t c_fid;
fid_t s_fid;
};
struct sock_conn_req {
struct sock_conn_hdr hdr;
uint16_t ep_id;
struct fi_info info;
struct sockaddr_in src_addr;
struct sockaddr_in dest_addr;
@ -712,14 +727,20 @@ struct sock_conn_req {
struct fi_ep_attr ep_attr;
struct fi_domain_attr domain_attr;
struct fi_fabric_attr fabric_attr;
struct sockaddr_in from_addr;
char user_data[0];
};
struct sock_conn_response {
struct sock_conn_hdr hdr;
char user_data[0];
};
enum {
SOCK_CONNREQ,
SOCK_ACCEPT,
SOCK_REJECT,
SOCK_CONNECTED,
SOCK_SHUTDOWN
SOCK_CONN_REQ,
SOCK_CONN_ACCEPT,
SOCK_CONN_REJECT,
SOCK_CONN_SHUTDOWN,
};
int sock_verify_info(struct fi_info *hints);
@ -757,19 +778,20 @@ struct sock_conn *sock_ep_lookup_conn(struct sock_ep *ep);
int sock_rdm_ep(struct fid_domain *domain, struct fi_info *info,
struct fid_ep **ep, void *context);
int sock_rdm_sep(struct fid_domain *domain, struct fi_info *info,
struct fid_sep **sep, void *context);
struct fid_ep **sep, void *context);
int sock_dgram_ep(struct fid_domain *domain, struct fi_info *info,
struct fid_ep **ep, void *context);
int sock_dgram_sep(struct fid_domain *domain, struct fi_info *info,
struct fid_sep **sep, void *context);
struct fid_ep **sep, void *context);
int sock_msg_ep(struct fid_domain *domain, struct fi_info *info,
struct fid_ep **ep, void *context);
int sock_msg_sep(struct fid_domain *domain, struct fi_info *info,
struct fid_sep **sep, void *context);
struct fid_ep **sep, void *context);
int sock_msg_passive_ep(struct fid_fabric *fabric, struct fi_info *info,
struct fid_pep **pep, void *context);
int sock_ep_enable(struct fid_ep *ep);
int sock_stx_ctx(struct fid_domain *domain,
@ -791,8 +813,7 @@ ssize_t sock_eq_report_event(struct sock_eq *sock_eq, uint32_t event,
const void *buf, size_t len, uint64_t flags);
ssize_t sock_eq_report_error(struct sock_eq *sock_eq, fid_t fid, void *context,
int err, int prov_errno, void *err_data);
int sock_eq_openwait(struct sock_eq *eq, char *service);
struct fi_info * sock_ep_msg_process_info(struct sock_conn_req *req);
int sock_eq_openwait(struct sock_eq *eq, const char *service);
int sock_cntr_open(struct fid_domain *domain, struct fi_cntr_attr *attr,
struct fid_cntr **cntr, void *context);
@ -816,10 +837,10 @@ struct sock_mr *sock_mr_verify_desc(struct sock_domain *domain, void *desc,
struct sock_mr * sock_mr_get_entry(struct sock_domain *domain, uint16_t key);
struct sock_rx_ctx *sock_rx_ctx_alloc(struct fi_rx_attr *attr, void *context);
struct sock_rx_ctx *sock_rx_ctx_alloc(const struct fi_rx_attr *attr, void *context);
void sock_rx_ctx_free(struct sock_rx_ctx *rx_ctx);
struct sock_tx_ctx *sock_tx_ctx_alloc(struct fi_tx_attr *attr, void *context);
struct sock_tx_ctx *sock_tx_ctx_alloc(const struct fi_tx_attr *attr, void *context);
void sock_tx_ctx_free(struct sock_tx_ctx *tx_ctx);
void sock_tx_ctx_start(struct sock_tx_ctx *tx_ctx);
void sock_tx_ctx_write(struct sock_tx_ctx *tx_ctx, const void *buf, size_t len);
@ -842,8 +863,8 @@ fi_addr_t _sock_av_lookup(struct sock_av *av, struct sockaddr *addr);
fi_addr_t sock_av_get_fiaddr(struct sock_av *av, struct sock_conn *conn);
fi_addr_t sock_av_lookup_key(struct sock_av *av, int key);
struct sock_conn *sock_av_lookup_addr(struct sock_av *av, fi_addr_t addr);
int sock_av_compare_addr(struct sock_av *av,
fi_addr_t addr1, fi_addr_t addr2);
int sock_av_compare_addr(struct sock_av *av, fi_addr_t addr1, fi_addr_t addr2);
uint16_t sock_av_lookup_ep_id(struct sock_av *av, fi_addr_t addr);
struct sock_conn *sock_conn_map_lookup_key(struct sock_conn_map *conn_map,
@ -866,6 +887,8 @@ void sock_pe_add_tx_ctx(struct sock_pe *pe, struct sock_tx_ctx *ctx);
void sock_pe_add_rx_ctx(struct sock_pe *pe, struct sock_rx_ctx *ctx);
int sock_pe_progress_rx_ctx(struct sock_pe *pe, struct sock_rx_ctx *rx_ctx);
int sock_pe_progress_tx_ctx(struct sock_pe *pe, struct sock_tx_ctx *tx_ctx);
void sock_pe_remove_tx_ctx(struct sock_tx_ctx *tx_ctx);
void sock_pe_remove_rx_ctx(struct sock_rx_ctx *rx_ctx);
void sock_pe_finalize(struct sock_pe *pe);

Просмотреть файл

@ -74,7 +74,7 @@ static ssize_t sock_ep_tx_atomic(struct fid_ep *ep,
switch (ep->fid.fclass) {
case FI_CLASS_EP:
sock_ep = container_of(ep, struct sock_ep, fid.ep);
sock_ep = container_of(ep, struct sock_ep, ep);
tx_ctx = sock_ep->tx_ctx;
break;
@ -97,11 +97,13 @@ static ssize_t sock_ep_tx_atomic(struct fid_ep *ep,
} else {
conn = sock_av_lookup_addr(tx_ctx->av, msg->addr);
}
assert(conn);
if (!conn)
return -FI_EAGAIN;
src_len = 0;
datatype_sz = fi_datatype_size(msg->datatype);
if (flags & FI_INJECT) {
if (SOCK_INJECT_OK(flags)) {
for (i=0; i< msg->iov_count; i++) {
src_len += (msg->msg_iov[i].count * datatype_sz);
}
@ -130,7 +132,7 @@ static ssize_t sock_ep_tx_atomic(struct fid_ep *ep,
tx_op.atomic.res_iov_len = result_count;
tx_op.atomic.cmp_iov_len = compare_count;
if (flags & FI_INJECT)
if (SOCK_INJECT_OK(flags))
tx_op.src_iov_len = src_len;
else
tx_op.src_iov_len = msg->iov_count;
@ -147,7 +149,8 @@ static ssize_t sock_ep_tx_atomic(struct fid_ep *ep,
sock_tx_ctx_write(tx_ctx, &msg->data, sizeof(uint64_t));
}
if (flags & FI_INJECT) {
src_len = 0;
if (SOCK_INJECT_OK(flags)) {
for (i=0; i< msg->iov_count; i++) {
sock_tx_ctx_write(tx_ctx, msg->msg_iov[i].addr,
msg->msg_iov[i].count * datatype_sz);
@ -469,6 +472,7 @@ static int sock_ep_atomic_valid(struct fid_ep *ep, enum fi_datatype datatype,
switch(datatype){
case FI_FLOAT:
case FI_DOUBLE:
case FI_LONG_DOUBLE:
if (op == FI_BOR || op == FI_BAND ||
op == FI_BXOR || op == FI_MSWAP)
return -FI_ENOENT;
@ -476,7 +480,6 @@ static int sock_ep_atomic_valid(struct fid_ep *ep, enum fi_datatype datatype,
case FI_FLOAT_COMPLEX:
case FI_DOUBLE_COMPLEX:
case FI_LONG_DOUBLE:
case FI_LONG_DOUBLE_COMPLEX:
return -FI_ENOENT;
default:

Просмотреть файл

@ -133,6 +133,25 @@ struct sock_conn *sock_av_lookup_addr(struct sock_av *av,
return sock_conn_map_lookup_key(av->cmap, av->key[idx]);
}
uint16_t sock_av_lookup_ep_id(struct sock_av *av, fi_addr_t addr)
{
int index = ((uint64_t)addr & av->mask);
struct sock_av_addr *av_addr;
if (index >= av->table_hdr->stored || index < 0) {
return AF_INET;
}
if (!av->cmap) {
SOCK_LOG_ERROR("EP with no AV bound\n");
return 0;
}
av_addr = idm_lookup(&av->addr_idm, index);
return av_addr->rem_ep_id;
}
static inline void sock_av_report_success(struct sock_av *av,
int *index, uint64_t flags)
{
@ -158,6 +177,7 @@ static int sock_check_table_in(struct sock_av *_av, struct sockaddr_in *addr,
char sa_ip[INET_ADDRSTRLEN];
struct sock_av_addr *av_addr;
size_t new_count, table_sz;
uint16_t rem_ep_id;
if ((_av->attr.flags & FI_EVENT) && !_av->eq)
return -FI_ENOEQ;
@ -166,8 +186,13 @@ static int sock_check_table_in(struct sock_av *_av, struct sockaddr_in *addr,
for (i = 0; i < count; i++) {
for (j = 0; j < _av->table_hdr->stored; j++) {
av_addr = &_av->table[j];
if (memcmp(&av_addr->addr, &addr[i],
sizeof(struct sockaddr_in)) == 0) {
rem_ep_id = ((struct sockaddr_in*)&addr[i])->sin_family;
((struct sockaddr_in*)&addr[i])->sin_family = AF_INET;
if ((memcmp(&av_addr->addr, &addr[i],
sizeof(struct sockaddr_in)) == 0) &&
av_addr->rem_ep_id == rem_ep_id) {
SOCK_LOG_INFO("Found addr in shared av\n");
if (idm_set(&_av->addr_idm, _av->key[j], av_addr) < 0) {
if (fi_addr)
@ -215,6 +240,9 @@ static int sock_check_table_in(struct sock_av *_av, struct sockaddr_in *addr,
}
}
rem_ep_id = ((struct sockaddr_in*)&addr[i])->sin_family;
((struct sockaddr_in*)&addr[i])->sin_family = AF_INET;
av_addr = &_av->table[_av->table_hdr->stored];
memcpy(sa_ip, inet_ntoa((&addr[i])->sin_addr), INET_ADDRSTRLEN);
SOCK_LOG_INFO("AV-INSERT:src_addr: family: %d, IP is %s, port: %d\n",
@ -222,6 +250,7 @@ static int sock_check_table_in(struct sock_av *_av, struct sockaddr_in *addr,
ntohs(((struct sockaddr_in*)&addr[i])->sin_port));
memcpy(&av_addr->addr, &addr[i], sizeof(struct sockaddr_in));
av_addr->rem_ep_id = rem_ep_id;
if (idm_set(&_av->addr_idm, _av->table_hdr->stored, av_addr) < 0) {
if (fi_addr)
fi_addr[i] = FI_ADDR_NOTAVAIL;

Просмотреть файл

@ -42,6 +42,7 @@
#include <sys/types.h>
#include "sock.h"
#include "sock_util.h"
const struct fi_cntr_attr sock_cntr_attr = {
.events = FI_CNTR_EVENTS_COMP,
@ -56,6 +57,10 @@ int sock_cntr_progress(struct sock_cntr *cntr)
struct sock_rx_ctx *rx_ctx;
struct dlist_entry *entry;
if (cntr->domain->progress_mode == FI_PROGRESS_AUTO &&
!sock_progress_thread_wait)
return 0;
for (entry = cntr->tx_list.next; entry != &cntr->tx_list;
entry = entry->next) {
tx_ctx = container_of(entry, struct sock_tx_ctx, cntr_entry);
@ -74,8 +79,7 @@ static uint64_t sock_cntr_read(struct fid_cntr *cntr)
{
struct sock_cntr *_cntr;
_cntr = container_of(cntr, struct sock_cntr, cntr_fid);
if (_cntr->domain->progress_mode == FI_PROGRESS_MANUAL)
sock_cntr_progress(_cntr);
sock_cntr_progress(_cntr);
return atomic_get(&_cntr->value);
}

Просмотреть файл

@ -202,16 +202,21 @@ ssize_t sock_comm_peek(struct sock_conn *conn, void *buf, size_t len)
int sock_comm_buffer_init(struct sock_conn *conn)
{
int optval;
uint64_t flags;
socklen_t size = SOCK_COMM_BUF_SZ;
socklen_t optlen = sizeof(socklen_t);
optval = 1;
setsockopt(conn->sock_fd, IPPROTO_TCP, TCP_NODELAY,
&optval, sizeof optval);
flags = fcntl(conn->sock_fd, F_GETFL, 0);
fcntl(conn->sock_fd, F_SETFL, flags | O_NONBLOCK);
rbinit(&conn->inbuf, SOCK_COMM_BUF_SZ);
rbinit(&conn->outbuf, SOCK_COMM_BUF_SZ);
setsockopt(conn->sock_fd, SOL_SOCKET, SO_RCVBUF, &size, optlen);
setsockopt(conn->sock_fd, SOL_SOCKET, SO_SNDBUF, &size, optlen);
@ -221,10 +226,10 @@ int sock_comm_buffer_init(struct sock_conn *conn)
optlen = sizeof(socklen_t);
getsockopt(conn->sock_fd, SOL_SOCKET, SO_SNDBUF, &size, &optlen);
SOCK_LOG_INFO("SO_SNDBUF: %d\n", size);
return 0;
}
void sock_comm_buffer_finalize(struct sock_conn *conn)
{
rbfree(&conn->inbuf);

Просмотреть файл

@ -102,22 +102,27 @@ struct sock_conn *sock_conn_map_lookup_key(struct sock_conn_map *conn_map,
#define SOCK_ADDR_IN_PORT(sa)SOCK_ADDR_IN_PTR(sa)->sin_port
#define SOCK_ADDR_IN_ADDR(sa)SOCK_ADDR_IN_PTR(sa)->sin_addr
static int sock_compare_addr(struct sockaddr_in *addr1,
struct sockaddr_in *addr2)
{
if ((SOCK_ADDR_IN_ADDR(addr1).s_addr ==
SOCK_ADDR_IN_ADDR(addr2).s_addr) &&
(SOCK_ADDR_IN_PORT(addr1) == SOCK_ADDR_IN_PORT(addr2)))
return 1;
return 0;
}
uint16_t sock_conn_map_lookup(struct sock_conn_map *map,
struct sockaddr_in *addr)
{
int i;
struct sockaddr_in *entry;
fastlock_acquire(&map->lock);
for (i=0; i < map->used; i++) {
entry = (struct sockaddr_in *)&(map->table[i].addr);
if ((SOCK_ADDR_IN_ADDR(entry).s_addr ==
SOCK_ADDR_IN_ADDR(addr).s_addr) &&
(SOCK_ADDR_IN_PORT(entry) == SOCK_ADDR_IN_PORT(addr))) {
fastlock_release(&map->lock);
if (sock_compare_addr(entry, addr)) {
return i+1;
}
}
fastlock_release(&map->lock);
return 0;
}
@ -126,11 +131,8 @@ static int sock_conn_map_insert(struct sock_conn_map *map,
int conn_fd)
{
int index;
fastlock_acquire(&map->lock);
if (map->size == map->used) {
if (sock_conn_map_increase(map, map->size * 2)) {
fastlock_release(&map->lock);
return 0;
}
}
@ -140,7 +142,6 @@ static int sock_conn_map_insert(struct sock_conn_map *map,
map->table[index].sock_fd = conn_fd;
sock_comm_buffer_init(&map->table[index]);
map->used++;
fastlock_release(&map->lock);
return index + 1;
}
@ -172,6 +173,10 @@ uint16_t sock_conn_map_connect(struct sock_domain *dom,
flags = fcntl(conn_fd, F_GETFL, 0);
fcntl(conn_fd, F_SETFL, flags | O_NONBLOCK);
fastlock_acquire(&map->lock);
memcpy(&map->curr_addr, addr, sizeof(struct sockaddr_in));
fastlock_release(&map->lock);
if (connect(conn_fd, addr, sizeof *addr) < 0) {
if (errno == EINPROGRESS) {
/* timeout after 5 secs */
@ -224,12 +229,24 @@ uint16_t sock_conn_map_connect(struct sock_domain *dom,
reply = ntohs(reply);
SOCK_LOG_INFO("Connect response: %d\n", ntohs(reply));
if (reply == 0) {
fastlock_acquire(&map->lock);
ret = sock_conn_map_insert(map, addr, conn_fd);
fastlock_release(&map->lock);
} else {
ret = sock_conn_map_lookup(map, addr);
ret = 0;
close(conn_fd);
SOCK_LOG_INFO("waiting for an accept\n");
while (!ret) {
fastlock_acquire(&map->lock);
ret = sock_conn_map_lookup(map, addr);
fastlock_release(&map->lock);
}
SOCK_LOG_INFO("got accept\n");
}
return ret;
}
@ -238,8 +255,13 @@ uint16_t sock_conn_map_match_or_connect(struct sock_domain *dom,
struct sockaddr_in *addr)
{
uint16_t index;
return (index = sock_conn_map_lookup(map, addr)) ?
index : sock_conn_map_connect(dom, map, addr);
fastlock_acquire(&map->lock);
index = sock_conn_map_lookup(map, addr);
fastlock_release(&map->lock);
if (!index)
index = sock_conn_map_connect(dom, map, addr);
return index;
}
static void *_sock_conn_listen(void *arg)
@ -253,10 +275,9 @@ static void *_sock_conn_listen(void *arg)
struct sockaddr_in remote;
socklen_t addr_size;
struct pollfd poll_fds[2];
char service[NI_MAXSERV];
struct sockaddr_in addr;
char sa_ip[INET_ADDRSTRLEN];
unsigned short port;
unsigned short port, response;
uint16_t index;
memset(&hints, 0, sizeof(hints));
@ -264,14 +285,14 @@ static void *_sock_conn_listen(void *arg)
hints.ai_socktype = SOCK_STREAM;
hints.ai_flags = AI_PASSIVE;
sprintf(service, "%d", domain->service);
if(getaddrinfo(NULL, service, &hints, &s_res)) {
SOCK_LOG_ERROR("no available AF_INET address\n");
perror("no available AF_INET address");
ret = getaddrinfo(NULL, domain->service, &hints, &s_res);
if (ret) {
SOCK_LOG_ERROR("no available AF_INET address, service %s, %s\n",
domain->service, gai_strerror(ret));
return NULL;
}
SOCK_LOG_INFO("Binding listener thread to port: %d\n", domain->service);
SOCK_LOG_INFO("Binding listener thread to port: %s\n", domain->service);
for (p=s_res; p; p=p->ai_next) {
listen_fd = socket(p->ai_family, p->ai_socktype, p->ai_protocol);
if (listen_fd >= 0) {
@ -291,16 +312,17 @@ static void *_sock_conn_listen(void *arg)
freeaddrinfo(s_res);
if (listen_fd < 0) {
SOCK_LOG_ERROR("failed to listen to port: %d\n", domain->service);
SOCK_LOG_ERROR("failed to listen to port: %s\n", domain->service);
goto err;
}
if (domain->service == 0) {
if (atoi(domain->service) == 0) {
addr_size = sizeof(struct sockaddr_in);
if (getsockname(listen_fd, (struct sockaddr*)&addr, &addr_size))
goto err;
domain->service = ntohs(addr.sin_port);
SOCK_LOG_INFO("Bound to port: %d\n", domain->service);
snprintf(domain->service, sizeof domain->service, "%d",
ntohs(addr.sin_port));
SOCK_LOG_INFO("Bound to port: %s\n", domain->service);
}
if (listen(listen_fd, 0)) {
@ -309,7 +331,7 @@ static void *_sock_conn_listen(void *arg)
}
((struct sockaddr_in*)&(domain->src_addr))->sin_port =
htons(domain->service);
htons(atoi(domain->service));
domain->listening = 1;
poll_fds[0].fd = listen_fd;
@ -343,15 +365,34 @@ static void *_sock_conn_listen(void *arg)
remote.sin_port = port;
SOCK_LOG_INFO("Remote port: %d\n", ntohs(port));
index = sock_conn_map_lookup(map, &remote);
port = (index) ? 1 : 0;
ret = send(conn_fd, &port, sizeof(port), 0);
if (ret != sizeof(port))
SOCK_LOG_ERROR("Cannot exchange port\n");
if (index == 0)
fastlock_acquire(&map->lock);
index = sock_conn_map_lookup(map, &remote);
response = (index) ? 1 : 0;
if (response == 0) {
if (sock_compare_addr((struct sockaddr_in*)&map->curr_addr,
&remote)) {
ret = memcmp(&domain->src_addr, &remote,
sizeof(struct sockaddr_in));
if (ret > 0 ||
(ret == 0 && atoi(domain->service) > port)) {
response = 1;
SOCK_LOG_INFO("Rejecting accept\n");
}
}
}
fastlock_release(&map->lock);
ret = send(conn_fd, &response, sizeof(response), 0);
if (ret != sizeof(response))
SOCK_LOG_ERROR("Cannot exchange port\n");
if (!response) {
fastlock_acquire(&map->lock);
sock_conn_map_insert(map, &remote, conn_fd);
else
fastlock_release(&map->lock);
} else
close(conn_fd);
}

Просмотреть файл

@ -55,6 +55,10 @@ int sock_cq_progress(struct sock_cq *cq)
struct sock_rx_ctx *rx_ctx;
struct dlist_entry *entry;
if (cq->domain->progress_mode == FI_PROGRESS_AUTO &&
!sock_progress_thread_wait)
return 0;
for (entry = cq->tx_list.next; entry != &cq->tx_list;
entry = entry->next) {
tx_ctx = container_of(entry, struct sock_tx_ctx, cq_entry);
@ -111,13 +115,15 @@ static ssize_t _sock_cq_write(struct sock_cq *cq, fi_addr_t addr,
goto out;
}
rbfdwrite(&cq->cq_rbfd, buf, len);
rbfdcommit(&cq->cq_rbfd);
ret = len;
rbwrite(&cq->addr_rb, &addr, sizeof(fi_addr_t));
rbcommit(&cq->addr_rb);
rbfdwrite(&cq->cq_rbfd, buf, len);
rbfdcommit(&cq->cq_rbfd);
ret = len;
if (cq->signal)
sock_wait_signal(cq->waitset);
out:
@ -218,16 +224,31 @@ static void sock_cq_set_report_fn(struct sock_cq *sock_cq)
}
}
static inline ssize_t sock_cq_rbuf_read(struct sock_cq *cq, void *buf,
size_t count, fi_addr_t *src_addr,
size_t cq_entry_len)
{
ssize_t i;
fi_addr_t addr;
rbfdread(&cq->cq_rbfd, buf, cq_entry_len * count);
for(i = 0; i < count; i++) {
rbread(&cq->addr_rb, &addr, sizeof(fi_addr_t));
if (src_addr)
src_addr[i] = addr;
}
return count;
}
ssize_t sock_cq_sreadfrom(struct fid_cq *cq, void *buf, size_t count,
fi_addr_t *src_addr, const void *cond, int timeout)
{
int ret;
fi_addr_t addr;
int ret = 0;
int64_t threshold;
struct timeval now;
struct sock_cq *sock_cq;
double start_ms, end_ms;
ssize_t i, bytes_read, num_read, cq_entry_len;
ssize_t cq_entry_len, avail;
sock_cq = container_of(cq, struct sock_cq, cq_fid);
cq_entry_len = sock_cq->cq_entry_size;
@ -246,8 +267,9 @@ ssize_t sock_cq_sreadfrom(struct fid_cq *cq, void *buf, size_t count,
timeout -= (end_ms - start_ms);
timeout = timeout < 0 ? 0 : timeout;
}
}
} else
sock_cq_progress(sock_cq);
if (sock_cq->attr.wait_cond == FI_CQ_COND_THRESHOLD) {
threshold = MIN((int64_t)cond, count);
}else{
@ -255,23 +277,21 @@ ssize_t sock_cq_sreadfrom(struct fid_cq *cq, void *buf, size_t count,
}
fastlock_acquire(&sock_cq->lock);
bytes_read = rbfdsread(&sock_cq->cq_rbfd, buf,
cq_entry_len*threshold, timeout);
if (bytes_read == 0) {
ret = -FI_ETIMEDOUT;
goto out;
}
num_read = bytes_read/cq_entry_len;
for(i=0; i < num_read; i++) {
rbread(&sock_cq->addr_rb, &addr, sizeof(fi_addr_t));
if (src_addr)
src_addr[i] = addr;
}
ret = num_read;
out:
if ((avail = rbfdused(&sock_cq->cq_rbfd)))
ret = sock_cq_rbuf_read(sock_cq, buf,
MIN(threshold, avail / cq_entry_len),
src_addr, cq_entry_len);
fastlock_release(&sock_cq->lock);
if (ret == 0) {
ret = rbfdwait(&sock_cq->cq_rbfd, timeout);
fastlock_acquire(&sock_cq->lock);
if (ret != -FI_ETIMEDOUT && (avail = rbfdused(&sock_cq->cq_rbfd)))
ret = sock_cq_rbuf_read(sock_cq, buf,
MIN(threshold, avail / cq_entry_len),
src_addr, cq_entry_len);
fastlock_release(&sock_cq->lock);
}
return ret;
}

Просмотреть файл

@ -41,7 +41,7 @@
#include "sock_util.h"
struct sock_rx_ctx *sock_rx_ctx_alloc(struct fi_rx_attr *attr, void *context)
struct sock_rx_ctx *sock_rx_ctx_alloc(const struct fi_rx_attr *attr, void *context)
{
struct sock_rx_ctx *rx_ctx;
rx_ctx = calloc(1, sizeof(*rx_ctx));
@ -71,7 +71,7 @@ void sock_rx_ctx_free(struct sock_rx_ctx *rx_ctx)
free(rx_ctx);
}
static struct sock_tx_ctx *sock_tx_context_alloc(struct fi_tx_attr *attr,
static struct sock_tx_ctx *sock_tx_context_alloc(const struct fi_tx_attr *attr,
void *context, size_t fclass)
{
struct sock_tx_ctx *tx_ctx;
@ -80,7 +80,9 @@ static struct sock_tx_ctx *sock_tx_context_alloc(struct fi_tx_attr *attr,
if (!tx_ctx)
return NULL;
if (rbfdinit(&tx_ctx->rbfd, attr->size))
if (rbfdinit(&tx_ctx->rbfd,
(attr->size) ? attr->size :
SOCK_EP_TX_SZ * SOCK_EP_TX_ENTRY_SZ))
goto err;
dlist_init(&tx_ctx->cq_entry);
@ -99,7 +101,7 @@ static struct sock_tx_ctx *sock_tx_context_alloc(struct fi_tx_attr *attr,
tx_ctx->fid.ctx.fid.context = context;
break;
case FI_CLASS_STX_CTX:
tx_ctx->fid.stx.fid.fclass = FI_CLASS_TX_CTX;
tx_ctx->fid.stx.fid.fclass = FI_CLASS_STX_CTX;
tx_ctx->fid.stx.fid.context = context;
break;
default:
@ -114,12 +116,12 @@ err:
}
struct sock_tx_ctx *sock_tx_ctx_alloc(struct fi_tx_attr *attr, void *context)
struct sock_tx_ctx *sock_tx_ctx_alloc(const struct fi_tx_attr *attr, void *context)
{
return sock_tx_context_alloc(attr, context, FI_CLASS_TX_CTX);
}
struct sock_tx_ctx *sock_stx_ctx_alloc(struct fi_tx_attr *attr, void *context)
struct sock_tx_ctx *sock_stx_ctx_alloc(const struct fi_tx_attr *attr, void *context)
{
return sock_tx_context_alloc(attr, context, FI_CLASS_STX_CTX);
}
@ -151,6 +153,6 @@ void sock_tx_ctx_commit(struct sock_tx_ctx *tx_ctx)
void sock_tx_ctx_abort(struct sock_tx_ctx *tx_ctx)
{
rbfdabort(&tx_ctx->rbfd);
fastlock_release(&tx_ctx->rlock);
fastlock_release(&tx_ctx->wlock);
}

Просмотреть файл

@ -47,7 +47,7 @@ const struct fi_domain_attr sock_domain_attr = {
.threading = FI_THREAD_SAFE,
.control_progress = FI_PROGRESS_AUTO,
.data_progress = FI_PROGRESS_AUTO,
.mr_key_size = 0,
.mr_key_size = sizeof(uint16_t),
.cq_data_size = sizeof(uint64_t),
.ep_cnt = SOCK_EP_MAX_EP_CNT,
.tx_ctx_cnt = SOCK_EP_MAX_TX_CNT,
@ -72,6 +72,7 @@ int sock_verify_domain_attr(struct fi_domain_attr *attr)
case FI_THREAD_FID:
case FI_THREAD_DOMAIN:
case FI_THREAD_COMPLETION:
case FI_THREAD_ENDPOINT:
break;
default:
SOCK_LOG_INFO("Invalid threading model!\n");
@ -180,13 +181,15 @@ static int sock_mr_bind(struct fid *fid, struct fid *bfid, uint64_t flags)
case FI_CLASS_CQ:
cq = container_of(bfid, struct sock_cq, cq_fid.fid);
assert(mr->domain == cq->domain);
mr->cq = cq;
if (flags & FI_REMOTE_WRITE)
mr->cq = cq;
break;
case FI_CLASS_CNTR:
cntr = container_of(bfid, struct sock_cntr, cntr_fid.fid);
assert(mr->domain == cntr->domain);
mr->cntr = cntr;
if (flags & FI_REMOTE_WRITE)
mr->cntr = cntr;
break;
default:
@ -361,7 +364,7 @@ int sock_endpoint(struct fid_domain *domain, struct fi_info *info,
}
int sock_scalable_ep(struct fid_domain *domain, struct fi_info *info,
struct fid_sep **sep, void *context)
struct fid_ep **sep, void *context)
{
switch (info->ep_type) {
case FI_EP_RDM:
@ -406,7 +409,6 @@ int sock_domain(struct fid_fabric *fabric, struct fi_info *info,
struct fid_domain **dom, void *context)
{
int ret, flags;
char service[NI_MAXSERV];
struct sock_domain *sock_domain;
if(info && info->domain_attr){
@ -424,11 +426,11 @@ int sock_domain(struct fid_fabric *fabric, struct fi_info *info,
if(info && info->src_addr) {
if (getnameinfo(info->src_addr, info->src_addrlen, NULL, 0,
service, sizeof(service), NI_NUMERICSERV)) {
sock_domain->service, sizeof(sock_domain->service),
NI_NUMERICSERV)) {
SOCK_LOG_ERROR("could not resolve src_addr\n");
goto err;
}
sock_domain->service = atoi(service);
sock_domain->info = *info;
memcpy(&sock_domain->src_addr, info->src_addr,
sizeof(struct sockaddr_in));
@ -455,6 +457,7 @@ int sock_domain(struct fid_fabric *fabric, struct fi_info *info,
goto err;
}
sock_domain->ep_count = AF_INET;
sock_domain->r_cmap.domain = sock_domain;
fastlock_init(&sock_domain->r_cmap.lock);
if(socketpair(AF_UNIX, SOCK_STREAM, 0, sock_domain->signal_fds) < 0)

Просмотреть файл

@ -55,20 +55,25 @@ extern const struct fi_fabric_attr sock_fabric_attr;
extern const char const sock_fab_name[];
extern const char const sock_dom_name[];
extern const char const sock_prov_name[];
static void sock_dequeue_tx_ctx(struct sock_tx_ctx *tx_ctx)
{
fastlock_acquire(&tx_ctx->domain->pe->lock);
dlist_remove(&tx_ctx->pe_entry);
fastlock_release(&tx_ctx->domain->pe->lock);
}
const struct fi_tx_attr sock_stx_attr = {
.caps = SOCK_EP_RDM_CAP,
.op_flags = SOCK_DEF_OPS,
.msg_order = SOCK_EP_MSG_ORDER,
.inject_size = SOCK_EP_MAX_INJECT_SZ,
.size = SOCK_EP_TX_SZ,
.iov_limit = SOCK_EP_MAX_IOV_LIMIT,
};
static void sock_dequeue_rx_ctx(struct sock_rx_ctx *rx_ctx)
{
fastlock_acquire(&rx_ctx->domain->pe->lock);
dlist_remove(&rx_ctx->pe_entry);
fastlock_release(&rx_ctx->domain->pe->lock);
}
const struct fi_rx_attr sock_srx_attr = {
.caps = SOCK_EP_RDM_CAP,
.op_flags = SOCK_DEF_OPS,
.msg_order = SOCK_EP_MSG_ORDER,
.total_buffered_recv = SOCK_EP_MAX_BUFF_RECV,
.size = SOCK_EP_MAX_MSG_SZ,
.iov_limit = SOCK_EP_MAX_IOV_LIMIT,
};
static int sock_ctx_close(struct fid *fid)
{
@ -78,7 +83,7 @@ static int sock_ctx_close(struct fid *fid)
switch (fid->fclass) {
case FI_CLASS_TX_CTX:
tx_ctx = container_of(fid, struct sock_tx_ctx, fid.ctx.fid);
sock_dequeue_tx_ctx(tx_ctx);
sock_pe_remove_tx_ctx(tx_ctx);
atomic_dec(&tx_ctx->ep->num_rx_ctx);
atomic_dec(&tx_ctx->domain->ref);
sock_tx_ctx_free(tx_ctx);
@ -86,7 +91,7 @@ static int sock_ctx_close(struct fid *fid)
case FI_CLASS_RX_CTX:
rx_ctx = container_of(fid, struct sock_rx_ctx, ctx.fid);
sock_dequeue_rx_ctx(rx_ctx);
sock_pe_remove_rx_ctx(rx_ctx);
atomic_dec(&rx_ctx->ep->num_rx_ctx);
atomic_dec(&rx_ctx->domain->ref);
sock_rx_ctx_free(rx_ctx);
@ -95,14 +100,14 @@ static int sock_ctx_close(struct fid *fid)
case FI_CLASS_STX_CTX:
tx_ctx = container_of(fid, struct sock_tx_ctx, fid.stx.fid);
atomic_dec(&tx_ctx->domain->ref);
sock_dequeue_tx_ctx(tx_ctx);
sock_pe_remove_tx_ctx(tx_ctx);
sock_tx_ctx_free(tx_ctx);
break;
case FI_CLASS_SRX_CTX:
rx_ctx = container_of(fid, struct sock_rx_ctx, ctx.fid);
atomic_dec(&rx_ctx->domain->ref);
sock_dequeue_rx_ctx(rx_ctx);
sock_pe_remove_rx_ctx(rx_ctx);
sock_rx_ctx_free(rx_ctx);
break;
@ -141,10 +146,6 @@ static int sock_ctx_bind_cq(struct fid *fid, struct fid *bfid, uint64_t flags)
tx_ctx->comp.write_cq_event = 1;
}
if (!tx_ctx->progress) {
tx_ctx->progress = 1;
sock_pe_add_tx_ctx(tx_ctx->domain->pe, tx_ctx);
}
dlist_insert_tail(&tx_ctx->cq_entry, &sock_cq->tx_list);
break;
@ -168,10 +169,6 @@ static int sock_ctx_bind_cq(struct fid *fid, struct fid *bfid, uint64_t flags)
rx_ctx->comp.rem_write_cq_event = 1;
}
if (!rx_ctx->progress) {
rx_ctx->progress = 1;
sock_pe_add_rx_ctx(rx_ctx->domain->pe, rx_ctx);
}
dlist_insert_tail(&rx_ctx->cq_entry, &sock_cq->rx_list);
break;
@ -195,10 +192,6 @@ static int sock_ctx_bind_cq(struct fid *fid, struct fid *bfid, uint64_t flags)
tx_ctx->comp.write_cq_event = 1;
}
if (!tx_ctx->progress) {
tx_ctx->progress = 1;
sock_pe_add_tx_ctx(tx_ctx->domain->pe, tx_ctx);
}
dlist_insert_tail(&tx_ctx->cq_entry, &sock_cq->tx_list);
break;
@ -228,10 +221,6 @@ static int sock_ctx_bind_cntr(struct fid *fid, struct fid *bfid, uint64_t flags)
if (flags & FI_WRITE)
tx_ctx->comp.write_cntr = cntr;
if (!tx_ctx->progress) {
tx_ctx->progress = 1;
sock_pe_add_tx_ctx(tx_ctx->domain->pe, tx_ctx);
}
dlist_insert_tail(&tx_ctx->cntr_entry, &cntr->tx_list);
break;
@ -247,10 +236,6 @@ static int sock_ctx_bind_cntr(struct fid *fid, struct fid *bfid, uint64_t flags)
if (flags & FI_REMOTE_WRITE)
rx_ctx->comp.rem_write_cntr = cntr;
if (!rx_ctx->progress) {
rx_ctx->progress = 1;
sock_pe_add_rx_ctx(rx_ctx->domain->pe, rx_ctx);
}
dlist_insert_tail(&rx_ctx->cntr_entry, &cntr->rx_list);
break;
@ -265,10 +250,6 @@ static int sock_ctx_bind_cntr(struct fid *fid, struct fid *bfid, uint64_t flags)
if (flags & FI_WRITE)
tx_ctx->comp.write_cntr = cntr;
if (!tx_ctx->progress) {
tx_ctx->progress = 1;
sock_pe_add_tx_ctx(tx_ctx->domain->pe, tx_ctx);
}
dlist_insert_tail(&tx_ctx->cntr_entry, &cntr->tx_list);
break;
@ -289,6 +270,9 @@ static int sock_ctx_bind(struct fid *fid, struct fid *bfid, uint64_t flags)
case FI_CLASS_CNTR:
return sock_ctx_bind_cntr(fid, bfid, flags);
case FI_CLASS_MR:
return 0;
default:
SOCK_LOG_ERROR("Invalid bind()\n");
return -FI_EINVAL;
@ -367,11 +351,19 @@ static int sock_ctx_enable(struct fid_ep *ep)
case FI_CLASS_RX_CTX:
rx_ctx = container_of(ep, struct sock_rx_ctx, ctx.fid);
rx_ctx->enabled = 1;
if (!rx_ctx->progress) {
sock_pe_add_rx_ctx(rx_ctx->domain->pe, rx_ctx);
rx_ctx->progress = 1;
}
return 0;
case FI_CLASS_TX_CTX:
tx_ctx = container_of(ep, struct sock_tx_ctx, fid.ctx.fid);
tx_ctx->enabled = 1;
if (!tx_ctx->progress) {
sock_pe_add_tx_ctx(tx_ctx->domain->pe, tx_ctx);
tx_ctx->progress = 1;
}
return 0;
default:
@ -422,31 +414,12 @@ static int sock_ctx_setopt(fid_t fid, int level, int optname,
return 0;
}
static ssize_t sock_ep_cancel(fid_t fid, void *context)
static ssize_t sock_rx_ctx_cancel(struct sock_rx_ctx *rx_ctx, void *context)
{
int ret;
struct sock_rx_ctx *rx_ctx;
struct sock_rx_entry *rx_entry;
struct sock_ep *sock_ep;
struct dlist_entry *entry;
ssize_t ret = -FI_ENOENT;
struct sock_rx_entry *rx_entry;
switch (fid->fclass) {
case FI_CLASS_EP:
sock_ep = container_of(fid, struct sock_ep, fid.ep.fid);
rx_ctx = sock_ep->rx_ctx;
break;
case FI_CLASS_RX_CTX:
case FI_CLASS_SRX_CTX:
rx_ctx = container_of(fid, struct sock_rx_ctx, ctx.fid);
break;
default:
SOCK_LOG_ERROR("Invalid ep type\n");
return -FI_EINVAL;
}
ret = -FI_ENOENT;
fastlock_acquire(&rx_ctx->lock);
for (entry = rx_ctx->rx_entry_list.next;
entry != &rx_ctx->rx_entry_list; entry = entry->next) {
@ -454,7 +427,7 @@ static ssize_t sock_ep_cancel(fid_t fid, void *context)
rx_entry = container_of(entry, struct sock_rx_entry, entry);
if (rx_entry->is_busy || rx_entry->used)
continue;
if ((uint64_t)context == rx_entry->context) {
dlist_remove(&rx_entry->entry);
sock_rx_release_entry(rx_entry);
@ -466,6 +439,35 @@ static ssize_t sock_ep_cancel(fid_t fid, void *context)
return ret;
}
static ssize_t sock_ep_cancel(fid_t fid, void *context)
{
struct sock_rx_ctx *rx_ctx = NULL;
struct sock_ep *sock_ep;
switch (fid->fclass) {
case FI_CLASS_EP:
sock_ep = container_of(fid, struct sock_ep, ep.fid);
rx_ctx = sock_ep->rx_ctx;
break;
case FI_CLASS_RX_CTX:
case FI_CLASS_SRX_CTX:
rx_ctx = container_of(fid, struct sock_rx_ctx, ctx.fid);
sock_ep = rx_ctx->ep;
break;
default:
SOCK_LOG_ERROR("Invalid ep type\n");
return -FI_EINVAL;
}
if (!(sock_ep->info.caps & FI_CANCEL)) {
return -FI_EINVAL;
}
return sock_rx_ctx_cancel(rx_ctx, context);
}
struct fi_ops_ep sock_ctx_ep_ops = {
.size = sizeof(struct fi_ops_ep),
.enable = sock_ctx_enable,
@ -474,6 +476,8 @@ struct fi_ops_ep sock_ctx_ep_ops = {
.setopt = sock_ctx_setopt,
.tx_ctx = fi_no_tx_ctx,
.rx_ctx = fi_no_rx_ctx,
.rx_size_left = fi_no_rx_size_left,
.tx_size_left = fi_no_tx_size_left,
};
static int sock_ep_close(struct fid *fid)
@ -482,16 +486,13 @@ static int sock_ep_close(struct fid *fid)
switch(fid->fclass) {
case FI_CLASS_EP:
sock_ep = container_of(fid, struct sock_ep, fid.ep.fid);
sock_ep = container_of(fid, struct sock_ep, ep.fid);
break;
case FI_CLASS_SEP:
sock_ep = container_of(fid, struct sock_ep, fid.sep.fid);
sock_ep = container_of(fid, struct sock_ep, ep.fid);
break;
case FI_CLASS_PEP:
sock_ep = container_of(fid, struct sock_ep, fid.pep.fid);
break;
default:
return -FI_EINVAL;
}
@ -500,15 +501,13 @@ static int sock_ep_close(struct fid *fid)
atomic_get(&sock_ep->num_tx_ctx))
return -FI_EBUSY;
if (sock_ep->fclass != FI_CLASS_SEP &&
sock_ep->ep_attr.tx_ctx_cnt != FI_SHARED_CONTEXT) {
sock_dequeue_tx_ctx(sock_ep->tx_array[0]);
if (sock_ep->fclass != FI_CLASS_SEP && !sock_ep->tx_shared) {
sock_pe_remove_tx_ctx(sock_ep->tx_array[0]);
sock_tx_ctx_free(sock_ep->tx_array[0]);
}
if (sock_ep->fclass != FI_CLASS_SEP &&
sock_ep->ep_attr.rx_ctx_cnt != FI_SHARED_CONTEXT) {
sock_dequeue_rx_ctx(sock_ep->rx_array[0]);
if (sock_ep->fclass != FI_CLASS_SEP && !sock_ep->rx_shared) {
sock_pe_remove_rx_ctx(sock_ep->rx_array[0]);
sock_rx_ctx_free(sock_ep->rx_array[0]);
}
@ -538,16 +537,13 @@ static int sock_ep_bind(struct fid *fid, struct fid *bfid, uint64_t flags)
switch(fid->fclass) {
case FI_CLASS_EP:
ep = container_of(fid, struct sock_ep, fid.ep.fid);
ep = container_of(fid, struct sock_ep, ep.fid);
break;
case FI_CLASS_SEP:
ep = container_of(fid, struct sock_ep, fid.sep.fid);
ep = container_of(fid, struct sock_ep, ep.fid);
break;
case FI_CLASS_PEP:
ep = container_of(fid, struct sock_ep, fid.pep.fid);
break;
default:
return -FI_EINVAL;
}
@ -556,12 +552,10 @@ static int sock_ep_bind(struct fid *fid, struct fid *bfid, uint64_t flags)
case FI_CLASS_EQ:
eq = container_of(bfid, struct sock_eq, eq.fid);
ep->eq = eq;
if ((eq->attr.wait_obj == FI_WAIT_FD) && (eq->wait_fd < 0))
sock_eq_openwait(eq, (char *)&ep->domain->service);
break;
case FI_CLASS_MR:
return -FI_EINVAL;
return 0;
case FI_CLASS_CQ:
cq = container_of(bfid, struct sock_cq, cq_fid.fid);
@ -621,9 +615,32 @@ static int sock_ep_bind(struct fid *fid, struct fid *bfid, uint64_t flags)
for (i = 0; i < ep->ep_attr.rx_ctx_cnt; i++) {
rx_ctx = ep->rx_array[i];
if (!rx_ctx)
if (!rx_ctx)
continue;
if (rx_ctx->ctx.fid.fclass == FI_CLASS_SRX_CTX) {
if (flags & FI_RECV) {
ep->comp.recv_cq = cq;
if (flags & FI_COMPLETION)
ep->comp.recv_cq_event = 1;
}
if (flags & FI_REMOTE_READ) {
ep->comp.rem_read_cq = cq;
if (flags & FI_COMPLETION)
ep->comp.rem_read_cq_event = 1;
}
if (flags & FI_REMOTE_WRITE) {
ep->comp.rem_write_cq = cq;
if (flags & FI_COMPLETION)
ep->comp.rem_write_cq_event = 1;
}
dlist_insert_tail(&rx_ctx->cq_entry, &cq->rx_list);
continue;
}
if ((ret = sock_ctx_bind_cq(&rx_ctx->ctx.fid,
bfid, flags)))
return ret;
@ -673,6 +690,21 @@ static int sock_ep_bind(struct fid *fid, struct fid *bfid, uint64_t flags)
if (!rx_ctx)
continue;
if (rx_ctx->ctx.fid.fclass == FI_CLASS_SRX_CTX) {
if (flags & FI_RECV)
rx_ctx->comp.recv_cntr = cntr;
if (flags & FI_REMOTE_READ)
rx_ctx->comp.rem_read_cntr = cntr;
if (flags & FI_REMOTE_WRITE)
rx_ctx->comp.rem_write_cntr = cntr;
dlist_insert_tail(&rx_ctx->cntr_entry, &cntr->rx_list);
continue;
}
if ((ret = sock_ctx_bind_cntr(&rx_ctx->ctx.fid,
bfid, flags)))
@ -737,16 +769,13 @@ static int sock_ep_control(struct fid *fid, int command, void *arg)
switch(fid->fclass) {
case FI_CLASS_EP:
ep = container_of(fid, struct sock_ep, fid.ep.fid);
ep = container_of(fid, struct sock_ep, ep.fid);
break;
case FI_CLASS_SEP:
ep = container_of(fid, struct sock_ep, fid.sep.fid);
ep = container_of(fid, struct sock_ep, ep.fid);
break;
case FI_CLASS_PEP:
ep = container_of(fid, struct sock_ep, fid.pep.fid);
break;
default:
return -FI_EINVAL;
}
@ -759,7 +788,7 @@ static int sock_ep_control(struct fid *fid, int command, void *arg)
return -FI_ENOMEM;
*new_ep = *ep;
new_ep->op_flags = alias->flags;
*alias->fid = &new_ep->fid.ep.fid;
*alias->fid = &new_ep->ep.fid;
break;
case FI_GETOPSFLAG:
@ -785,29 +814,49 @@ struct fi_ops sock_ep_fi_ops = {
.ops_open = fi_no_ops_open,
};
static int sock_ep_enable(struct fid_ep *ep)
int sock_ep_enable(struct fid_ep *ep)
{
int i;
struct sock_ep *sock_ep;
sock_ep = container_of(ep, struct sock_ep, fid.ep);
sock_ep = container_of(ep, struct sock_ep, ep);
if (sock_ep->tx_ctx &&
sock_ep->tx_ctx->fid.ctx.fid.fclass == FI_CLASS_TX_CTX)
sock_ep->tx_ctx->fid.ctx.fid.fclass == FI_CLASS_TX_CTX) {
sock_ep->tx_ctx->enabled = 1;
if (!sock_ep->tx_ctx->progress) {
sock_pe_add_tx_ctx(sock_ep->domain->pe, sock_ep->tx_ctx);
sock_ep->tx_ctx->progress = 1;
}
}
if (sock_ep->rx_ctx &&
sock_ep->rx_ctx->ctx.fid.fclass == FI_CLASS_RX_CTX)
sock_ep->rx_ctx->ctx.fid.fclass == FI_CLASS_RX_CTX) {
sock_ep->rx_ctx->enabled = 1;
if (!sock_ep->rx_ctx->progress) {
sock_pe_add_rx_ctx(sock_ep->domain->pe, sock_ep->rx_ctx);
sock_ep->rx_ctx->progress = 1;
}
}
for (i = 0; i < sock_ep->ep_attr.tx_ctx_cnt; i++) {
if (sock_ep->tx_array[i])
if (sock_ep->tx_array[i]) {
sock_ep->tx_array[i]->enabled = 1;
if (!sock_ep->tx_array[i]->progress) {
sock_pe_add_tx_ctx(sock_ep->domain->pe, sock_ep->tx_array[i]);
sock_ep->tx_array[i]->progress = 1;
}
}
}
for (i = 0; i < sock_ep->ep_attr.rx_ctx_cnt; i++) {
if (sock_ep->rx_array[i])
if (sock_ep->rx_array[i]) {
sock_ep->rx_array[i]->enabled = 1;
if (!sock_ep->rx_array[i]->progress) {
sock_pe_add_rx_ctx(sock_ep->domain->pe, sock_ep->rx_array[i]);
sock_ep->rx_array[i]->progress = 1;
}
}
}
return 0;
}
@ -816,7 +865,7 @@ static int sock_ep_getopt(fid_t fid, int level, int optname,
void *optval, size_t *optlen)
{
struct sock_ep *sock_ep;
sock_ep = container_of(fid, struct sock_ep, fid.ep.fid);
sock_ep = container_of(fid, struct sock_ep, ep.fid);
if (level != FI_OPT_ENDPOINT)
return -ENOPROTOOPT;
@ -838,7 +887,7 @@ static int sock_ep_setopt(fid_t fid, int level, int optname,
{
int i;
struct sock_ep *sock_ep;
sock_ep = container_of(fid, struct sock_ep, fid.ep.fid);
sock_ep = container_of(fid, struct sock_ep, ep.fid);
if (level != FI_OPT_ENDPOINT)
return -ENOPROTOOPT;
@ -861,13 +910,13 @@ static int sock_ep_setopt(fid_t fid, int level, int optname,
return 0;
}
static int sock_ep_tx_ctx(struct fid_sep *ep, int index, struct fi_tx_attr *attr,
static int sock_ep_tx_ctx(struct fid_ep *ep, int index, struct fi_tx_attr *attr,
struct fid_ep **tx_ep, void *context)
{
struct sock_ep *sock_ep;
struct sock_tx_ctx *tx_ctx;
sock_ep = container_of(ep, struct sock_ep, fid.sep);
sock_ep = container_of(ep, struct sock_ep, ep);
if (index >= sock_ep->ep_attr.tx_ctx_cnt)
return -FI_EINVAL;
@ -894,13 +943,13 @@ static int sock_ep_tx_ctx(struct fid_sep *ep, int index, struct fi_tx_attr *attr
return 0;
}
static int sock_ep_rx_ctx(struct fid_sep *ep, int index, struct fi_rx_attr *attr,
static int sock_ep_rx_ctx(struct fid_ep *ep, int index, struct fi_rx_attr *attr,
struct fid_ep **rx_ep, void *context)
{
struct sock_ep *sock_ep;
struct sock_rx_ctx *rx_ctx;
sock_ep = container_of(ep, struct sock_ep, fid.sep);
sock_ep = container_of(ep, struct sock_ep, ep);
if (index >= sock_ep->ep_attr.rx_ctx_cnt)
return -FI_EINVAL;
@ -937,23 +986,45 @@ struct fi_ops_ep sock_ep_ops ={
.setopt = sock_ep_setopt,
.tx_ctx = sock_ep_tx_ctx,
.rx_ctx = sock_ep_rx_ctx,
.rx_size_left = fi_no_rx_size_left,
.tx_size_left = fi_no_tx_size_left,
};
static int sock_verify_tx_attr(const struct fi_tx_attr *attr)
{
if (!attr)
return 0;
if (attr->inject_size > SOCK_EP_MAX_INJECT_SZ)
return -FI_ENODATA;
if (attr->size > SOCK_EP_TX_SZ)
return -FI_ENODATA;
if (attr->iov_limit > SOCK_EP_MAX_IOV_LIMIT)
return -FI_ENODATA;
return 0;
}
int sock_stx_ctx(struct fid_domain *domain,
struct fi_tx_attr *attr, struct fid_stx **stx, void *context)
{
struct sock_domain *dom;
struct sock_tx_ctx *tx_ctx;
if (attr && sock_verify_tx_attr(attr))
return -FI_EINVAL;
dom = container_of(domain, struct sock_domain, dom_fid);
tx_ctx = sock_tx_ctx_alloc(attr, context);
tx_ctx = sock_tx_ctx_alloc(attr ? attr : &sock_stx_attr, context);
if (!tx_ctx)
return -FI_ENOMEM;
tx_ctx->domain = dom;
tx_ctx->fid.ctx.fid.fclass = FI_CLASS_STX_CTX;
tx_ctx->fid.stx.fid.fclass = FI_CLASS_STX_CTX;
tx_ctx->fid.stx.fid.ops = &sock_ctx_ops;
tx_ctx->fid.stx.ops = &sock_ep_ops;
atomic_inc(&dom->ref);
@ -962,14 +1033,34 @@ int sock_stx_ctx(struct fid_domain *domain,
return 0;
}
static int sock_verify_rx_attr(const struct fi_rx_attr *attr)
{
if (!attr)
return 0;
if (attr->total_buffered_recv > SOCK_EP_MAX_BUFF_RECV)
return -FI_ENODATA;
if (attr->size > SOCK_EP_TX_SZ)
return -FI_ENODATA;
if (attr->iov_limit > SOCK_EP_MAX_IOV_LIMIT)
return -FI_ENODATA;
return 0;
}
int sock_srx_ctx(struct fid_domain *domain,
struct fi_rx_attr *attr, struct fid_ep **srx, void *context)
{
struct sock_domain *dom;
struct sock_rx_ctx *rx_ctx;
if (attr && sock_verify_rx_attr(attr))
return -FI_EINVAL;
dom = container_of(domain, struct sock_domain, dom_fid);
rx_ctx = sock_rx_ctx_alloc(attr, context);
rx_ctx = sock_rx_ctx_alloc(attr ? attr : &sock_srx_attr, context);
if (!rx_ctx)
return -FI_ENOMEM;
@ -983,6 +1074,8 @@ int sock_srx_ctx(struct fid_domain *domain,
/* default config */
rx_ctx->min_multi_recv = SOCK_EP_MIN_MULTI_RECV;
rx_ctx->attr.total_buffered_recv = rx_ctx->attr.total_buffered_recv ?
rx_ctx->attr.total_buffered_recv : SOCK_EP_MAX_BUFF_RECV;
*srx = &rx_ctx->ctx;
atomic_inc(&dom->ref);
@ -1014,13 +1107,22 @@ struct fi_info *sock_fi_info(enum fi_ep_type ep_type,
if (hints->caps)
_info->caps = hints->caps;
if (hints->ep_attr)
*(_info->ep_attr) = *(hints->ep_attr);
if (hints->tx_attr)
*(_info->tx_attr) = *(hints->tx_attr);
if (hints->rx_attr)
*(_info->rx_attr) = *(hints->rx_attr);
*(_info->domain_attr) = sock_domain_attr;
*(_info->fabric_attr) = sock_fabric_attr;
_info->domain_attr->name = strdup(sock_dom_name);
_info->fabric_attr->name = strdup(sock_fab_name);
_info->fabric_attr->prov_name = strdup(sock_fab_name);
_info->fabric_attr->prov_name = strdup(sock_prov_name);
return _info;
}
@ -1053,36 +1155,27 @@ int sock_alloc_endpoint(struct fid_domain *domain, struct fi_info *info,
switch (fclass) {
case FI_CLASS_EP:
sock_ep->fid.ep.fid.fclass = FI_CLASS_EP;
sock_ep->fid.ep.fid.context = context;
sock_ep->fid.ep.fid.ops = &sock_ep_fi_ops;
sock_ep->ep.fid.fclass = FI_CLASS_EP;
sock_ep->ep.fid.context = context;
sock_ep->ep.fid.ops = &sock_ep_fi_ops;
sock_ep->fid.ep.ops = &sock_ep_ops;
sock_ep->fid.ep.cm = &sock_ep_cm_ops;
sock_ep->fid.ep.msg = &sock_ep_msg_ops;
sock_ep->fid.ep.rma = &sock_ep_rma;
sock_ep->fid.ep.tagged = &sock_ep_tagged;
sock_ep->fid.ep.atomic = &sock_ep_atomic;
sock_ep->ep.ops = &sock_ep_ops;
sock_ep->ep.cm = &sock_ep_cm_ops;
sock_ep->ep.msg = &sock_ep_msg_ops;
sock_ep->ep.rma = &sock_ep_rma;
sock_ep->ep.tagged = &sock_ep_tagged;
sock_ep->ep.atomic = &sock_ep_atomic;
break;
case FI_CLASS_SEP:
sock_ep->fid.sep.fid.fclass = FI_CLASS_SEP;
sock_ep->fid.sep.fid.context = context;
sock_ep->fid.sep.fid.ops = &sock_ep_fi_ops;
sock_ep->ep.fid.fclass = FI_CLASS_SEP;
sock_ep->ep.fid.context = context;
sock_ep->ep.fid.ops = &sock_ep_fi_ops;
sock_ep->fid.sep.ops = &sock_ep_ops;
sock_ep->fid.sep.cm = &sock_ep_cm_ops;
sock_ep->ep.ops = &sock_ep_ops;
sock_ep->ep.cm = &sock_ep_cm_ops;
break;
case FI_CLASS_PEP:
sock_ep->fid.pep.fid.fclass = FI_CLASS_SEP;
sock_ep->fid.pep.fid.context = context;
sock_ep->fid.pep.fid.ops = &sock_ep_fi_ops;
sock_ep->fid.pep.ops = &sock_ep_ops;
sock_ep->fid.pep.cm = &sock_ep_cm_ops;
break;
default:
goto err;
}
@ -1090,6 +1183,10 @@ int sock_alloc_endpoint(struct fid_domain *domain, struct fi_info *info,
sock_ep->fclass = fclass;
*ep = sock_ep;
fastlock_acquire(&sock_dom->lock);
sock_ep->ep_id = sock_dom->ep_count++;
fastlock_release(&sock_dom->lock);
if (info) {
sock_ep->ep_type = info->ep_type;
sock_ep->info.caps = info->caps;
@ -1100,7 +1197,9 @@ int sock_alloc_endpoint(struct fid_domain *domain, struct fi_info *info,
memcpy(sock_ep->src_addr, info->src_addr,
sizeof(struct sockaddr_in));
((struct sockaddr_in*)sock_ep->src_addr)->sin_port =
htons(sock_dom->service);
htons(atoi(sock_dom->service));
((struct sockaddr_in*)sock_ep->src_addr)->sin_family =
sock_ep->ep_id;
}
if (info->dest_addr) {
@ -1121,7 +1220,8 @@ int sock_alloc_endpoint(struct fid_domain *domain, struct fi_info *info,
sock_ep->tx_attr = *info->tx_attr;
sock_ep->op_flags = info->tx_attr->op_flags;
sock_ep->tx_attr.size = sock_ep->tx_attr.size ?
sock_ep->tx_attr.size : SOCK_EP_MAX_TX_CTX_SZ;
sock_ep->tx_attr.size :
(SOCK_EP_TX_SZ * SOCK_EP_TX_ENTRY_SZ);
}
if (info->rx_attr) {
@ -1132,12 +1232,18 @@ int sock_alloc_endpoint(struct fid_domain *domain, struct fi_info *info,
sock_ep->rx_attr.total_buffered_recv :
SOCK_EP_MAX_BUFF_RECV;
}
sock_ep->info.connreq = info->connreq;
}
atomic_init(&sock_ep->ref, 0);
atomic_init(&sock_ep->num_tx_ctx, 0);
atomic_init(&sock_ep->num_rx_ctx, 0);
if (sock_ep->ep_attr.tx_ctx_cnt == FI_SHARED_CONTEXT)
sock_ep->tx_shared = 1;
if (sock_ep->ep_attr.rx_ctx_cnt == FI_SHARED_CONTEXT)
sock_ep->rx_shared = 1;
if (sock_ep->fclass != FI_CLASS_SEP) {
sock_ep->ep_attr.tx_ctx_cnt = 1;
sock_ep->ep_attr.rx_ctx_cnt = 1;

Просмотреть файл

@ -71,16 +71,16 @@ const struct fi_ep_attr sock_dgram_ep_attr = {
const struct fi_tx_attr sock_dgram_tx_attr = {
.caps = SOCK_EP_DGRAM_CAP,
.op_flags = SOCK_DGRAM_DEF_OPS,
.op_flags = SOCK_DEF_OPS,
.msg_order = SOCK_EP_MSG_ORDER,
.inject_size = SOCK_EP_MAX_INJECT_SZ,
.size = SOCK_EP_MAX_TX_CTX_SZ,
.size = SOCK_EP_TX_SZ,
.iov_limit = SOCK_EP_MAX_IOV_LIMIT,
};
const struct fi_rx_attr sock_dgram_rx_attr = {
.caps = SOCK_EP_DGRAM_CAP,
.op_flags = SOCK_DGRAM_DEF_OPS,
.op_flags = SOCK_DEF_OPS,
.msg_order = SOCK_EP_MSG_ORDER,
.total_buffered_recv = SOCK_EP_MAX_BUFF_RECV,
.size = SOCK_EP_MAX_MSG_SZ,
@ -200,18 +200,12 @@ static struct fi_info *sock_dgram_fi_info(struct fi_info *hints,
if (!_info)
return NULL;
if (!hints->caps)
_info->caps = SOCK_EP_DGRAM_CAP;
if (!hints->tx_attr)
*(_info->tx_attr) = sock_dgram_tx_attr;
if (!hints->rx_attr)
*(_info->rx_attr) = sock_dgram_rx_attr;
if (!hints->ep_attr)
*(_info->ep_attr) = sock_dgram_ep_attr;
_info->caps = SOCK_EP_DGRAM_CAP;
*(_info->tx_attr) = sock_dgram_tx_attr;
*(_info->rx_attr) = sock_dgram_rx_attr;
*(_info->ep_attr) = sock_dgram_ep_attr;
_info->caps |= (_info->rx_attr->caps | _info->tx_attr->caps);
return _info;
}
@ -256,9 +250,6 @@ int sock_dgram_getinfo(uint32_t version, const char *node, const char *service,
return ret;
}
src_addr = calloc(1, sizeof(struct sockaddr_in));
dest_addr = calloc(1, sizeof(struct sockaddr_in));
memset(&sock_hints, 0, sizeof(struct addrinfo));
sock_hints.ai_family = AF_INET;
sock_hints.ai_socktype = SOCK_STREAM;
@ -293,6 +284,11 @@ int sock_dgram_getinfo(uint32_t version, const char *node, const char *service,
goto err;
}
src_addr = calloc(1, sizeof(struct sockaddr_in));
if (!src_addr) {
ret = -FI_ENOMEM;
goto err;
}
memcpy(src_addr, result->ai_addr, result->ai_addrlen);
freeaddrinfo(result);
} else if (node || service) {
@ -317,6 +313,11 @@ int sock_dgram_getinfo(uint32_t version, const char *node, const char *service,
goto err;
}
dest_addr = calloc(1, sizeof(struct sockaddr_in));
if (!dest_addr) {
ret = -FI_ENOMEM;
goto err;
}
memcpy(dest_addr, result->ai_addr, result->ai_addrlen);
udp_sock = socket(AF_INET, SOCK_DGRAM, 0);
@ -328,7 +329,12 @@ int sock_dgram_getinfo(uint32_t version, const char *node, const char *service,
goto err;
}
len = sizeof(struct sockaddr_in);
len = sizeof(struct sockaddr_in);
src_addr = calloc(1, sizeof(struct sockaddr_in));
if (!src_addr) {
ret = -FI_ENOMEM;
goto err;
}
ret = getsockname(udp_sock, (struct sockaddr*)src_addr, &len);
if (ret != 0) {
SOCK_LOG_ERROR("getsockname failed\n");
@ -341,11 +347,25 @@ int sock_dgram_getinfo(uint32_t version, const char *node, const char *service,
}
if (hints->src_addr) {
if (!src_addr) {
src_addr = calloc(1, sizeof(struct sockaddr_in));
if (!src_addr) {
ret = -FI_ENOMEM;
goto err;
}
}
assert(hints->src_addrlen == sizeof(struct sockaddr_in));
memcpy(src_addr, hints->src_addr, hints->src_addrlen);
}
if (hints->dest_addr) {
if (!dest_addr) {
dest_addr = calloc(1, sizeof(struct sockaddr_in));
if (!dest_addr) {
ret = -FI_ENOMEM;
goto err;
}
}
assert(hints->dest_addrlen == sizeof(struct sockaddr_in));
memcpy(dest_addr, hints->dest_addr, hints->dest_addrlen);
}
@ -368,14 +388,18 @@ int sock_dgram_getinfo(uint32_t version, const char *node, const char *service,
goto err;
}
if (src_addr)
free(src_addr);
if (dest_addr)
free(dest_addr);
*info = _info;
free(src_addr);
free(dest_addr);
return 0;
err:
free(src_addr);
free(dest_addr);
if (src_addr)
free(src_addr);
if (dest_addr)
free(dest_addr);
SOCK_LOG_ERROR("fi_getinfo failed\n");
return ret;
}
@ -433,12 +457,12 @@ int sock_dgram_ep(struct fid_domain *domain, struct fi_info *info,
if (ret)
return ret;
*ep = &endpoint->fid.ep;
*ep = &endpoint->ep;
return 0;
}
int sock_dgram_sep(struct fid_domain *domain, struct fi_info *info,
struct fid_sep **sep, void *context)
struct fid_ep **sep, void *context)
{
int ret;
struct sock_ep *endpoint;
@ -447,6 +471,6 @@ int sock_dgram_sep(struct fid_domain *domain, struct fi_info *info,
if (ret)
return ret;
*sep = &endpoint->fid.sep;
*sep = &endpoint->ep;
return 0;
}

Просмотреть файл

@ -75,7 +75,7 @@ const struct fi_tx_attr sock_msg_tx_attr = {
.op_flags = SOCK_DEF_OPS,
.msg_order = SOCK_EP_MSG_ORDER,
.inject_size = SOCK_EP_MAX_INJECT_SZ,
.size = SOCK_EP_MAX_TX_CTX_SZ,
.size = SOCK_EP_TX_SZ,
.iov_limit = SOCK_EP_MAX_IOV_LIMIT,
};
@ -201,18 +201,12 @@ static struct fi_info *sock_msg_fi_info(struct fi_info *hints,
if (!_info)
return NULL;
if (!hints->caps)
_info->caps = SOCK_EP_MSG_CAP;
if (!hints->tx_attr)
*(_info->tx_attr) = sock_msg_tx_attr;
if (!hints->rx_attr)
*(_info->rx_attr) = sock_msg_rx_attr;
if (!hints->ep_attr)
*(_info->ep_attr) = sock_msg_ep_attr;
_info->caps = SOCK_EP_MSG_CAP;
*(_info->tx_attr) = sock_msg_tx_attr;
*(_info->rx_attr) = sock_msg_rx_attr;
*(_info->ep_attr) = sock_msg_ep_attr;
_info->caps |= (_info->rx_attr->caps | _info->tx_attr->caps);
return _info;
}
@ -257,9 +251,6 @@ int sock_msg_getinfo(uint32_t version, const char *node, const char *service,
return ret;
}
src_addr = calloc(1, sizeof(struct sockaddr_in));
dest_addr = calloc(1, sizeof(struct sockaddr_in));
memset(&sock_hints, 0, sizeof(struct addrinfo));
sock_hints.ai_family = AF_INET;
sock_hints.ai_socktype = SOCK_STREAM;
@ -294,6 +285,11 @@ int sock_msg_getinfo(uint32_t version, const char *node, const char *service,
goto err;
}
src_addr = calloc(1, sizeof(struct sockaddr_in));
if (!src_addr) {
ret = -FI_ENOMEM;
goto err;
}
memcpy(src_addr, result->ai_addr, result->ai_addrlen);
freeaddrinfo(result);
} else if (node || service) {
@ -318,6 +314,11 @@ int sock_msg_getinfo(uint32_t version, const char *node, const char *service,
goto err;
}
dest_addr = calloc(1, sizeof(struct sockaddr_in));
if (!dest_addr) {
ret = -FI_ENOMEM;
goto err;
}
memcpy(dest_addr, result->ai_addr, result->ai_addrlen);
udp_sock = socket(AF_INET, SOCK_DGRAM, 0);
@ -329,7 +330,12 @@ int sock_msg_getinfo(uint32_t version, const char *node, const char *service,
goto err;
}
len = sizeof(struct sockaddr_in);
len = sizeof(struct sockaddr_in);
src_addr = calloc(1, sizeof(struct sockaddr_in));
if (!src_addr) {
ret = -FI_ENOMEM;
goto err;
}
ret = getsockname(udp_sock, (struct sockaddr*)src_addr, &len);
if (ret != 0) {
SOCK_LOG_ERROR("getsockname failed\n");
@ -347,17 +353,38 @@ int sock_msg_getinfo(uint32_t version, const char *node, const char *service,
}
if (hints->dest_addr) {
if (!dest_addr) {
dest_addr = calloc(1, sizeof(struct sockaddr_in));
if (!dest_addr) {
ret = -FI_ENOMEM;
goto err;
}
}
assert(hints->dest_addrlen == sizeof(struct sockaddr_in));
memcpy(dest_addr, hints->dest_addr, hints->dest_addrlen);
}
if (dest_addr) {
if (!dest_addr) {
dest_addr = calloc(1, sizeof(struct sockaddr_in));
if (!dest_addr) {
ret = -FI_ENOMEM;
goto err;
}
}
memcpy(sa_ip, inet_ntoa(dest_addr->sin_addr), INET_ADDRSTRLEN);
SOCK_LOG_INFO("dest_addr: family: %d, IP is %s\n",
((struct sockaddr_in*)dest_addr)->sin_family, sa_ip);
}
if (src_addr) {
if (!src_addr) {
src_addr = calloc(1, sizeof(struct sockaddr_in));
if (!src_addr) {
ret = -FI_ENOMEM;
goto err;
}
}
memcpy(sa_ip, inet_ntoa(src_addr->sin_addr), INET_ADDRSTRLEN);
SOCK_LOG_INFO("src_addr: family: %d, IP is %s\n",
((struct sockaddr_in*)src_addr)->sin_family, sa_ip);
@ -370,13 +397,17 @@ int sock_msg_getinfo(uint32_t version, const char *node, const char *service,
}
*info = _info;
free(src_addr);
free(dest_addr);
if (src_addr)
free(src_addr);
if (dest_addr)
free(dest_addr);
return 0;
err:
free(src_addr);
free(dest_addr);
if (src_addr)
free(src_addr);
if (dest_addr)
free(dest_addr);
SOCK_LOG_ERROR("fi_getinfo failed\n");
return ret;
}
@ -389,7 +420,7 @@ static int sock_ep_cm_getname(fid_t fid, void *addr, size_t *addrlen)
return -FI_ETOOSMALL;
}
sock_ep = container_of(fid, struct sock_ep, fid.ep.fid);
sock_ep = container_of(fid, struct sock_ep, ep.fid);
*addrlen = MIN(*addrlen, sizeof(struct sockaddr_in));
memcpy(addr, sock_ep->src_addr, *addrlen);
return 0;
@ -404,88 +435,312 @@ static int sock_ep_cm_getpeer(struct fid_ep *ep, void *addr, size_t *addrlen)
return -FI_ETOOSMALL;
}
sock_ep = container_of(ep, struct sock_ep, fid.ep);
sock_ep = container_of(ep, struct sock_ep, ep);
*addrlen = MIN(*addrlen, sizeof(struct sockaddr_in));
memcpy(addr, sock_ep->dest_addr, *addrlen);
return 0;
}
static int sock_ep_cm_create_socket()
{
int sock, optval;
sock = socket(AF_INET, SOCK_DGRAM, 0);
if (sock < 0)
return 0;
optval = 1;
setsockopt(sock, SOL_SOCKET, SO_REUSEADDR,
&optval, sizeof optval);
return sock;
}
static int sock_ep_cm_send_msg(int sock_fd,
const struct sockaddr_in *addr, void *msg, size_t len)
{
int ret, retry = 0;
unsigned char response;
struct sockaddr_in from_addr;
socklen_t addr_len;
char sa_ip[INET_ADDRSTRLEN] = {0};
memcpy(sa_ip, inet_ntoa(addr->sin_addr), INET_ADDRSTRLEN);
SOCK_LOG_INFO("Sending message to %s:%d\n",
sa_ip, ntohs(addr->sin_port));
while (retry < SOCK_EP_MAX_RETRY) {
ret = sendto(sock_fd, (char *)msg, len, 0, addr, sizeof *addr);
SOCK_LOG_INFO("Total Sent: %d\n", ret);
if (ret < 0)
return -1;
ret = fi_poll_fd(sock_fd, SOCK_CM_COMM_TIMEOUT);
retry++;
if (ret <= 0) {
continue;
}
addr_len = sizeof(struct sockaddr_in);
ret = recvfrom(sock_fd, &response, sizeof(response), 0,
&from_addr, &addr_len);
SOCK_LOG_INFO("Received ACK: %d\n", ret);
if (ret == sizeof(response))
return 0;
}
return -1;
}
static int sock_ep_cm_send_ack(int sock_fd, struct sockaddr_in *addr)
{
int ack_sent = 0, retry = 0, ret;
unsigned char response;
while(!ack_sent && retry < SOCK_EP_MAX_RETRY) {
ret = sendto(sock_fd, &response, sizeof(response), 0,
addr, sizeof *addr);
retry++;
SOCK_LOG_INFO("ack: %d\n", ret);
if (ret == sizeof(response)) {
ack_sent = 1;
break;
}
if (ret == EWOULDBLOCK || ret == EAGAIN)
usleep(SOCK_CM_COMM_TIMEOUT * 1000);
}
return ack_sent;
}
static void *sock_msg_ep_listener_thread (void *data)
{
struct sock_ep *ep = (struct sock_ep *)data;
struct sock_conn_response *conn_response = NULL;
struct fi_eq_cm_entry cm_entry;
struct fi_eq_err_entry cm_err_entry;
struct sockaddr_in from_addr;
socklen_t addr_len;
int ret, user_data_sz;
struct fid_ep *fid_ep;
struct sock_ep *sock_ep;
SOCK_LOG_INFO("Starting listener thread for EP: %p\n", ep);
ep->do_listen = 1;
while((volatile int)ep->do_listen) {
ret = fi_poll_fd(ep->socket, -1);
if (ret <= 0)
continue;
if (conn_response == NULL) {
conn_response = (struct sock_conn_response*)
calloc(1, sizeof(*conn_response) +
SOCK_EP_MAX_CM_DATA_SZ);
if (!conn_response) {
SOCK_LOG_ERROR("cannot allocate\n");
return NULL;
}
}
addr_len = sizeof(struct sockaddr_in);
ret = recvfrom(ep->socket, (char*)conn_response,
sizeof(*conn_response) + SOCK_EP_MAX_CM_DATA_SZ,
0, &from_addr, &addr_len);
if (ret <= 0)
continue;
SOCK_LOG_INFO("Total received: %d\n", ret);
if (ret < sizeof(*conn_response) ||
!sock_ep_cm_send_ack(ep->socket, &from_addr))
continue;
user_data_sz = 0;
switch (conn_response->hdr.type) {
case SOCK_CONN_ACCEPT:
SOCK_LOG_INFO("Received SOCK_CONN_ACCEPT\n");
memset(&cm_entry, 0, sizeof(cm_entry));
cm_entry.fid = conn_response->hdr.c_fid;
if (ret > sizeof(struct sock_conn_response)) {
user_data_sz = ret -
sizeof(struct sock_conn_response);
memcpy(&cm_entry.data,
(char *)conn_response +
sizeof(struct sock_conn_response),
user_data_sz);
}
fid_ep = container_of(conn_response->hdr.c_fid,
struct fid_ep, fid);
sock_ep = container_of(fid_ep, struct sock_ep, ep);
sock_ep->connected = 1;
sock_ep_enable(&ep->ep);
if (sock_eq_report_event(ep->eq, FI_CONNECTED, &cm_entry,
sizeof(cm_entry) + user_data_sz, 0))
SOCK_LOG_ERROR("Error in writing to EQ\n");
break;
case SOCK_CONN_REJECT:
SOCK_LOG_INFO("Received SOCK_CONN_REJECT\n");
memset(&cm_err_entry, 0, sizeof(cm_err_entry));
cm_err_entry.fid = conn_response->hdr.c_fid;
cm_err_entry.context = NULL;
cm_err_entry.data = 0;
cm_err_entry.err = -FI_ECONNREFUSED;
cm_err_entry.prov_errno = 0;
cm_err_entry.err_data = NULL;
if (ret > sizeof(struct sock_conn_response)) {
user_data_sz = ret -
sizeof(struct sock_conn_response);
memcpy(&cm_entry.data,
(char *)conn_response +
sizeof(struct sock_conn_response),
user_data_sz);
}
if (sock_eq_report_event(ep->eq, FI_ECONNREFUSED,
&cm_err_entry,
sizeof (cm_err_entry) +
user_data_sz, 0))
SOCK_LOG_ERROR("Error in writing to EQ\n");
goto out;
default:
SOCK_LOG_ERROR("Invalid event\n");
break;
}
conn_response = NULL;
}
out:
if (conn_response)
free(conn_response);
close(ep->socket);
ep->socket = 0;
return NULL;
}
static int sock_ep_cm_connect(struct fid_ep *ep, const void *addr,
const void *param, size_t paramlen)
{
struct sock_conn_req req;
struct sock_conn_req *req;
struct sock_ep *_ep;
struct sock_eq *_eq;
_ep = container_of(ep, struct sock_ep, fid.ep);
_ep = container_of(ep, struct sock_ep, ep);
_eq = _ep->eq;
if (!_eq) {
SOCK_LOG_ERROR("no EQ bound with this ep\n");
if (!_eq || paramlen > SOCK_EP_MAX_CM_DATA_SZ)
return -FI_EINVAL;
req = (struct sock_conn_req*)calloc(1,
sizeof(*req) + paramlen);
if (!req)
return -FI_ENOMEM;
_ep->rem_ep_id = ((struct sockaddr *)addr)->sa_family;
((struct sockaddr *)addr)->sa_family = AF_INET;
req->hdr.type = SOCK_CONN_REQ;
req->ep_id = _ep->ep_id;
req->hdr.c_fid = &ep->fid;
req->hdr.s_fid = 0;
memcpy(&req->info, &_ep->info, sizeof(struct fi_info));
memcpy(&req->src_addr, _ep->info.src_addr, sizeof(struct sockaddr_in));
memcpy(&req->dest_addr, _ep->info.dest_addr, sizeof(struct sockaddr_in));
memcpy(&req->tx_attr, _ep->info.tx_attr, sizeof(struct fi_tx_attr));
memcpy(&req->rx_attr, _ep->info.rx_attr, sizeof(struct fi_rx_attr));
memcpy(&req->ep_attr, _ep->info.ep_attr, sizeof(struct fi_ep_attr));
memcpy(&req->domain_attr, _ep->info.domain_attr, sizeof(struct fi_domain_attr));
memcpy(&req->fabric_attr, _ep->info.fabric_attr, sizeof(struct fi_fabric_attr));
if (param && paramlen)
memcpy(&req->user_data, param, paramlen);
if (!_ep->socket) {
_ep->socket = sock_ep_cm_create_socket();
if (!_ep->socket) {
free (req);
return -FI_EIO;
}
}
if (sock_ep_cm_send_msg(_ep->socket, addr, req, sizeof (*req) + paramlen))
return -FI_EIO;
if (pthread_create(&_ep->listener_thread, NULL,
sock_msg_ep_listener_thread, (void *)_ep)) {
SOCK_LOG_ERROR("Couldn't create listener thread\n");
free (req);
return -FI_EINVAL;
}
if(((struct sockaddr *)addr)->sa_family != AF_INET) {
SOCK_LOG_ERROR("invalid address type to connect: only IPv4 supported\n");
return -FI_EINVAL;
}
req.type = SOCK_CONNREQ;
req.c_fid = &ep->fid;
req.s_fid = 0;
memcpy(&req.info, &_ep->info, sizeof(struct fi_info));
memcpy(&req.src_addr, _ep->info.src_addr, sizeof(struct sockaddr_in));
memcpy(&req.dest_addr, _ep->info.dest_addr, sizeof(struct sockaddr_in));
memcpy(&req.tx_attr, _ep->info.tx_attr, sizeof(struct fi_tx_attr));
memcpy(&req.rx_attr, _ep->info.rx_attr, sizeof(struct fi_rx_attr));
memcpy(&req.ep_attr, _ep->info.ep_attr, sizeof(struct fi_ep_attr));
memcpy(&req.domain_attr, _ep->info.domain_attr, sizeof(struct fi_domain_attr));
memcpy(&req.fabric_attr, _ep->info.fabric_attr, sizeof(struct fi_fabric_attr));
if (sock_util_sendto(_eq->wait_fd, &req, sizeof(struct sock_conn_req),
(struct sockaddr_in *)addr, sizeof(struct sockaddr_in), 0))
return -errno;
free (req);
return 0;
}
static int sock_ep_cm_accept(struct fid_ep *ep, const void *param, size_t paramlen)
{
struct sock_conn_req *req;
struct sock_domain *_dom;
struct fi_eq_cm_entry cm_entry;
struct sock_conn_response *response;
struct sockaddr_in *addr;
socklen_t addrlen;
struct sock_ep *_ep;
struct sock_eq *_eq;
int ret;
_ep = container_of(ep, struct sock_ep, fid.ep);
_ep = container_of(ep, struct sock_ep, ep);
_eq = _ep->eq;
if (!_eq) {
SOCK_LOG_ERROR("no EQ bound with this ep\n");
if (!_eq || paramlen > SOCK_EP_MAX_CM_DATA_SZ)
return -FI_EINVAL;
}
_dom = _ep->domain;
addr = _dom->info.dest_addr;
addrlen = _dom->info.dest_addrlen;
req = (struct sock_conn_req *)_dom->info.connreq;
response = (struct sock_conn_response*)calloc(1,
sizeof(*response) + paramlen);
if (!response)
return -FI_ENOMEM;
req = (struct sock_conn_req *)_ep->info.connreq;
if (!req) {
SOCK_LOG_ERROR("invalid connreq for cm_accept\n");
return -FI_EINVAL;
}
memcpy(&response->hdr, &req->hdr, sizeof(struct sock_conn_hdr));
if (param && paramlen)
memcpy(&response->user_data, param, paramlen);
if (((struct sockaddr *)addr)->sa_family != AF_INET) {
SOCK_LOG_ERROR("invalid address type to connect: only IPv4 supported\n");
return -FI_EINVAL;
addr = &req->from_addr;
_ep->rem_ep_id = req->ep_id;
response->hdr.type = SOCK_CONN_ACCEPT;
response->hdr.s_fid = &ep->fid;
_ep->socket = sock_ep_cm_create_socket();
if (!_ep->socket) {
ret = -FI_EIO;
goto out;
}
req->type = SOCK_ACCEPT;
req->s_fid = &ep->fid;
if (sock_util_sendto(_eq->wait_fd, req, sizeof(req->type) +
sizeof(req->c_fid) + sizeof(req->s_fid), addr, addrlen, 0))
return -errno;
if (sock_ep_cm_send_msg(_ep->socket, addr, response,
sizeof (*response) + paramlen)) {
close(_ep->socket);
ret = -FI_EIO;
goto out;
}
sock_ep_enable(ep);
memset(&cm_entry, 0, sizeof(cm_entry));
cm_entry.fid = &ep->fid;
_ep->connected = 1;
ret = sock_eq_report_event(_eq, FI_CONNECTED, &cm_entry,
sizeof(cm_entry), 0);
out:
free(req);
return 0;
free(response);
_ep->info.connreq = NULL;
return ret;
}
struct fi_ops_cm sock_ep_cm_ops = {
@ -552,7 +807,7 @@ int sock_msg_ep(struct fid_domain *domain, struct fi_info *info,
if (ret)
return ret;
*ep = &endpoint->fid.ep;
*ep = &endpoint->ep;
return 0;
}
@ -572,19 +827,19 @@ static int sock_pep_fi_bind(fid_t fid, struct fid *bfid, uint64_t flags)
return -FI_EINVAL;
}
pep->eq = eq;
if ((eq->attr.wait_obj == FI_WAIT_FD) && (eq->wait_fd < 0))
sock_eq_openwait(eq, (char *)&pep->service);
return 0;
}
static int sock_pep_fi_close(fid_t fid)
{
int c;
struct sock_pep *pep;
pep = container_of(fid, struct sock_pep, pep.fid);
pep->do_listen = 0;
write(pep->signal_fds[0], &c, 1);
pthread_join(pep->listener_thread, NULL);
free(pep);
return 0;
}
@ -596,9 +851,181 @@ static struct fi_ops sock_pep_fi_ops = {
.ops_open = fi_no_ops_open,
};
static struct fi_info * sock_ep_msg_process_info(struct sock_conn_req *req)
{
req->info.src_addr = &req->src_addr;
req->info.dest_addr = &req->dest_addr;
req->info.tx_attr = &req->tx_attr;
req->info.rx_attr = &req->rx_attr;
req->info.ep_attr = &req->ep_attr;
req->info.domain_attr = &req->domain_attr;
req->info.fabric_attr = &req->fabric_attr;
req->info.domain_attr->name = NULL;
req->info.fabric_attr->name = NULL;
req->info.fabric_attr->prov_name = NULL;
if (sock_verify_info(&req->info)) {
SOCK_LOG_INFO("incoming conn_req not supported\n");
errno = EINVAL;
return NULL;
}
return sock_fi_info(FI_EP_MSG, &req->info,
req->info.dest_addr, req->info.src_addr);
}
static void *sock_pep_listener_thread (void *data)
{
struct sock_pep *pep = (struct sock_pep *)data;
struct sock_conn_req *conn_req = NULL;
struct fi_eq_cm_entry cm_entry;
struct sockaddr_in from_addr;
struct pollfd poll_fds[2];
socklen_t addr_len;
int ret, user_data_sz, tmp;
SOCK_LOG_INFO("Starting listener thread for PEP: %p\n", pep);
poll_fds[0].fd = pep->socket;
poll_fds[1].fd = pep->signal_fds[1];
poll_fds[0].events = poll_fds[1].events = POLLIN;
while((volatile int)pep->do_listen) {
if (poll(poll_fds, 2, -1) > 0) {
if (poll_fds[1].revents & POLLIN) {
read(pep->signal_fds[1], &tmp, 1);
continue;
}
} else
return NULL;
if (conn_req == NULL) {
conn_req = (struct sock_conn_req*)calloc(1,
sizeof(*conn_req) +
SOCK_EP_MAX_CM_DATA_SZ);
if (!conn_req) {
SOCK_LOG_ERROR("cannot allocate\n");
return NULL;
}
}
addr_len = sizeof(struct sockaddr_in);
ret = recvfrom(pep->socket, (char*)conn_req,
sizeof(*conn_req) + SOCK_EP_MAX_CM_DATA_SZ, 0,
&from_addr, &addr_len);
if (ret <= 0)
continue;
memcpy(&conn_req->from_addr, &from_addr, sizeof(struct sockaddr_in));
SOCK_LOG_INFO("Msg received: %d\n", ret);
memset(&cm_entry, 0, sizeof(cm_entry));
user_data_sz = 0;
if (conn_req->hdr.type == SOCK_CONN_REQ) {
SOCK_LOG_INFO("Received SOCK_CONN_REQ\n");
if (ret < sizeof(*conn_req) ||
!sock_ep_cm_send_ack(pep->socket, &from_addr)) {
SOCK_LOG_ERROR("Invalid connection request\n");
break;
}
cm_entry.info = sock_ep_msg_process_info(conn_req);
cm_entry.info->connreq = (fi_connreq_t)conn_req;
if (ret > sizeof(struct sock_conn_req)) {
user_data_sz = ret - sizeof(struct sock_conn_req);
memcpy(&cm_entry.data,
(char *)conn_req + sizeof(struct sock_conn_req),
user_data_sz);
}
if (sock_eq_report_event(pep->eq, FI_CONNREQ, &cm_entry,
sizeof(cm_entry) + user_data_sz, 0))
SOCK_LOG_ERROR("Error in writing to EQ\n");
} else {
SOCK_LOG_ERROR("Invalid event\n");
}
conn_req = NULL;
}
if (conn_req)
free(conn_req);
close(pep->socket);
pep->socket = 0;
return NULL;
}
static int sock_pep_create_listener_thread(struct sock_pep *pep)
{
int optval, ret;
socklen_t addr_size;
struct sockaddr_in addr;
struct addrinfo *s_res = NULL, *p;
struct addrinfo hints;
char sa_ip[INET_ADDRSTRLEN] = {0};
char sa_port[NI_MAXSERV] = {0};
pep->do_listen = 1;
memset(&hints, 0, sizeof(hints));
hints.ai_family = AF_INET;
hints.ai_socktype = SOCK_DGRAM;
hints.ai_flags = AI_PASSIVE;
hints.ai_protocol = IPPROTO_UDP;
memcpy(sa_ip, inet_ntoa(pep->src_addr.sin_addr), INET_ADDRSTRLEN);
sprintf(sa_port, "%d", ntohs(pep->src_addr.sin_port));
ret = getaddrinfo(sa_ip, sa_port, &hints, &s_res);
if (ret) {
SOCK_LOG_ERROR("no available AF_INET address service:%s, %s\n",
sa_port, gai_strerror(ret));
return -FI_EINVAL;
}
for (p=s_res; p; p=p->ai_next) {
pep->socket = socket(p->ai_family, p->ai_socktype,
p->ai_protocol);
if (pep->socket >= 0) {
optval = 1;
setsockopt(pep->socket, SOL_SOCKET, SO_REUSEADDR, &optval,
sizeof optval);
if (!bind(pep->socket, s_res->ai_addr, s_res->ai_addrlen))
break;
close(pep->socket);
pep->socket = -1;
}
}
freeaddrinfo(s_res);
if (pep->socket < 0)
return -FI_EIO;
optval = 1;
setsockopt(pep->socket, SOL_SOCKET, SO_REUSEADDR, &optval,
sizeof optval);
if (pep->src_addr.sin_port == 0) {
addr_size = sizeof(addr);
if (getsockname(pep->socket, (struct sockaddr*)&addr, &addr_size))
return -FI_EINVAL;
pep->src_addr.sin_port = addr.sin_port;
}
SOCK_LOG_INFO("Listener thread bound to %s:%d\n",
sa_ip, ntohs(pep->src_addr.sin_port));
if (pthread_create(&pep->listener_thread, NULL,
sock_pep_listener_thread, (void *)pep)) {
SOCK_LOG_ERROR("Couldn't create listener thread\n");
return -FI_EINVAL;
}
return 0;
}
static int sock_pep_listen(struct fid_pep *pep)
{
return 0;
struct sock_pep *_pep;
_pep = container_of(pep, struct sock_pep, pep);
return sock_pep_create_listener_thread(_pep);
}
static int sock_pep_reject(struct fid_pep *pep, fi_connreq_t connreq,
@ -606,38 +1033,52 @@ static int sock_pep_reject(struct fid_pep *pep, fi_connreq_t connreq,
{
struct sock_conn_req *req;
struct sockaddr_in *addr;
socklen_t addrlen;
struct sock_pep *_pep;
struct sock_eq *_eq;
struct sock_conn_response *response;
int ret = 0;
_pep = container_of(pep, struct sock_pep, pep);
_eq = _pep->eq;
if (!_eq) {
SOCK_LOG_ERROR("no EQ bound with this pep\n");
if (!_eq || paramlen > SOCK_EP_MAX_CM_DATA_SZ)
return -FI_EINVAL;
}
req = (struct sock_conn_req *)connreq;
if (!req) {
SOCK_LOG_ERROR("invalid connreq for cm_accept\n");
return -FI_EINVAL;
if (!req)
return 0;
response = (struct sock_conn_response*)
calloc(1, sizeof(*response) + paramlen);
if (!response)
return -FI_ENOMEM;
memcpy(&response->hdr, &req->hdr, sizeof(struct sock_conn_hdr));
if (param && paramlen)
memcpy(&response->user_data, param, paramlen);
addr = &req->from_addr;
response->hdr.type = SOCK_CONN_REJECT;
response->hdr.s_fid = NULL;
if (!_pep->socket) {
_pep->socket = sock_ep_cm_create_socket();
if (!_pep->socket) {
ret = -FI_EIO;
goto out;
}
}
addr = &req->src_addr;
addrlen = sizeof(struct sockaddr_in);
if (((struct sockaddr *)addr)->sa_family != AF_INET) {
SOCK_LOG_ERROR("invalid address type to connect: only IPv4 supported\n");
return -FI_EINVAL;
if (sock_ep_cm_send_msg(_pep->socket, addr, req,
sizeof(struct sock_conn_response))) {
ret = -FI_EIO;
goto out;
}
ret = 0;
req->type = SOCK_REJECT;
req->s_fid = NULL;
if (sock_util_sendto(_eq->wait_fd, req, sizeof(req->type) +
sizeof(req->c_fid), addr, addrlen, 0))
return -errno;
out:
free(req);
return 0;
free(response);
return ret;
}
static struct fi_ops_cm sock_pep_cm_ops = {
@ -652,7 +1093,7 @@ static struct fi_ops_cm sock_pep_cm_ops = {
};
int sock_msg_sep(struct fid_domain *domain, struct fi_info *info,
struct fid_sep **sep, void *context)
struct fid_ep **sep, void *context)
{
int ret;
struct sock_ep *endpoint;
@ -661,29 +1102,18 @@ int sock_msg_sep(struct fid_domain *domain, struct fi_info *info,
if (ret)
return ret;
*sep = &endpoint->fid.sep;
return 0;
}
int sock_msg_pep(struct fid_fabric *fabric, struct fi_info *info,
struct fid_pep **pep, void *context)
{
int ret;
struct sock_ep *endpoint;
ret = sock_msg_endpoint(NULL, info, &endpoint, context, FI_CLASS_PEP);
if (ret)
return ret;
*pep = &endpoint->fid.pep;
*sep = &endpoint->ep;
return 0;
}
int sock_msg_passive_ep(struct fid_fabric *fabric, struct fi_info *info,
struct fid_pep **pep, void *context)
struct fid_pep **pep, void *context)
{
int ret, flags;
struct sock_pep *_pep;
int ret;
char hostname[HOST_NAME_MAX];
struct addrinfo sock_hints;
struct addrinfo *result = NULL;
if (info) {
ret = sock_verify_info(info);
@ -698,36 +1128,36 @@ int sock_msg_passive_ep(struct fid_fabric *fabric, struct fi_info *info,
return -FI_ENOMEM;
if(info) {
struct sockaddr *dest_addr = (struct sockaddr *)info->dest_addr;
struct sockaddr *src_addr = (struct sockaddr *)info->src_addr;
if (!dest_addr || !src_addr) {
SOCK_LOG_ERROR("invalid dest_addr or src_addr\n");
goto err;
}
if (!dest_addr->sa_family) {
if(getnameinfo(src_addr, sizeof(*src_addr), NULL, 0,
_pep->service,
sizeof(_pep->service),
NI_NUMERICSERV)) {
SOCK_LOG_ERROR("could not resolve src_addr\n");
goto err;
}
if (info->src_addr) {
memcpy(&_pep->src_addr, info->src_addr,
sizeof(struct sockaddr_in));
} else {
if(getnameinfo(dest_addr, sizeof(*dest_addr), NULL, 0,
_pep->service,
sizeof(_pep->service),
NI_NUMERICSERV)) {
SOCK_LOG_ERROR("could not resolve dest_addr\n");
gethostname(hostname, HOST_NAME_MAX);
memset(&sock_hints, 0, sizeof(struct addrinfo));
sock_hints.ai_family = AF_INET;
sock_hints.ai_socktype = SOCK_STREAM;
ret = getaddrinfo(hostname, NULL, &sock_hints, &result);
if (ret != 0) {
ret = FI_EINVAL;
SOCK_LOG_INFO("getaddrinfo failed!\n");
goto err;
}
memcpy(&_pep->src_addr, result->ai_addr, result->ai_addrlen);
}
_pep->info = *info;
} else {
SOCK_LOG_ERROR("invalid fi_info\n");
goto err;
}
if(socketpair(AF_UNIX, SOCK_STREAM, 0, _pep->signal_fds) < 0)
goto err;
flags = fcntl(_pep->signal_fds[1], F_GETFL, 0);
fcntl(_pep->signal_fds[1], F_SETFL, flags | O_NONBLOCK);
_pep->pep.fid.fclass = FI_CLASS_PEP;
_pep->pep.fid.context = context;
_pep->pep.fid.ops = &sock_pep_fi_ops;
@ -736,30 +1166,9 @@ int sock_msg_passive_ep(struct fid_fabric *fabric, struct fi_info *info,
_pep->sock_fab = container_of(fabric, struct sock_fabric, fab_fid);
*pep = &_pep->pep;
return 0;
err:
free(_pep);
return -errno;
return ret;
}
struct fi_info * sock_ep_msg_process_info(struct sock_conn_req *req)
{
req->info.src_addr = &req->src_addr;
req->info.dest_addr = &req->dest_addr;
req->info.tx_attr = &req->tx_attr;
req->info.rx_attr = &req->rx_attr;
req->info.ep_attr = &req->ep_attr;
req->info.domain_attr = &req->domain_attr;
req->info.fabric_attr = &req->fabric_attr;
if (sock_verify_info(&req->info)) {
SOCK_LOG_INFO("incoming conn_req not supported\n");
errno = EINVAL;
return NULL;
}
/* reverse src_addr and dest_addr */
return sock_fi_info(FI_EP_MSG, &req->info,
req->info.dest_addr, req->info.src_addr);
}

Просмотреть файл

@ -75,7 +75,7 @@ const struct fi_tx_attr sock_rdm_tx_attr = {
.op_flags = SOCK_DEF_OPS,
.msg_order = SOCK_EP_MSG_ORDER,
.inject_size = SOCK_EP_MAX_INJECT_SZ,
.size = SOCK_EP_MAX_TX_CTX_SZ,
.size = SOCK_EP_TX_SZ,
.iov_limit = SOCK_EP_MAX_IOV_LIMIT,
};
@ -202,13 +202,12 @@ static struct fi_info *sock_rdm_fi_info(struct fi_info *hints,
if (!_info)
return NULL;
if (!hints->caps)
_info->caps = SOCK_EP_RDM_CAP;
_info->caps = SOCK_EP_RDM_CAP;
*(_info->tx_attr) = sock_rdm_tx_attr;
*(_info->rx_attr) = sock_rdm_rx_attr;
*(_info->ep_attr) = sock_rdm_ep_attr;
_info->caps |= (_info->rx_attr->caps | _info->tx_attr->caps);
return _info;
}
@ -249,9 +248,6 @@ int sock_rdm_getinfo(uint32_t version, const char *node, const char *service,
return ret;
}
src_addr = calloc(1, sizeof(struct sockaddr_in));
dest_addr = calloc(1, sizeof(struct sockaddr_in));
memset(&sock_hints, 0, sizeof(struct addrinfo));
sock_hints.ai_family = AF_INET;
sock_hints.ai_socktype = SOCK_STREAM;
@ -286,6 +282,11 @@ int sock_rdm_getinfo(uint32_t version, const char *node, const char *service,
goto err;
}
src_addr = calloc(1, sizeof(struct sockaddr_in));
if (!src_addr) {
ret = -FI_ENOMEM;
goto err;
}
memcpy(src_addr, result->ai_addr, result->ai_addrlen);
freeaddrinfo(result);
} else if (node || service) {
@ -310,6 +311,11 @@ int sock_rdm_getinfo(uint32_t version, const char *node, const char *service,
goto err;
}
dest_addr = calloc(1, sizeof(struct sockaddr_in));
if (!dest_addr) {
ret = -FI_ENOMEM;
goto err;
}
memcpy(dest_addr, result->ai_addr, result->ai_addrlen);
udp_sock = socket(AF_INET, SOCK_DGRAM, 0);
@ -321,7 +327,12 @@ int sock_rdm_getinfo(uint32_t version, const char *node, const char *service,
goto err;
}
len = sizeof(struct sockaddr_in);
len = sizeof(struct sockaddr_in);
src_addr = calloc(1, sizeof(struct sockaddr_in));
if (!src_addr) {
ret = -FI_ENOMEM;
goto err;
}
ret = getsockname(udp_sock, (struct sockaddr*)src_addr, &len);
if (ret != 0) {
SOCK_LOG_ERROR("getsockname failed\n");
@ -334,11 +345,25 @@ int sock_rdm_getinfo(uint32_t version, const char *node, const char *service,
}
if (hints->src_addr) {
if (!src_addr) {
src_addr = calloc(1, sizeof(struct sockaddr_in));
if (!src_addr) {
ret = -FI_ENOMEM;
goto err;
}
}
assert(hints->src_addrlen == sizeof(struct sockaddr_in));
memcpy(src_addr, hints->src_addr, hints->src_addrlen);
}
if (hints->dest_addr) {
if (!dest_addr) {
dest_addr = calloc(1, sizeof(struct sockaddr_in));
if (!dest_addr) {
ret = -FI_ENOMEM;
goto err;
}
}
assert(hints->dest_addrlen == sizeof(struct sockaddr_in));
memcpy(dest_addr, hints->dest_addr, hints->dest_addrlen);
}
@ -362,13 +387,17 @@ int sock_rdm_getinfo(uint32_t version, const char *node, const char *service,
}
*info = _info;
free(src_addr);
free(dest_addr);
if (src_addr)
free(src_addr);
if (dest_addr)
free(dest_addr);
return 0;
err:
free(src_addr);
free(dest_addr);
if (src_addr)
free(src_addr);
if (dest_addr)
free(dest_addr);
SOCK_LOG_ERROR("fi_getinfo failed\n");
return ret;
}
@ -426,12 +455,12 @@ int sock_rdm_ep(struct fid_domain *domain, struct fi_info *info,
if (ret)
return ret;
*ep = &endpoint->fid.ep;
*ep = &endpoint->ep;
return 0;
}
int sock_rdm_sep(struct fid_domain *domain, struct fi_info *info,
struct fid_sep **sep, void *context)
struct fid_ep **sep, void *context)
{
int ret;
struct sock_ep *endpoint;
@ -440,7 +469,7 @@ int sock_rdm_sep(struct fid_domain *domain, struct fi_info *info,
if (ret)
return ret;
*sep = &endpoint->fid.sep;
*sep = &endpoint->ep;
return 0;
}

Просмотреть файл

@ -41,6 +41,7 @@
#include <fcntl.h>
#include <sys/socket.h>
#include <sys/types.h>
#include <netdb.h>
#include <fi_list.h>
#include "sock.h"
@ -56,18 +57,17 @@ ssize_t sock_eq_sread(struct fid_eq *eq, uint32_t *event, void *buf, size_t len,
sock_eq = container_of(eq, struct sock_eq, eq);
fastlock_acquire(&sock_eq->lock);
if(!dlistfd_empty(&sock_eq->err_list)) {
ret = -FI_EAVAIL;
goto out;
return -FI_EAVAIL;
}
if(dlistfd_empty(&sock_eq->list)) {
ret = dlistfd_wait_avail(&sock_eq->list, timeout);
if(ret <= 0)
goto out;
return ret;
}
fastlock_acquire(&sock_eq->lock);
list = sock_eq->list.list.next;
entry = container_of(list, struct sock_eq_entry, entry);
@ -208,115 +208,6 @@ static struct fi_ops_eq sock_eq_ops = {
.strerror = sock_eq_strerror,
};
ssize_t sock_eq_fd_sread(struct fid_eq *eq, uint32_t *event, void *buf,
size_t len, int timeout, uint64_t flags)
{
struct sock_eq *sock_eq;
struct fid_ep *fid_ep;
struct sock_ep *sock_ep;
int ret;
struct sock_conn_req *req;
socklen_t addrlen;
struct sockaddr_in addr;
struct fi_eq_cm_entry *entry;
struct fi_eq_err_entry err;
req = (struct sock_conn_req *)calloc(1, sizeof(struct sock_conn_req));
if (!req) {
SOCK_LOG_ERROR("calloc for conn_req failed\n");
errno = ENOMEM;
return 0;
}
sock_eq = container_of(eq, struct sock_eq, eq);
addrlen = sizeof(struct sockaddr_in);
ret = sock_util_recvfrom(sock_eq->wait_fd, req, sizeof *req, &addr, &addrlen,
timeout);
entry = (struct fi_eq_cm_entry *)buf;
switch (req->type) {
case SOCK_ACCEPT:
SOCK_LOG_INFO("received SOCK_ACCEPT\n");
if (ret != sizeof req->type + sizeof req->c_fid + sizeof req->s_fid) {
SOCK_LOG_ERROR("recvfrom value invalid: %d\n", ret);
return 0;
}
*event = FI_CONNECTED;
entry->info = NULL;
entry->fid = req->c_fid;
fid_ep = container_of(req->c_fid, struct fid_ep, fid);
sock_ep = container_of(fid_ep, struct sock_ep, fid.ep);
sock_ep->connected = 1;
req->type = SOCK_CONNECTED;
if (sock_util_sendto(sock_eq->wait_fd, req, sizeof(req->type) +
sizeof(req->c_fid) + sizeof(req->s_fid), &addr, addrlen, 0))
return 0;
free(req);
break;
case SOCK_CONNREQ:
SOCK_LOG_INFO("received SOCK_CONNREQ\n");
if (ret != sizeof *req) {
SOCK_LOG_ERROR("recvfrom value invalid: %d\n", ret);
return 0;
}
*event = FI_CONNREQ;
entry->info = sock_ep_msg_process_info(req);
entry->info->connreq = (fi_connreq_t)req;
if (!entry->info) {
SOCK_LOG_ERROR("failed create new info\n");
return -errno;
}
break;
case SOCK_REJECT:
SOCK_LOG_INFO("received SOCK_REJECT\n");
if (ret != sizeof req->type + sizeof req->c_fid) {
SOCK_LOG_ERROR("recvfrom value invalid: %d\n", ret);
return 0;
}
err.fid = req->c_fid;
err.context = NULL;
err.data = 0;
err.err = -FI_ECONNREFUSED;
err.prov_errno = 0;
err.err_data = NULL;
sock_eq_report_event(sock_eq, 0, &err, sizeof err, 0);
free(req);
break;
case SOCK_CONNECTED:
SOCK_LOG_INFO("received SOCK_CONNECTED\n");
*event = FI_CONNECTED;
entry->info = NULL;
entry->fid = req->s_fid;
fid_ep = container_of(req->s_fid, struct fid_ep, fid);
sock_ep = container_of(fid_ep, struct sock_ep, fid.ep);
sock_ep->connected = 1;
free(req);
break;
case SOCK_SHUTDOWN:
SOCK_LOG_INFO("received SOCK_SHUTDOWN\n");
*event = FI_SHUTDOWN;
entry->info = NULL;
entry->fid = req->s_fid;
free(req);
break;
default:
SOCK_LOG_ERROR("unexpected req to EQ\n");
free(req);
return 0;
}
return sizeof *entry ;
}
static struct fi_ops_eq sock_eq_fd_ops = {
.size = sizeof(struct fi_ops_eq),
.read = sock_eq_read,
.readerr = sock_eq_readerr,
.write = sock_eq_write,
.sread = sock_eq_fd_sread,
.strerror = sock_eq_strerror,
};
int sock_eq_fi_close(struct fid *fid)
{
struct sock_eq *sock_eq;
@ -404,12 +295,12 @@ static struct fi_eq_attr _sock_eq_def_attr ={
.wait_set = NULL,
};
int sock_eq_openwait(struct sock_eq *eq, char *service)
int sock_eq_openwait(struct sock_eq *eq, const char *service)
{
SOCK_LOG_INFO("enter\n");
struct addrinfo *s_res = NULL, *p;
struct addrinfo hints;
int optval;
int optval, ret;
if (eq->wait_fd > 0 && !strncmp((char *)&eq->service, service, NI_MAXSERV))
{
@ -426,9 +317,10 @@ int sock_eq_openwait(struct sock_eq *eq, char *service)
hints.ai_flags = AI_PASSIVE;
hints.ai_protocol = IPPROTO_UDP;
if(getaddrinfo(NULL, service, &hints, &s_res)) {
SOCK_LOG_ERROR("no available AF_INET address\n");
perror("no available AF_INET address");
ret = getaddrinfo(NULL, service, &hints, &s_res);
if (ret) {
SOCK_LOG_ERROR("no available AF_INET address service:%s, %s\n",
service, gai_strerror(ret));
return -FI_EINVAL;
}
@ -506,7 +398,6 @@ int sock_eq_open(struct fid_fabric *fabric, struct fi_eq_attr *attr,
break;
case FI_WAIT_FD:
sock_eq->signal = 0;
sock_eq->eq.ops = &sock_eq_fd_ops;
break;
case FI_WAIT_MUTEX_COND:

Просмотреть файл

@ -44,6 +44,7 @@
const char const sock_fab_name[] = "IP";
const char const sock_dom_name[] = "sockets";
const char const sock_prov_name[] = "sockets";
const struct fi_fabric_attr sock_fabric_attr = {
.fabric = NULL,
@ -237,7 +238,7 @@ static void fi_sockets_fini(void)
}
struct fi_provider sock_prov = {
.name = "IP",
.name = sock_prov_name,
.version = FI_VERSION(SOCK_MAJOR_VERSION, SOCK_MINOR_VERSION),
.fi_version = FI_VERSION(FI_MAJOR_VERSION, FI_MINOR_VERSION),
.getinfo = sock_getinfo,
@ -255,5 +256,10 @@ SOCKETS_INI
sock_log_level = SOCK_ERROR;
}
tmp = getenv("OFI_SOCK_PROGRESS_YIELD_TIME");
if (tmp) {
sock_progress_thread_wait = atoi(tmp);
}
return (&sock_prov);
}

Просмотреть файл

@ -67,7 +67,7 @@ static ssize_t sock_ep_recvmsg(struct fid_ep *ep, const struct fi_msg *msg,
switch (ep->fid.fclass) {
case FI_CLASS_EP:
sock_ep = container_of(ep, struct sock_ep, fid.ep);
sock_ep = container_of(ep, struct sock_ep, ep);
rx_ctx = sock_ep->rx_ctx;
break;
@ -157,7 +157,7 @@ static ssize_t sock_ep_sendmsg(struct fid_ep *ep, const struct fi_msg *msg,
switch (ep->fid.fclass) {
case FI_CLASS_EP:
sock_ep = container_of(ep, struct sock_ep, fid.ep);
sock_ep = container_of(ep, struct sock_ep, ep);
tx_ctx = sock_ep->tx_ctx;
break;
@ -177,7 +177,8 @@ static ssize_t sock_ep_sendmsg(struct fid_ep *ep, const struct fi_msg *msg,
} else {
conn = sock_av_lookup_addr(tx_ctx->av, msg->addr);
}
assert(conn);
if (!conn)
return -FI_EAGAIN;
SOCK_LOG_INFO("New sendmsg on TX: %p using conn: %p\n",
tx_ctx, conn);
@ -187,7 +188,7 @@ static ssize_t sock_ep_sendmsg(struct fid_ep *ep, const struct fi_msg *msg,
tx_op.op = SOCK_OP_SEND;
total_len = 0;
if (flags & FI_INJECT) {
if (SOCK_INJECT_OK(flags)) {
for (i=0; i< msg->iov_count; i++) {
total_len += msg->msg_iov[i].iov_len;
}
@ -221,7 +222,7 @@ static ssize_t sock_ep_sendmsg(struct fid_ep *ep, const struct fi_msg *msg,
sock_tx_ctx_write(tx_ctx, &msg->data, sizeof(uint64_t));
}
if (flags & FI_INJECT) {
if (SOCK_INJECT_OK(flags)) {
for (i=0; i< msg->iov_count; i++) {
sock_tx_ctx_write(tx_ctx, msg->msg_iov[i].iov_base,
msg->msg_iov[i].iov_len);
@ -317,8 +318,6 @@ struct fi_ops_msg sock_ep_msg_ops = {
.sendmsg = sock_ep_sendmsg,
.inject = sock_ep_inject,
.senddata = sock_ep_senddata,
.rx_size_left = fi_no_msg_rx_size_left,
.tx_size_left = fi_no_msg_tx_size_left,
};
static ssize_t sock_ep_trecvmsg(struct fid_ep *ep,
@ -331,7 +330,7 @@ static ssize_t sock_ep_trecvmsg(struct fid_ep *ep,
switch (ep->fid.fclass) {
case FI_CLASS_EP:
sock_ep = container_of(ep, struct sock_ep, fid.ep);
sock_ep = container_of(ep, struct sock_ep, ep);
rx_ctx = sock_ep->rx_ctx;
break;
@ -423,7 +422,7 @@ static ssize_t sock_ep_tsendmsg(struct fid_ep *ep,
switch (ep->fid.fclass) {
case FI_CLASS_EP:
sock_ep = container_of(ep, struct sock_ep, fid.ep);
sock_ep = container_of(ep, struct sock_ep, ep);
tx_ctx = sock_ep->tx_ctx;
break;
@ -439,10 +438,11 @@ static ssize_t sock_ep_tsendmsg(struct fid_ep *ep,
assert(tx_ctx->enabled && msg->iov_count <= SOCK_EP_MAX_IOV_LIMIT);
conn = sock_av_lookup_addr(tx_ctx->av, msg->addr);
assert(conn);
if (!conn)
return -FI_EAGAIN;
total_len = 0;
if (flags & FI_INJECT) {
if (SOCK_INJECT_OK(flags)) {
for (i=0; i< msg->iov_count; i++) {
total_len += msg->msg_iov[i].iov_len;
}
@ -479,7 +479,7 @@ static ssize_t sock_ep_tsendmsg(struct fid_ep *ep,
}
sock_tx_ctx_write(tx_ctx, &msg->tag, sizeof(uint64_t));
if (flags & FI_INJECT) {
if (SOCK_INJECT_OK(flags)) {
for (i=0; i< msg->iov_count; i++) {
sock_tx_ctx_write(tx_ctx, msg->msg_iov[i].iov_base,
msg->msg_iov[i].iov_len);
@ -579,7 +579,7 @@ static ssize_t sock_ep_tsearch(struct fid_ep *ep, uint64_t *tag, uint64_t ignore
switch (ep->fid.fclass) {
case FI_CLASS_EP:
sock_ep = container_of(ep, struct sock_ep, fid.ep);
sock_ep = container_of(ep, struct sock_ep, ep);
rx_ctx = sock_ep->rx_ctx;
break;

Просмотреть файл

@ -96,8 +96,7 @@ static int sock_poll_poll(struct fid_poll *pollset, void **context, int count)
switch (list_item->fid->fclass) {
case FI_CLASS_CQ:
cq = container_of(list_item->fid, struct sock_cq, cq_fid);
if (cq->domain->progress_mode == FI_PROGRESS_MANUAL)
sock_cq_progress(cq);
sock_cq_progress(cq);
fastlock_acquire(&cq->lock);
if (rbfdused(&cq->cq_rbfd)) {
*context++ = cq->cq_fid.fid.context;
@ -108,8 +107,7 @@ static int sock_poll_poll(struct fid_poll *pollset, void **context, int count)
case FI_CLASS_CNTR:
cntr = container_of(list_item->fid, struct sock_cntr, cntr_fid);
if (cntr->domain->progress_mode == FI_PROGRESS_MANUAL)
sock_cntr_progress(cntr);
sock_cntr_progress(cntr);
fastlock_acquire(&cntr->mut);
if (atomic_get(&cntr->value) >= atomic_get(&cntr->threshold)) {
*context++ = cntr->cntr_fid.fid.context;

Просмотреть файл

@ -111,6 +111,7 @@ static void sock_pe_release_entry(struct sock_pe *pe,
else
pe_entry->conn->rx_pe_entry = NULL;
pe->num_free_entries++;
pe_entry->conn = NULL;
memset(&pe_entry->pe.rx, 0, sizeof(pe_entry->pe.rx));
memset(&pe_entry->pe.tx, 0, sizeof(pe_entry->pe.tx));
@ -124,7 +125,7 @@ static void sock_pe_release_entry(struct sock_pe *pe,
pe_entry->buf = 0;
dlist_remove(&pe_entry->entry);
dlist_insert_tail(&pe_entry->entry, &pe->free_list);
dlist_insert_head(&pe_entry->entry, &pe->free_list);
SOCK_LOG_INFO("progress entry %p released\n", pe_entry);
}
@ -133,11 +134,16 @@ static struct sock_pe_entry *sock_pe_acquire_entry(struct sock_pe *pe)
struct dlist_entry *entry;
struct sock_pe_entry *pe_entry;
if (dlist_empty(&pe->free_list))
return NULL;
pe->num_free_entries--;
entry = pe->free_list.next;
pe_entry = container_of(entry, struct sock_pe_entry, entry);
dlist_remove(&pe_entry->entry);
dlist_insert_tail(&pe_entry->entry, &pe->busy_list);
SOCK_LOG_INFO("progress entry %p acquired \n", pe_entry);
SOCK_LOG_INFO("progress entry %p acquired : %lu\n", pe_entry,
PE_INDEX(pe, pe_entry));
return pe_entry;
}
@ -145,16 +151,17 @@ static void sock_pe_report_tx_completion(struct sock_pe_entry *pe_entry)
{
int ret1 = 0, ret2 = 0;
if (pe_entry->comp->send_cq &&
(!pe_entry->comp->send_cq_event ||
(pe_entry->comp->send_cq_event &&
(pe_entry->msg_hdr.flags & FI_COMPLETION))))
ret1 = pe_entry->comp->send_cq->report_completion(
pe_entry->comp->send_cq, pe_entry->addr, pe_entry);
if (!(pe_entry->flags & FI_INJECT)) {
if (pe_entry->comp->send_cq &&
(!pe_entry->comp->send_cq_event ||
(pe_entry->comp->send_cq_event &&
(pe_entry->msg_hdr.flags & FI_COMPLETION))))
ret1 = pe_entry->comp->send_cq->report_completion(
pe_entry->comp->send_cq, pe_entry->addr, pe_entry);
}
if (pe_entry->comp->send_cntr)
ret2 = sock_cntr_inc(pe_entry->comp->send_cntr);
if (ret1 < 0 || ret2 < 0) {
SOCK_LOG_ERROR("Failed to report completion %p\n",
@ -175,8 +182,8 @@ static void sock_pe_report_rx_completion(struct sock_pe_entry *pe_entry)
if (pe_entry->comp->recv_cq &&
(!pe_entry->comp->recv_cq_event ||
(pe_entry->comp->recv_cq_event &&
(pe_entry->msg_hdr.flags & FI_COMPLETION))))
(pe_entry->comp->recv_cq_event &&
(pe_entry->msg_hdr.flags & FI_COMPLETION))))
ret1 = pe_entry->comp->recv_cq->report_completion(
pe_entry->comp->recv_cq, pe_entry->addr,
pe_entry);
@ -197,7 +204,7 @@ static void sock_pe_report_rx_completion(struct sock_pe_entry *pe_entry)
}
}
void sock_pe_report_mr_completion(struct sock_domain *domain,
static void sock_pe_report_mr_completion(struct sock_domain *domain,
struct sock_pe_entry *pe_entry)
{
int i;
@ -219,7 +226,7 @@ void sock_pe_report_mr_completion(struct sock_domain *domain,
}
}
void sock_pe_report_remote_write(struct sock_rx_ctx *rx_ctx,
static void sock_pe_report_remote_write(struct sock_rx_ctx *rx_ctx,
struct sock_pe_entry *pe_entry)
{
pe_entry->buf = pe_entry->pe.rx.rx_iov[0].iov.addr;
@ -246,7 +253,26 @@ void sock_pe_report_remote_write(struct sock_rx_ctx *rx_ctx,
sock_cntr_inc(pe_entry->comp->rem_write_cntr);
}
void sock_pe_report_remote_read(struct sock_rx_ctx *rx_ctx,
static void sock_pe_report_write_completion(struct sock_pe_entry *pe_entry)
{
if (!(pe_entry->flags & FI_INJECT)) {
sock_pe_report_tx_completion(pe_entry);
if (pe_entry->comp->write_cq &&
(pe_entry->comp->send_cq != pe_entry->comp->write_cq) &&
(!pe_entry->comp->write_cq_event ||
(pe_entry->comp->write_cq_event &&
(pe_entry->msg_hdr.flags & FI_COMPLETION))))
pe_entry->comp->write_cq->report_completion(
pe_entry->comp->write_cq, pe_entry->addr, pe_entry);
}
if (pe_entry->comp->write_cntr &&
pe_entry->comp->write_cntr != pe_entry->comp->send_cntr)
sock_cntr_inc(pe_entry->comp->write_cntr);
}
static void sock_pe_report_remote_read(struct sock_rx_ctx *rx_ctx,
struct sock_pe_entry *pe_entry)
{
pe_entry->buf = pe_entry->pe.rx.rx_iov[0].iov.addr;
@ -273,6 +299,25 @@ void sock_pe_report_remote_read(struct sock_rx_ctx *rx_ctx,
sock_cntr_inc(pe_entry->comp->rem_read_cntr);
}
static void sock_pe_report_read_completion(struct sock_pe_entry *pe_entry)
{
if (!(pe_entry->flags & FI_INJECT)) {
sock_pe_report_tx_completion(pe_entry);
if (pe_entry->comp->read_cq &&
(pe_entry->comp->read_cq != pe_entry->comp->send_cq) &&
(!pe_entry->comp->read_cq_event ||
(pe_entry->comp->read_cq_event &&
(pe_entry->msg_hdr.flags & FI_COMPLETION))))
pe_entry->comp->read_cq->report_completion(
pe_entry->comp->read_cq, pe_entry->addr, pe_entry);
}
if (pe_entry->comp->read_cntr &&
pe_entry->comp->read_cntr != pe_entry->comp->send_cntr)
sock_cntr_inc(pe_entry->comp->read_cntr);
}
static void sock_pe_report_error(struct sock_pe_entry *pe_entry, int rem)
{
if (pe_entry->comp->recv_cntr)
@ -288,7 +333,9 @@ static void sock_pe_progress_pending_ack(struct sock_pe *pe,
int len, data_len, i;
struct sock_conn *conn = pe_entry->conn;
assert(conn);
if (!conn)
return;
if (conn->tx_pe_entry != NULL && conn->tx_pe_entry != pe_entry) {
SOCK_LOG_INFO("Cannot progress %p as conn %p is being used by %p\n",
pe_entry, conn, conn->tx_pe_entry);
@ -341,6 +388,7 @@ static void sock_pe_progress_pending_ack(struct sock_pe *pe,
}
static void sock_pe_send_response(struct sock_pe *pe,
struct sock_rx_ctx *rx_ctx,
struct sock_pe_entry *pe_entry,
size_t data_len, uint8_t op_type)
{
@ -354,7 +402,9 @@ static void sock_pe_send_response(struct sock_pe *pe,
response->msg_hdr.version = SOCK_WIRE_PROTO_VERSION;
response->msg_hdr.op_type = op_type;
response->msg_hdr.msg_len = htonll(response->msg_hdr.msg_len);
response->msg_hdr.rx_id = htons(pe_entry->msg_hdr.rx_id);
response->msg_hdr.rx_id = pe_entry->msg_hdr.rx_id;
response->msg_hdr.ep_id = htons(sock_av_lookup_ep_id(rx_ctx->av,
pe_entry->addr));
pe->pe_atomic = NULL;
pe_entry->done_len = 0;
@ -397,8 +447,7 @@ static int sock_pe_handle_ack(struct sock_pe *pe, struct sock_pe_entry *pe_entry
waiting_entry, response->pe_entry_id);
assert(waiting_entry->type == SOCK_PE_TX);
if (!(waiting_entry->flags & FI_INJECT))
sock_pe_report_tx_completion(waiting_entry);
sock_pe_report_tx_completion(waiting_entry);
waiting_entry->is_complete = 1;
pe_entry->is_complete = 1;
return 0;
@ -432,8 +481,30 @@ static int sock_pe_handle_read_complete(struct sock_pe *pe,
return 0;
len += waiting_entry->pe.tx.data.tx_iov[i].dst.iov.len;
}
sock_pe_report_read_completion(waiting_entry);
waiting_entry->is_complete = 1;
pe_entry->is_complete = 1;
return 0;
}
static int sock_pe_handle_write_complete(struct sock_pe *pe,
struct sock_pe_entry *pe_entry)
{
struct sock_pe_entry *waiting_entry;
struct sock_msg_response *response;
if (sock_pe_read_response(pe_entry))
return 0;
response = &pe_entry->response;
assert(response->pe_entry_id <= SOCK_PE_MAX_ENTRIES);
waiting_entry = &pe->pe_table[response->pe_entry_id];
SOCK_LOG_INFO("Received ack for PE entry %p (index: %d)\n",
waiting_entry, response->pe_entry_id);
sock_pe_report_tx_completion(waiting_entry);
assert(waiting_entry->type == SOCK_PE_TX);
sock_pe_report_write_completion(waiting_entry);
waiting_entry->is_complete = 1;
pe_entry->is_complete = 1;
return 0;
@ -471,14 +542,16 @@ static int sock_pe_handle_atomic_complete(struct sock_pe *pe,
len += (waiting_entry->pe.tx.data.tx_iov[i].res.ioc.count * datatype_sz);
}
if (!(waiting_entry->flags & FI_INJECT))
sock_pe_report_tx_completion(waiting_entry);
if (waiting_entry->pe.rx.rx_op.atomic.res_iov_len)
sock_pe_report_read_completion(waiting_entry);
else
sock_pe_report_write_completion(waiting_entry);
waiting_entry->is_complete = 1;
pe_entry->is_complete = 1;
return 0;
}
static int sock_pe_process_rx_read(struct sock_pe *pe, struct sock_rx_ctx *rx_ctx,
struct sock_pe_entry *pe_entry)
{
@ -507,7 +580,8 @@ static int sock_pe_process_rx_read(struct sock_pe *pe, struct sock_rx_ctx *rx_ct
(void*)pe_entry->pe.rx.rx_iov[i].iov.addr,
pe_entry->pe.rx.rx_iov[i].iov.len,
pe_entry->pe.rx.rx_iov[i].iov.key);
sock_pe_send_response(pe, pe_entry, 0, SOCK_OP_READ_ERROR);
sock_pe_send_response(pe, rx_ctx, pe_entry, 0,
SOCK_OP_READ_ERROR);
return -FI_EINVAL;
}
@ -524,8 +598,8 @@ static int sock_pe_process_rx_read(struct sock_pe *pe, struct sock_rx_ctx *rx_ct
}
sock_pe_report_remote_read(rx_ctx, pe_entry);
sock_pe_send_response(pe, pe_entry, data_len,
SOCK_OP_READ_COMPLETE);
sock_pe_send_response(pe, rx_ctx, pe_entry, data_len,
SOCK_OP_READ_COMPLETE);
return 0;
}
@ -563,7 +637,7 @@ static int sock_pe_process_rx_write(struct sock_pe *pe, struct sock_rx_ctx *rx_c
(void*)pe_entry->pe.rx.rx_iov[i].iov.addr,
pe_entry->pe.rx.rx_iov[i].iov.len,
pe_entry->pe.rx.rx_iov[i].iov.key);
sock_pe_send_response(pe, pe_entry, 0,
sock_pe_send_response(pe, rx_ctx, pe_entry, 0,
SOCK_OP_WRITE_ERROR);
break;
}
@ -598,7 +672,8 @@ static int sock_pe_process_rx_write(struct sock_pe *pe, struct sock_rx_ctx *rx_c
out:
sock_pe_report_remote_write(rx_ctx, pe_entry);
sock_pe_report_mr_completion(rx_ctx->domain, pe_entry);
sock_pe_send_response(pe, pe_entry, 0, SOCK_OP_WRITE_COMPLETE);
sock_pe_send_response(pe, rx_ctx, pe_entry, 0,
SOCK_OP_WRITE_COMPLETE);
return ret;
}
@ -898,6 +973,14 @@ static int sock_pe_update_atomic(void *cmp, void *dst, void *src,
break;
}
case FI_LONG_DOUBLE:
{
long double *_cmp, *_dst, *_src;
_cmp = cmp, _src = src, _dst = dst;
SOCK_ATOMIC_UPDATE_FLOAT(_cmp, _src, _dst);
break;
}
default:
SOCK_LOG_ERROR("Atomic datatype not supported\n");
break;
@ -968,7 +1051,8 @@ static int sock_pe_process_rx_atomic(struct sock_pe *pe, struct sock_rx_ctx *rx_
(void*)pe_entry->pe.rx.rx_iov[i].ioc.addr,
pe_entry->pe.rx.rx_iov[i].ioc.count * datatype_sz,
pe_entry->pe.rx.rx_iov[i].ioc.key);
sock_pe_send_response(pe, pe_entry, 0, SOCK_OP_ATOMIC_ERROR);
sock_pe_send_response(pe, rx_ctx, pe_entry, 0,
SOCK_OP_ATOMIC_ERROR);
goto err;
}
if (mr->flags & FI_MR_OFFSET)
@ -1003,7 +1087,7 @@ static int sock_pe_process_rx_atomic(struct sock_pe *pe, struct sock_rx_ctx *rx_
sock_pe_report_remote_write(rx_ctx, pe_entry);
sock_pe_report_mr_completion(rx_ctx->domain, pe_entry);
sock_pe_send_response(pe, pe_entry,
sock_pe_send_response(pe, rx_ctx, pe_entry,
pe_entry->pe.rx.rx_op.atomic.res_iov_len ?
entry_len : 0, SOCK_OP_ATOMIC_COMPLETE);
return ret;
@ -1019,24 +1103,26 @@ int sock_pe_progress_buffered_rx(struct sock_rx_ctx *rx_ctx)
struct dlist_entry *entry;
struct sock_pe_entry pe_entry;
struct sock_rx_entry *rx_buffered, *rx_posted;
int i, rem, offset, len, used_len, dst_offset;
int i, rem = 0, offset, len, used_len, dst_offset;
if (dlist_empty(&rx_ctx->rx_entry_list) ||
dlist_empty(&rx_ctx->rx_buffered_list))
goto out;
return 0;
for (entry = rx_ctx->rx_buffered_list.next;
entry != &rx_ctx->rx_buffered_list;) {
rx_buffered = container_of(entry, struct sock_rx_entry, entry);
entry = entry->next;
if (!rx_buffered->is_complete)
continue;
rx_posted = sock_rx_get_entry(rx_ctx, rx_buffered->addr,
rx_buffered->tag);
if (!rx_posted)
continue;
rx_ctx->buffered_len -= rem;
SOCK_LOG_INFO("Consuming buffered entry: %p, ctx: %p\n",
rx_buffered, rx_ctx);
SOCK_LOG_INFO("Consuming posted entry: %p, ctx: %p\n",
@ -1044,6 +1130,7 @@ int sock_pe_progress_buffered_rx(struct sock_rx_ctx *rx_ctx)
offset = 0;
rem = rx_buffered->iov[0].iov.len;
rx_ctx->buffered_len -= rem;
used_len = rx_posted->used;
for (i = 0; i < rx_posted->rx_op.dest_iov_len && rem > 0; i++) {
if (used_len >= rx_posted->rx_op.dest_iov_len) {
@ -1085,7 +1172,7 @@ int sock_pe_progress_buffered_rx(struct sock_rx_ctx *rx_ctx)
if (rem) {
SOCK_LOG_INFO("Not enough space in posted recv buffer\n");
sock_pe_report_error(&pe_entry, rem);
goto out;
return 0;
} else {
sock_pe_report_rx_completion(&pe_entry);
}
@ -1096,8 +1183,6 @@ int sock_pe_progress_buffered_rx(struct sock_rx_ctx *rx_ctx)
if (pe_entry.flags & FI_MULTI_RECV)
sock_rx_release_entry(rx_posted);
}
out:
return 0;
}
@ -1128,21 +1213,23 @@ static int sock_pe_process_rx_send(struct sock_pe *pe, struct sock_rx_ctx *rx_ct
if (pe_entry->done_len == len && !pe_entry->pe.rx.rx_entry) {
data_len = pe_entry->msg_hdr.msg_len - len;
fastlock_acquire(&rx_ctx->lock);
/* progress buffered recvs, if any */
fastlock_acquire(&rx_ctx->lock);
sock_pe_progress_buffered_rx(rx_ctx);
rx_entry = sock_rx_get_entry(rx_ctx, pe_entry->addr, pe_entry->tag);
SOCK_LOG_INFO("Consuming posted entry: %p\n", rx_entry);
SOCK_LOG_INFO("Consuming posted entry: %p\n", rx_entry);
if (!rx_entry) {
SOCK_LOG_INFO("%p: No matching recv, buffering recv (len=%llu)\n",
pe_entry, (long long unsigned int)data_len);
rx_entry = sock_rx_new_buffered_entry(rx_ctx, data_len);
if (!rx_entry)
if (!rx_entry) {
fastlock_release(&rx_ctx->lock);
return -FI_ENOMEM;
}
rx_entry->addr = pe_entry->addr;
rx_entry->tag = pe_entry->tag;
@ -1151,10 +1238,9 @@ static int sock_pe_process_rx_send(struct sock_pe *pe, struct sock_rx_ctx *rx_ct
rx_entry->comp = pe_entry->comp;
pe_entry->context = rx_entry->context;
}
fastlock_release(&rx_ctx->lock);
pe_entry->context = rx_entry->context;
pe_entry->pe.rx.rx_entry = rx_entry;
rx_entry->is_busy = 1;
fastlock_release(&rx_ctx->lock);
}
rx_entry = pe_entry->pe.rx.rx_entry;
@ -1203,6 +1289,7 @@ static int sock_pe_process_rx_send(struct sock_pe *pe, struct sock_rx_ctx *rx_ct
fastlock_release(&rx_ctx->lock);
pe_entry->is_complete = 1;
rx_entry->is_complete = 1;
rx_entry->is_busy = 0;
/* report error, if any */
@ -1214,12 +1301,13 @@ static int sock_pe_process_rx_send(struct sock_pe *pe, struct sock_rx_ctx *rx_ct
if (!rx_entry->is_buffered)
sock_pe_report_rx_completion(pe_entry);
}
if (pe_entry->msg_hdr.flags & FI_REMOTE_COMPLETE) {
sock_pe_send_response(pe, pe_entry, 0, SOCK_OP_SEND_COMPLETE);
}
out:
if (pe_entry->msg_hdr.flags & FI_REMOTE_COMPLETE) {
sock_pe_send_response(pe, rx_ctx, pe_entry, 0,
SOCK_OP_SEND_COMPLETE);
}
if (!rx_entry->is_buffered &&
(!(rx_entry->flags & FI_MULTI_RECV) ||
(pe_entry->flags & FI_MULTI_RECV)))
@ -1261,13 +1349,13 @@ static int sock_pe_process_recv(struct sock_pe *pe, struct sock_rx_ctx *rx_ctx,
break;
case SOCK_OP_SEND_COMPLETE:
case SOCK_OP_WRITE_COMPLETE:
case SOCK_OP_WRITE_ERROR:
case SOCK_OP_READ_ERROR:
case SOCK_OP_ATOMIC_ERROR:
ret = sock_pe_handle_ack(pe, pe_entry);
break;
case SOCK_OP_WRITE_COMPLETE:
ret = sock_pe_handle_write_complete(pe, pe_entry);
break;
case SOCK_OP_READ_COMPLETE:
ret = sock_pe_handle_read_complete(pe, pe_entry);
break;
@ -1276,6 +1364,12 @@ static int sock_pe_process_recv(struct sock_pe *pe, struct sock_rx_ctx *rx_ctx,
ret = sock_pe_handle_atomic_complete(pe, pe_entry);
break;
case SOCK_OP_WRITE_ERROR:
case SOCK_OP_READ_ERROR:
case SOCK_OP_ATOMIC_ERROR:
ret = sock_pe_handle_ack(pe, pe_entry);
break;
default:
ret = -FI_ENOSYS;
SOCK_LOG_ERROR("Operation not supported\n");
@ -1306,9 +1400,10 @@ static int sock_pe_peek_hdr(struct sock_pe *pe,
return -1;
msg_hdr->msg_len = ntohll(msg_hdr->msg_len);
msg_hdr->rx_id = ntohs(msg_hdr->rx_id);
msg_hdr->rx_id = msg_hdr->rx_id;
msg_hdr->flags = ntohll(msg_hdr->flags);
msg_hdr->pe_entry_id = ntohs(msg_hdr->pe_entry_id);
msg_hdr->ep_id = ntohs(msg_hdr->ep_id);
SOCK_LOG_INFO("PE RX (Hdr peek): MsgLen: %lu, TX-ID: %d, Type: %d\n",
msg_hdr->msg_len, msg_hdr->rx_id, msg_hdr->op_type);
@ -1318,6 +1413,9 @@ static int sock_pe_peek_hdr(struct sock_pe *pe,
static int sock_pe_read_hdr(struct sock_pe *pe, struct sock_rx_ctx *rx_ctx,
struct sock_pe_entry *pe_entry)
{
int match;
struct sock_ep *ep;
struct dlist_entry *entry;
struct sock_msg_hdr *msg_hdr;
struct sock_conn *conn = pe_entry->conn;
@ -1334,14 +1432,35 @@ static int sock_pe_read_hdr(struct sock_pe *pe, struct sock_rx_ctx *rx_ctx,
if (msg_hdr->rx_id != rx_ctx->rx_id)
return -1;
if (rx_ctx->ctx.fid.fclass == FI_CLASS_SRX_CTX) {
match = 0;
for (entry = rx_ctx->ep_list.next;
entry != &rx_ctx->ep_list; entry = entry->next) {
ep = container_of(entry, struct sock_ep, rx_ctx_entry);
if (ep->ep_id == msg_hdr->ep_id) {
match = 1;
break;
}
}
if (!match)
return -1;
} else {
if (msg_hdr->ep_id != rx_ctx->ep->ep_id) {
SOCK_LOG_INFO("Mismatch: %d:%d\n",
msg_hdr->ep_id,rx_ctx->ep->ep_id);
return -1;
}
}
sock_pe_recv_field(pe_entry, (void*)msg_hdr,
sizeof(struct sock_msg_hdr), 0);
msg_hdr->msg_len = ntohll(msg_hdr->msg_len);
msg_hdr->rx_id = ntohs(msg_hdr->rx_id);
msg_hdr->rx_id = msg_hdr->rx_id;
msg_hdr->flags = ntohll(msg_hdr->flags);
msg_hdr->pe_entry_id = ntohs(msg_hdr->pe_entry_id);
msg_hdr->ep_id = ntohs(msg_hdr->ep_id);
pe_entry->pe.rx.header_read = 1;
SOCK_LOG_INFO("PE RX (Hdr read): MsgLen: %lu, TX-ID: %d, Type: %d\n",
@ -1397,7 +1516,7 @@ static int sock_pe_progress_tx_atomic(struct sock_pe *pe,
}
/* data */
if (pe_entry->flags & FI_INJECT) {
if (SOCK_INJECT_OK(pe_entry->flags)) {
if (sock_pe_send_field(pe_entry,
&pe_entry->pe.tx.data.inject[0],
pe_entry->pe.tx.tx_op.src_iov_len, len))
@ -1454,7 +1573,7 @@ static int sock_pe_progress_tx_write(struct sock_pe *pe,
len += dest_iov_len;
/* data */
if (pe_entry->flags & FI_INJECT) {
if (SOCK_INJECT_OK(pe_entry->flags)) {
if (sock_pe_send_field(pe_entry, &pe_entry->pe.tx.data.inject[0],
pe_entry->pe.tx.tx_op.src_iov_len, len))
return 0;
@ -1541,7 +1660,7 @@ static int sock_pe_progress_tx_send(struct sock_pe *pe,
len += SOCK_CQ_DATA_SIZE;
}
if (pe_entry->flags & FI_INJECT) {
if (SOCK_INJECT_OK(pe_entry->flags)) {
if (sock_pe_send_field(pe_entry, pe_entry->pe.tx.data.inject,
pe_entry->pe.tx.tx_op.src_iov_len, len))
return 0;
@ -1567,8 +1686,7 @@ static int sock_pe_progress_tx_send(struct sock_pe *pe,
SOCK_LOG_INFO("Send complete\n");
if (!(pe_entry->flags & FI_REMOTE_COMPLETE)) {
if (!(pe_entry->flags & FI_INJECT))
sock_pe_report_tx_completion(pe_entry);
sock_pe_report_tx_completion(pe_entry);
pe_entry->is_complete = 1;
}
}
@ -1583,10 +1701,9 @@ static int sock_pe_progress_tx_entry(struct sock_pe *pe,
int ret;
struct sock_conn *conn = pe_entry->conn;
if (pe_entry->pe.tx.send_done)
if (!pe_entry->conn || pe_entry->pe.tx.send_done)
return 0;
assert(pe_entry->conn);
if (conn->tx_pe_entry != NULL && conn->tx_pe_entry != pe_entry) {
SOCK_LOG_INFO("Cannot progress %p as conn %p is being used by %p\n",
pe_entry, conn, conn->tx_pe_entry);
@ -1598,6 +1715,12 @@ static int sock_pe_progress_tx_entry(struct sock_pe *pe,
conn->tx_pe_entry = pe_entry;
}
if ((pe_entry->flags & FI_FENCE) &&
(tx_ctx->pe_entry_list.next != &pe_entry->ctx_entry)) {
SOCK_LOG_INFO("Waiting for FI_FENCE\n");
return 0;
}
if (!pe_entry->pe.tx.header_sent) {
if (sock_pe_send_field(pe_entry, &pe_entry->msg_hdr,
sizeof(struct sock_msg_hdr), 0))
@ -1633,15 +1756,51 @@ static int sock_pe_progress_tx_entry(struct sock_pe *pe,
return ret;
}
static int sock_pe_progress_rx_pe_entry(struct sock_pe *pe,
struct sock_pe_entry *pe_entry,
struct sock_rx_ctx *rx_ctx)
{
int ret;
if (pe_entry->pe.rx.pending_send) {
sock_pe_progress_pending_ack(pe, pe_entry);
if (pe_entry->is_complete) {
sock_pe_release_entry(pe, pe_entry);
SOCK_LOG_INFO("[%p] RX done\n", pe_entry);
}
return 0;
}
if (!pe_entry->pe.rx.header_read) {
if (sock_pe_read_hdr(pe, rx_ctx, pe_entry) == -1) {
sock_pe_release_entry(pe, pe_entry);
return 0;
}
}
if (pe_entry->pe.rx.header_read) {
ret = sock_pe_process_recv(pe, rx_ctx, pe_entry);
if (ret < 0)
return ret;
}
if (pe_entry->is_complete) {
sock_pe_release_entry(pe, pe_entry);
SOCK_LOG_INFO("[%p] RX done\n", pe_entry);
}
return 0;
}
static int sock_pe_new_rx_entry(struct sock_pe *pe, struct sock_rx_ctx *rx_ctx,
struct sock_ep *ep, struct sock_conn *conn,
int key)
{
int ret;
struct sock_pe_entry *pe_entry;
pe_entry = sock_pe_acquire_entry(pe);
if (!pe_entry) {
SOCK_LOG_ERROR("Error in getting PE entry\n");
return -FI_EINVAL;
SOCK_LOG_INFO("Cannot get PE entry\n");
return 0;
}
memset(&pe_entry->pe.rx, 0, sizeof(struct sock_rx_pe_entry));
@ -1652,12 +1811,12 @@ static int sock_pe_new_rx_entry(struct sock_pe *pe, struct sock_rx_ctx *rx_ctx,
pe_entry->is_complete = 0;
pe_entry->done_len = 0;
if (ep->ep_type == FI_EP_MSG)
if (ep->ep_type == FI_EP_MSG || !ep->av)
pe_entry->addr = FI_ADDR_NOTAVAIL;
else
pe_entry->addr = sock_av_lookup_key(ep->av, key);
if (ep->ep_attr.rx_ctx_cnt == FI_SHARED_CONTEXT)
if (rx_ctx->ctx.fid.fclass == FI_CLASS_SRX_CTX)
pe_entry->comp = &ep->comp;
else
pe_entry->comp = &rx_ctx->comp;
@ -1671,7 +1830,9 @@ static int sock_pe_new_rx_entry(struct sock_pe *pe, struct sock_rx_ctx *rx_ctx,
/* link to tracking list in rx_ctx */
dlist_init(&pe_entry->ctx_entry);
dlist_insert_tail(&pe_entry->ctx_entry, &rx_ctx->pe_entry_list);
return 0;
ret = sock_pe_progress_rx_pe_entry(pe, pe_entry, rx_ctx);
return ret;
}
static int sock_pe_new_tx_entry(struct sock_pe *pe, struct sock_tx_ctx *tx_ctx)
@ -1680,12 +1841,11 @@ static int sock_pe_new_tx_entry(struct sock_pe *pe, struct sock_tx_ctx *tx_ctx)
struct sock_msg_hdr *msg_hdr;
struct sock_pe_entry *pe_entry;
struct sock_ep *ep;
uint16_t rx_id;
pe_entry = sock_pe_acquire_entry(pe);
if (!pe_entry) {
SOCK_LOG_ERROR("Failed to get free PE entry \n");
return -FI_EINVAL;
SOCK_LOG_INFO("Cannot get free PE entry \n");
return 0;
}
memset(&pe_entry->pe.tx, 0, sizeof(struct sock_tx_pe_entry));
@ -1698,6 +1858,7 @@ static int sock_pe_new_tx_entry(struct sock_pe *pe, struct sock_tx_ctx *tx_ctx)
pe_entry->ep = tx_ctx->ep;
pe_entry->pe.tx.tx_ctx = tx_ctx;
dlist_init(&pe_entry->ctx_entry);
dlist_insert_tail(&pe_entry->ctx_entry, &tx_ctx->pe_entry_list);
/* fill in PE tx entry */
@ -1717,9 +1878,9 @@ static int sock_pe_new_tx_entry(struct sock_pe *pe, struct sock_tx_ctx *tx_ctx)
rbfdread(&tx_ctx->rbfd, &pe_entry->buf, sizeof(uint64_t));
rbfdread(&tx_ctx->rbfd, &ep, sizeof(uint64_t));
if (ep && ep->ep_attr.tx_ctx_cnt == FI_SHARED_CONTEXT)
if (ep && tx_ctx->fid.stx.fid.fclass == FI_CLASS_STX_CTX)
pe_entry->comp = &ep->comp;
else
else
pe_entry->comp = &tx_ctx->comp;
if (pe_entry->flags & FI_REMOTE_CQ_DATA) {
@ -1738,7 +1899,7 @@ static int sock_pe_new_tx_entry(struct sock_pe *pe, struct sock_tx_ctx *tx_ctx)
case SOCK_OP_SEND:
case SOCK_OP_TSEND:
if (pe_entry->flags & FI_INJECT) {
if (SOCK_INJECT_OK(pe_entry->flags)) {
rbfdread(&tx_ctx->rbfd, &pe_entry->pe.tx.data.inject[0],
pe_entry->pe.tx.tx_op.src_iov_len);
msg_hdr->msg_len += pe_entry->pe.tx.tx_op.src_iov_len;
@ -1754,7 +1915,7 @@ static int sock_pe_new_tx_entry(struct sock_pe *pe, struct sock_tx_ctx *tx_ctx)
case SOCK_OP_WRITE:
if (pe_entry->flags & FI_INJECT) {
if (SOCK_INJECT_OK(pe_entry->flags)) {
rbfdread(&tx_ctx->rbfd, &pe_entry->pe.tx.data.inject[0],
pe_entry->pe.tx.tx_op.src_iov_len);
msg_hdr->msg_len += pe_entry->pe.tx.tx_op.src_iov_len;
@ -1796,7 +1957,7 @@ static int sock_pe_new_tx_entry(struct sock_pe *pe, struct sock_tx_ctx *tx_ctx)
case SOCK_OP_ATOMIC:
msg_hdr->msg_len += sizeof(struct sock_op);
datatype_sz = fi_datatype_size(pe_entry->pe.tx.tx_op.atomic.datatype);
if (pe_entry->flags & FI_INJECT) {
if (SOCK_INJECT_OK(pe_entry->flags)) {
rbfdread(&tx_ctx->rbfd, &pe_entry->pe.tx.data.inject[0],
pe_entry->pe.tx.tx_op.src_iov_len);
msg_hdr->msg_len += pe_entry->pe.tx.tx_op.src_iov_len;
@ -1845,18 +2006,21 @@ static int sock_pe_new_tx_entry(struct sock_pe *pe, struct sock_tx_ctx *tx_ctx)
msg_hdr->version = SOCK_WIRE_PROTO_VERSION;
if (tx_ctx->av) {
rx_id = (uint16_t)SOCK_GET_RX_ID(pe_entry->addr,
tx_ctx->av->rx_ctx_bits);
msg_hdr->rx_id = htons(rx_id);
msg_hdr->rx_id = (uint16_t)SOCK_GET_RX_ID(pe_entry->addr,
tx_ctx->av->rx_ctx_bits);
msg_hdr->ep_id = sock_av_lookup_ep_id(tx_ctx->av, pe_entry->addr);
} else {
msg_hdr->rx_id = 0;
msg_hdr->ep_id = ep->rem_ep_id;
}
msg_hdr->dest_iov_len = pe_entry->pe.tx.tx_op.src_iov_len;
msg_hdr->dest_iov_len = pe_entry->pe.tx.tx_op.dest_iov_len;
msg_hdr->flags = htonll(pe_entry->flags);
pe_entry->total_len = msg_hdr->msg_len;
msg_hdr->msg_len = htonll(msg_hdr->msg_len);
msg_hdr->pe_entry_id = htons(msg_hdr->pe_entry_id);
return 0;
msg_hdr->ep_id = htons(msg_hdr->ep_id);
return sock_pe_progress_tx_entry(pe, tx_ctx, pe_entry);
}
void sock_pe_add_tx_ctx(struct sock_pe *pe, struct sock_tx_ctx *ctx)
@ -1875,28 +2039,44 @@ void sock_pe_add_rx_ctx(struct sock_pe *pe, struct sock_rx_ctx *ctx)
SOCK_LOG_INFO("RX ctx added to PE\n");
}
void sock_pe_remove_tx_ctx(struct sock_tx_ctx *tx_ctx)
{
fastlock_acquire(&tx_ctx->domain->pe->lock);
dlist_remove(&tx_ctx->pe_entry);
fastlock_release(&tx_ctx->domain->pe->lock);
}
void sock_pe_remove_rx_ctx(struct sock_rx_ctx *rx_ctx)
{
fastlock_acquire(&rx_ctx->domain->pe->lock);
dlist_remove(&rx_ctx->pe_entry);
fastlock_release(&rx_ctx->domain->pe->lock);
}
int sock_pe_progress_rx_ep(struct sock_pe *pe, struct sock_ep *ep,
struct sock_rx_ctx *rx_ctx)
{
struct sock_conn *conn;
struct sock_conn_map *map;
int i, ret = 0, data_avail;
struct pollfd poll_fd;
map = &ep->domain->r_cmap;
assert(map != NULL);
poll_fd.events = POLLIN;
for (i=0; i<map->used; i++) {
conn = &map->table[i];
if (!conn)
continue;
if (rbused(&conn->outbuf))
sock_comm_flush(conn);
data_avail = 0;
if (rbused(&conn->inbuf) > 0) {
data_avail = 1;
} else {
poll_fd.fd = conn->sock_fd;
ret = poll(&poll_fd, 1, 0);
if (ret < 0) {
ret = fi_poll_fd(conn->sock_fd, 0);
if (ret < 0 && errno != EINTR) {
SOCK_LOG_INFO("Error polling fd: %d\n",
conn->sock_fd);
return ret;
@ -1904,7 +2084,8 @@ int sock_pe_progress_rx_ep(struct sock_pe *pe, struct sock_ep *ep,
data_avail = (ret == 1);
}
if (data_avail && conn->rx_pe_entry == NULL) {
if (data_avail && conn->rx_pe_entry == NULL &&
!dlist_empty(&pe->free_list)) {
/* new RX PE entry */
ret = sock_pe_new_rx_entry(pe, rx_ctx, ep, conn, i);
if (ret < 0)
@ -1931,15 +2112,18 @@ int sock_pe_progress_rx_ctx(struct sock_pe *pe, struct sock_rx_ctx *rx_ctx)
/* check for incoming data */
if (rx_ctx->ctx.fid.fclass == FI_CLASS_SRX_CTX) {
for (entry = rx_ctx->ep_list.next;
entry != &rx_ctx->ep_list; entry = entry->next) {
entry != &rx_ctx->ep_list; ) {
ep = container_of(entry, struct sock_ep, rx_ctx_entry);
if ((ret = sock_pe_progress_rx_ep(pe, ep, rx_ctx)) < 0)
entry = entry->next;
ret = sock_pe_progress_rx_ep(pe, ep, rx_ctx);
if (ret < 0)
goto out;
}
} else {
ep = rx_ctx->ep;
if ((ret = sock_pe_progress_rx_ep(pe, ep, rx_ctx)) < 0)
ret = sock_pe_progress_rx_ep(pe, ep, rx_ctx);
if (ret < 0)
goto out;
}
@ -1949,34 +2133,9 @@ int sock_pe_progress_rx_ctx(struct sock_pe *pe, struct sock_rx_ctx *rx_ctx)
pe_entry = container_of(entry, struct sock_pe_entry, ctx_entry);
entry = entry->next;
if (pe_entry->pe.rx.pending_send) {
sock_pe_progress_pending_ack(pe, pe_entry);
if (pe_entry->is_complete) {
sock_pe_release_entry(pe, pe_entry);
SOCK_LOG_INFO("[%p] RX done\n", pe_entry);
}
continue;
}
if (!pe_entry->pe.rx.header_read) {
if (sock_pe_read_hdr(pe, rx_ctx, pe_entry) == -1) {
sock_pe_release_entry(pe, pe_entry);
continue;
}
}
if (pe_entry->pe.rx.header_read) {
ret = sock_pe_process_recv(pe, rx_ctx, pe_entry);
if (ret < 0)
goto out;
}
if (pe_entry->is_complete) {
sock_pe_release_entry(pe, pe_entry);
SOCK_LOG_INFO("[%p] RX done\n", pe_entry);
}
ret = sock_pe_progress_rx_pe_entry(pe, pe_entry, rx_ctx);
if (ret < 0)
goto out;
}
out:
@ -1996,8 +2155,8 @@ int sock_pe_progress_tx_ctx(struct sock_pe *pe, struct sock_tx_ctx *tx_ctx)
/* check tx_ctx rbuf */
fastlock_acquire(&tx_ctx->rlock);
while (!rbfdempty(&tx_ctx->rbfd) &&
!dlist_empty(&pe->free_list)) {
if (!rbfdempty(&tx_ctx->rbfd) &&
pe->num_free_entries > SOCK_PE_MIN_ENTRIES) {
/* new TX PE entry */
ret = sock_pe_new_tx_entry(pe, tx_ctx);
if (ret < 0) {
@ -2019,7 +2178,7 @@ int sock_pe_progress_tx_ctx(struct sock_pe *pe, struct sock_tx_ctx *tx_ctx)
SOCK_LOG_ERROR("Error in progressing %p\n", pe_entry);
goto out;
}
if (pe_entry->is_complete) {
sock_pe_release_entry(pe, pe_entry);
SOCK_LOG_INFO("[%p] TX done\n", pe_entry);
@ -2044,6 +2203,11 @@ static void *sock_pe_progress_thread(void *data)
SOCK_LOG_INFO("Progress thread started\n");
while (pe->do_progress) {
if (sock_progress_thread_wait) {
pthread_yield();
usleep(sock_progress_thread_wait * 1000);
}
/* progress tx */
if (!dlistfd_empty(&pe->tx_list)) {
for (entry = pe->tx_list.list.next;
@ -2091,9 +2255,10 @@ static void sock_pe_init_table(
dlist_init(&pe->busy_list);
for (i=0; i<SOCK_PE_MAX_ENTRIES; i++) {
dlist_insert_tail(&pe->pe_table[i].entry, &pe->free_list);
dlist_insert_head(&pe->pe_table[i].entry, &pe->free_list);
}
pe->num_free_entries = SOCK_PE_MAX_ENTRIES;
SOCK_LOG_INFO("PE table init: OK\n");
}
@ -2104,7 +2269,6 @@ struct sock_pe *sock_pe_init(struct sock_domain *domain)
return NULL;
sock_pe_init_table(pe);
dlistfd_head_init(&pe->tx_list);
dlistfd_head_init(&pe->rx_list);
fastlock_init(&pe->lock);

Просмотреть файл

@ -71,7 +71,7 @@ static ssize_t sock_ep_rma_readmsg(struct fid_ep *ep,
switch (ep->fid.fclass) {
case FI_CLASS_EP:
sock_ep = container_of(ep, struct sock_ep, fid.ep);
sock_ep = container_of(ep, struct sock_ep, ep);
tx_ctx = sock_ep->tx_ctx;
break;
@ -90,7 +90,8 @@ static ssize_t sock_ep_rma_readmsg(struct fid_ep *ep,
msg->rma_iov_count <= SOCK_EP_MAX_IOV_LIMIT);
conn = sock_av_lookup_addr(tx_ctx->av, msg->addr);
assert(conn);
if (!conn)
return -FI_EAGAIN;
total_len = sizeof(struct sock_op_send) +
(msg->iov_count * sizeof(union sock_iov)) +
@ -214,7 +215,7 @@ static ssize_t sock_ep_rma_writemsg(struct fid_ep *ep,
switch (ep->fid.fclass) {
case FI_CLASS_EP:
sock_ep = container_of(ep, struct sock_ep, fid.ep);
sock_ep = container_of(ep, struct sock_ep, ep);
tx_ctx = sock_ep->tx_ctx;
break;
@ -236,7 +237,9 @@ static ssize_t sock_ep_rma_writemsg(struct fid_ep *ep,
} else {
conn = sock_av_lookup_addr(tx_ctx->av, msg->addr);
}
assert(conn);
if (!conn)
return -FI_EAGAIN;
flags |= tx_ctx->attr.op_flags;
memset(&tx_op, 0, sizeof(struct sock_op));
@ -244,7 +247,7 @@ static ssize_t sock_ep_rma_writemsg(struct fid_ep *ep,
tx_op.dest_iov_len = msg->rma_iov_count;
total_len = 0;
if (flags & FI_INJECT) {
if (SOCK_INJECT_OK(flags)) {
for (i=0; i< msg->iov_count; i++) {
total_len += msg->msg_iov[i].iov_len;
}
@ -277,11 +280,11 @@ static ssize_t sock_ep_rma_writemsg(struct fid_ep *ep,
}
src_len = 0;
if (flags & FI_INJECT) {
if (SOCK_INJECT_OK(flags)) {
for (i=0; i< msg->iov_count; i++) {
sock_tx_ctx_write(tx_ctx, msg->msg_iov[i].iov_base,
msg->msg_iov[i].iov_len);
src_len += tx_iov.iov.len;
src_len += msg->msg_iov[i].iov_len;
}
} else {
for (i = 0; i< msg->iov_count; i++) {
@ -349,16 +352,22 @@ static ssize_t sock_ep_rma_writev(struct fid_ep *ep,
size_t count, fi_addr_t dest_addr, uint64_t addr,
uint64_t key, void *context)
{
int i;
size_t len;
struct fi_msg_rma msg;
struct fi_rma_iov rma_iov;
msg.msg_iov = iov;
msg.desc = desc;
msg.iov_count = count;
msg.rma_iov_count = 1;
for (i = 0, len = 0; i < count; i++)
len += iov[i].iov_len;
rma_iov.addr = addr;
rma_iov.key = key;
rma_iov.len = 1;
rma_iov.len = len;
msg.rma_iov = &rma_iov;
msg.context = context;
@ -380,10 +389,11 @@ static ssize_t sock_ep_rma_writedata(struct fid_ep *ep, const void *buf,
msg_iov.iov_len = len;
msg.desc = &desc;
msg.iov_count = 1;
msg.rma_iov_count = 1;
rma_iov.addr = addr;
rma_iov.key = key;
rma_iov.len = 1;
rma_iov.len = len;
msg.rma_iov = &rma_iov;
msg.msg_iov = &msg_iov;
@ -407,10 +417,11 @@ static ssize_t sock_ep_rma_inject(struct fid_ep *ep, const void *buf,
msg_iov.iov_len = len;
msg.msg_iov = &msg_iov;
msg.iov_count = 1;
msg.rma_iov_count = 1;
rma_iov.addr = addr;
rma_iov.key = key;
rma_iov.len = 1;
rma_iov.len = len;
msg.rma_iov = &rma_iov;
msg.msg_iov = &msg_iov;
@ -431,10 +442,11 @@ static ssize_t sock_ep_rma_injectdata(struct fid_ep *ep, const void *buf,
msg_iov.iov_len = len;
msg.msg_iov = &msg_iov;
msg.iov_count = 1;
msg.rma_iov_count = 1;
rma_iov.addr = addr;
rma_iov.key = key;
rma_iov.len = 1;
rma_iov.len = len;
msg.rma_iov = &rma_iov;
msg.msg_iov = &msg_iov;

Просмотреть файл

@ -93,6 +93,7 @@ struct sock_rx_entry *sock_rx_new_buffered_entry(struct sock_rx_ctx *rx_ctx,
rx_ctx->buffered_len += len;
dlist_insert_tail(&rx_entry->entry, &rx_ctx->rx_buffered_list);
rx_entry->is_busy = 1;
return rx_entry;
}
@ -126,6 +127,7 @@ struct sock_rx_entry *sock_rx_get_entry(struct sock_rx_ctx *rx_ctx,
if (entry == &rx_ctx->rx_entry_list)
rx_entry = NULL;
else
rx_entry->is_busy = 1;
return rx_entry;
}

Просмотреть файл

@ -55,100 +55,4 @@
#include "sock_util.h"
int sock_log_level = SOCK_ERROR;
char host[128] = {0};
#define SOCK_SENDTO_TIMEOUT 5
int sock_util_sendto(int fd, void *buf, size_t len, struct sockaddr_in *addr,
socklen_t addrlen, int timeout)
{
struct timeval tv;
fd_set writefds;
socklen_t optlen;
int optval;
if (sendto(fd, buf, len, 0, addr, addrlen) < 0) {
SOCK_LOG_ERROR("sendto failed with error %d - %s\n", errno,
strerror(errno));
return -errno;
}
if (timeout) {
tv.tv_sec = 0;
tv.tv_usec = timeout;
} else {
tv.tv_sec = SOCK_SENDTO_TIMEOUT;
tv.tv_usec = 0;
}
FD_ZERO(&writefds);
FD_SET(fd, &writefds);
if (select(fd+1, NULL, &writefds, NULL, &tv) > 0) {
optlen = sizeof(int);
getsockopt(fd, SOL_SOCKET, SO_ERROR, &optval, &optlen);
if (optval) {
SOCK_LOG_ERROR("failed to sendto %d - %s\n", optval,
strerror(optval));
close(fd);
return -errno;
}
} else {
SOCK_LOG_ERROR("Timeout or error to sendto %d - %s\n", optval,
strerror(optval));
close(fd);
errno = ETIMEDOUT;
return -FI_ETIMEDOUT;
}
return 0;
}
int sock_util_recvfrom(int fd, void *buf, size_t len, struct sockaddr_in *addr,
socklen_t *addrlen, int timeout)
{
struct timeval tv;
struct timeval *tv_ptr;
fd_set readfds;
socklen_t optlen;
int optval;
int ret;
if (timeout < 0) {
/* negative timeout means an infinite timeout */
tv_ptr = NULL;
} else {
tv.tv_sec = 0;
tv.tv_usec = timeout;
tv_ptr = &tv;
}
FD_ZERO(&readfds);
FD_SET(fd, &readfds);
if (select(fd+1, &readfds, NULL, NULL, tv_ptr) > 0) {
optlen = sizeof(int);
getsockopt(fd, SOL_SOCKET, SO_ERROR, &optval, &optlen);
if (optval) {
SOCK_LOG_ERROR("failed to connect %d - %s\n", optval,
strerror(optval));
close(fd);
return 0;
}
} else {
SOCK_LOG_ERROR("Timeout or error to connect %d - %s\n", optval,
strerror(optval));
close(fd);
errno = ETIMEDOUT;
return 0;
}
/* read */
ret = recvfrom(fd, buf, len, 0, addr, addrlen);
if (ret < 0) {
SOCK_LOG_ERROR("error recvfrom for sread: %d - %s\n", errno,
strerror(errno));
return 0;
}
return ret;
}
useconds_t sock_progress_thread_wait = 0;

Просмотреть файл

@ -44,6 +44,7 @@
#define SOCK_INFO (3)
extern int sock_log_level;
extern useconds_t sock_progress_thread_wait;
#define SOCK_LOG_INFO(...) do { \
if (sock_log_level >= SOCK_INFO) { \
@ -68,7 +69,3 @@ extern int sock_log_level;
#endif
int sock_util_sendto(int fd, void *buf, size_t len, struct sockaddr_in *addr,
socklen_t addrlen, int timeout);
int sock_util_recvfrom(int fd, void *buf, size_t len, struct sockaddr_in *addr,
socklen_t *addrlen, int timeout);

Просмотреть файл

@ -127,15 +127,13 @@ static int sock_wait_wait(struct fid_wait *wait_fid, int timeout)
case FI_CLASS_CQ:
cq = container_of(list_item->fid,
struct sock_cq, cq_fid);
if (cq->domain->progress_mode == FI_PROGRESS_MANUAL)
sock_cq_progress(cq);
sock_cq_progress(cq);
break;
case FI_CLASS_CNTR:
cntr = container_of(list_item->fid,
struct sock_cntr, cntr_fid);
if (cntr->domain->progress_mode == FI_PROGRESS_MANUAL)
sock_cntr_progress(cntr);
sock_cntr_progress(cntr);
break;
}
}
@ -220,8 +218,9 @@ int sock_wait_close(fid_t fid)
wait = container_of(fid, struct sock_wait, wait_fid.fid);
head = &wait->fid_list;
for (p = head->next; p != head; p = p->next) {
for (p = head->next; p != head;) {
list_item = container_of(p, struct sock_fid_list, entry);
p = p->next;
free(list_item);
}
@ -284,7 +283,7 @@ int sock_wait_open(struct fid_fabric *fabric, struct fi_wait_attr *attr,
free(wait);
return err;
}
wait->wait_fid.fid.fclass = FI_CLASS_WAIT;
wait->wait_fid.fid.context = 0;
wait->wait_fid.fid.ops = &sock_wait_fi_ops;
@ -292,6 +291,7 @@ int sock_wait_open(struct fid_fabric *fabric, struct fi_wait_attr *attr,
wait->fab = fab;
wait->type = wait_obj_type;
atomic_inc(&fab->ref);
dlist_init(&wait->fid_list);
*waitset = &wait->wait_fid;
return 0;

Просмотреть файл

@ -30,18 +30,20 @@
* SOFTWARE.
*/
#ifndef _FI_USNIC_H_
#define _FI_USNIC_H_
#ifndef _FI_EXT_USNIC_H_
#define _FI_EXT_USNIC_H_
#include <stdint.h>
#include <net/if.h>
#define FI_PROTO_RUDP 100
#define FI_USNIC_INFO_VERSION 1
/*
* usNIC specific info
*/
struct fi_usnic_info {
struct fi_usnic_info_v1 {
uint32_t ui_link_speed;
uint32_t ui_netmask_be;
char ui_ifname[IFNAMSIZ];
@ -51,6 +53,13 @@ struct fi_usnic_info {
uint32_t ui_cq_per_vf;
};
struct fi_usnic_info {
uint32_t ui_version;
union {
struct fi_usnic_info_v1 v1;
} ui;
};
/*
* usNIC-specific AV ops
*/
@ -69,4 +78,4 @@ struct fi_usnic_ops_av {
int (*get_distance)(struct fid_av *av, void *addr, int *metric);
};
#endif /* _FI_USNIC_H_ */
#endif /* _FI_EXT_USNIC_H_ */

Просмотреть файл

@ -63,8 +63,7 @@
#include "usdf_av.h"
#include "usdf_timer.h"
/* would like to move to include/rdma */
#include "fi_usnic.h"
#include "fi_ext_usnic.h"
static void
usdf_av_insert_async_complete(struct usdf_av_insert *insert)

Просмотреть файл

@ -82,7 +82,23 @@ usdf_cq_readerr(struct fid_cq *fcq, struct fi_cq_err_entry *entry,
entry->op_context = cq->cq_comp.uc_context;
entry->flags = 0;
entry->err = FI_EIO;
entry->prov_errno = cq->cq_comp.uc_status;
switch (cq->cq_comp.uc_status) {
case USD_COMPSTAT_SUCCESS:
entry->prov_errno = FI_SUCCESS;
break;
case USD_COMPSTAT_ERROR_CRC:
entry->prov_errno = FI_ECRC;
break;
case USD_COMPSTAT_ERROR_TRUNC:
entry->prov_errno = FI_ETRUNC;
break;
case USD_COMPSTAT_ERROR_TIMEOUT:
entry->prov_errno = FI_ETIMEDOUT;
break;
case USD_COMPSTAT_ERROR_INTERNAL:
entry->prov_errno = FI_EOTHER;
break;
}
cq->cq_comp.uc_status = 0;

Просмотреть файл

@ -142,9 +142,11 @@ usdf_ep_dgram_enable(struct fid_ep *fep)
fail:
if (ep->e.dg.ep_hdr_ptr != NULL) {
free(ep->e.dg.ep_hdr_ptr);
ep->e.dg.ep_hdr_ptr = NULL;
}
if (ep->e.dg.ep_qp != NULL) {
usd_destroy_qp(ep->e.dg.ep_qp);
ep->e.dg.ep_qp = NULL;
}
return ret;
}
@ -286,6 +288,20 @@ static struct fi_ops_ep usdf_base_dgram_ops = {
.setopt = fi_no_setopt,
.tx_ctx = fi_no_tx_ctx,
.rx_ctx = fi_no_rx_ctx,
.rx_size_left = usdf_dgram_rx_size_left,
.tx_size_left = usdf_dgram_tx_size_left,
};
static struct fi_ops_ep usdf_base_dgram_prefix_ops = {
.size = sizeof(struct fi_ops_ep),
.enable = usdf_ep_dgram_enable,
.cancel = fi_no_cancel,
.getopt = fi_no_getopt,
.setopt = fi_no_setopt,
.tx_ctx = fi_no_tx_ctx,
.rx_ctx = fi_no_rx_ctx,
.rx_size_left = usdf_dgram_prefix_rx_size_left,
.tx_size_left = usdf_dgram_prefix_tx_size_left,
};
static struct fi_ops_msg usdf_dgram_ops = {
@ -299,8 +315,6 @@ static struct fi_ops_msg usdf_dgram_ops = {
.inject = usdf_dgram_inject,
.senddata = usdf_dgram_senddata,
.injectdata = fi_no_msg_injectdata,
.rx_size_left = usdf_dgram_rx_size_left,
.tx_size_left = usdf_dgram_tx_size_left,
};
static struct fi_ops_msg usdf_dgram_prefix_ops = {
@ -314,8 +328,6 @@ static struct fi_ops_msg usdf_dgram_prefix_ops = {
.inject = usdf_dgram_inject,
.senddata = usdf_dgram_senddata,
.injectdata = fi_no_msg_injectdata,
.rx_size_left = usdf_dgram_prefix_rx_size_left,
.tx_size_left = usdf_dgram_prefix_tx_size_left,
};
static struct fi_ops_cm usdf_cm_dgram_ops = {
@ -369,7 +381,6 @@ usdf_ep_dgram_open(struct fid_domain *domain, struct fi_info *info,
ep->ep_fid.fid.fclass = FI_CLASS_EP;
ep->ep_fid.fid.context = context;
ep->ep_fid.fid.ops = &usdf_ep_dgram_ops;
ep->ep_fid.ops = &usdf_base_dgram_ops;
ep->ep_fid.cm = &usdf_cm_dgram_ops;
ep->ep_domain = udp;
ep->ep_caps = info->caps;
@ -393,9 +404,11 @@ usdf_ep_dgram_open(struct fid_domain *domain, struct fi_info *info,
goto fail;
}
ep->ep_fid.ops = &usdf_base_dgram_prefix_ops;
info->ep_attr->msg_prefix_size = USDF_HDR_BUF_ENTRY;
ep->ep_fid.msg = &usdf_dgram_prefix_ops;
} else {
ep->ep_fid.ops = &usdf_base_dgram_ops;
ep->ep_fid.msg = &usdf_dgram_ops;
}
atomic_init(&ep->ep_refcnt, 0);

Просмотреть файл

@ -581,6 +581,8 @@ static struct fi_ops_ep usdf_base_msg_ops = {
.setopt = usdf_ep_msg_setopt,
.tx_ctx = fi_no_tx_ctx,
.rx_ctx = fi_no_rx_ctx,
.rx_size_left = fi_no_rx_size_left,
.tx_size_left = fi_no_tx_size_left,
};
static struct fi_ops_cm usdf_cm_msg_ops = {

Просмотреть файл

@ -638,6 +638,8 @@ static struct fi_ops_ep usdf_base_rdm_ops = {
.setopt = usdf_ep_rdm_setopt,
.tx_ctx = fi_no_tx_ctx,
.rx_ctx = fi_no_rx_ctx,
.rx_size_left = fi_no_rx_size_left,
.tx_size_left = fi_no_tx_size_left,
};
static struct fi_ops_cm usdf_cm_rdm_ops = {

Просмотреть файл

@ -66,7 +66,7 @@
#include "libnl_utils.h"
#include "usdf.h"
#include "fi_usnic.h"
#include "fi_ext_usnic.h"
#include "usdf_progress.h"
#include "usdf_timer.h"
#include "usdf_dgram.h"
@ -780,12 +780,12 @@ usdf_usnic_getinfo(struct fid_fabric *fabric, struct fi_usnic_info *uip)
fp = fab_ftou(fabric);
dap = fp->fab_dev_attrs;
uip->ui_link_speed = dap->uda_bandwidth;
uip->ui_netmask_be = dap->uda_netmask_be;
strcpy(uip->ui_ifname, dap->uda_ifname);
uip->ui_num_vf = dap->uda_num_vf;
uip->ui_qp_per_vf = dap->uda_qp_per_vf;
uip->ui_cq_per_vf = dap->uda_cq_per_vf;
uip->ui.v1.ui_link_speed = dap->uda_bandwidth;
uip->ui.v1.ui_netmask_be = dap->uda_netmask_be;
strcpy(uip->ui.v1.ui_ifname, dap->uda_ifname);
uip->ui.v1.ui_num_vf = dap->uda_num_vf;
uip->ui.v1.ui_qp_per_vf = dap->uda_qp_per_vf;
uip->ui.v1.ui_cq_per_vf = dap->uda_cq_per_vf;
return 0;
}

Просмотреть файл

@ -142,11 +142,16 @@ _usdf_msg_post_recv(struct usdf_rx *rx, void *buf, size_t len)
rq->urq_post_index = (rq->urq_post_index + 1)
& rq->urq_post_index_mask;
desc = vnic_rq_next_desc(vrq);
desc = rq->urq_next_desc;
rq_enet_desc_enc(desc, (dma_addr_t) buf,
RQ_ENET_TYPE_ONLY_SOP, len);
wmb();
vnic_rq_post(vrq, buf, 0, (dma_addr_t) buf, len, 0);
iowrite32(rq->urq_post_index, &vrq->ctrl->posted_index);
rq->urq_next_desc = (struct rq_enet_desc *)
((uintptr_t)rq->urq_desc_ring
+ ((rq->urq_post_index)<<4));
rq->urq_recv_credits -= 1;
return 0;
}

Просмотреть файл

@ -58,7 +58,7 @@
#include "fi.h"
#include "fi_enosys.h"
#include "fi_usnic.h"
#include "fi_ext_usnic.h"
#include "usnic_direct.h"
#include "usd.h"
#include "usdf.h"

Просмотреть файл

@ -391,11 +391,16 @@ _usdf_rdm_post_recv(struct usdf_rx *rx, void *buf, size_t len)
rq->urq_post_index = (rq->urq_post_index + 1)
& rq->urq_post_index_mask;
desc = vnic_rq_next_desc(vrq);
desc = rq->urq_next_desc;
rq_enet_desc_enc(desc, (dma_addr_t) buf,
RQ_ENET_TYPE_ONLY_SOP, len);
wmb();
vnic_rq_post(vrq, buf, 0, (dma_addr_t) buf, len, 0);
iowrite32(rq->urq_post_index, &vrq->ctrl->posted_index);
rq->urq_next_desc = (struct rq_enet_desc *)
((uintptr_t)rq->urq_desc_ring
+ ((rq->urq_post_index)<<4));
rq->urq_recv_credits -= 1;
return 0;
}

Просмотреть файл

@ -201,12 +201,20 @@ static inline bool skb_flow_dissect(const struct sk_buff *skb, struct flow_keys
return true;
}
#endif /*CONFIG_RFS_ACCEL*/
#if ((RHEL_RELEASE_CODE && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6, 5)))
#define skb_get_rxhash(skb) (skb)->rxhash
#endif /*RHEL_RELEASE_VERSION == 6.5*/
#endif /*LINUX >= 3.3.0*/
#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 0))
#define skb_get_hash_raw(skb) (skb)->rxhash
#endif
#if !defined(__VMKLNX__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 24))
#define enic_wq_lock(wq_lock) spin_lock_irqsave(wq_lock, flags)
#define enic_wq_unlock(wq_lock) spin_unlock_irqrestore(wq_lock, flags)
#else
#define enic_wq_lock(wq_lock) spin_lock(wq_lock)
#define enic_wq_unlock(wq_lock) spin_unlock(wq_lock)
#endif /* ! vmklnx && kernel < 2.6.24 */
#ifdef CONFIG_RFS_ACCEL
#if ((RHEL_RELEASE_CODE && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6, 5) \
&& RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(7, 0)))
@ -235,10 +243,26 @@ static inline bool skb_flow_dissect(const struct sk_buff *skb, struct flow_keys
#define napi_hash_add(napi) do {} while(0)
#define skb_mark_napi_id(skb, napi) do {} while(0)
#endif /*CONFIG_NET_RX_BUSY_POLL*/
#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 9, 00))
#define __vlan_hwaccel_put_tag(a, b, c) __vlan_hwaccel_put_tag(a, c);
#endif /* KERNEL < 3.9.0 */
#if ((LINUX_VERSION_CODE <= KERNEL_VERSION(3, 4, 0)) && \
(!RHEL_RELEASE_CODE || RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(6, 0)))
#define net_warn_ratelimited(fmt, ...) \
do { \
if (net_ratelimit()) \
pr_warn(fmt, ##__VA_ARGS__); \
} while (0)
#endif /* kernel <= 3.4 && rhel < 6.0 */
#if (LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 26))
#define enic_pci_dma_mapping_error(pdev, dma) pci_dma_mapping_error(dma)
#else
#define enic_pci_dma_mapping_error(pdev, dma) pci_dma_mapping_error(pdev, dma)
#endif /* Kernel version <= 2.6.26 */
/* Kernel version-specific definitions */
#if (LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 14))
static inline signed long schedule_timeout_uninterruptible(signed long timeout)
@ -454,6 +478,8 @@ struct napi_struct {
#undef pr_err
#define pr_err(fmt, ...) \
printk(KERN_ERR pr_fmt(fmt), ##__VA_ARGS__)
#undef pr_warn
#define pr_warn pr_warning
#undef pr_warning
#define pr_warning(fmt, ...) \
printk(KERN_WARNING pr_fmt(fmt), ##__VA_ARGS__)

Просмотреть файл

@ -201,6 +201,9 @@ struct usd_rq {
char *urq_rxbuf;
char **urq_post_addr;
uint32_t urq_recv_credits; /* number of available descriptors */
struct rq_enet_desc *urq_desc_ring;
struct rq_enet_desc *urq_next_desc;
uint32_t urq_post_index; /* next rxbuf to post */
uint32_t urq_post_index_mask;
uint32_t urq_last_comp;

Просмотреть файл

@ -99,7 +99,7 @@ usd_desc_to_rq_comp(
rcvbuf_len = 0;
do {
rq_enet_desc_dec( (struct rq_enet_desc *)
((uintptr_t)rq->urq_vnic_rq.ring.descs + (i<<4)),
((uintptr_t)rq->urq_desc_ring + (i<<4)),
&bus_addr, &type, &len);
rcvbuf_len += len;
i = (i - 1) & rq->urq_post_index_mask;
@ -127,7 +127,7 @@ usd_desc_to_rq_comp(
* reported as released until next RX
*/
credits = (q_index - rq->urq_last_comp) & rq->urq_post_index_mask;
rq->urq_vnic_rq.ring.desc_avail += credits;
rq->urq_recv_credits += credits;
rq->urq_last_comp = q_index;
return 0;

Просмотреть файл

@ -59,12 +59,10 @@ usd_get_recv_credits(
struct usd_qp *uqp)
{
struct usd_qp_impl *qp;
struct vnic_rq *vrq;
qp = to_qpi(uqp);
vrq = &qp->uq_rq.urq_vnic_rq;
return vrq->ring.desc_avail;
return qp->uq_rq.urq_recv_credits;
}
int
@ -77,45 +75,48 @@ usd_post_recv(
struct vnic_rq *vrq;
struct rq_enet_desc *desc;
struct iovec *iovp;
uint32_t index;
uint32_t count;
unsigned i;
qp = to_qpi(uqp);
rq = &qp->uq_rq;
vrq = &rq->urq_vnic_rq;
desc = rq->urq_next_desc;
index = rq->urq_post_index;
iovp = recv_list->urd_iov;
count = 0;
while (recv_list != NULL) {
iovp = recv_list->urd_iov;
/* XXX - this should be rewritten along the lines of post_send */
rq->urq_context[rq->urq_post_index] = recv_list->urd_context;
rq->urq_post_index = (rq->urq_post_index + 1)
& rq->urq_post_index_mask;
desc = vnic_rq_next_desc(vrq);
rq->urq_context[index] = recv_list->urd_context;
rq_enet_desc_enc(desc, (dma_addr_t) iovp[0].iov_base,
RQ_ENET_TYPE_ONLY_SOP, iovp[0].iov_len);
wmb();
vnic_rq_post(vrq, iovp[0].iov_base, 0,
(dma_addr_t) iovp[0].iov_base, iovp[0].iov_len, 0);
count++;
index = (index+1) & rq->urq_post_index_mask;
desc = (struct rq_enet_desc *) ((uintptr_t)rq->urq_desc_ring
+ (index<<4));
for (i = 1; i < recv_list->urd_iov_cnt; ++i) {
rq->urq_context[rq->urq_post_index] = recv_list->urd_context;
rq->urq_post_index = (rq->urq_post_index + 1)
& rq->urq_post_index_mask;
desc = vnic_rq_next_desc(vrq);
rq->urq_context[index] = recv_list->urd_context;
rq_enet_desc_enc(desc, (dma_addr_t) iovp[i].iov_base,
RQ_ENET_TYPE_NOT_SOP, iovp[i].iov_len);
wmb();
vnic_rq_post(vrq, iovp[i].iov_base, 0,
(dma_addr_t) iovp[i].iov_base, iovp[i].iov_len,
0);
count++;
index = (index+1) & rq->urq_post_index_mask;
desc = (struct rq_enet_desc *) ((uintptr_t)rq->urq_desc_ring
+ (index<<4));
}
recv_list = recv_list->urd_next;
}
wmb();
iowrite32(index, &vrq->ctrl->posted_index);
rq->urq_next_desc = desc;
rq->urq_post_index = index;
rq->urq_recv_credits -= count;
return 0;
}

Просмотреть файл

@ -162,8 +162,8 @@ _usd_post_send_iov(
}
wq_enet_desc_enc(desc, (uintptr_t)(iov[i].iov_base),
iov[i].iov_len, mss, header_length, offload_mode,
1, cq_entry, fcoe_encap, vlan_tag_insert, vlan_tag, loopback);
iov[i].iov_len, mss, header_length, offload_mode,
1, cq_entry, fcoe_encap, vlan_tag_insert, vlan_tag, loopback);
wmb();

Просмотреть файл

@ -472,27 +472,76 @@ usd_create_wq(
return ret;
}
static int
usd_vnic_rq_init(
struct usd_rq *rq,
struct usd_vf *vf,
uint64_t desc_ring)
{
struct vnic_rq *vrq;
int ret;
vrq = &rq->urq_vnic_rq;
/* get address of control register */
vrq->ctrl = vnic_dev_get_res(vf->vf_vdev, RES_TYPE_RQ, rq->urq_index);
if (vrq->ctrl == NULL)
return -EINVAL;
ret = vnic_rq_disable(vrq);
if (ret != 0)
return ret;
writeq(desc_ring, &vrq->ctrl->ring_base);
iowrite32(rq->urq_num_entries, &vrq->ctrl->ring_size);
iowrite32(0, &vrq->ctrl->fetch_index);
iowrite32(0, &vrq->ctrl->posted_index);
iowrite32(rq->urq_cq->ucq_index, &vrq->ctrl->cq_index);
iowrite32(0, &vrq->ctrl->error_interrupt_enable);
iowrite32(0, &vrq->ctrl->error_interrupt_offset);
iowrite32(0, &vrq->ctrl->dropped_packet_count);
iowrite32(0, &vrq->ctrl->error_status);
rq->urq_state |= USD_QS_VNIC_INITIALIZED;
rq->urq_next_desc = rq->urq_desc_ring;
rq->urq_recv_credits = rq->urq_num_entries - 1;
return 0;
}
/*
* Allocate the resources for a previously created RQ
*/
static int
usd_create_vnic_rq(
struct usd_rq *rq,
struct usd_vf *vf)
usd_create_rq(struct usd_qp_impl *qp)
{
struct usd_rq *rq;
uint32_t ring_size;
int ret;
/* Allocate resources for RQ */
ret = vnic_rq_alloc(vf->vf_vdev, &rq->urq_vnic_rq,
rq->urq_index, rq->urq_num_entries, sizeof(struct rq_enet_desc));
if (ret != 0) {
return ret;
}
rq = &qp->uq_rq;
vnic_rq_init(&rq->urq_vnic_rq, rq->urq_cq->ucq_index, 0, 0);
rq->urq_state |= USD_QS_VNIC_INITIALIZED;
/* Allocate resources for RQ */
ring_size = sizeof(struct rq_enet_desc) * rq->urq_num_entries;
ret = usd_alloc_mr(qp->uq_dev, ring_size, (void **)&rq->urq_desc_ring);
if (ret != 0)
return ret;
ret = usd_vnic_rq_init(rq, qp->uq_vf, (uint64_t)rq->urq_desc_ring);
if (ret != 0)
goto out;
rq->urq_post_index_mask = (rq->urq_num_entries-1);
rq->urq_post_index = 0;
rq->urq_last_comp = (rq->urq_num_entries-1);
return 0;
out:
if (rq->urq_desc_ring != NULL) {
usd_free_mr(rq->urq_desc_ring);
rq->urq_desc_ring = NULL;
}
return ret;
}
static int
@ -952,12 +1001,6 @@ usd_create_qp_normal(
}
num_wq_entries = wq->uwq_num_entries;
copybuf_size = USD_SEND_MAX_COPY * num_wq_entries;
ret = usd_alloc_mr(dev, copybuf_size, (void **)&wq->uwq_copybuf);
if (ret != 0)
goto fail;
num_rq_entries = rq->urq_num_entries;
rq->urq_context = calloc(sizeof(void *), num_rq_entries);
@ -967,10 +1010,6 @@ usd_create_qp_normal(
goto fail;
}
rq->urq_post_index_mask = (rq->urq_num_entries-1);
rq->urq_post_index = 0;
rq->urq_last_comp = (rq->urq_num_entries-1);
/*
* Issue verbs command to create the QP. This does not actually
* instanstiate the filter in the VIC yet, need to bring the
@ -986,6 +1025,16 @@ usd_create_qp_normal(
rq->urq_state |= USD_QS_VERBS_CREATED;
wq->uwq_state |= USD_QS_VERBS_CREATED;
/*
* Create/regmr for wq copybuf after verbs QP is created
* because QP number information may be needed to register
* mr under shared PD
*/
copybuf_size = USD_SEND_MAX_COPY * num_wq_entries;
ret = usd_alloc_mr(dev, copybuf_size, (void **)&wq->uwq_copybuf);
if (ret != 0)
goto fail;
ret = usd_map_vf(dev, &vf_info, &vf);
if (ret != 0) {
goto fail;
@ -1013,7 +1062,7 @@ usd_create_qp_normal(
if (ret != 0) {
goto fail;
}
ret = usd_create_vnic_rq(&qp->uq_rq, qp->uq_vf);
ret = usd_create_rq(qp);
if (ret != 0) {
goto fail;
}

Просмотреть файл

@ -110,6 +110,7 @@ struct vnic_dev {
struct vnic_intr_coal_timer_info intr_coal_timer_info;
struct devcmd2_controller *devcmd2;
int (*devcmd_rtn)(struct vnic_dev *vdev, enum vnic_devcmd_cmd cmd, int wait);
struct vnic_gen_stats gen_stats;
};
#define VNIC_MAX_RES_HDR_SIZE \

Просмотреть файл

@ -46,7 +46,7 @@
#include <linux/errno.h>
#include <linux/types.h>
#include <linux/pci.h>
#if __KERNEL__
#ifdef __KERNEL__
#include <linux/delay.h>
#include <linux/slab.h>
#endif

Просмотреть файл

@ -86,6 +86,11 @@ struct vnic_rx_stats {
u64 rsvd[16];
};
/* Generic statistics */
struct vnic_gen_stats {
u64 dma_map_error;
};
struct vnic_stats {
struct vnic_tx_stats tx;
struct vnic_rx_stats rx;

Просмотреть файл

@ -102,11 +102,14 @@ static int vnic_wq_alloc_bufs(struct vnic_wq *wq)
wq->ring.desc_size * buf->index;
if (buf->index + 1 == count) {
buf->next = wq->bufs[0];
buf->next->prev = buf;
break;
} else if (j + 1 == VNIC_WQ_BUF_BLK_ENTRIES(count)) {
buf->next = wq->bufs[i + 1];
buf->next->prev = buf;
} else {
buf->next = buf + 1;
buf->next->prev = buf;
buf++;
}
}

Просмотреть файл

@ -88,6 +88,7 @@ struct vnic_wq_buf {
uint8_t cq_entry; /* Gets completion event from hw */
uint8_t desc_skip_cnt; /* Num descs to occupy */
uint8_t compressed_send; /* Both hdr and payload in one desc */
struct vnic_wq_buf *prev;
};
/* Break the vnic_wq_buf allocations into blocks of 32/64 entries */

Просмотреть файл

@ -689,6 +689,9 @@ static int fi_ibv_msg_ep_bind(struct fid *fid, struct fid *bfid, uint64_t flags)
switch (bfid->fclass) {
case FI_CLASS_CQ:
if (!(flags & (FI_RECV|FI_SEND))) {
return -EINVAL;
}
if (flags & FI_RECV) {
if (ep->rcq)
return -EINVAL;
@ -908,8 +911,6 @@ static struct fi_ops_msg fi_ibv_msg_ep_msg_ops = {
.inject = fi_no_msg_inject,
.senddata = fi_ibv_msg_ep_senddata,
.injectdata = fi_no_msg_injectdata,
.rx_size_left = fi_no_msg_rx_size_left,
.tx_size_left = fi_no_msg_tx_size_left,
};
static ssize_t
@ -1678,7 +1679,7 @@ static int fi_ibv_msg_ep_shutdown(struct fid_ep *ep, uint64_t flags)
static struct fi_ops_cm fi_ibv_msg_ep_cm_ops = {
.size = sizeof(struct fi_ops_cm),
.getname = NULL, /* TODO */
.getname = fi_no_getname,
.getpeer = fi_no_getpeer,
.connect = fi_ibv_msg_ep_connect,
.listen = fi_no_listen,
@ -1720,6 +1721,8 @@ static int fi_ibv_msg_ep_enable(struct fid_ep *ep)
_ep = container_of(ep, struct fi_ibv_msg_ep, ep_fid);
if (!_ep->eq)
return -FI_ENOEQ;
if (!_ep->scq || !_ep->rcq)
return -FI_ENOCQ;
return fi_ibv_msg_ep_create_qp(_ep);
}
@ -1730,6 +1733,8 @@ static struct fi_ops_ep fi_ibv_msg_ep_base_ops = {
.cancel = fi_no_cancel,
.getopt = fi_ibv_msg_ep_getopt,
.setopt = fi_ibv_msg_ep_setopt,
.rx_size_left = fi_no_rx_size_left,
.tx_size_left = fi_no_tx_size_left,
};
static int fi_ibv_msg_ep_close(fid_t fid)
@ -1880,20 +1885,20 @@ fi_ibv_eq_cm_process_event(struct fi_ibv_eq *eq, struct rdma_cm_event *cma_event
case RDMA_CM_EVENT_UNREACHABLE:
eq->err.fid = fid;
eq->err.err = cma_event->status;
return -EIO;
return -FI_EAVAIL;
case RDMA_CM_EVENT_REJECTED:
eq->err.fid = fid;
eq->err.err = ECONNREFUSED;
eq->err.prov_errno = cma_event->status;
return -EIO;
return -FI_EAVAIL;
case RDMA_CM_EVENT_DEVICE_REMOVAL:
eq->err.fid = fid;
eq->err.err = ENODEV;
return -EIO;
return -FI_EAVAIL;
case RDMA_CM_EVENT_ADDR_CHANGE:
eq->err.fid = fid;
eq->err.err = EADDRNOTAVAIL;
return -EIO;
return -FI_EAVAIL;
default:
return 0;
}
@ -1917,7 +1922,7 @@ fi_ibv_eq_read(struct fid_eq *eq, uint32_t *event,
_eq = container_of(eq, struct fi_ibv_eq, eq_fid.fid);
entry = (struct fi_eq_cm_entry *) buf;
if (_eq->err.err)
return -FI_EIO;
return -FI_EAVAIL;
ret = rdma_get_cm_event(_eq->channel, &cma_event);
if (ret)
@ -2025,6 +2030,7 @@ fi_ibv_eq_open(struct fid_fabric *fabric, struct fi_eq_attr *attr,
_eq->fab = container_of(fabric, struct fi_ibv_fabric, fabric_fid);
switch (attr->wait_obj) {
case FI_WAIT_UNSPEC:
case FI_WAIT_FD:
_eq->channel = rdma_create_event_channel();
if (!_eq->channel) {
@ -2338,6 +2344,7 @@ fi_ibv_cq_open(struct fid_domain *domain, struct fi_cq_attr *attr,
_cq->domain = container_of(domain, struct fi_ibv_domain, domain_fid);
switch (attr->wait_obj) {
case FI_WAIT_UNSPEC:
case FI_WAIT_FD:
_cq->channel = ibv_create_comp_channel(_cq->domain->verbs);
if (!_cq->channel) {
@ -2572,7 +2579,7 @@ static int fi_ibv_pep_listen(struct fid_pep *pep)
static struct fi_ops_cm fi_ibv_pep_cm_ops = {
.size = sizeof(struct fi_ops_cm),
.getname = NULL, /* TODO */
.getname = fi_no_getname,
.getpeer = fi_no_getpeer,
.connect = fi_no_connect,
.listen = fi_ibv_pep_listen,

Просмотреть файл

@ -155,3 +155,63 @@ size_t fi_datatype_size(enum fi_datatype datatype)
}
return fi_datatype_size_table[datatype];
}
int fi_send_allowed(uint64_t caps)
{
if (caps & FI_MSG ||
caps & FI_TAGGED) {
if (caps & FI_SEND)
return 1;
if (caps & FI_RECV)
return 0;
return 1;
}
return 0;
}
int fi_recv_allowed(uint64_t caps)
{
if (caps & FI_MSG ||
caps & FI_TAGGED) {
if (caps & FI_RECV)
return 1;
if (caps & FI_SEND)
return 0;
return 1;
}
return 0;
}
int fi_rma_initiate_allowed(uint64_t caps)
{
if (caps & FI_RMA ||
caps & FI_ATOMICS) {
if (caps & FI_WRITE ||
caps & FI_READ)
return 1;
if (caps & FI_REMOTE_WRITE ||
caps & FI_REMOTE_READ)
return 0;
return 1;
}
return 0;
}
int fi_rma_target_allowed(uint64_t caps)
{
if (caps & FI_RMA ||
caps & FI_ATOMICS) {
if (caps & FI_REMOTE_WRITE ||
caps & FI_REMOTE_READ)
return 1;
if (caps & FI_WRITE ||
caps & FI_READ)
return 0;
return 1;
}
return 0;
}

Просмотреть файл

@ -288,18 +288,26 @@ int fi_no_setopt(fid_t fid, int level, int optname,
{
return -FI_ENOSYS;
}
int fi_no_tx_ctx(struct fid_sep *sep, int index,
int fi_no_tx_ctx(struct fid_ep *sep, int index,
struct fi_tx_attr *attr, struct fid_ep **tx_ep,
void *context)
{
return -FI_ENOSYS;
}
int fi_no_rx_ctx(struct fid_sep *sep, int index,
int fi_no_rx_ctx(struct fid_ep *sep, int index,
struct fi_rx_attr *attr, struct fid_ep **rx_ep,
void *context)
{
return -FI_ENOSYS;
}
ssize_t fi_no_rx_size_left(struct fid_ep *ep)
{
return -FI_ENOSYS;
}
ssize_t fi_no_tx_size_left(struct fid_ep *ep)
{
return -FI_ENOSYS;
}
/*
* struct fi_ops_msg
@ -349,14 +357,6 @@ ssize_t fi_no_msg_injectdata(struct fid_ep *ep, const void *buf, size_t len,
{
return -FI_ENOSYS;
}
ssize_t fi_no_msg_rx_size_left(struct fid_ep *ep)
{
return -FI_ENOSYS;
}
ssize_t fi_no_msg_tx_size_left(struct fid_ep *ep)
{
return -FI_ENOSYS;
}
/*
* struct fi_ops_wait

Просмотреть файл

@ -44,17 +44,16 @@
#include <rdma/fi_errno.h>
#include "fi.h"
#include "prov.h"
#include "fi_log.h"
#ifdef HAVE_LIBDL
#include <dlfcn.h>
#endif
#define FI_WARN(fmt, ...) \
do { fprintf(stderr, "%s: " fmt, PACKAGE, ##__VA_ARGS__); } while (0)
struct fi_prov {
struct fi_prov *next;
struct fi_provider *provider;
void *dlhandle;
};
static struct fi_prov *fi_getprov(const char *prov_name);
@ -64,31 +63,62 @@ static volatile int init = 0;
static pthread_mutex_t ini_lock = PTHREAD_MUTEX_INITIALIZER;
static int fi_register_provider(struct fi_provider *provider)
static void cleanup_provider(struct fi_provider *provider, void *dlhandle)
{
if (provider && provider->cleanup)
provider->cleanup();
#ifdef HAVE_LIBDL
if (dlhandle)
dlclose(dlhandle);
#endif
}
static int fi_register_provider(struct fi_provider *provider, void *dlhandle)
{
struct fi_prov *prov;
int ret;
if (!provider)
return -FI_EINVAL;
if (!provider) {
ret = -FI_EINVAL;
goto cleanup;
}
FI_LOG(2, NULL, "registering provider: %s (%d.%d)\n", provider->name,
FI_MAJOR(provider->version), FI_MINOR(provider->version));
if (FI_MAJOR(provider->fi_version) != FI_MAJOR_VERSION ||
FI_MINOR(provider->fi_version) > FI_MINOR_VERSION) {
FI_LOG(2, NULL, "provider has unsupported FI version (provider %d.%d != libfabric %d.%d); ignoring\n",
FI_MAJOR(provider->fi_version),
FI_MINOR(provider->fi_version),
FI_MAJOR_VERSION, FI_MINOR_VERSION);
ret = -FI_ENOSYS;
goto cleanup;
}
prov = fi_getprov(provider->name);
if (prov) {
/* If we have two versions of the same provider,
* keep the most recent
/* If this provider is older than an already-loaded
* provider of the same name, then discard this one.
*/
if (FI_VERSION_GE(prov->provider->version, provider->version)) {
FI_LOG(2, NULL, "a newer %s provider was already loaded; ignoring this one\n",
provider->name);
ret = -FI_EALREADY;
goto cleanup;
}
prov->provider->cleanup();
/* This provider is newer than an already-loaded
* provider of the same name, so discard the
* already-loaded one.
*/
FI_LOG(2, NULL, "an older %s provider was already loaded; keeping this one and ignoring the older one\n",
provider->name);
cleanup_provider(prov->provider, prov->dlhandle);
prov->dlhandle = dlhandle;
prov->provider = provider;
return 0;
}
@ -99,6 +129,7 @@ static int fi_register_provider(struct fi_provider *provider)
goto cleanup;
}
prov->dlhandle = dlhandle;
prov->provider = provider;
if (prov_tail)
prov_tail->next = prov;
@ -108,7 +139,8 @@ static int fi_register_provider(struct fi_provider *provider)
return 0;
cleanup:
provider->cleanup();
cleanup_provider(provider, dlhandle);
return ret;
}
@ -136,6 +168,8 @@ static void fi_ini(void)
if (init)
goto unlock;
fi_log_init();
#ifdef HAVE_LIBDL
struct dirent **liblist;
int n;
@ -156,34 +190,35 @@ static void fi_ini(void)
while (n--) {
if (asprintf(&lib, "%s/%s", provdir, liblist[n]->d_name) < 0) {
FI_WARN("asprintf failed to allocate memory\n");
FI_WARN(NULL, "asprintf failed to allocate memory\n");
free(liblist[n]);
goto done;
}
FI_DEBUG(NULL, "opening provider lib %s\n", lib);
dlhandle = dlopen(lib, RTLD_NOW);
if (dlhandle == NULL)
FI_WARN("dlopen(%s): %s\n", lib, dlerror());
FI_WARN(NULL, "dlopen(%s): %s\n", lib, dlerror());
free(liblist[n]);
free(lib);
inif = dlsym(dlhandle, "fi_prov_ini");
if (inif == NULL)
FI_WARN("dlsym: %s\n", dlerror());
FI_WARN(NULL, "dlsym: %s\n", dlerror());
else
fi_register_provider((inif)());
fi_register_provider((inif)(), dlhandle);
}
free(liblist);
done:
#endif
fi_register_provider(PSM_INIT);
fi_register_provider(USNIC_INIT);
fi_register_provider(PSM_INIT, NULL);
fi_register_provider(USNIC_INIT, NULL);
fi_register_provider(VERBS_INIT);
fi_register_provider(SOCKETS_INIT);
fi_register_provider(VERBS_INIT, NULL);
fi_register_provider(SOCKETS_INIT, NULL);
init = 1;
unlock:
@ -193,7 +228,7 @@ unlock:
static void __attribute__((destructor)) fi_fini(void)
{
for (struct fi_prov *prov = prov_head; prov; prov = prov->next)
prov->provider->cleanup();
cleanup_provider(prov->provider, prov->dlhandle);
}
static struct fi_prov *fi_getprov(const char *prov_name)
@ -258,6 +293,8 @@ int fi_getinfo_(uint32_t version, const char *node, const char *service,
ret = prov->provider->getinfo(version, node, service, flags,
hints, &cur);
if (ret) {
FI_LOG(1, NULL, "fi_getinfo: provider %s returned -%d (%s)\n",
prov->provider->name, -ret, fi_strerror(-ret));
if (ret == -FI_ENODATA) {
continue;
} else {

Просмотреть файл

@ -487,6 +487,11 @@ static void fi_tostr_atomic_op(char *buf, enum fi_op op)
}
}
static void fi_tostr_version(char *buf)
{
strcatf(buf, VERSION);
}
__attribute__((visibility ("default")))
char *fi_tostr_(const void *data, enum fi_type datatype)
{
@ -560,6 +565,9 @@ char *fi_tostr_(const void *data, enum fi_type datatype)
case FI_TYPE_ATOMIC_OP:
fi_tostr_atomic_op(buf, enumval);
break;
case FI_TYPE_VERSION:
fi_tostr_version(buf);
break;
default:
strcatf(buf, "Unknown type");
break;

105
opal/mca/common/libfabric/libfabric/src/log.c Обычный файл
Просмотреть файл

@ -0,0 +1,105 @@
/*
* Copyright (c) 2015, Cisco Systems, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
*/
#include <errno.h>
#include <limits.h>
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "fi.h"
#include "fi_log.h"
/* General implementation note: these functions currently use multiple fprintfs
* in a row, which can render in an ugly fashion for multithreaded code and for
* some mpirun implementations. If this bugs anyone enough then we can convert
* them to snprintf to build up the printout in a single buffer.
*/
int fi_log_level = INT_MIN;
void fi_log_init(void)
{
int ret;
if (getenv("FI_LOG_LEVEL") != NULL) {
errno = 0;
ret = strtol(getenv("FI_LOG_LEVEL"), NULL, 10);
if (errno != 0)
fprintf(stderr,
"%s: invalid value specified for FI_LOG_LEVEL (%s)\n",
PACKAGE, strerror(errno));
else
fi_log_level = (int)ret;
}
}
void fi_warn_impl(const char *prov, const char *fmt, ...)
{
va_list vargs;
if (prov != NULL)
fprintf(stderr, "%s:%s: ", PACKAGE, prov);
else
fprintf(stderr, "%s: ", PACKAGE);
va_start(vargs, fmt);
vfprintf(stderr, fmt, vargs);
va_end(vargs);
}
void fi_log_impl(int level, const char *prov, const char *fmt, ...)
{
va_list vargs;
if (prov != NULL)
fprintf(stderr, "%s:%s:<%d> ", PACKAGE, prov, level);
else
fprintf(stderr, "%s:<%d> ", PACKAGE, level);
va_start(vargs, fmt);
vfprintf(stderr, fmt, vargs);
va_end(vargs);
}
void fi_debug_impl(const char *prov, const char *fmt, ...)
{
va_list vargs;
if (prov != NULL)
fprintf(stderr, "%s:%s:<DBG> ", PACKAGE, prov);
else
fprintf(stderr, "%s:<DBG> ", PACKAGE);
va_start(vargs, fmt);
vfprintf(stderr, fmt, vargs);
va_end(vargs);
}