1
1

usnic: convert from verbs to libfabric (yay!)

This commit represents the conversion of the usnic BTL from verbs to
libfabric.

For the moment, libfabric is embedded in Open MPI (currently in the
usnic BTL).  This is because the libfabric API is still changing, and
also has not yet been released.  Ultimately, this embedded copy of
libfabric will likely disappear and the usnic BTL will rely on an
external installation of libfabric.

New configure options:

* --with-libfabric: will cause configure to fail if libfabric support
    cannot be built
* --without-libfabric: will prevent libfabric support from being built
* --with-libfabric=DIR: use an external libfabric installation
* --with-libfabric-libdir=LIBDIR: when paired with --with-libfabric=DIR,
    use LIBDIR for the libfabric installation library dir

The --with-libnl3[-libdir] arguments are now gone.
Этот коммит содержится в:
Jeff Squyres 2014-12-02 13:09:46 -08:00
родитель baf32fe480
Коммит 984982790a
329 изменённых файлов: 75818 добавлений и 3303 удалений

216
config/opal_check_libfabric.m4 Обычный файл
Просмотреть файл

@ -0,0 +1,216 @@
# -*- shell-script -*-
#
# Copyright (c) 2014 Cisco Systems, Inc. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# --------------------------------------------------------
# OPAL_CHECK_LIBFABRIC([prefix (1)],
# [action-if-found (2)],
# [action-if-not-found (3)])
# --------------------------------------------------------
# Check if libfabric support can be found. Sets prefix_{CPPFLAGS,
# LDFLAGS, LIBS} as needed and runs action-if-found if there is
# support, otherwise executes action-if-not-found.
# --------------------------------------------------------
AC_DEFUN([OPAL_CHECK_LIBFABRIC],[
# Setup the --with switches to allow users to specify where
# libfabric stuff lives.
AC_REQUIRE([_OPAL_CHECK_LIBFABRIC_DIR])
# usNIC: not allowed to say --with-libfabric=no or
# --with-libfabric-libdir=no
AS_IF([test "$opal_libfabric_dir" = "no" || \
test "$opal_libfabric_libdir" = "no"],
[AC_MSG_WARN([You cannot specify --without-libfabric[-libdir]])
AC_MSG_ERROR([Cannot continue])])
opal_check_libfabric_$1_save_CPPFLAGS=$CPPFLAGS
opal_check_libfabric_$1_save_LDFLAGS=$LDFLAGS
opal_check_libfabric_$1_save_LIBS=$LIBS
# usNIC: Use the internal or external libfabric?
AS_IF([test -z "$opal_libfabric_dir" && test -z "$opal_libfabric_libdir"],
[_OPAL_USNIC_SETUP_LIBFABRIC_INTERNAL($1)],
[_OPAL_USNIC_SETUP_LIBFABRIC_EXTERNAL($1)])
_OPAL_USNIC_SETUP_LIBFABRIC_INTERNAL_CONDITIONALS
CPPFLAGS="$CPPFLAGS $$1_CPPFLAGS"
LDFLAGS="$LDFLAGS $$1_LDFLAGS"
LIBS="$LIBS $$1_LIBS"
CPPFLAGS=$opal_check_libfabric_$1_save_CPPFLAGS
LDFLAGS=$opal_check_libfabric_$1_save_LDFLAGS
LIBS=$opal_check_libfabric_$1_save_LIBS
AS_IF([test "$opal_check_libfabric_happy" = "yes"],
[$2],
[AS_IF([test "$opal_want_lifabric" = "yes"],
[AC_MSG_WARN([Libfabric support requested (via --with-libfabric) but not found.])
AC_MSG_ERROR([Cannot continue])])
$3])
])
# --------------------------------------------------------
# _OPAL_CHECK_LIBFABRIC_DIR (internal)
# --------------------------------------------------------
# Add --with-libfabric options, and if directories are specified,
# sanity check them.
#
# At the end of this macro:
#
# 1. $opal_want_libfabric will be set to:
# "yes" if --with-libfabric or --with-libfabric=DIR was specified
# "no" if --without-libfabric was specified)
# "optional" if neither --with-libfabric* nor --without-libfabric
# was specified
#
# 2. $opal_libfabric_dir and $opal_libfabric_libdir with either both
# be set or both be empty.
#
# --------------------------------------------------------
AC_DEFUN([_OPAL_CHECK_LIBFABRIC_DIR],[
# Add --with options
AC_ARG_WITH([libfabric],
[AC_HELP_STRING([--with-libfabric(=DIR)],
[Build libfabric support, optionally adding DIR/include, DIR/lib, and DIR/lib64 to the search path for headers and libraries])])
AC_ARG_WITH([libfabric-libdir],
[AC_HELP_STRING([--with-libfabric-libdir=DIR],
[Search for libfabric libraries in DIR])])
# Sanity check the --with values
OPAL_CHECK_WITHDIR([libfabric], [$with_libfabric],
[include/rdma/fabric.h])
OPAL_CHECK_WITHDIR([libfabric-libdir], [$with_libfabric_libdir],
[libfabric.*])
# Set standardized shell variables for libfabric lovin' components to
# use. Either both of $opal_libfabric_dir and
# $libfabric_libdir will be set, or neither will be set.
opal_want_libfabric=no
AS_IF([test -z "$with_libfabric"],
[opal_want_libfabric=optional],
[AS_IF([test "$with_libfabric" = "no"],
[opal_want_libfabric=no],
[opal_want_libfabric=yes])
])
opal_libfabric_dir=
AS_IF([test -n "$with_libfabric" && \
test "$with_libfabric" != "yes" && \
test "$with_libfabric" != "no"],
[opal_libfabric_dir=$with_libfabric])
opal_libfabric_libdir=
AS_IF([test -n "$with_libfabric_libdir" && \
test "$with_libfabric_libdir" != "yes" && \
test "$with_libfabric_libdir" != "no"],
[opal_libfabric_libdir=$with_libfabric_libdir])
])
# --------------------------------------------------------
# Internal helper macro to setup the embedded libfabric.
#
# The internal libfabric is *TEMPORARY* and only for convenience of
# development. Ultimately, the embedded libfabric will disappear and
# you will need to have libfabric installed.
# --------------------------------------------------------
AC_DEFUN([_OPAL_USNIC_SETUP_LIBFABRIC_INTERNAL_CONDITIONALS],[
AM_CONDITIONAL(HAVE_LD_VERSION_SCRIPT, [/bin/false])
AM_CONDITIONAL([HAVE_DIRECT], [/bin/false])
])
AC_DEFUN([_OPAL_USNIC_SETUP_LIBFABRIC_INTERNAL],[
AC_MSG_NOTICE([Setting up for INTERNAL libfabric])
# Mostly replicate relevant parts from the libfabric configure.ac
# script. Make a lot of simplifying assumptions, just for the
# sake of embedding here.
AC_DEFINE([INCLUDE_VALGRIND], 0, [no valgrind])
AC_DEFINE([STREAM_CLOEXEC], 0, [no streamcloexec])
AC_DEFINE([HAVE_ATOMICS], 0, [no atomics])
AC_DEFINE([HAVE_SYMVER_SUPPORT], 1, [assembler has .symver support])
usnic_happy=1
AC_CHECK_HEADER([infiniband/verbs.h], [], [usnic_happy=0])
AC_CHECK_HEADER([linux/netlink.h], [], [usnic_happy=0], [
#include <sys/types.h>
#include <net/if.h>
])
AC_CHECK_LIB([nl], [nl_connect], [], [usnic_happy=0])
AS_IF([test "$usnic_happy" -eq 1],
[BTL_USNIC_EMBEDDED_LIBFABRIC=libusnic_fabric_embedded.la
$1_CPPFLAGS="-I$OPAL_TOP_SRCDIR/opal/mca/common/libfabric/include"
$1_LIBS="$OPAL_TOP_BUILDDIR/opal/mca/common/libusnic_fabric_embedded.la"])
AC_SUBST(BTL_USNIC_EMBEDDED_LIBFABRIC)
opal_check_libfabric_happy=yes
])
# --------------------------------------------------------
# Internal helper macro to setup for an external libfabric
# --------------------------------------------------------
AC_DEFUN([_OPAL_USNIC_SETUP_LIBFABRIC_EXTERNAL],[
AC_MSG_NOTICE([Setting up for EXTERNAL libfabric])
# If the top dir was specified but the libdir was not, look for
# it. Note that if the user needs a specific libdir (i.e., if our
# hueristic ordering below is not sufficient), they need to
# specify it.
AS_IF([test -z "$opal_libfabric_libdir" -a -n "$opal_libfabric_dir"],
[_OPAL_CHECK_LIBFABRIC_LIBDIR(["$opal_libfabric_dir/lib"])])
AS_IF([test -z "$opal_libfabric_libdir" -a -n "$opal_libfabric_dir"],
[_OPAL_CHECK_LIBFABRIC_LIBDIR(["$opal_libfabric_dir/lib64"])])
AS_IF([test -z "$opal_libfabric_libdir" -a -n "$opal_libfabric_dir"],
[_OPAL_CHECK_LIBFABRIC_LIBDIR(["$opal_libfabric_dir/lib32"])])
AS_IF([test -z "$opal_libfabric_libdir" -a -n "$opal_libfabric_dir"],
[AC_MSG_WARN([Could not find libiblibfabric in the usual locations under $opal_libfabric_dir])
AC_MSG_ERROR([Cannot continue])
])
# If the libdir was specified, but the top dir was not, look for
# it. Note that if the user needs a specific top dir (i.e., if
# our hueristic below is not sufficient), they need to specify it.
AS_IF([test -z "$opal_libfabric" -a -n "$opal_libfabric_libdir"],
[_OPAL_CHECK_LIBFABRIC_INCDIR([`dirname "$opal_libfabric_libdir"`])])
AS_IF([test -z "$opal_libfabric_dir" -a -n "$opal_libfabric_libdir"],
[AC_MSG_WARN([Could not find libfabric.h in the usual locations under $opal_libfabric_dir])
AC_MSG_ERROR([Cannot continue])
])
# Now actually check to ensure that the external libfabric works
OPAL_CHECK_PACKAGE([$1],
[rdma/fabric.h],
[fabric],
[fi_getinfo],
[],
[$opal_libfabric_dir],
[$opal_libfabric_libdir],
[opal_check_libfabric_happy="yes"],
[opal_check_libfabric_happy="no"])
])
# --------------------------------------------------------
# Internal helper macro to look for the libfabric libdir
# --------------------------------------------------------
AC_DEFUN([_OPAL_CHECK_LIBFABRIC_LIBDIR],[
AS_IF([test -d "$1"],
[AS_IF([test "x`ls $1/libfabric.* 2> /dev/null`" != "x"],
[opal_libfabric_libdir="$1"])
])
])
# --------------------------------------------------------
# Internal helper macro to look for the libfabric dir
# --------------------------------------------------------
AC_DEFUN([_OPAL_CHECK_LIBFABRIC_INCDIR],[
AS_IF([test -d "$1"],
[AS_IF([test -f "$1/include/rdma/fabric.h"],
[opal_libfabric_dir="$1"])
])
])

Просмотреть файл

@ -19,8 +19,7 @@
# $HEADER$
#
AM_CPPFLAGS = $(btl_usnic_CPPFLAGS)
AM_CFLAGS = $(btl_usnic_CFLAGS)
AM_CPPFLAGS = -DBTL_IN_OPAL=1 $(opal_common_libfabric_CPPFLAGS)
EXTRA_DIST = README.txt README.test
@ -31,19 +30,9 @@ test_sources = \
test/btl_usnic_component_test.h \
test/btl_usnic_graph_test.h
libnl_utils_sources = \
btl_usnic_libnl_utils.h
if OPAL_BTL_USNIC_BUILD_LIBNL1_UTILS
libnl_utils_sources += btl_usnic_libnl1_utils.c
endif OPAL_BTL_USNIC_BUILD_LIBNL1_UTILS
if OPAL_BTL_USNIC_BUILD_LIBNL3_UTILS
libnl_utils_sources += btl_usnic_libnl3_utils.c
endif OPAL_BTL_USNIC_BUILD_LIBNL3_UTILS
sources = \
btl_usnic_compat.h \
btl_usnic_compat.c \
btl_usnic_module.c \
btl_usnic_module.h \
btl_usnic.h \
@ -55,8 +44,6 @@ sources = \
btl_usnic_cagent.c \
btl_usnic_endpoint.c \
btl_usnic_endpoint.h \
btl_usnic_ext.h \
btl_usnic_ext.c \
btl_usnic_frag.c \
btl_usnic_frag.h \
btl_usnic_graph.h \
@ -76,8 +63,7 @@ sources = \
btl_usnic_util.h \
btl_usnic_test.c \
btl_usnic_test.h \
$(test_sources) \
$(libnl_utils_sources)
$(test_sources)
if OPAL_HAVE_HWLOC
sources += btl_usnic_hwloc.c
@ -102,17 +88,18 @@ endif
mcacomponentdir = $(opallibdir)
mcacomponent_LTLIBRARIES = $(component)
mca_btl_usnic_la_SOURCES = $(component_sources)
mca_btl_usnic_la_LDFLAGS = -module -avoid-version $(btl_usnic_LDFLAGS)
mca_btl_usnic_la_LIBADD = $(btl_usnic_LIBS) \
$(OPAL_TOP_BUILDDIR)/opal/mca/common/verbs/lib@OPAL_LIB_PREFIX@mca_common_verbs.la
mca_btl_usnic_la_LDFLAGS = -module -avoid-version
mca_btl_usnic_la_LIBADD = $(opal_common_libfabric_LIBADD)
noinst_LTLIBRARIES = $(lib)
libmca_btl_usnic_la_SOURCES = $(lib_sources)
libmca_btl_usnic_la_LDFLAGS= -module -avoid-version $(btl_usnic_LDFLAGS)
libmca_btl_usnic_la_LIBADD = $(btl_usnic_LIBS)
libmca_btl_usnic_la_LIBADD = $(opal_common_libfabric_LIBADD)
if OPAL_BTL_USNIC_BUILD_UNIT_TESTS
opal_btl_usnic_run_tests_SOURCES = test/opal_btl_usnic_run_tests.c
opal_btl_usnic_run_tests_LDADD = -ldl
bin_PROGRAMS = opal_btl_usnic_run_tests
usnic_btl_run_tests_CPPFLAGS = \
-DBTL_USNIC_RUN_TESTS_SYMBOL=\"opal_btl_usnic_run_tests\"
usnic_btl_run_tests_SOURCES = test/usnic_btl_run_tests.c
usnic_btl_run_tests_LDADD = -ldl
bin_PROGRAMS = usnic_btl_run_tests
endif OPAL_BTL_USNIC_BUILD_UNIT_TESTS

Просмотреть файл

@ -22,7 +22,7 @@ Goals
Anti-Goals
----------
* Testing the low level networking API (e.g., verbs).
* Testing the low level networking API (e.g., libfabric).
* Testing inter-process interaction, such as ORTE-related functionality.
Constraints
@ -40,7 +40,7 @@ Design Notes
- Rationale: keeps `X.c` clutter-free
* unit test infrastructure lives in `btl_usnic_test.c` and `btl_usnic_test.h`
* unit test functionality is built and enabled by passing
`--enable-ompi-btl-usnic-unit-tests` to configure
`--enable-opal-btl-usnic-unit-tests` to configure
- Rationale: default state disables all unit test logic, achieving our
"non-interference" goals
* The tests are run by a new executable that gets built when unit tests are

Просмотреть файл

@ -35,7 +35,7 @@ data queue is for standard data traffic
command queue should possibly be called "priority" queue
command queue is shorter and has a smaller MTU that the data queue
this makes the command queue a lot faster than the data queue, so we
this makes the command queue a lot faster than the data queue, so we
hijack it for sending very small fragments (<= tiny_mtu, currently 768 bytes)
command queue is used for ACKs and tiny fragments
@ -47,7 +47,7 @@ PML fragments marked priority should perhaps use command queue
sending
Normally, all send requests are simply enqueued and then actually posted
to the NIC by the routine opal_btl_usnic_module_progress_sends().
to the NIC by the routine opal_btl_usnic_module_progress_sends().
"fastpath" tiny sends are the exception.
Each module maintains a queue of endpoints that are ready to send.
@ -59,14 +59,14 @@ An endpoint is ready to send if all of the following are met:
Each module also maintains a list of segments that need to be retransmitted.
Note that the list of pending retrans is per-module, not per-endpoint.
send progression first posts any pending retransmissions, always using the
send progression first posts any pending retransmissions, always using the
data channel. (reason is that if we start getting heavy congestion and
there are lots of retransmits, it becomes more important than ever to
there are lots of retransmits, it becomes more important than ever to
prioritize ACKs, clogging command channel with retrans data makes things worse,
not better)
Next, progression loops sending segments to the endpoint at the top of
the "endpoints_with_sends" queue. When an endpoint exhausts its send
the "endpoints_with_sends" queue. When an endpoint exhausts its send
credits or fills its send window or runs out of segments to send, it removes
itself from the endpoint_with_sends list. Any pending ACKs will be
picked up and piggy-backed on these sends.
@ -79,11 +79,11 @@ The middle part of the progression loop handles both small (single-segment)
and large (multi-segment) sends.
For small fragments, the verbs descriptor within the embedded segment is
updated with length, BTL header is updated, then we call
updated with length, BTL header is updated, then we call
opal_btl_usnic_endpoint_send_segment() to send the segment.
After posting, we make a PML callback if needed.
For large fragments, a little more is needed. segments froma large
For large fragments, a little more is needed. segments froma large
fragment have a slightly larger BTL header which contains a fragment ID,
and offset, and a size. The fragment ID is allocated when the first chunk
the fragment is sent. A segment gets allocated, next blob of data is
@ -93,7 +93,7 @@ send queue.
[double-click opal_btl_usnic_endpoint_send_segment()]
This is common posting code for large or small segments. It assigns a
This is common posting code for large or small segments. It assigns a
sequence number to a segment, checks for an ACK to piggy-back,
posts the segment to the NIC, and then starts the retransmit timer
by checking the segment into hotel. Send credits are consumed here.
@ -117,7 +117,7 @@ usnic_put(desc)
usnic_alloc() currently asserts the length is "small", allocates and
fills in a small fragment. src pointer will point to start of
fills in a small fragment. src pointer will point to start of
associated registered mem + sizeof BTL header, and PML will put its
data there.
@ -138,7 +138,7 @@ fragment, the user data is copied into the associated registered memory at this
time and the SG list in the descriptor is collapsed to one entry.
After the checks above are done, the fragment is enqueued to be sent
via opal_btl_usnic_endpoint_enqueue_frag()
via opal_btl_usnic_endpoint_enqueue_frag()
usnic_put()
PML will have filled in destination address in descriptor. This is saved
@ -161,7 +161,7 @@ An ack packet is header only with a sequence number being ACKed.
Both frag and chunk packets go through some of the same processing.
Both may carry piggy-backed ACKs which may need to be processed.
Both have sequence numbers which must be processed and may result in
Both have sequence numbers which must be processed and may result in
dropping the packet and/or queueing an ACK to the sender.
frag packets may be either regular PML fragments or PUT segments.
@ -173,7 +173,7 @@ needed. Once the callback is complete, the receive buffer is recycled.
chunk packets are parts of a larger fragment. If an active fragment receive
for the matching fragment ID cannot be found, and new fragment info
descriptor is allocated. If this is not a PUT (put_addr == NULL), we
descriptor is allocated. If this is not a PUT (put_addr == NULL), we
malloc() data to reassemble the fragment into. Each subsequent chunk
is copied either into this reassembly buffer or directly into user memory.
When the last chunk of a fragment arrives, a PML callback is made for non-PUTs,
@ -247,3 +247,40 @@ of the large sends. smalls would have to be paced pretty precisely to
keep command queue empty enough and also beat out the large sends.
send credits limit how many larges can be queued on the sender, but there
could be many on the receiver
======================================
November 2014 / SC 2014
The usnic BTL code has been unified across master and the v1.8
branches. That is, you can copy the code from
v1.8:ompi/mca/btl/usnic/* to master:opal/mca/btl/usnic*, and then only
have to make 3 changes in the resulting code in master:
1. Edit Makefile.am: s/ompi/opal/gi
2. Edit configure.m4: s/ompi/opal/gi
3. Edit Makefile.am: change -DBTL_IN_OPAL=0 to -DBTL_IN_OPAL=1
*** Note: the BTL_IN_OPAL preprocessor macro is set in Makefile.am
rather that in btl_usnic_compat.h to avoid all kinds of include
file dependency issues (i.e., btl_usnic_compat.h would need to be
included first, but it requires some data structures to be
defined, which means it either can't be first or we have to
declare various structs first... just put BTL_IN_OPAL in
Makefile.am and be happy).
*** Note 2: CARE MUST BE TAKEN WHEN COPYING THE OTHER DIRECTION! It
is *not* as simple as simple s/opal/ompi/gi in configure.m4 and
Makefile.am. It certainly can be done, but there's a few strings
that need to stay "opal" or "OPAL" (e.g., OPAL_HAVE_HWLOC).
Hence, the string replace will likely need to be done via manual
inspection.
Things still to do:
- VF/PF sanity checks in component.c:check_usnic_config() uses
usnic-specific fi_provider info. The exact mechanism might change
as provider-specific info is still being discussed upstream.
- component.c:usnic_handle_cq_error is using a USD_* constant from
usnic_direct. Need to get that value through libfabric somehow.

Просмотреть файл

@ -12,8 +12,6 @@
* Copyright (c) 2006 Sandia National Laboratories. All rights
* reserved.
* Copyright (c) 2011-2014 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2014 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -28,7 +26,6 @@
#include "opal_config.h"
#include <sys/types.h>
#include <infiniband/verbs.h>
#include "opal_stdint.h"
#include "opal/util/alfg.h"
@ -36,13 +33,20 @@
#include "opal/class/opal_hash_table.h"
#include "opal/mca/event/event.h"
#if BTL_IN_OPAL
#include "opal/class/opal_free_list.h"
#include "opal/mca/btl/btl.h"
#include "opal/mca/btl/base/btl_base_error.h"
#include "opal/mca/btl/base/base.h"
#include "opal/mca/mpool/grdma/mpool_grdma.h"
#else
#include "ompi/class/ompi_free_list.h"
#include "ompi/mca/btl/btl.h"
#include "ompi/mca/btl/base/btl_base_error.h"
#include "ompi/mca/btl/base/base.h"
#include "ompi/mca/mpool/grdma/mpool_grdma.h"
#endif
#include "btl_usnic_libnl_utils.h"
#include "btl_usnic_compat.h"
BEGIN_C_DECLS
@ -68,10 +72,8 @@ extern opal_rng_buff_t opal_btl_usnic_rand_buff;
(type *)( ((char *)(ptr)) - offsetof(type,member) ))
#endif
/* particularly old versions of verbs do not have this function, which will
* cause unnecessary build failures on other platforms */
#if !HAVE_DECL_IBV_EVENT_TYPE_STR
#define ibv_event_type_str(ev_type) "(ibv_event_type_str unavailable)"
#ifndef max
#define max(a, b) (((a) > (b)) ? (a) : (b))
#endif
/* MSGDEBUG2 prints 1 line at each BTL entry point */
@ -122,7 +124,7 @@ extern opal_rng_buff_t opal_btl_usnic_rand_buff;
/**
* Verbs UD BTL component.
* usnic BTL component
*/
typedef struct opal_btl_usnic_component_t {
/** base BTL component */
@ -132,13 +134,13 @@ typedef struct opal_btl_usnic_component_t {
* subsequent fastpath fields */
/** Maximum number of BTL modules */
uint32_t max_modules;
int max_modules;
/** Number of available/initialized BTL modules */
uint32_t num_modules;
int num_modules;
/* Cached hashed version of my RTE proc name (to stuff in
protocol headers) */
opal_process_name_t my_name;
uint64_t my_hashed_rte_name;
/** array of possible BTLs (>= num_modules elements) */
struct opal_btl_usnic_module_t* usnic_all_modules;
@ -148,9 +150,6 @@ typedef struct opal_btl_usnic_component_t {
/** convertor packing threshold */
int pack_lazy_threshold;
/** does the stack below us speak UDP or custom-L2? */
bool use_udp;
/* vvvvvvvvvv non-fastpath fields go below vvvvvvvvvv */
/** list of usnic proc structures */
@ -167,9 +166,6 @@ typedef struct opal_btl_usnic_component_t {
bool stats_relative;
int stats_frequency;
/** GID index to use */
int gid_index;
/** Whether we want to use NUMA distances to choose which usNIC
devices to use for short messages */
bool want_numa_device_assignment;
@ -190,7 +186,17 @@ typedef struct opal_btl_usnic_component_t {
/** retrans characteristics */
int retrans_timeout;
struct usnic_rtnl_sk *unlsk;
/** transport header length for all usNIC devices on this server
(it is guaranteed that all usNIC devices on a single server
will have the same underlying transport, and therefore the
same transport header length) */
int transport_header_len;
uint32_t transport_protocol;
/* what UDP port do we want to use? If 0, the system will pick.
If nonzero, it is used as the base -- the final number will be
(base+my_local_rank). */
int udp_port_base;
/** disable the "cannot find route" warnings (for network setups
where this is known/acceptable) */
@ -202,7 +208,7 @@ typedef struct opal_btl_usnic_component_t {
int connectivity_ack_timeout;
int connectivity_num_retries;
/* ibv_create_ah() (i.e., ARP) timeout */
/* remote address handle (i.e., ARP) timeout */
int arp_timeout;
/** how many short packets have to be received before outputting

Просмотреть файл

@ -131,8 +131,8 @@ opal_btl_usnic_handle_ack(
#if MSGDEBUG1
opal_output(0, " ACKED seg %p frag %p ack_bytes=%"PRIu32" left=%zd dst_seg[0].seg_addr=%p des_flags=0x%x\n",
(void*)sseg, (void*)frag, bytes_acked,
frag->sf_ack_bytes_left-bytes_acked,
frag->sf_base.uf_dst_seg[0].seg_addr.pval,
frag->sf_ack_bytes_left - bytes_acked,
frag->sf_base.uf_local_seg[0].seg_addr.pval,
frag->sf_base.uf_base.des_flags);
#endif
@ -200,25 +200,23 @@ opal_btl_usnic_ack_send(
ack->ss_base.us_btl_header->ack_seq =
endpoint->endpoint_next_contig_seq_to_recv - 1;
ack->ss_base.us_sg_entry[0].length =
sizeof(opal_btl_usnic_btl_header_t);
ack->ss_len = sizeof(opal_btl_usnic_btl_header_t);
#if MSGDEBUG1
{
uint8_t mac[6];
char src_mac[32];
char dest_mac[32];
char remote_ip[IPV4STRADDRLEN];
struct opal_btl_usnic_modex_t *modex =
&endpoint->endpoint_remote_modex;
opal_btl_usnic_snprintf_ipv4_addr(remote_ip, sizeof(remote_ip),
modex->ipv4_addr,
modex->netmask);
memset(src_mac, 0, sizeof(src_mac));
memset(dest_mac, 0, sizeof(dest_mac));
opal_btl_usnic_sprintf_mac(src_mac, module->if_mac);
opal_btl_usnic_gid_to_mac(&endpoint->endpoint_remote_addr.gid, mac);
opal_btl_usnic_sprintf_mac(dest_mac, mac);
opal_output(0, "--> Sending ACK, sg_entry length %d, seq %" UDSEQ " to %s, qp %u",
ack->ss_base.us_sg_entry[0].length,
ack->ss_base.us_btl_header->ack_seq, dest_mac,
endpoint->endpoint_remote_addr.qp_num[ack->ss_channel]);
opal_output(0, "--> Sending ACK, length %d, seq %" UDSEQ " to %s, port %u",
ack->ss_len,
ack->ss_base.us_btl_header->ack_seq,
remote_ip,
modex->ports[ack->ss_channel]);
}
#endif
@ -228,7 +226,7 @@ opal_btl_usnic_ack_send(
opal_btl_usnic_check_connectivity(module, endpoint);
/* send the ACK */
opal_btl_usnic_post_segment(module, endpoint, ack);
opal_btl_usnic_post_ack(module, endpoint, ack);
/* Stats */
++module->stats.num_ack_sends;
@ -244,7 +242,6 @@ opal_btl_usnic_ack_complete(opal_btl_usnic_module_t *module,
opal_btl_usnic_ack_segment_t *ack)
{
opal_btl_usnic_ack_segment_return(module, ack);
++module->mod_channels[ack->ss_channel].sd_wqe;
}
/*****************************************************************************/

Просмотреть файл

@ -57,13 +57,11 @@ typedef struct {
/* Data from the LISTEN command message */
uint32_t ipv4_addr;
uint32_t cidrmask;
uint32_t netmask;
char ipv4_addr_str[IPV4STRADDRLEN];
uint32_t mtu;
uint32_t max_msg_size;
char *nodename;
char *if_name;
char *usnic_name;
char mac_str[MACSTRLEN];
/* File descriptor, UDP port, buffer to receive messages, and event */
int fd;
@ -121,11 +119,10 @@ typedef struct {
uint32_t src_udp_port;
agent_udp_port_listener_t *listener;
uint32_t dest_ipv4_addr; /* in network byte order */
uint32_t dest_cidrmask;
uint32_t dest_netmask;
uint32_t dest_udp_port;
struct sockaddr_in dest_sockaddr;
char *dest_nodename;
uint8_t dest_mac[6];
/* The sizes and corresponding buffers of the PING messages that
we'll send, and whether each of those PING messages have been
@ -154,13 +151,11 @@ OBJ_CLASS_DECLARATION(agent_ping_t);
static void udp_port_listener_zero(agent_udp_port_listener_t *obj)
{
obj->ipv4_addr =
obj->cidrmask =
obj->mtu = 0;
obj->netmask =
obj->max_msg_size = 0;
obj->nodename =
obj->if_name =
obj->usnic_name = NULL;
memset(obj->ipv4_addr_str, 0, sizeof(obj->ipv4_addr_str));
memset(obj->mac_str, 0, sizeof(obj->mac_str));
obj->fd = -1;
obj->udp_port = -1;
@ -192,9 +187,6 @@ static void udp_port_listener_destructor(agent_udp_port_listener_t *obj)
if (NULL != obj->nodename) {
free(obj->nodename);
}
if (NULL != obj->if_name) {
free(obj->if_name);
}
if (NULL != obj->usnic_name) {
free(obj->usnic_name);
}
@ -365,7 +357,7 @@ static bool agent_thread_is_ping_expected(opal_btl_usnic_module_t *module,
opal_btl_usnic_endpoint_t *ep;
ep = container_of(item, opal_btl_usnic_endpoint_t,
endpoint_endpoint_li);
if (src_ipv4_addr == ep->endpoint_remote_addr.ipv4_addr) {
if (src_ipv4_addr == ep->endpoint_remote_modex.ipv4_addr) {
found = true;
break;
}
@ -503,7 +495,7 @@ static void agent_thread_receive_ping(int fd, short flags, void *context)
socklen_t addrlen = sizeof(src_addr);
while (1) {
numbytes = recvfrom(listener->fd, listener->buffer, listener->mtu, 0,
numbytes = recvfrom(listener->fd, listener->buffer, listener->max_msg_size, 0,
&src_addr, &addrlen);
if (numbytes > 0) {
break;
@ -622,21 +614,19 @@ static void agent_thread_cmd_listen(agent_ipc_listener_t *ipc_listener)
}
udp_listener->module = cmd.module;
udp_listener->mtu = cmd.mtu;
udp_listener->max_msg_size = cmd.max_msg_size;
udp_listener->ipv4_addr = cmd.ipv4_addr;
udp_listener->cidrmask = cmd.cidrmask;
udp_listener->if_name = strdup(cmd.if_name);
udp_listener->netmask = cmd.netmask;
udp_listener->usnic_name = strdup(cmd.usnic_name);
/* Fill in the ipv4_addr_str and mac_str. Since we don't have the
IPv4 address in sockaddr_in form, it's not worth using
/* Fill in the ipv4_addr_str. Since we don't have the IPv4
address in sockaddr_in form, it's not worth using
inet_ntop() */
opal_btl_usnic_snprintf_ipv4_addr(udp_listener->ipv4_addr_str,
sizeof(udp_listener->ipv4_addr_str),
cmd.ipv4_addr, cmd.cidrmask);
opal_btl_usnic_sprintf_mac(udp_listener->mac_str, cmd.mac);
cmd.ipv4_addr, cmd.netmask);
udp_listener->buffer = malloc(udp_listener->mtu);
udp_listener->buffer = malloc(udp_listener->max_msg_size);
if (NULL == udp_listener->buffer) {
OPAL_ERROR_LOG(OPAL_ERR_OUT_OF_RESOURCE);
ABORT("Out of memory");
@ -676,10 +666,10 @@ static void agent_thread_cmd_listen(agent_ipc_listener_t *ipc_listener)
udp_listener->udp_port = ntohs(inaddr.sin_port);
opal_output_verbose(20, USNIC_OUT,
"usNIC connectivity agent listening on %s:%d, (%s/%s)",
"usNIC connectivity agent listening on %s:%d, (%s)",
udp_listener->ipv4_addr_str,
udp_listener->udp_port,
udp_listener->usnic_name, udp_listener->if_name);
udp_listener->usnic_name);
/* Set the "don't fragment" bit on outgoing frames because we
want MTU-sized messages to get through successfully to the
@ -696,7 +686,7 @@ static void agent_thread_cmd_listen(agent_ipc_listener_t *ipc_listener)
/* Set the send and receive buffer sizes to our MTU size */
int temp;
temp = (int) udp_listener->mtu;
temp = (int) udp_listener->max_msg_size;
if ((ret = setsockopt(udp_listener->fd, SOL_SOCKET, SO_RCVBUF,
&temp, sizeof(temp))) < 0 ||
(ret = setsockopt(udp_listener->fd, SOL_SOCKET, SO_SNDBUF,
@ -736,7 +726,7 @@ static void agent_thread_send_ping(int fd, short flags, void *context)
char dest_ipv4_addr_str[IPV4STRADDRLEN];
opal_btl_usnic_snprintf_ipv4_addr(dest_ipv4_addr_str,
sizeof(dest_ipv4_addr_str),
ap->dest_ipv4_addr, ap->dest_cidrmask);
ap->dest_ipv4_addr, ap->dest_netmask);
/* If we got all the ACKs for this ping, then move this ping from
the "pending" list to the "results" list. We can also free the
@ -778,20 +768,16 @@ static void agent_thread_send_ping(int fd, short flags, void *context)
topic = "connectivity error: small bad, large bad";
}
char mac_str[MACSTRLEN], ipv4_addr_str[IPV4STRADDRLEN];
char ipv4_addr_str[IPV4STRADDRLEN];
opal_btl_usnic_snprintf_ipv4_addr(ipv4_addr_str, sizeof(ipv4_addr_str),
ap->dest_ipv4_addr,
ap->dest_cidrmask);
opal_btl_usnic_sprintf_mac(mac_str, ap->dest_mac);
ap->dest_netmask);
opal_show_help("help-mpi-btl-usnic.txt", topic, true,
opal_process_info.nodename,
ap->listener->ipv4_addr_str,
ap->listener->usnic_name,
ap->listener->if_name,
ap->listener->mac_str,
ap->dest_nodename,
ipv4_addr_str,
mac_str,
ap->sizes[0],
ap->sizes[1]);
opal_btl_usnic_exit(NULL);
@ -800,12 +786,12 @@ static void agent_thread_send_ping(int fd, short flags, void *context)
time_t t = time(NULL);
opal_output_verbose(20, USNIC_OUT,
"usNIC connectivity pinging %s:%d (%s) from %s (%s/%s) at %s",
"usNIC connectivity pinging %s:%d (%s) from %s (%s) at %s",
dest_ipv4_addr_str,
ntohs(ap->dest_sockaddr.sin_port),
ap->dest_nodename,
ap->listener->ipv4_addr_str,
ap->listener->if_name, ap->listener->usnic_name,
ap->listener->usnic_name,
ctime(&t));
/* Send the ping messages to the peer */
@ -888,12 +874,11 @@ static void agent_thread_cmd_ping(agent_ipc_listener_t *ipc_listener)
ap->src_udp_port = cmd.src_udp_port;
ap->listener = udp_listener;
ap->dest_ipv4_addr = cmd.dest_ipv4_addr;
ap->dest_cidrmask = cmd.dest_cidrmask;
ap->dest_netmask = cmd.dest_netmask;
ap->dest_udp_port = cmd.dest_udp_port;
ap->dest_sockaddr.sin_family = AF_INET;
ap->dest_sockaddr.sin_addr.s_addr = cmd.dest_ipv4_addr;
ap->dest_sockaddr.sin_port = htons(cmd.dest_udp_port);
memcpy(ap->dest_mac, cmd.dest_mac, 6);
ap->dest_nodename = strdup(cmd.dest_nodename);
/* The first message we send will be "short" (a simple control
@ -906,8 +891,9 @@ static void agent_thread_cmd_ping(agent_ipc_listener_t *ipc_listener)
all IP options are enabled, which is 60 bytes), and then also
subtract off the UDP header (which is 8 bytes). So we need to
subtract off 68 bytes from the MTU, and that's the largest ping
payload we can send. */
ap->sizes[1] = cmd.mtu - 68;
payload we can send.
max_msg_size allows for minimal UDP header, be more conservative */
ap->sizes[1] = cmd.max_msg_size - (68 - 42);
/* Allocate a buffer for each size. Make sure the smallest size
is at least sizeof(agent_udp_message_t). */

Просмотреть файл

@ -22,7 +22,6 @@
#include "opal_stdint.h"
#include "opal/threads/mutex.h"
#include "opal/mca/event/event.h"
#include "opal/mca/dstore/dstore.h"
#include "opal/util/output.h"
#include "opal/util/fd.h"
@ -150,6 +149,7 @@ int opal_btl_usnic_connectivity_listen(opal_btl_usnic_module_t *module)
{
/* If connectivity checking is not enabled, do nothing */
if (!mca_btl_usnic_component.connectivity_enabled) {
module->local_modex.connectivity_udp_port = 0;
return OPAL_SUCCESS;
}
@ -164,9 +164,9 @@ int opal_btl_usnic_connectivity_listen(opal_btl_usnic_module_t *module)
/* Send the LISTEN command parameters */
opal_btl_usnic_connectivity_cmd_listen_t cmd = {
.module = NULL,
.ipv4_addr = module->local_addr.ipv4_addr,
.cidrmask = module->local_addr.cidrmask,
.mtu = module->local_addr.mtu
.ipv4_addr = module->local_modex.ipv4_addr,
.netmask = module->local_modex.netmask,
.max_msg_size = module->local_modex.max_msg_size
};
/* Only the MPI process who is also the agent will send the
pointer value (it doesn't make sense otherwise) */
@ -177,10 +177,8 @@ int opal_btl_usnic_connectivity_listen(opal_btl_usnic_module_t *module)
/* Ensure to NULL-terminate the passed strings */
strncpy(cmd.nodename, opal_process_info.nodename,
CONNECTIVITY_NODENAME_LEN - 1);
strncpy(cmd.if_name, module->if_name, CONNECTIVITY_IFNAME_LEN - 1);
strncpy(cmd.usnic_name, ibv_get_device_name(module->device),
strncpy(cmd.usnic_name, module->fabric_info->fabric_attr->name,
CONNECTIVITY_IFNAME_LEN - 1);
memcpy(cmd.mac, module->local_addr.mac, 6);
if (OPAL_SUCCESS != opal_fd_write(agent_fd, sizeof(cmd), &cmd)) {
OPAL_ERROR_LOG(OPAL_ERR_IN_ERRNO);
@ -199,7 +197,7 @@ int opal_btl_usnic_connectivity_listen(opal_btl_usnic_module_t *module)
/* Get the UDP port number that was received */
assert(CONNECTIVITY_AGENT_CMD_LISTEN == reply.cmd);
module->local_addr.connectivity_udp_port = reply.udp_port;
module->local_modex.connectivity_udp_port = reply.udp_port;
return OPAL_SUCCESS;
}
@ -207,9 +205,9 @@ int opal_btl_usnic_connectivity_listen(opal_btl_usnic_module_t *module)
int opal_btl_usnic_connectivity_ping(uint32_t src_ipv4_addr, int src_port,
uint32_t dest_ipv4_addr,
uint32_t dest_cidrmask, int dest_port,
uint8_t dest_mac[6], char *dest_nodename,
size_t mtu)
uint32_t dest_netmask, int dest_port,
char *dest_nodename,
size_t max_msg_size)
{
/* If connectivity checking is not enabled, do nothing */
if (!mca_btl_usnic_component.connectivity_enabled) {
@ -229,13 +227,12 @@ int opal_btl_usnic_connectivity_ping(uint32_t src_ipv4_addr, int src_port,
.src_ipv4_addr = src_ipv4_addr,
.src_udp_port = src_port,
.dest_ipv4_addr = dest_ipv4_addr,
.dest_cidrmask = dest_cidrmask,
.dest_netmask = dest_netmask,
.dest_udp_port = dest_port,
.mtu = mtu
.max_msg_size = max_msg_size
};
/* Ensure to NULL-terminate the passed string */
strncpy(cmd.dest_nodename, dest_nodename, CONNECTIVITY_NODENAME_LEN - 1);
memcpy(cmd.dest_mac, dest_mac, 6);
if (OPAL_SUCCESS != opal_fd_write(agent_fd, sizeof(cmd), &cmd)) {
OPAL_ERROR_LOG(OPAL_ERR_IN_ERRNO);
@ -272,7 +269,7 @@ int opal_btl_usnic_connectivity_unlisten(opal_btl_usnic_module_t *module)
/* Send the UNLISTEN command parameters */
opal_btl_usnic_connectivity_cmd_unlisten_t cmd = {
.ipv4_addr = module->local_addr.ipv4_addr,
.ipv4_addr = module->local_modex.ipv4_addr,
};
if (OPAL_SUCCESS != opal_fd_write(agent_fd, sizeof(cmd), &cmd)) {

117
opal/mca/btl/usnic/btl_usnic_compat.c Обычный файл
Просмотреть файл

@ -0,0 +1,117 @@
/*
* Copyright (c) 2014 Cisco Systems, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#if BTL_IN_OPAL
#include "opal_config.h"
#else
#include "ompi_config.h"
#endif
#include "opal/mca/mca.h"
#include "opal_stdint.h"
#include "btl_usnic_compat.h"
#include "btl_usnic_endpoint.h"
/************************************************************************/
/* v1.9 and beyond */
#if (OPAL_MAJOR_VERSION == 1 && OPAL_MINOR_VERSION >= 9) || \
(OPAL_MAJOR_VERSION >= 2)
#include "opal/util/proc.h"
/**************************
* usNIC BTL-specific functions to hide differences between master and
* v1.8
**************************/
void usnic_compat_modex_send(int *rc,
mca_base_component_t *component,
opal_btl_usnic_modex_t *modexes,
size_t size)
{
OPAL_MODEX_SEND(*rc, PMIX_SYNC_REQD, PMIX_REMOTE, component,
modexes, size);
}
void usnic_compat_modex_recv(int *rc,
mca_base_component_t *component,
opal_proc_t *proc,
opal_btl_usnic_modex_t **modexes,
size_t *size)
{
OPAL_MODEX_RECV(*rc, component, proc, (uint8_t**) modexes, size);
}
uint64_t usnic_compat_rte_hash_name(opal_process_name_t *pname)
{
uint64_t name = pname->jobid;
name <<= 32;
name += pname->vpid;
return name;
}
const char *usnic_compat_proc_name_print(opal_process_name_t *pname)
{
return OPAL_NAME_PRINT(*pname);
}
/************************************************************************/
/* v1.7 and v1.8 */
#elif (OPAL_MAJOR_VERSION == 1 && OPAL_MINOR_VERSION >= 7)
#include "ompi/proc/proc.h"
#include "ompi/mca/rte/rte.h"
#include "ompi/mca/rte/base/base.h"
#include "ompi/runtime/ompi_module_exchange.h"
/**************************
* Replicate functions that exist on master
**************************/
char* opal_get_proc_hostname(opal_proc_t *proc)
{
return proc->proc_hostname;
}
/**************************
* usNIC BTL-specific functions to hide differences between master and
* v1.8
**************************/
void usnic_compat_modex_send(int *rc,
mca_base_component_t *component,
struct opal_btl_usnic_modex_t *modexes,
size_t size)
{
*rc = ompi_modex_send(component, modexes, size);
}
void usnic_compat_modex_recv(int *rc,
mca_base_component_t *component,
opal_proc_t *proc,
struct opal_btl_usnic_modex_t **modexes,
size_t *size)
{
*rc = ompi_modex_recv(component, proc, (void*) modexes, size);
}
uint64_t usnic_compat_rte_hash_name(opal_process_name_t *pname)
{
return ompi_rte_hash_name(pname);
}
const char *usnic_compat_proc_name_print(opal_process_name_t *pname)
{
return OMPI_NAME_PRINT(pname);
}
#endif

Просмотреть файл

@ -23,15 +23,47 @@
/* OMPI_ERROR_LOG and friends */
# include "opal/util/error.h"
/* PMIX / modex stuff */
# include "opal/mca/pmix/pmix.h"
/* Proc stuff */
# include "opal/util/proc.h"
# define USNIC_OUT opal_btl_base_framework.framework_output
/* JMS Really want to be able to get the job size somehow... But for
now, so that we can compile, just set it to a constant :-( */
# define USNIC_MCW_SIZE 16
# define USNIC_MCW_SIZE 2
#if OPAL_HAVE_HWLOC
# define proc_bound() (NULL != opal_process_info.cpuset ? 1 : 0)
#else
# define proc_bound() 0
#endif
# define USNIC_BTL_DEFAULT_VERSION(name) MCA_BTL_DEFAULT_VERSION(name)
# define USNIC_SEND_LOCAL des_local
# define USNIC_SEND_LOCAL_COUNT des_local_count
# define USNIC_SEND_REMOTE des_remote
# define USNIC_SEND_REMOTE_COUNT des_remote_count
# define USNIC_RECV_LOCAL des_local
# define USNIC_RECV_LOCAL_COUNT des_local_count
# define USNIC_RECV_REMOTE des_remote
# define USNIC_RECV_REMOTE_COUNT des_remote_count
# define USNIC_PUT_LOCAL des_local
# define USNIC_PUT_LOCAL_COUNT des_local_count
# define USNIC_PUT_REMOTE des_remote
# define USNIC_PUT_REMOTE_COUNT des_remote_count
/*
* Performance critical; needs to be inline
*/
static inline int
usnic_compat_proc_name_compare(opal_process_name_t a,
opal_process_name_t b)
{
return (bool) (a.jobid == b.jobid && a.vpid == b.vpid);
}
/************************************************************************/
@ -42,9 +74,96 @@
/* OMPI_ERROR_LOG and friends */
# include "ompi/mca/rte/rte.h"
/* Proc stuff */
# include "ompi/proc/proc.h"
# define USNIC_OUT ompi_btl_base_framework.framework_output
# define USNIC_MCW_SIZE ompi_process_info.num_procs
# define proc_bound() (ompi_rte_proc_is_bound)
# define opal_proc_local_get() ompi_proc_local()
# define opal_process_info orte_process_info
# define opal_proc_t ompi_proc_t
# define opal_process_name_t ompi_process_name_t
# define opal_btl_usnic_modex_t ompi_btl_usnic_modex_t
# define opal_btl_usnic_component_t ompi_btl_usnic_component_t
# define opal_btl_usnic_module_t ompi_btl_usnic_module_t
# define opal_btl_usnic_endpoint_t ompi_btl_usnic_endpoint_t
# define opal_btl_usnic_endpoint_t_class ompi_btl_usnic_endpoint_t_class
# define opal_btl_usnic_frag_t ompi_btl_usnic_frag_t
# define opal_btl_usnic_frag_t_class ompi_btl_usnic_frag_t_class
# define opal_btl_usnic_send_frag_t ompi_btl_usnic_send_frag_t
# define opal_btl_usnic_send_frag_t_class ompi_btl_usnic_send_frag_t_class
# define opal_btl_usnic_large_send_frag_t ompi_btl_usnic_large_send_frag_t
# define opal_btl_usnic_large_send_frag_t_class ompi_btl_usnic_large_send_frag_t_class
# define opal_btl_usnic_small_send_frag_t ompi_btl_usnic_small_send_frag_t
# define opal_btl_usnic_small_send_frag_t_class ompi_btl_usnic_small_send_frag_t_class
# define opal_btl_usnic_put_dest_frag_t ompi_btl_usnic_put_dest_frag_t
# define opal_btl_usnic_put_dest_frag_t_class ompi_btl_usnic_put_dest_frag_t_class
# define opal_btl_usnic_rx_buf_t ompi_btl_usnic_rx_buf_t
# define opal_btl_usnic_rx_buf_t_class ompi_btl_usnic_rx_buf_t_class
# define opal_btl_usnic_segment_t ompi_btl_usnic_segment_t
# define opal_btl_usnic_segment_t_class ompi_btl_usnic_segment_t_class
# define opal_btl_usnic_frag_segment_t ompi_btl_usnic_frag_segment_t
# define opal_btl_usnic_frag_segment_t_class ompi_btl_usnic_frag_segment_t_class
# define opal_btl_usnic_chunk_segment_t ompi_btl_usnic_chunk_segment_t
# define opal_btl_usnic_chunk_segment_t_class ompi_btl_usnic_chunk_segment_t_class
# define opal_btl_usnic_recv_segment_t ompi_btl_usnic_recv_segment_t
# define opal_btl_usnic_recv_segment_t_class ompi_btl_usnic_recv_segment_t_class
# define opal_btl_usnic_ack_segment_t ompi_btl_usnic_ack_segment_t
# define opal_btl_usnic_ack_segment_t_class ompi_btl_usnic_ack_segment_t_class
# define opal_btl_usnic_graph_t ompi_btl_usnic_graph_t
# define opal_btl_usnic_run_tests ompi_btl_usnic_run_tests
# define USNIC_SEND_LOCAL des_src
# define USNIC_SEND_LOCAL_COUNT des_src_cnt
# define USNIC_SEND_REMOTE des_dst
# define USNIC_SEND_REMOTE_COUNT des_dst_cnt
# define USNIC_RECV_LOCAL des_dst
# define USNIC_RECV_LOCAL_COUNT des_dst_cnt
# define USNIC_RECV_REMOTE des_src
# define USNIC_RECV_REMOTE_COUNT des_src_cnt
# define USNIC_PUT_LOCAL des_dst
# define USNIC_PUT_LOCAL_COUNT des_dst_cnt
# define USNIC_PUT_REMOTE des_dst
# define USNIC_PUT_REMOTE_COUNT des_dst_cnt
# define USNIC_COMPAT_BASE_VERSION \
MCA_BTL_BASE_VERSION_2_0_0, \
.mca_type_name = "btl", \
.mca_type_major_version = OMPI_MAJOR_VERSION, \
.mca_type_minor_version = OMPI_MINOR_VERSION, \
.mca_type_release_version = OMPI_RELEASE_VERSION
# define USNIC_BTL_DEFAULT_VERSION(name) \
USNIC_COMPAT_BASE_VERSION, \
.mca_component_name = name, \
.mca_component_major_version = OPAL_MAJOR_VERSION, \
.mca_component_minor_version = OPAL_MINOR_VERSION, \
.mca_component_release_version = OPAL_RELEASE_VERSION
#define OPAL_BTL_USNIC_UNIT_TESTS OMPI_BTL_USNIC_UNIT_TESTS
/*
* Performance critical; needs to be inline
*/
static inline int
usnic_compat_proc_name_compare(opal_process_name_t a,
opal_process_name_t b)
{
return ompi_rte_compare_name_fields(OMPI_RTE_CMP_ALL, &a, &b);
}
/*
* Replicate functions that exist on master
*/
char* opal_get_proc_hostname(opal_proc_t *proc);
/************************************************************************/
@ -67,4 +186,25 @@
OPAL_FREE_LIST_RETURN(list_, item_)
#endif
/************************************************************************
* Common to all versions
************************************************************************/
/* Forward declare to avoid #include ordering complications */
struct opal_btl_usnic_modex_t;
void usnic_compat_modex_send(int *rc,
mca_base_component_t *component,
struct opal_btl_usnic_modex_t *modexes,
size_t size);
void usnic_compat_modex_recv(int *rc,
mca_base_component_t *component,
opal_proc_t *proc,
struct opal_btl_usnic_modex_t **modexes,
size_t *size);
uint64_t usnic_compat_rte_hash_name(opal_process_name_t *pname);
const char *usnic_compat_proc_name_print(opal_process_name_t *pname);
#endif /* BTL_USNIC_COMPAT_H */

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -14,10 +14,17 @@
#include "opal_config.h"
#include "opal/util/show_help.h"
#include "opal/util/proc.h"
#include "btl_usnic_util.h"
#if BTL_IN_OPAL
#include "opal/util/proc.h"
#else
#include "ompi/proc/proc.h"
#endif
#include "btl_usnic_compat.h"
#include "btl_usnic_module.h"
#include "btl_usnic_proc.h"
#include "btl_usnic_util.h"
/**
@ -120,12 +127,10 @@ enum {
typedef struct {
void *module;
uint32_t ipv4_addr;
uint32_t cidrmask;
uint32_t mtu;
uint32_t netmask;
uint32_t max_msg_size;
char nodename[CONNECTIVITY_NODENAME_LEN];
char if_name[CONNECTIVITY_IFNAME_LEN];
char usnic_name[CONNECTIVITY_IFNAME_LEN];
uint8_t mac[6];
} opal_btl_usnic_connectivity_cmd_listen_t;
/*
@ -154,11 +159,10 @@ typedef struct {
uint32_t src_ipv4_addr;
uint32_t src_udp_port;
uint32_t dest_ipv4_addr;
uint32_t dest_cidrmask;
uint32_t dest_netmask;
uint32_t dest_udp_port;
uint32_t mtu;
uint32_t max_msg_size;
char dest_nodename[CONNECTIVITY_NODENAME_LEN];
uint8_t dest_mac[6];
} opal_btl_usnic_connectivity_cmd_ping_t;
/**
@ -185,7 +189,7 @@ int opal_btl_usnic_connectivity_client_init(void);
* This routine will request the new listen from the agent, and wait
* for the agent to reply with the UDP port that is being used/was
* created. The UDP listening port will then be stuffed in
* module->local_addr.connectivity_udp_port (i.e., data that will be
* module->local_modex.connectivity_udp_port (i.e., data that will be
* sent in the modex).
*
* It is safe to call this function even if the connectivity check is
@ -200,11 +204,10 @@ int opal_btl_usnic_connectivity_listen(struct opal_btl_usnic_module_t *module);
* @param[in] src_ipv4_addr The source module IPv4 address
* @param[in] src_port The source module listening UDP port
* @param[in] dest_ipv4_addr The destination IPv4 address
* @param[in] dest_cidrmask The destination CIDR mask
* @param[in] dest_netmask The destination netmask
* @param[in] dest_port The destination UDP port
* @param[in] dest_mac The destination MAC address
* @param[in] dest_nodename The destination server name
* @param[in] mtu The max ping message size to send
* @param[in] max_msg_size The max ping message size to send
*
* @returns OPAL_SUCCESS or an OPAL error code.
*
@ -220,9 +223,9 @@ int opal_btl_usnic_connectivity_listen(struct opal_btl_usnic_module_t *module);
*/
int opal_btl_usnic_connectivity_ping(uint32_t src_ipv4_addr, int src_port,
uint32_t dest_ipv4_addr,
uint32_t dest_cidrmask, int dest_port,
uint8_t *dest_mac, char *dest_nodename,
size_t mtu);
uint32_t dest_netmask, int dest_port,
char *dest_nodename,
size_t max_msg_size);
/**
* Tell the agent to stop listening on the given IP address.
@ -280,14 +283,13 @@ opal_btl_usnic_check_connectivity(opal_btl_usnic_module_t *module,
{
if (OPAL_LIKELY(mca_btl_usnic_component.connectivity_enabled) &&
OPAL_UNLIKELY(!endpoint->endpoint_connectivity_checked)) {
opal_btl_usnic_connectivity_ping(module->local_addr.ipv4_addr,
module->local_addr.connectivity_udp_port,
endpoint->endpoint_remote_addr.ipv4_addr,
endpoint->endpoint_remote_addr.cidrmask,
endpoint->endpoint_remote_addr.connectivity_udp_port,
endpoint->endpoint_remote_addr.mac,
opal_btl_usnic_connectivity_ping(module->local_modex.ipv4_addr,
module->local_modex.connectivity_udp_port,
endpoint->endpoint_remote_modex.ipv4_addr,
endpoint->endpoint_remote_modex.netmask,
endpoint->endpoint_remote_modex.connectivity_udp_port,
opal_get_proc_hostname(endpoint->endpoint_proc->proc_opal),
endpoint->endpoint_remote_addr.mtu);
endpoint->endpoint_remote_modex.max_msg_size);
endpoint->endpoint_connectivity_checked = true;
}
}

Просмотреть файл

@ -55,12 +55,10 @@ static void endpoint_construct(mca_btl_base_endpoint_t* endpoint)
endpoint->endpoint_connectivity_checked = false;
endpoint->endpoint_on_all_endpoints = false;
for (i=0; i<USNIC_NUM_CHANNELS; ++i) {
endpoint->endpoint_remote_addr.qp_num[i] = 0;
for (i = 0; i < USNIC_NUM_CHANNELS; ++i) {
endpoint->endpoint_remote_modex.ports[i] = 0;
endpoint->endpoint_remote_addrs[i] = FI_ADDR_NOTAVAIL;
}
endpoint->endpoint_remote_addr.gid.global.subnet_prefix = 0;
endpoint->endpoint_remote_addr.gid.global.interface_id = 0;
endpoint->endpoint_remote_ah = NULL;
endpoint->endpoint_send_credits = 8;
@ -76,9 +74,9 @@ static void endpoint_construct(mca_btl_base_endpoint_t* endpoint)
/* clear sent/received sequence number array */
memset(endpoint->endpoint_sent_segs, 0,
sizeof(endpoint->endpoint_sent_segs));
sizeof(endpoint->endpoint_sent_segs));
memset(endpoint->endpoint_rcvd_segs, 0,
sizeof(endpoint->endpoint_rcvd_segs));
sizeof(endpoint->endpoint_rcvd_segs));
/*
* Make a new OPAL hotel for this module
@ -110,7 +108,6 @@ static void endpoint_construct(mca_btl_base_endpoint_t* endpoint)
static void endpoint_destruct(mca_btl_base_endpoint_t* endpoint)
{
int rc;
opal_btl_usnic_proc_t *proc;
if (endpoint->endpoint_ack_needed) {
@ -143,14 +140,6 @@ static void endpoint_destruct(mca_btl_base_endpoint_t* endpoint)
}
free(endpoint->endpoint_rx_frag_info);
if (NULL != endpoint->endpoint_remote_ah) {
rc = ibv_destroy_ah(endpoint->endpoint_remote_ah);
if (rc) {
BTL_ERROR(("failed to ibv_destroy_ah, err=%d (%s)",
rc, strerror(rc)));
}
}
}
OBJ_CLASS_INSTANCE(opal_btl_usnic_endpoint_t,

Просмотреть файл

@ -22,7 +22,7 @@
#ifndef OPAL_BTL_USNIC_ENDPOINT_H
#define OPAL_BTL_USNIC_ENDPOINT_H
#include <infiniband/verbs.h>
#include <rdma/fabric.h>
#include "opal/class/opal_list.h"
#include "opal/class/opal_hotel.h"
@ -64,18 +64,19 @@ typedef enum opal_btl_usnic_channel_id_t {
USNIC_NUM_CHANNELS
} opal_btl_usnic_channel_id_t;
typedef struct opal_btl_usnic_addr_t {
union ibv_gid gid;
uint32_t qp_num[USNIC_NUM_CHANNELS];
typedef struct opal_btl_usnic_modex_t {
/* Stored in network order */
uint32_t ipv4_addr;
uint32_t cidrmask;
/* Stored in host order */
uint32_t ports[USNIC_NUM_CHANNELS];
uint32_t netmask;
/* Stored in host order */
uint32_t connectivity_udp_port;
uint32_t link_speed_mbps;
uint16_t mtu;
uint16_t max_msg_size;
opal_btl_usnic_seq_t isn;
uint8_t mac[6];
uint8_t use_udp;
} opal_btl_usnic_addr_t;
uint32_t protocol;
} opal_btl_usnic_modex_t;
struct opal_btl_usnic_send_segment_t;
struct opal_btl_usnic_proc_t;
@ -114,7 +115,7 @@ typedef struct opal_btl_usnic_rx_frag_info_t {
* connectionless, so no connection is ever established.
*/
typedef struct mca_btl_base_endpoint_t {
opal_list_item_t super;
opal_list_item_t super;
/** BTL module that created this connection */
struct opal_btl_usnic_module_t *endpoint_module;
@ -133,53 +134,62 @@ typedef struct mca_btl_base_endpoint_t {
opal_list_item_t endpoint_ack_li;
/** Remote address information */
opal_btl_usnic_addr_t endpoint_remote_addr;
opal_btl_usnic_modex_t endpoint_remote_modex;
/** Remote address handle */
struct ibv_ah* endpoint_remote_ah;
/** Remote address handle. Need one for each
channel because each remote channel has different dest port */
fi_addr_t endpoint_remote_addrs[USNIC_NUM_CHANNELS];
/** Send-related data */
bool endpoint_ready_to_send;
opal_list_t endpoint_frag_send_queue;
int32_t endpoint_send_credits;
uint32_t endpoint_next_frag_id;
bool endpoint_ready_to_send;
opal_list_t endpoint_frag_send_queue;
int32_t endpoint_send_credits;
uint32_t endpoint_next_frag_id;
/** Receive-related data */
struct opal_btl_usnic_rx_frag_info_t *endpoint_rx_frag_info;
/** OPAL hotel to track outstanding stends */
opal_hotel_t endpoint_hotel;
opal_hotel_t endpoint_hotel;
/** Sliding window parameters for this peer */
/* Values for the current proc to send to this endpoint on the
peer proc */
opal_btl_usnic_seq_t endpoint_next_seq_to_send; /* n_t */
opal_btl_usnic_seq_t endpoint_ack_seq_rcvd; /* n_a */
opal_btl_usnic_seq_t endpoint_next_seq_to_send; /* n_t */
opal_btl_usnic_seq_t endpoint_ack_seq_rcvd; /* n_a */
struct opal_btl_usnic_send_segment_t *endpoint_sent_segs[WINDOW_SIZE];
struct opal_btl_usnic_send_segment_t *endpoint_sent_segs[WINDOW_SIZE];
/* Values for the current proc to receive from this endpoint on
the peer proc */
bool endpoint_ack_needed;
bool endpoint_ack_needed;
/* When we receive a packet that needs an ACK, set this
* to delay the ACK to allow for piggybacking
*/
uint64_t endpoint_acktime;
uint64_t endpoint_acktime;
opal_btl_usnic_seq_t endpoint_next_contig_seq_to_recv; /* n_r */
opal_btl_usnic_seq_t endpoint_highest_seq_rcvd; /* n_s */
opal_btl_usnic_seq_t endpoint_next_contig_seq_to_recv; /* n_r */
opal_btl_usnic_seq_t endpoint_highest_seq_rcvd; /* n_s */
bool endpoint_rcvd_segs[WINDOW_SIZE];
uint32_t endpoint_rfstart;
bool endpoint_rcvd_segs[WINDOW_SIZE];
uint32_t endpoint_rfstart;
bool endpoint_connectivity_checked;
bool endpoint_on_all_endpoints;
bool endpoint_connectivity_checked;
bool endpoint_on_all_endpoints;
} mca_btl_base_endpoint_t;
typedef mca_btl_base_endpoint_t opal_btl_usnic_endpoint_t;
OBJ_CLASS_DECLARATION(opal_btl_usnic_endpoint_t);
/*
* Helper struct for the asynchornous creation of fi_addr array
*/
typedef struct {
opal_btl_usnic_endpoint_t *endpoint;
opal_btl_usnic_channel_id_t channel_id;
} opal_btl_usnic_addr_context_t;
/*
* Flush all pending sends and resends from and endpoint
*/

Просмотреть файл

@ -1,78 +0,0 @@
/*
* Copyright (c) 2014 Cisco Systems, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "opal_config.h"
#include "opal/util/output.h"
#include "opal/mca/btl/base/base.h"
#include "btl_usnic_compat.h"
#include "btl_usnic_ext.h"
/*
* Global variable of usnic extension function pointers
*/
opal_btl_usnic_ext_fns_t opal_btl_usnic_ext;
/*
* Initialize opal_btl_usnic_ext
*/
void opal_btl_usnic_ext_init(struct ibv_context *context)
{
memset(&opal_btl_usnic_ext, 0, sizeof(opal_btl_usnic_ext));
/* See if this context supports the usnic extensions. Do the
magic query port on port number 42 (which is THE ANSWER). If
it works, we'll get rc==0 and the magic number in the struct
will be set. Note, however, that due to a bug in early
versions of libusnic_verbs, we *may* get rc==0 even if it
doesn't work, which is why we also must check for the magic
value, too. */
int rc;
struct ibv_port_attr attr;
rc = ibv_query_port(context, 42, &attr);
assert(sizeof(opal_btl_usnic_ext) <= sizeof(attr));
memcpy(&opal_btl_usnic_ext, &attr, sizeof(opal_btl_usnic_ext));
if (0 != rc || USNIC_PORT_QUERY_MAGIC != opal_btl_usnic_ext.qpt.magic) {
/* If the probe fails, we must re-memset() the function
pointer block */
memset(&opal_btl_usnic_ext, 0, sizeof(opal_btl_usnic_ext));
opal_output_verbose(5, USNIC_OUT,
"btl:usnic: verbs plugin does not support extensions");
return;
}
/* If the libusnic_verbs plugin under the verbs API supporting
this context supports the usnic extensions, it'll return 0==rc
and give us a function that we can use to look up other usnic
verb extension function pointers. If the lookup_version is one
that we understand, use it to look up the extensions we care
about. */
opal_output_verbose(5, USNIC_OUT,
"btl:usnic: verbs plugin has extension lookup ABI version %d",
opal_btl_usnic_ext.qpt.lookup_version);
if (1 != opal_btl_usnic_ext.qpt.lookup_version) {
/* If the probe fails, we must re-memset() the function
pointer block, because it may/will return junk in the qpt */
memset(&opal_btl_usnic_ext, 0, sizeof(opal_btl_usnic_ext));
opal_output_verbose(5, USNIC_OUT,
"btl:usnic: unrecognized lookup ABI version"
" (I only recognize version 1) "
" -- extensions ignored");
return;
}
opal_output_verbose(5, USNIC_OUT,
"btl:usnic: BTL recognizes this lookup ABI -- yay!");
*(void **) (&opal_btl_usnic_ext.enable_udp) =
opal_btl_usnic_ext.qpt.lookup("enable_udp");
*(void **) (&opal_btl_usnic_ext.get_ud_header_len) =
opal_btl_usnic_ext.qpt.lookup("get_ud_header_len");
}

Просмотреть файл

@ -1,62 +0,0 @@
/*
* Copyright (c) 2014 Cisco Systems, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef OPAL_BTL_USNIC_EXT_H
#define OPAL_BTL_USNIC_EXT_H
#include "opal_config.h"
#include <infiniband/verbs.h>
#include "opal_stdint.h"
typedef void *(*opal_btl_usnic_dlsym_fn_t)(const char *name);
typedef struct {
int lookup_version;
uint64_t magic;
opal_btl_usnic_dlsym_fn_t lookup;
} opal_btl_usnic_query_port_table_t;
#define USNIC_PORT_QUERY_MAGIC (0x43494e7375534355ULL)
/*
* Tells libusnic_verbs to enable UDP support.
*/
typedef int (*opal_btl_usnic_enable_udp_fn_t)(struct ibv_context *context);
/*
* Find out what the UD header length is
*/
typedef int (*opal_btl_usnic_get_ud_header_len_fn_t)(struct ibv_context *context,
uint8_t port_num);
/*
* Struct usnic extension function pointers
*/
typedef struct {
opal_btl_usnic_query_port_table_t qpt;
opal_btl_usnic_enable_udp_fn_t enable_udp;
opal_btl_usnic_get_ud_header_len_fn_t get_ud_header_len;
} opal_btl_usnic_ext_fns_t;
/*
* Global variable of usnic extension function pointers
*/
extern opal_btl_usnic_ext_fns_t opal_btl_usnic_ext;
/*
* Function to initialze the global variable of usnic extension
* function pointers
*/
void opal_btl_usnic_ext_init(struct ibv_context *ctx);
#endif /* OPAL_BTL_USNIC_EXT_H */

Просмотреть файл

@ -12,8 +12,6 @@
* Copyright (c) 2006 Sandia National Laboratories. All rights
* reserved.
* Copyright (c) 2013-2014 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2014 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -33,29 +31,22 @@
static void
common_send_seg_helper(
opal_btl_usnic_send_segment_t *seg)
opal_btl_usnic_send_segment_t *seg,
int offset)
{
opal_btl_usnic_segment_t *bseg;
bseg = &seg->ss_base;
bseg->us_btl_header = (opal_btl_usnic_btl_header_t *)bseg->us_list.ptr;
bseg->us_btl_header->sender = mca_btl_usnic_component.my_name;
/* build verbs work request descriptor */
seg->ss_send_desc.wr_id = (unsigned long) seg;
seg->ss_send_desc.sg_list = bseg->us_sg_entry;
seg->ss_send_desc.num_sge = 1;
seg->ss_send_desc.opcode = IBV_WR_SEND;
seg->ss_send_desc.next = NULL;
seg->ss_send_desc.send_flags = IBV_SEND_SIGNALED;
bseg->us_btl_header = (opal_btl_usnic_btl_header_t *)
(((char*) bseg->us_list.ptr) + offset);
bseg->us_btl_header->sender = mca_btl_usnic_component.my_hashed_rte_name;
seg->ss_send_posted = 0;
seg->ss_ack_pending = false;
/* verbs SG entry, len will be filled in just before send */
bseg->us_sg_entry[0].addr = (unsigned long) bseg->us_btl_header;
/* send ptr, len will be filled in just before send */
seg->ss_ptr = (uint8_t *)bseg->us_btl_header;
}
static void
@ -68,7 +59,7 @@ chunk_seg_constructor(
bseg->us_type = OPAL_BTL_USNIC_SEG_CHUNK;
/* some more common initializaiton */
common_send_seg_helper(seg);
common_send_seg_helper(seg, mca_btl_usnic_component.transport_header_len);
/* payload starts next byte beyond BTL chunk header */
bseg->us_payload.raw = (uint8_t *)(bseg->us_btl_chunk_header + 1);
@ -86,7 +77,7 @@ frag_seg_constructor(
bseg->us_type = OPAL_BTL_USNIC_SEG_FRAG;
/* some more common initializaiton */
common_send_seg_helper(seg);
common_send_seg_helper(seg, mca_btl_usnic_component.transport_header_len);
/* payload starts next byte beyond BTL header */
bseg->us_payload.raw = (uint8_t *)(bseg->us_btl_header + 1);
@ -104,14 +95,14 @@ ack_seg_constructor(
bseg->us_type = OPAL_BTL_USNIC_SEG_ACK;
/* some more common initializaiton */
common_send_seg_helper(ack);
common_send_seg_helper(ack, mca_btl_usnic_component.transport_header_len);
/* ACK value embedded in BTL header */
bseg->us_btl_header->payload_type = OPAL_BTL_USNIC_PAYLOAD_TYPE_ACK;
bseg->us_btl_header->payload_len = 0;
bseg->us_btl_header->ack_present = 1;
bseg->us_sg_entry[0].length = sizeof(bseg->us_btl_header);
ack->ss_len = sizeof(bseg->us_btl_header);
}
@ -127,22 +118,14 @@ recv_seg_constructor(
/* on receive, BTL header starts after protocol header */
seg->rs_protocol_header = bseg->us_list.ptr;
bseg->us_btl_header = (opal_btl_usnic_btl_header_t *)(
((char *)seg->rs_protocol_header) +
OPAL_BTL_USNIC_PROTO_HDR_SZ);
((char *) seg->rs_protocol_header) +
mca_btl_usnic_component.transport_header_len);
/* initialize verbs work request */
seg->rs_recv_desc.wr_id = (unsigned long) seg;
seg->rs_recv_desc.sg_list = bseg->us_sg_entry;
seg->rs_recv_desc.num_sge = 1;
/* verbs SG entry, len filled in by caller b/c we don't have value */
bseg->us_sg_entry[0].addr = (unsigned long) seg->rs_protocol_header;
/* initialize mca descriptor */
seg->rs_desc.des_local = &seg->rs_segment;
seg->rs_desc.des_local_count = 1;
seg->rs_desc.des_remote = NULL;
seg->rs_desc.des_remote_count = 0;
/* initialize descriptor */
seg->rs_desc.USNIC_RECV_LOCAL = &seg->rs_segment;
seg->rs_desc.USNIC_RECV_LOCAL_COUNT = 1;
seg->rs_desc.USNIC_RECV_REMOTE = NULL;
seg->rs_desc.USNIC_RECV_REMOTE_COUNT = 0;
/*
* This pointer is only correct for incoming segments of type
@ -161,12 +144,12 @@ send_frag_constructor(opal_btl_usnic_send_frag_t *frag)
/* Fill in source descriptor */
desc = &frag->sf_base.uf_base;
desc->des_local = frag->sf_base.uf_local_seg;
desc->USNIC_SEND_LOCAL = frag->sf_base.uf_local_seg;
frag->sf_base.uf_local_seg[0].seg_len = 0;
frag->sf_base.uf_local_seg[1].seg_len = 0;
desc->des_local_count = 2;
desc->des_remote = frag->sf_base.uf_remote_seg;
desc->des_remote_count = 0;
desc->USNIC_SEND_LOCAL_COUNT = 2;
desc->USNIC_SEND_REMOTE = frag->sf_base.uf_remote_seg;
desc->USNIC_SEND_REMOTE_COUNT = 0;
desc->order = MCA_BTL_NO_ORDER;
desc->des_flags = 0;
@ -182,8 +165,9 @@ send_frag_destructor(opal_btl_usnic_send_frag_t *frag)
/* make sure nobody twiddled these values after the constructor */
desc = &frag->sf_base.uf_base;
assert(desc->des_local == frag->sf_base.uf_local_seg);
assert(desc->USNIC_SEND_LOCAL == frag->sf_base.uf_local_seg);
assert(0 == frag->sf_base.uf_local_seg[0].seg_len);
/* PML may change desc->des_remote to point elsewhere, cannot assert that it
* still points to our embedded segment */
@ -205,7 +189,6 @@ small_send_frag_constructor(opal_btl_usnic_small_send_frag_t *frag)
fseg->ss_parent_frag = (struct opal_btl_usnic_send_frag_t *)frag;
frag->ssf_base.sf_base.uf_type = OPAL_BTL_USNIC_FRAG_SMALL_SEND;
frag->ssf_segment.ss_send_desc.send_flags = IBV_SEND_SIGNALED;
/* save data pointer for PML */
frag->ssf_base.sf_base.uf_local_seg[0].seg_addr.pval =
@ -245,15 +228,15 @@ put_dest_frag_constructor(opal_btl_usnic_put_dest_frag_t *pfrag)
pfrag->uf_type = OPAL_BTL_USNIC_FRAG_PUT_DEST;
/* point dest to our utility segment */
pfrag->uf_base.des_local = pfrag->uf_remote_seg;
pfrag->uf_base.des_local_count = 1;
pfrag->uf_base.USNIC_PUT_LOCAL = pfrag->uf_remote_seg;
pfrag->uf_base.USNIC_PUT_LOCAL_COUNT = 1;
}
static void
put_dest_frag_destructor(opal_btl_usnic_put_dest_frag_t *pfrag)
{
assert(pfrag->uf_base.des_local == pfrag->uf_remote_seg);
assert(1 == pfrag->uf_base.des_local_count);
assert(pfrag->uf_base.USNIC_PUT_LOCAL == pfrag->uf_remote_seg);
assert(1 == pfrag->uf_base.USNIC_PUT_LOCAL_COUNT);
}
OBJ_CLASS_INSTANCE(opal_btl_usnic_segment_t,

Просмотреть файл

@ -12,8 +12,6 @@
* Copyright (c) 2006 Sandia National Laboratories. All rights
* reserved.
* Copyright (c) 2013-2014 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2014 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -26,12 +24,9 @@
#define OPAL_BTL_USNIC_FRAG_ALIGN (8)
#include <infiniband/verbs.h>
#include "btl_usnic.h"
#include "btl_usnic_module.h"
BEGIN_C_DECLS
/*
@ -78,32 +73,24 @@ typedef enum {
} opal_btl_usnic_seg_type_t;
static inline const char *
usnic_seg_type(opal_btl_usnic_seg_type_t t)
usnic_seg_type_str(opal_btl_usnic_seg_type_t t)
{
switch (t) {
case OPAL_BTL_USNIC_SEG_ACK: return "ACK";
case OPAL_BTL_USNIC_SEG_FRAG: return "FRAG";
case OPAL_BTL_USNIC_SEG_ACK: return "ACK";
case OPAL_BTL_USNIC_SEG_FRAG: return "FRAG";
case OPAL_BTL_USNIC_SEG_CHUNK: return "CHUNK";
case OPAL_BTL_USNIC_SEG_RECV: return "RECV";
default: return "unknown";
case OPAL_BTL_USNIC_SEG_RECV: return "RECV";
default: return "unknown";
}
}
typedef struct opal_btl_usnic_reg_t {
mca_mpool_base_registration_t base;
struct ibv_mr* mr;
struct fid_mr *mr;
} opal_btl_usnic_reg_t;
/* UDP headers are always 42 bytes long */
#define OPAL_BTL_USNIC_UDP_HDR_SZ (42)
#define OPAL_BTL_USNIC_PROTO_HDR_SZ \
(mca_btl_usnic_component.use_udp ? \
OPAL_BTL_USNIC_UDP_HDR_SZ : \
sizeof(struct ibv_grh))
/**
* usnic header type
*/
@ -120,8 +107,8 @@ typedef enum {
*/
typedef struct {
/* process name of the sender */
opal_process_name_t sender;
/* Hashed RTE process name of the sender */
uint64_t sender;
/* Sliding window sequence number (echoed back in an ACK). */
opal_btl_usnic_seq_t pkt_seq;
@ -165,9 +152,6 @@ typedef struct opal_btl_usnic_segment_t {
opal_btl_usnic_seg_type_t us_type;
/* allow for 2 SG entries */
struct ibv_sge us_sg_entry[2];
/* header for chunked frag is different */
union {
opal_btl_usnic_btl_header_t *uus_btl_header;
@ -195,12 +179,12 @@ typedef struct opal_btl_usnic_recv_segment_t {
/* receive segments have protocol header prepended */
uint8_t *rs_protocol_header;
size_t rs_len;
struct opal_btl_usnic_recv_segment_t *rs_next;
opal_btl_usnic_endpoint_t *rs_endpoint;
/* verbs recv desc */
struct ibv_recv_wr rs_recv_desc;
} opal_btl_usnic_recv_segment_t;
/**
@ -210,8 +194,8 @@ typedef struct opal_btl_usnic_recv_segment_t {
typedef struct opal_btl_usnic_send_segment_t {
opal_btl_usnic_segment_t ss_base;
/* verbs send desc */
struct ibv_send_wr ss_send_desc;
uint8_t *ss_ptr;
size_t ss_len;
/* channel upon which send was posted */
opal_btl_usnic_channel_id_t ss_channel;
@ -297,7 +281,6 @@ typedef struct opal_btl_usnic_large_send_frag_t {
/* Shortcut member macros. Access uf_src_seg array instead of the descriptor's
* des_src ptr to save a deref. */
#define lsf_des_src lsf_base.sf_base.uf_local_seg
#define lsf_des_local_cnt lsf_base.sf_base.uf_base.des_local_count
/**
* small send fragment
@ -471,14 +454,11 @@ opal_btl_usnic_frag_return(
}
lfrag->lsf_pack_on_the_fly = false;
if (2 == lfrag->lsf_des_local_cnt &&
/* JMS This should never happen any more, right? */
if (2 == lfrag->lsf_base.sf_base.uf_base.USNIC_SEND_LOCAL_COUNT &&
NULL == lfrag->lsf_des_src[1].seg_addr.pval) {
opal_convertor_cleanup(&lfrag->lsf_base.sf_convertor);
}
} else if (frag->uf_type == OPAL_BTL_USNIC_FRAG_SMALL_SEND) {
opal_btl_usnic_small_send_frag_t *sfrag;
sfrag = (opal_btl_usnic_small_send_frag_t *)frag;
sfrag->ssf_segment.ss_send_desc.send_flags &= ~IBV_SEND_INLINE;
}
OMPI_FREE_LIST_RETURN_MT(frag->uf_freelist, &(frag->uf_base.super));
@ -533,7 +513,6 @@ opal_btl_usnic_chunk_segment_alloc(
seg = (opal_btl_usnic_send_segment_t*) item;
seg->ss_channel = USNIC_DATA_CHANNEL;
seg->ss_send_desc.send_flags = IBV_SEND_SIGNALED;
assert(seg);
assert(OPAL_BTL_USNIC_SEG_CHUNK == seg->ss_base.us_type);
@ -568,7 +547,6 @@ opal_btl_usnic_ack_segment_alloc(opal_btl_usnic_module_t *module)
ack = (opal_btl_usnic_ack_segment_t*) item;
ack->ss_channel = USNIC_PRIORITY_CHANNEL;
ack->ss_send_desc.send_flags = IBV_SEND_SIGNALED;
assert(ack);
assert(OPAL_BTL_USNIC_SEG_ACK == ack->ss_base.us_type);
@ -590,36 +568,6 @@ opal_btl_usnic_ack_segment_return(
OMPI_FREE_LIST_RETURN_MT(&(module->ack_segs), &(ack->ss_base.us_list));
}
/* returns the expected L2 packet size in bytes for the given FRAG recv
* segment, based on the payload_len */
static inline uint32_t
opal_btl_usnic_frag_seg_proto_size(opal_btl_usnic_recv_segment_t *rseg)
{
opal_btl_usnic_segment_t *bseg = &rseg->rs_base;
MSGDEBUG1_OUT("us_type=%d\n", bseg->us_type);
assert(OPAL_BTL_USNIC_PAYLOAD_TYPE_FRAG == bseg->us_btl_header->payload_type);
return (OPAL_BTL_USNIC_PROTO_HDR_SZ +
sizeof(*bseg->us_btl_header) +
bseg->us_btl_header->payload_len);
}
/* returns the expected L2 packet size in bytes for the given CHUNK recv
* segment, based on the payload_len */
static inline uint32_t
opal_btl_usnic_chunk_seg_proto_size(opal_btl_usnic_recv_segment_t *rseg)
{
opal_btl_usnic_segment_t *bseg = &rseg->rs_base;
assert(OPAL_BTL_USNIC_PAYLOAD_TYPE_CHUNK ==
bseg->us_btl_chunk_header->ch_hdr.payload_type);
return (OPAL_BTL_USNIC_PROTO_HDR_SZ +
sizeof(*bseg->us_btl_chunk_header) +
bseg->us_btl_chunk_header->ch_hdr.payload_len);
}
END_C_DECLS
#endif

Просмотреть файл

@ -16,9 +16,15 @@
#include "opal/constants.h"
/* mainly for BTL_ERROR */
#if BTL_IN_OPAL
#include "opal/mca/btl/btl.h"
#include "opal/mca/btl/base/base.h"
#include "opal/mca/btl/base/btl_base_error.h"
#else
#include "ompi/mca/btl/btl.h"
#include "ompi/mca/btl/base/base.h"
#include "ompi/mca/btl/base/btl_base_error.h"
#endif
#include "btl_usnic.h"
#include "btl_usnic_graph.h"

Просмотреть файл

@ -14,8 +14,6 @@
#include "opal_config.h"
#include <infiniband/verbs.h>
/* Define this before including hwloc.h so that we also get the hwloc
verbs helper header file, too. We have to do this level of
indirection because the hwloc subsystem is a component -- we don't
@ -25,8 +23,12 @@
#include "opal/mca/hwloc/hwloc.h"
#include "opal/constants.h"
#if BTL_IN_OPAL
#include "opal/mca/btl/base/base.h"
#include "opal/mca/common/verbs/common_verbs.h"
#else
#include "ompi/mca/btl/base/base.h"
#endif
#include "btl_usnic_hwloc.h"
@ -136,27 +138,46 @@ static int find_my_numa_node(void)
*/
static hwloc_obj_t find_device_numa(opal_btl_usnic_module_t *module)
{
struct fi_usnic_info *uip;
hwloc_obj_t obj;
hwloc_bitmap_t cpuset;
/* Bozo checks */
assert(NULL != matrix);
assert(NULL != my_numa_node);
/* Find the NUMA node for the device */
cpuset = hwloc_bitmap_alloc();
if (NULL == cpuset) {
return NULL;
uip = &module->usnic_info;
/* Look for the IP device name in the hwloc topology (the usnic
device is simply an alternate API to reach the same device, so
if we find the IP device name, we've found the usNIC device) */
obj = NULL;
while (NULL != (obj = hwloc_get_next_osdev(opal_hwloc_topology, obj))) {
assert(HWLOC_OBJ_OS_DEVICE == obj->type);
if (0 == strcmp(obj->name, uip->ui_ifname)) {
break;
}
}
if (0 != hwloc_ibv_get_device_cpuset(opal_hwloc_topology,
module->device,
cpuset)) {
hwloc_bitmap_free(cpuset);
/* Did not find it */
if (NULL == obj) {
return NULL;
}
/* Search upwards to find the device's NUMA node */
/* Go upwards until we hit the NUMA node or run out of parents */
while (obj->type > HWLOC_OBJ_NODE &&
NULL != obj->parent) {
obj = obj->parent;
}
/* Make sure we ended up on the NUMA node */
if (obj->type != HWLOC_OBJ_NODE) {
opal_output_verbose(5, USNIC_OUT,
"btl:usnic:filter_numa: could not find NUMA node for %s; filtering by NUMA distance not possible",
module->fabric_info->fabric_attr->name);
return NULL;
}
obj = find_numa_node(cpuset);
hwloc_bitmap_free(cpuset);
return obj;
}
@ -209,7 +230,7 @@ int opal_btl_usnic_hwloc_distance(opal_btl_usnic_module_t *module)
opal_output_verbose(5, USNIC_OUT,
"btl:usnic:filter_numa: %s is distance %d from me",
ibv_get_device_name(module->device),
module->fabric_info->fabric_attr->name,
module->numa_distance);
}

Просмотреть файл

@ -1,321 +0,0 @@
/*
* Copyright (c) 2014 Cisco Systems, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/* This code is derived from similar code in libusnic_verbs. Consider porting
* any future bug fixes from that code to this code. The original version of
* this code was written by Xuyang Wang @ Cisco.
*/
#include "opal_config.h"
#include <errno.h>
#include <arpa/inet.h>
#include <time.h>
#include <net/if.h>
#include <netlink/netlink.h>
#include <netlink/addr.h>
#include <netlink/route/rtnl.h>
#include <netlink/route/link.h>
#include <netlink/route/addr.h>
#include <netlink/route/neighbour.h>
#include <netlink/route/neightbl.h>
#include <netlink/route/route.h>
#include "btl_usnic.h"
#include "btl_usnic_compat.h"
#include "btl_usnic_libnl_utils.h"
/* minimize divergence from the libusnic_verbs version of this code */
#define WANT_DEBUG_MSGS 0
#define usnic_err(...) opal_output_verbose(20, USNIC_OUT, __VA_ARGS__)
#define usnic_rtnl_sk_alloc opal_btl_usnic_rtnl_sk_alloc
#define usnic_rtnl_sk_free opal_btl_usnic_rtnl_sk_free
struct usnic_rtnl_sk {
struct nl_handle *nlh;
uint32_t seq;
};
struct nl_lookup_arg {
uint32_t nh_addr;
int oif;
int found;
int replied;
int msg_count;
int metric;
struct usnic_rtnl_sk *unlsk;
};
static struct nla_policy route_policy[RTA_MAX+1];
/* *sigh* use a helper routine to initialize this structure array b/c OMPI v1.6
* demands C89, which does not support the designated initializers used in the
* original code */
static void init_route_policy(struct nla_policy *policy)
{
policy[RTA_IIF].type = NLA_STRING;
policy[RTA_IIF].maxlen = IFNAMSIZ;
policy[RTA_OIF].type = NLA_U32;
policy[RTA_PRIORITY].type = NLA_U32;
policy[RTA_FLOW].type = NLA_U32;
policy[RTA_MP_ALGO].type = NLA_U32;
policy[RTA_CACHEINFO].minlen = sizeof(struct rta_cacheinfo);
policy[RTA_METRICS].type = NLA_NESTED;
policy[RTA_MULTIPATH].type = NLA_NESTED;
}
static int rtnl_raw_parse_cb(struct nl_msg *msg, void *arg)
{
struct nl_lookup_arg *lookup_arg = (struct nl_lookup_arg *)arg;
struct usnic_rtnl_sk *unlsk = lookup_arg->unlsk;
struct nlmsghdr *nlm_hdr = nlmsg_hdr(msg);
struct rtmsg *rtm;
struct nlattr *tb[RTA_MAX + 1];
int found = 0;
int err;
#if WANT_DEBUG_MSGS
nl_msg_dump(msg, stderr);
#endif /* WANT_DEBUG_MSGS */
lookup_arg->nh_addr = 0;
lookup_arg->found = 0;
lookup_arg->replied = 0;
lookup_arg->msg_count++;
if (nlm_hdr->nlmsg_pid != nl_socket_get_local_port(unlsk->nlh)
|| nlm_hdr->nlmsg_seq != unlsk->seq) {
usnic_err("Not an expected reply msg pid: %u local pid: %u "
"msg seq: %u expected seq: %u\n",
nlm_hdr->nlmsg_pid, nl_socket_get_local_port(unlsk->nlh),
nlm_hdr->nlmsg_seq, unlsk->seq);
return NL_SKIP;
}
lookup_arg->replied = 1;
if (nlm_hdr->nlmsg_type == NLMSG_ERROR) {
struct nlmsgerr *e = (struct nlmsgerr *)nlmsg_data(nlm_hdr);
if (nlm_hdr->nlmsg_len >= (__u32)nlmsg_msg_size(sizeof(*e))) {
usnic_err("Received a netlink error message %d\n",
e->error);
}
else {
usnic_err("Received a truncated netlink error message\n");
}
return NL_STOP;
}
if (nlm_hdr->nlmsg_type != RTM_NEWROUTE) {
usnic_err("Received an invalid route request reply message\n");
return NL_STOP;
}
rtm = nlmsg_data(nlm_hdr);
if (rtm->rtm_family != AF_INET) {
usnic_err("RTM message contains invalid AF family\n");
return NL_STOP;
}
init_route_policy(route_policy);
err = nlmsg_parse(nlm_hdr, sizeof(struct rtmsg), tb, RTA_MAX,
route_policy);
if (err < 0) {
usnic_err("nlmsg parse error %d\n", err);
return NL_STOP;
}
if (tb[RTA_OIF]) {
if (nla_get_u32(tb[RTA_OIF]) == (uint32_t)lookup_arg->oif)
found = 1;
else
usnic_err("Retrieved route has a different outgoing interface %d (expected %d)\n",
nla_get_u32(tb[RTA_OIF]),
lookup_arg->oif);
}
if (found && tb[RTA_METRICS]) {
lookup_arg->metric = (int)nla_get_u32(tb[RTA_METRICS]);
}
if (found && tb[RTA_GATEWAY])
lookup_arg->nh_addr = nla_get_u32(tb[RTA_GATEWAY]);
lookup_arg->found = found;
return NL_STOP;
}
static int rtnl_send_ack_disable(struct usnic_rtnl_sk *unlsk, struct nl_msg *msg)
{
struct nlmsghdr *nlhdr;
nlhdr = nlmsg_hdr(msg);
nlhdr->nlmsg_pid = nl_socket_get_local_port(unlsk->nlh);
nlhdr->nlmsg_seq = ++unlsk->seq;
nlmsg_set_proto(msg, NETLINK_ROUTE);
nlhdr->nlmsg_flags |= NLM_F_REQUEST;
return nl_send(unlsk->nlh, msg);
}
static int nl_set_recv_timeout(struct nl_handle *handle)
{
int err = 0;
struct timeval timeout;
timeout.tv_sec = 1;
timeout.tv_usec = 0;
err = setsockopt(nl_socket_get_fd(handle), SOL_SOCKET, SO_RCVTIMEO,
(char *)&timeout, sizeof(timeout));
if (err < 0)
usnic_err("Failed to set SO_RCVTIMEO socket option for nl socket, err %d\n",
err);
return err;
}
int opal_btl_usnic_nl_ip_rt_lookup(struct usnic_rtnl_sk *unlsk,
const char *src_ifname,
uint32_t src_addr,
uint32_t dst_addr, int *metric)
{
struct nl_msg *nlm;
struct rtmsg rmsg;
struct nl_lookup_arg arg;
int msg_cnt;
int err;
int oif;
oif = if_nametoindex(src_ifname);
if (0 == oif) {
return errno;
}
arg.nh_addr = 0;
arg.oif = oif;
arg.found = 0;
arg.replied = 0;
arg.unlsk = unlsk;
arg.msg_count = msg_cnt = 0;
memset(&rmsg, 0, sizeof(rmsg));
rmsg.rtm_family = AF_INET;
rmsg.rtm_dst_len = sizeof(dst_addr)*8;
rmsg.rtm_src_len = sizeof(src_addr)*8;
nlm = nlmsg_alloc_simple(RTM_GETROUTE, 0);
nlmsg_append(nlm, &rmsg, sizeof(rmsg), NLMSG_ALIGNTO);
nla_put_u32(nlm, RTA_DST, dst_addr);
nla_put_u32(nlm, RTA_SRC, src_addr);
err = rtnl_send_ack_disable(unlsk, nlm);
nlmsg_free(nlm);
if (err < 0) {
usnic_err("Failed to send nl route message to kernel, "
"error %s\n", nl_geterror());
return err;
}
err = nl_socket_modify_cb(unlsk->nlh, NL_CB_MSG_IN, NL_CB_CUSTOM,
rtnl_raw_parse_cb, &arg);
if (err != 0) {
usnic_err("Failed to setup callback function, error %s\n", nl_geterror());
return err;
}
while (!arg.replied) {
err = nl_recvmsgs_default(unlsk->nlh);
if (err < 0) {
usnic_err("Failed to receive nl route message from "
"kernel, error %s\n", nl_geterror());
return err;
}
/*
* the return value of nl_recvmsgs_default does not tell
* whether it returns because of successful read or socket
* timeout. So we compare msg count before and after the call
* to decide if no new message arrives. In such case,
* this function needs to terminate to prevent the caller from
* blocking forever
* NL_CB_MSG_IN traps every received message, so
* there should be no premature exit
*/
if (msg_cnt != arg.msg_count)
msg_cnt = arg.msg_count;
else
break;
}
if (arg.found) {
if (metric != NULL) {
*metric = arg.metric;
}
return 0;
}
else {
return -1;
}
}
int usnic_rtnl_sk_alloc(struct usnic_rtnl_sk **p_sk)
{
struct usnic_rtnl_sk *unlsk;
struct nl_handle *nlh;
int err;
unlsk = calloc(1, sizeof(*unlsk));
if (!unlsk) {
usnic_err("Failed to allocate usnic_rtnl_sk struct\n");
return -ENOMEM;
}
nlh = nl_handle_alloc();
if (!nlh) {
usnic_err("Failed to allocate nl handle\n");
err = -ENOMEM;
goto err_free_unlsk;
}
err = nl_connect(nlh, NETLINK_ROUTE);
if (err < 0) {
usnic_err("Failed to connnect netlink route socket\n");
goto err_free_nlh;
}
nl_disable_sequence_check(nlh);
err = nl_set_recv_timeout(nlh);
if (err < 0)
goto err_close_nlh;
unlsk->nlh = nlh;
unlsk->seq = time(NULL);
*p_sk = unlsk;
return 0;
err_close_nlh:
nl_close(nlh);
err_free_nlh:
nl_handle_destroy(nlh);
err_free_unlsk:
free(unlsk);
return err;
}
void usnic_rtnl_sk_free(struct usnic_rtnl_sk* u_nlsk)
{
if (u_nlsk != NULL) {
nl_close(u_nlsk->nlh);
nl_handle_destroy(u_nlsk->nlh);
free(u_nlsk);
}
}

Просмотреть файл

@ -1,305 +0,0 @@
/*
* Copyright (c) 2014 Cisco Systems, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/* This code is derived from similar code in libusnic_verbs. Consider porting
* any future bug fixes from that code to this code. The original version of
* this code was written by Xuyang Wang @ Cisco.
*/
#include "opal_config.h"
#include <errno.h>
#include <arpa/inet.h>
#include <net/if.h>
/* the netlink headers have many C99-isms that cause warnings on the v1.6
* branch when --enable-picky is passed to configure :( */
#include <netlink/netlink.h>
#include <netlink/utils.h>
#include <netlink/addr.h>
#include <netlink/route/rtnl.h>
#include <netlink/route/nexthop.h>
#include <netlink/route/route.h>
#include "btl_usnic.h"
#include "btl_usnic_compat.h"
#include "btl_usnic_libnl_utils.h"
/* minimize divergence from the libusnic_verbs version of this code */
#define WANT_DEBUG_MSGS 0
#define usnic_err(...) opal_output_verbose(20, USNIC_OUT, __VA_ARGS__)
#define usnic_rtnl_sk_alloc opal_btl_usnic_rtnl_sk_alloc
#define usnic_rtnl_sk_free opal_btl_usnic_rtnl_sk_free
struct usnic_rtnl_sk {
struct nl_sock *sock;
uint32_t seq;
};
struct nl_lookup_arg {
uint32_t nh_addr;
int oif;
int found;
int replied;
int msg_count;
int metric;
struct usnic_rtnl_sk *unlsk;
};
static struct nla_policy route_policy[RTA_MAX+1];
/* *sigh* use a helper routine to initialize this structure array b/c OMPI v1.6
* demands C89, which does not support the designated initializers used in the
* original code */
static void init_route_policy(struct nla_policy *policy)
{
policy[RTA_IIF].type = NLA_STRING;
policy[RTA_IIF].maxlen = IFNAMSIZ;
policy[RTA_OIF].type = NLA_U32;
policy[RTA_PRIORITY].type = NLA_U32;
policy[RTA_FLOW].type = NLA_U32;
policy[RTA_MP_ALGO].type = NLA_U32;
policy[RTA_CACHEINFO].minlen = sizeof(struct rta_cacheinfo);
policy[RTA_METRICS].type = NLA_NESTED;
policy[RTA_MULTIPATH].type = NLA_NESTED;
}
static int rtnl_raw_parse_cb(struct nl_msg *msg, void *arg)
{
struct nl_lookup_arg *lookup_arg = (struct nl_lookup_arg *)arg;
struct usnic_rtnl_sk *unlsk = lookup_arg->unlsk;
struct nlmsghdr *nlm_hdr = nlmsg_hdr(msg);
struct rtmsg *rtm;
struct nlattr *tb[RTA_MAX + 1];
int found = 0;
int err;
#if WANT_DEBUG_MSGS
nl_msg_dump(msg, stderr);
#endif /* WANT_DEBUG_MSGS */
lookup_arg->nh_addr = 0;
lookup_arg->found = 0;
lookup_arg->replied = 0;
if (nlm_hdr->nlmsg_pid != nl_socket_get_local_port(unlsk->sock)
|| nlm_hdr->nlmsg_seq != unlsk->seq) {
usnic_err("Not an expected reply msg pid: %u local pid: %u "
"msg seq: %u expected seq: %u\n",
nlm_hdr->nlmsg_pid, nl_socket_get_local_port(unlsk->sock),
nlm_hdr->nlmsg_seq, unlsk->seq);
return NL_SKIP;
}
lookup_arg->replied = 1;
if (nlm_hdr->nlmsg_type == NLMSG_ERROR) {
struct nlmsgerr *e = (struct nlmsgerr *)nlmsg_data(nlm_hdr);
if (nlm_hdr->nlmsg_len >= (__u32)nlmsg_size(sizeof(*e))) {
usnic_err("Received a netlink error message %d\n",
e->error);
}
else {
usnic_err("Received a truncated netlink error message\n");
}
return NL_STOP;
}
if (nlm_hdr->nlmsg_type != RTM_NEWROUTE) {
usnic_err("Received an invalid route request reply message\n");
return NL_STOP;
}
rtm = nlmsg_data(nlm_hdr);
if (rtm->rtm_family != AF_INET) {
usnic_err("RTM message contains invalid AF family\n");
return NL_STOP;
}
init_route_policy(route_policy);
err = nlmsg_parse(nlm_hdr, sizeof(struct rtmsg), tb, RTA_MAX,
route_policy);
if (err < 0) {
usnic_err("nlmsg parse error %d\n", err);
return NL_STOP;
}
if (tb[RTA_OIF]) {
if (nla_get_u32(tb[RTA_OIF]) == (uint32_t)lookup_arg->oif)
found = 1;
else
usnic_err("Retrieved route has a different outgoing interface %d (expected %d)\n",
nla_get_u32(tb[RTA_OIF]),
lookup_arg->oif);
}
if (found && tb[RTA_METRICS]) {
lookup_arg->metric = (int)nla_get_u32(tb[RTA_METRICS]);
}
if (found && tb[RTA_GATEWAY])
lookup_arg->nh_addr = nla_get_u32(tb[RTA_GATEWAY]);
lookup_arg->found = found;
return NL_STOP;
}
static int rtnl_send_ack_disable(struct usnic_rtnl_sk *unlsk, struct nl_msg *msg)
{
struct nlmsghdr *nlhdr;
nlhdr = nlmsg_hdr(msg);
nlhdr->nlmsg_pid = nl_socket_get_local_port(unlsk->sock);
nlhdr->nlmsg_seq = ++unlsk->seq;
nlmsg_set_proto(msg, NETLINK_ROUTE);
nlhdr->nlmsg_flags |= NLM_F_REQUEST;
return nl_send(unlsk->sock, msg);
}
static int nl_set_recv_timeout(struct nl_sock *sock)
{
int err = 0;
struct timeval timeout;
timeout.tv_sec = 1;
timeout.tv_usec = 0;
err = setsockopt(nl_socket_get_fd(sock), SOL_SOCKET, SO_RCVTIMEO,
(char *)&timeout, sizeof(timeout));
if (err < 0)
usnic_err("Failed to set SO_RCVTIMEO socket option for nl socket, err %d\n",
err);
return err;
}
int ompi_btl_usnic_nl_ip_rt_lookup(struct usnic_rtnl_sk *unlsk,
const char *src_ifname,
uint32_t src_addr,
uint32_t dst_addr, int *metric)
{
struct nl_msg *nlm;
struct rtmsg rmsg;
struct nl_lookup_arg arg;
int msg_cnt;
int err;
int oif;
oif = if_nametoindex(src_ifname);
if (0 == oif) {
return errno;
}
arg.nh_addr = 0;
arg.oif = oif;
arg.found = 0;
arg.replied = 0;
arg.unlsk = unlsk;
arg.msg_count = msg_cnt = 0;
memset(&rmsg, 0, sizeof(rmsg));
rmsg.rtm_family = AF_INET;
rmsg.rtm_dst_len = sizeof(dst_addr)*8;
rmsg.rtm_src_len = sizeof(src_addr)*8;
nlm = nlmsg_alloc_simple(RTM_GETROUTE, 0);
nlmsg_append(nlm, &rmsg, sizeof(rmsg), NLMSG_ALIGNTO);
nla_put_u32(nlm, RTA_DST, dst_addr);
nla_put_u32(nlm, RTA_SRC, src_addr);
err = rtnl_send_ack_disable(unlsk, nlm);
nlmsg_free(nlm);
if (err < 0) {
usnic_err("Failed to send rtnl query %s\n", nl_geterror(err));
return err;
}
err = nl_socket_modify_cb(unlsk->sock, NL_CB_MSG_IN, NL_CB_CUSTOM,
rtnl_raw_parse_cb, &arg);
if (err != 0) {
usnic_err("Failed to setup callback function, error %s\n",
nl_geterror(err));
return err;
}
while (!arg.replied) {
err = nl_recvmsgs_default(unlsk->sock);
if (err < 0) {
/* err will be returned as -NLE_AGAIN if the socket times out */
usnic_err("Failed to receive rtnl query results %s\n",
nl_geterror(err));
return err;
}
}
if (arg.found) {
if (metric != NULL) {
*metric = arg.metric;
}
return 0;
}
else {
return -1;
}
}
int opal_btl_usnic_rtnl_sk_alloc(struct usnic_rtnl_sk **p_sk)
{
struct usnic_rtnl_sk *unlsk;
struct nl_sock *sock;
int err;
unlsk = calloc(1, sizeof(*unlsk));
if (!unlsk) {
usnic_err("Failed to allocate usnic_rtnl_sk struct\n");
return -ENOMEM;
}
sock = nl_socket_alloc();
if (!sock) {
usnic_err("Failed to allocate nl socket\n");
err = -ENOMEM;
goto err_free_unlsk;
}
err = nl_connect(sock, NETLINK_ROUTE);
if (err < 0) {
usnic_err("Failed to connnect netlink route socket\n");
goto err_free_sk;
}
nl_socket_disable_seq_check(sock);
err = nl_set_recv_timeout(sock);
if (err < 0)
goto err_close_nlsk;
unlsk->sock = sock;
unlsk->seq = time(NULL);
*p_sk = unlsk;
return 0;
err_close_nlsk:
nl_close(sock);
err_free_sk:
nl_socket_free(sock);
err_free_unlsk:
free(unlsk);
return err;
}
void opal_btl_usnic_rtnl_sk_free(struct usnic_rtnl_sk *unlsk)
{
if (unlsk != NULL) {
nl_close(unlsk->sock);
nl_socket_free(unlsk->sock);
free(unlsk);
}
}

Просмотреть файл

@ -1,38 +0,0 @@
/*
* Copyright (c) 2014 Cisco Systems, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef LIBNL_UTILS_H
#define LIBNL_UTILS_H
/* This header file and btl_usnic_libnl1_utils.c/btl_usnic_libnl3_utils.c are
* here to support OS routing lookups. They use the Linux "routing netlink"
* communication subsystem (see "man 7 rtnetlink") via the "libnl" helper
* library. Unfortunately, libnl comes in two major versions: libnl (v1) and
* libnl-3 with significant API differences between them.
*
* Quick glossary to some of the abbreviations here:
* rtnl -- rtnetlink (routing netlink)
* sk -- socket
*/
struct usnic_rtnl_sk;
/* returns zero on success or negative errno values on failure */
int opal_btl_usnic_rtnl_sk_alloc(struct usnic_rtnl_sk **p_sk);
void opal_btl_usnic_rtnl_sk_free(struct usnic_rtnl_sk* u_nlsk);
/* src_addr and dst_addr are IPv4 addresses in network byte order. Returns
* zero on successful route lookup, -1 otherwise. */
int opal_btl_usnic_nl_ip_rt_lookup(struct usnic_rtnl_sk *unlsk,
const char *src_ifname,
uint32_t src_addr,
uint32_t dst_addr, int *metric);
#endif /* LIBNL_UTILS_H */

Просмотреть файл

@ -1,8 +1,6 @@
/*
* Copyright (c) 2013-2014 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2014 Intel, Inc. All rights reserved
* Copyright (c) 2014 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -17,22 +15,23 @@
#include <unistd.h>
#include "opal/util/show_help.h"
#include "opal/util/proc.h"
#include "btl_usnic_compat.h"
#include "btl_usnic.h"
#include "btl_usnic_module.h"
#include "btl_usnic_util.h"
#include "btl_usnic_proc.h"
/*
* qsort helper: compare modules by IBV device name
* qsort helper: compare modules by fabric name
*/
static int map_compare_modules(const void *aa, const void *bb)
{
opal_btl_usnic_module_t *a = *((opal_btl_usnic_module_t**) aa);
opal_btl_usnic_module_t *b = *((opal_btl_usnic_module_t**) bb);
return strcmp(ibv_get_device_name(a->device),
ibv_get_device_name(b->device));
return strcmp(a->fabric_info->fabric_attr->name,
b->fabric_info->fabric_attr->name);
}
/*
@ -40,11 +39,13 @@ static int map_compare_modules(const void *aa, const void *bb)
*/
static void map_output_modules(FILE *fp)
{
size_t i;
int i;
size_t size;
opal_btl_usnic_module_t **modules;
struct fi_usnic_info *uip;
char ipv4[IPV4STRADDRLEN];
char mac[MACSTRLEN];
struct sockaddr_in *sin;
int prefix_len;
fprintf(fp, "# Devices possibly used by this process:\n");
@ -62,16 +63,20 @@ static void map_output_modules(FILE *fp)
qsort(modules, mca_btl_usnic_component.num_modules,
sizeof(opal_btl_usnic_module_t*), map_compare_modules);
/* Loop over and print the sorted module device information */
for (i = 0; i < mca_btl_usnic_component.num_modules; ++i) {
opal_btl_usnic_snprintf_ipv4_addr(ipv4, IPV4STRADDRLEN,
modules[i]->if_ipv4_addr,
modules[i]->if_cidrmask);
opal_btl_usnic_sprintf_mac(mac, modules[i]->if_mac);
uip = &modules[i]->usnic_info;
sin = modules[i]->fabric_info->src_addr;
prefix_len = usnic_netmask_to_cidrlen(uip->ui_netmask_be);
fprintf(fp, "device=%s,interface=%s,ip=%s,mac=%s,mtu=%d\n",
ibv_get_device_name(modules[i]->device),
modules[i]->if_name, ipv4, mac, modules[i]->if_mtu);
opal_btl_usnic_snprintf_ipv4_addr(ipv4, IPV4STRADDRLEN,
sin->sin_addr.s_addr,
prefix_len);
fprintf(fp, "device=%s,ip=%s,mss=%" PRIsize_t "\n",
modules[i]->fabric_info->fabric_attr->name,
ipv4, modules[i]->fabric_info->ep_attr->max_msg_size);
}
/* Free the temp array */
@ -81,7 +86,7 @@ static void map_output_modules(FILE *fp)
/************************************************************************/
/*
* qsort helper: compare endpoints by IBV device name
* qsort helper: compare endpoints by fabric name
*/
static int map_compare_endpoints(const void *aa, const void *bb)
{
@ -96,8 +101,8 @@ static int map_compare_endpoints(const void *aa, const void *bb)
return -1;
}
return strcmp(ibv_get_device_name(a->endpoint_module->device),
ibv_get_device_name(b->endpoint_module->device));
return strcmp(a->endpoint_module->fabric_info->fabric_attr->name,
b->endpoint_module->fabric_info->fabric_attr->name);
}
/*
@ -110,7 +115,6 @@ static void map_output_endpoints(FILE *fp, opal_btl_usnic_proc_t *proc)
size_t size;
opal_btl_usnic_endpoint_t **eps;
char ipv4[IPV4STRADDRLEN];
char mac[MACSTRLEN];
/* First, we must sort the endpoints on this proc by MCW rank so
that they're always output in a repeatable order. There may
@ -140,13 +144,12 @@ static void map_output_endpoints(FILE *fp, opal_btl_usnic_proc_t *proc)
}
opal_btl_usnic_snprintf_ipv4_addr(ipv4, IPV4STRADDRLEN,
eps[i]->endpoint_remote_addr.ipv4_addr,
eps[i]->endpoint_remote_addr.cidrmask);
opal_btl_usnic_sprintf_mac(mac, eps[i]->endpoint_remote_addr.mac);
eps[i]->endpoint_remote_modex.ipv4_addr,
eps[i]->endpoint_remote_modex.netmask);
fprintf(fp, "device=%s@peer_ip=%s@peer_mac=%s",
ibv_get_device_name(eps[i]->endpoint_module->device),
ipv4, mac);
fprintf(fp, "device=%s@peer_ip=%s",
eps[i]->endpoint_module->fabric_info->fabric_attr->name,
ipv4);
++num_output;
}
fprintf(fp, "\n");
@ -208,7 +211,7 @@ static void map_output_procs(FILE *fp)
/* Loop over and print the sorted module device information */
for (i = 0; i < num_procs; ++i) {
fprintf(fp, "peer=%" PRIu32 ",", procs[i]->proc_opal->proc_name.vpid);
fprintf(fp, "peer=%d,", procs[i]->proc_opal->proc_name.vpid);
fprintf(fp, "hostname=%s,", opal_get_proc_hostname(procs[i]->proc_opal));
map_output_endpoints(fp, procs[i]);
}

Просмотреть файл

@ -27,15 +27,19 @@
#include <string.h>
#endif
#include <errno.h>
#include <infiniband/verbs.h>
#include "opal/mca/base/mca_base_var.h"
#include "opal/util/argv.h"
#include "opal/constants.h"
#if BTL_IN_OPAL
#include "opal/mca/btl/btl.h"
#include "opal/mca/btl/base/base.h"
#include "opal/mca/common/verbs/common_verbs.h"
#else
#include "ompi/mca/btl/btl.h"
#include "ompi/mca/btl/base/base.h"
#endif
#include "btl_usnic.h"
#include "btl_usnic_frag.h"
@ -159,7 +163,8 @@ int opal_btl_usnic_component_register(void)
static int prio_sd_num;
static int prio_rd_num;
static int cq_num;
static int max_tiny_payload;
static int udp_port_base;
static int max_tiny_msg_size;
static int eager_limit;
static int rndv_eager_limit;
static int pack_lazy_threshold;
@ -177,24 +182,24 @@ int opal_btl_usnic_component_register(void)
mca_btl_usnic_component.max_modules = (size_t) max_modules;
CHECK(reg_string("if_include",
"Comma-delimited list of devices/networks to be used (e.g. \"usnic_0,10.10.0.0/16\"; empty value means to use all available usNICs). Mutually exclusive with btl_usnic_if_exclude.",
"Comma-delimited list of usNIC devices/networks to be used (e.g. \"eth3,usnic_0,10.10.0.0/16\"; empty value means to use all available usNICs). Mutually exclusive with btl_usnic_if_exclude.",
NULL, &mca_btl_usnic_component.if_include,
REGSTR_EMPTY_OK, OPAL_INFO_LVL_1));
CHECK(reg_string("if_exclude",
"Comma-delimited list of devices/networks to be excluded (empty value means to not exclude any usNICs). Mutually exclusive with btl_usnic_if_include.",
"Comma-delimited list of usNIC devices/networks to be excluded (empty value means to not exclude any usNICs). Mutually exclusive with btl_usnic_if_include.",
NULL, &mca_btl_usnic_component.if_exclude,
REGSTR_EMPTY_OK, OPAL_INFO_LVL_1));
CHECK(reg_int("stats",
"A non-negative integer specifying the frequency at which each USNIC BTL will output statistics (default: 0 seconds, meaning that statistics are disabled)",
"A non-negative integer specifying the frequency at which each usnic BTL will output statistics (default: 0 seconds, meaning that statistics are disabled)",
0, &mca_btl_usnic_component.stats_frequency, 0,
OPAL_INFO_LVL_4));
mca_btl_usnic_component.stats_enabled =
(bool) (mca_btl_usnic_component.stats_frequency > 0);
CHECK(reg_int("stats_relative",
"If stats are enabled, output relative stats between the timestemps (vs. cumulative stats since the beginning of the job) (default: 0 -- i.e., absolute)",
"If stats are enabled, output relative stats between the timestamps (vs. cumulative stats since the beginning of the job) (default: 0 -- i.e., absolute)",
0, &stats_relative, 0, OPAL_INFO_LVL_4));
mca_btl_usnic_component.stats_relative = (bool) stats_relative;
@ -231,15 +236,19 @@ int opal_btl_usnic_component_register(void)
-1, &cq_num, REGINT_NEG_ONE_OK, OPAL_INFO_LVL_5));
mca_btl_usnic_component.cq_num = (int32_t) cq_num;
CHECK(reg_int("base_udp_port", "Base UDP port to use for usNIC communications. If 0, system will pick the port number. If non-zero, it will be added to each process' local rank to obtain the final port number (default: 0)",
0, &udp_port_base, REGINT_GE_ZERO, OPAL_INFO_LVL_5));
mca_btl_usnic_component.udp_port_base = (int) udp_port_base;
CHECK(reg_int("retrans_timeout", "Number of microseconds before retransmitting a frame",
1000, &mca_btl_usnic_component.retrans_timeout,
REGINT_GE_ONE, OPAL_INFO_LVL_5));
CHECK(reg_int("priority_limit", "Max size of \"priority\" messages (0 = use pre-set defaults; depends on number and type of devices available)",
0, &max_tiny_payload,
0, &max_tiny_msg_size,
REGINT_GE_ZERO, OPAL_INFO_LVL_5));
opal_btl_usnic_module_template.max_tiny_payload =
(size_t) max_tiny_payload;
opal_btl_usnic_module_template.max_tiny_msg_size =
(size_t) max_tiny_msg_size;
CHECK(reg_int("eager_limit", "Eager send limit (0 = use pre-set defaults; depends on number and type of devices available)",
0, &eager_limit, REGINT_GE_ZERO, OPAL_INFO_LVL_5));
@ -254,7 +263,7 @@ int opal_btl_usnic_component_register(void)
USNIC_DFLT_PACK_LAZY_THRESHOLD, &pack_lazy_threshold, REGINT_NEG_ONE_OK, OPAL_INFO_LVL_5));
mca_btl_usnic_component.pack_lazy_threshold = pack_lazy_threshold;
CHECK(reg_int("arp_timeout", "Timeout, in seconds, for the maximum delay between ARP replies when using the usNIC/UDP transport (ignored when using the usNIC/L2 transport, must be >=1)",
CHECK(reg_int("arp_timeout", "Timeout, in seconds, for the maximum delay between ARP replies (must be >=1)",
10, &mca_btl_usnic_component.arp_timeout,
REGINT_GE_ONE, OPAL_INFO_LVL_6));
@ -265,7 +274,7 @@ int opal_btl_usnic_component_register(void)
/* Default to bandwidth auto-detection */
opal_btl_usnic_module_template.super.btl_bandwidth = 0;
opal_btl_usnic_module_template.super.btl_latency = 4;
opal_btl_usnic_module_template.super.btl_latency = 2;
/* Show "cannot find route" warnings? */
mca_btl_usnic_component.show_route_failures = true;
@ -285,7 +294,7 @@ int opal_btl_usnic_component_register(void)
mca_btl_usnic_component.connectivity_ack_timeout = 250;
CHECK(reg_int("connectivity_ack_timeout",
"Timeout, in milliseconds, while waiting for an ACK while verification connectivity between usNIC devices. If 0, the connectivity check is disabled (must be >=0).",
"Timeout, in milliseconds, while waiting for an ACK while verification connectivity between usNIC interfaces. If 0, the connectivity check is disabled (must be >=0).",
mca_btl_usnic_component.connectivity_ack_timeout,
&mca_btl_usnic_component.connectivity_ack_timeout,
REGINT_GE_ZERO, OPAL_INFO_LVL_3));
@ -299,13 +308,10 @@ int opal_btl_usnic_component_register(void)
mca_btl_usnic_component.connectivity_map_prefix = NULL;
CHECK(reg_string("connectivity_map",
"Display the usNIC connectivity map. If this parameter is specified, it is the filename prefix emitted by each MPI process. The full filename emitted by each process is of the form: <prefix>-<hostname>.<pid>.<jobid>.<MCW rank>.txt.",
"Write a per-process file containing the usNIC connectivity map. If this parameter is specified, it is the filename prefix emitted by each MPI process. The full filename emitted by each process is of the form: <prefix>-<hostname>.<pid>.<jobid>.<MCW rank>.txt.",
mca_btl_usnic_component.connectivity_map_prefix,
&mca_btl_usnic_component.connectivity_map_prefix,
REGSTR_EMPTY_OK, OPAL_INFO_LVL_3));
/* Register some synonyms to the ompi common verbs component */
opal_common_verbs_mca_register(&mca_btl_usnic_component.super.btl_version);
return ret;
}

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -12,8 +12,6 @@
* Copyright (c) 2006 Sandia National Laboratories. All rights
* reserved.
* Copyright (c) 2011-2014 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2014 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -26,12 +24,19 @@
#ifndef OPAL_BTL_USNIC_MODULE_H
#define OPAL_BTL_USNIC_MODULE_H
#include "opal/class/opal_pointer_array.h"
#include <rdma/fabric.h>
#include <rdma/fi_eq.h>
#include <rdma/fi_endpoint.h>
#include <rdma/fi_errno.h>
#include "opal/mca/common/verbs/common_verbs.h"
#include "opal/class/opal_pointer_array.h"
#include "btl_usnic_endpoint.h"
#include "btl_usnic_stats.h"
#include "btl_usnic_util.h"
/* In libfabric prov/usnic/src */
#include "fi_usnic.h"
/*
* Default limits.
@ -53,30 +58,30 @@ struct opal_btl_usnic_send_segment_t;
struct opal_btl_usnic_recv_segment_t;
/*
* Abstraction of a set of IB queues
* Abstraction of a set of endpoints
*/
typedef struct opal_btl_usnic_channel_t {
int chan_index;
struct ibv_cq *cq;
struct fid_cq *cq;
int chan_mtu;
int chan_max_msg_size;
int chan_rd_num;
int chan_sd_num;
/** available send WQ entries */
int32_t sd_wqe;
int credits; /* RFXXX until libfab credits fixed */
/* fastsend enabled if sd_wqe >= fastsend_wqe_thresh */
int fastsend_wqe_thresh;
/* fastsend enabled if num_credits_available >= fastsend_wqe_thresh */
unsigned fastsend_wqe_thresh;
/* pointer to receive segment whose bookkeeping has been deferred */
/** pointer to receive segment whose bookkeeping has been deferred */
struct opal_btl_usnic_recv_segment_t *chan_deferred_recv;
/** queue pair */
struct ibv_qp* qp;
/** queue pair and attributes */
struct fi_info *info;
struct fid_ep *ep;
struct ibv_recv_wr *repost_recv_head;
struct opal_btl_usnic_recv_segment_t *repost_recv_head;
/** receive segments & buffers */
ompi_free_list_t recv_segs;
@ -88,33 +93,33 @@ typedef struct opal_btl_usnic_channel_t {
} opal_btl_usnic_channel_t;
/**
* usNIC verbs BTL interface
* usnic BTL module
*/
typedef struct opal_btl_usnic_module_t {
mca_btl_base_module_t super;
/* Cache for use during component_init to associate a module with
the opal_common_verbs_port_item_t that it came from. */
opal_common_verbs_port_item_t *port;
the libfabric device that it came from. */
struct fid_fabric *fabric;
struct fid_domain *domain;
struct fi_info *fabric_info;
struct fi_usnic_ops_fabric *usnic_fabric_ops;
struct fi_usnic_ops_av *usnic_av_ops;
struct fi_usnic_info usnic_info;
struct fid_eq *dom_eq;
struct fid_eq *av_eq;
struct fid_av *av;
mca_btl_base_module_error_cb_fn_t pml_error_callback;
/* Information about the usNIC verbs device */
uint8_t port_num;
struct ibv_device *device;
struct ibv_context *device_context;
/* Information about the events */
struct event device_async_event;
bool device_async_event_active;
struct ibv_pd *pd;
int numa_distance; /* hwloc NUMA distance from this process */
/* Information about the IP interface corresponding to this USNIC
interface */
char if_name[64];
uint32_t if_ipv4_addr; /* in network byte order */
uint32_t if_cidrmask; /* X in "/X" CIDR addr fmt, host byte order */
uint8_t if_mac[6];
int if_mtu;
/** local address information */
struct opal_btl_usnic_modex_t local_modex;
char if_ipv4_addr_str[IPV4STRADDRLEN];
/** desired send, receive, and completion queue entries (from MCA
params; cached here on the component because the MCA param
@ -131,16 +136,13 @@ typedef struct opal_btl_usnic_module_t {
* segment is slightly less than what can be held in frag segment due
* to fragment reassembly info.
*/
size_t tiny_mtu;
size_t max_tiny_msg_size;
size_t max_frag_payload; /* most that fits in a frag segment */
size_t max_chunk_payload; /* most that can fit in chunk segment */
size_t max_tiny_payload; /* threshold for using inline send */
/** Hash table to keep track of senders */
opal_proc_table_t senders;
/** local address information */
struct opal_btl_usnic_addr_t local_addr;
opal_hash_table_t senders;
/** list of all endpoints. Note that the main application thread
reads and writes to this list, and the connectivity agent
@ -186,7 +188,8 @@ typedef struct opal_btl_usnic_module_t {
/* abstract queue-pairs into channels */
opal_btl_usnic_channel_t mod_channels[USNIC_NUM_CHANNELS];
uint32_t qp_max_inline;
/* Number of short/erroneous packets we've receive on this
interface */
uint32_t num_short_packets;
/* Performance / debugging statistics */

Просмотреть файл

@ -13,26 +13,22 @@
* reserved.
* Copyright (c) 2013-2014 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2013-2014 Intel, Inc. All rights reserved
* Copyright (c) 2014 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include <netinet/in.h>
#include "opal_config.h"
#include <infiniband/verbs.h>
#include "opal_stdint.h"
#include "opal/util/arch.h"
#include "opal/util/show_help.h"
#include "opal/constants.h"
#include "opal/mca/pmix/pmix.h"
#include "opal/util/proc.h"
#include "btl_usnic_compat.h"
#include "btl_usnic.h"
#include "btl_usnic_proc.h"
#include "btl_usnic_endpoint.h"
@ -133,14 +129,14 @@ opal_btl_usnic_proc_lookup_ompi(opal_proc_t* opal_proc)
*/
opal_btl_usnic_endpoint_t *
opal_btl_usnic_proc_lookup_endpoint(opal_btl_usnic_module_t *receiver,
opal_process_name_t sender_proc_name)
uint64_t sender_proc_name)
{
opal_btl_usnic_proc_t *proc;
opal_btl_usnic_endpoint_t *endpoint;
opal_list_item_t *item;
MSGDEBUG1_OUT("lookup_endpoint: recvmodule=%p sendhash=0x%" PRIx64,
(void *)receiver, sender_hashed_rte_name);
(void *)receiver, sender_proc_name);
opal_mutex_lock(&receiver->all_endpoints_lock);
for (item = opal_list_get_first(&receiver->all_endpoints);
@ -154,7 +150,8 @@ opal_btl_usnic_proc_lookup_endpoint(opal_btl_usnic_module_t *receiver,
working to give handles instead of proc names, and then
have a function pointer to perform comparisons. This would
be bad here in the critical path, though... */
if (0 == opal_compare_proc(proc->proc_opal->proc_name, sender_proc_name)) {
if (usnic_compat_rte_hash_name(&(proc->proc_opal->proc_name)) ==
sender_proc_name) {
MSGDEBUG1_OUT("lookup_endpoint: matched endpoint=%p",
(void *)endpoint);
opal_mutex_unlock(&receiver->all_endpoints_lock);
@ -167,7 +164,6 @@ opal_btl_usnic_proc_lookup_endpoint(opal_btl_usnic_module_t *receiver,
return NULL;
}
/*
* Create an opal_btl_usnic_proc_t and initialize it with modex info
* and an empty array of endpoints.
@ -195,8 +191,8 @@ static int create_proc(opal_proc_t *opal_proc,
proc->proc_opal = opal_proc;
/* query for the peer address info */
OPAL_MODEX_RECV(rc, &mca_btl_usnic_component.super.btl_version,
opal_proc, (uint8_t**)&proc->proc_modex, &size);
usnic_compat_modex_recv(&rc, &mca_btl_usnic_component.super.btl_version,
opal_proc, &proc->proc_modex, &size);
/* If this proc simply doesn't have this key, then they're not
running the usnic BTL -- just ignore them. Otherwise, show an
@ -216,13 +212,13 @@ static int create_proc(opal_proc_t *opal_proc,
return OPAL_ERROR;
}
if ((size % sizeof(opal_btl_usnic_addr_t)) != 0) {
if ((size % sizeof(opal_btl_usnic_modex_t)) != 0) {
char msg[1024];
snprintf(msg, sizeof(msg),
"sizeof(modex for peer %s data) == %d, expected multiple of %d",
OPAL_NAME_PRINT(opal_proc->proc_name),
(int) size, (int) sizeof(opal_btl_usnic_addr_t));
usnic_compat_proc_name_print(&opal_proc->proc_name),
(int) size, (int) sizeof(opal_btl_usnic_modex_t));
opal_show_help("help-mpi-btl-usnic.txt", "internal error during init",
true,
opal_process_info.nodename,
@ -234,26 +230,33 @@ static int create_proc(opal_proc_t *opal_proc,
return OPAL_ERR_VALUE_OUT_OF_BOUNDS;
}
proc->proc_modex_count = size / sizeof(opal_btl_usnic_addr_t);
if (0 == proc->proc_modex_count) {
proc->proc_endpoints = NULL;
OBJ_RELEASE(proc);
return OPAL_ERR_UNREACH;
}
/* Sanity check: ensure that the remote proc agrees with this proc
on whether we're doing UDP or not. Note that all endpoints on
the remote proc will have the same "use_udp" value, so we only
need to check one of them. */
if (proc->proc_modex[0].use_udp !=
mca_btl_usnic_component.use_udp) {
/* See if the peer has the same underlying wire protocol as me.
If not, then print an error and ignore this peer. */
// RFXXX - things are weird when i force this to fail
if (mca_btl_usnic_component.transport_protocol !=
proc->proc_modex->protocol) {
uint64_t proto;
char protostr[32];
proto = mca_btl_usnic_component.transport_protocol;
strcpy(protostr, fi_tostr(&proto, FI_TYPE_PROTOCOL));
proto = proc->proc_modex->protocol;
opal_show_help("help-mpi-btl-usnic.txt",
"transport mismatch",
true,
opal_process_info.nodename,
opal_get_proc_hostname(proc->proc_opal));
protostr,
"peer",
fi_tostr(&proto, FI_TYPE_PROTOCOL));
OBJ_RELEASE(proc);
return OPAL_ERR_BAD_PARAM;
return OPAL_ERR_UNREACH;
}
proc->proc_modex_count = size / sizeof(opal_btl_usnic_modex_t);
if (0 == proc->proc_modex_count) {
proc->proc_endpoints = NULL;
OBJ_RELEASE(proc);
return OPAL_ERR_UNREACH;
}
proc->proc_modex_claimed = (bool*)
@ -281,48 +284,50 @@ static int create_proc(opal_proc_t *opal_proc,
* (higher values indicate more desirable connections). */
static uint64_t compute_weight(
opal_btl_usnic_module_t *module,
opal_btl_usnic_addr_t *proc_modex_addr)
opal_btl_usnic_modex_t *proc_modex_addr)
{
char my_ip_string[INET_ADDRSTRLEN], peer_ip_string[INET_ADDRSTRLEN];
struct sockaddr_in sin;
struct sockaddr_in *sinp;
struct fi_usnic_info *uip;
uint32_t mynet, peernet;
int err, metric;
int err;
int metric;
uint32_t min_link_speed_gbps;
inet_ntop(AF_INET, &module->if_ipv4_addr,
uip = &module->usnic_info;
sinp = module->fabric_info->src_addr;
inet_ntop(AF_INET, &sinp->sin_addr,
my_ip_string, sizeof(my_ip_string));
inet_ntop(AF_INET, &proc_modex_addr->ipv4_addr,
peer_ip_string, sizeof(peer_ip_string));
/* Just compare the CIDR-masked IP address to see if they're on
the same network. If so, we're good. */
mynet = opal_btl_usnic_get_ipv4_subnet(module->if_ipv4_addr,
module->if_cidrmask);
peernet = opal_btl_usnic_get_ipv4_subnet(proc_modex_addr->ipv4_addr,
proc_modex_addr->cidrmask);
mynet = sinp->sin_addr.s_addr & uip->ui_netmask_be;
peernet = proc_modex_addr->ipv4_addr & proc_modex_addr->netmask;
opal_output_verbose(5, USNIC_OUT,
"btl:usnic:%s: checking my IP address/subnet (%s/%d) vs. peer (%s/%d): %s",
__func__, my_ip_string, module->if_cidrmask,
peer_ip_string, proc_modex_addr->cidrmask,
__func__, my_ip_string,
usnic_netmask_to_cidrlen(uip->ui_netmask_be),
peer_ip_string,
usnic_netmask_to_cidrlen(proc_modex_addr->netmask),
(mynet == peernet ? "match" : "DO NOT match"));
if (!mca_btl_usnic_component.use_udp) {
if (mynet != peernet) {
return WEIGHT_UNREACHABLE;
} else {
return 1; /* any positive weight is fine */
}
}
min_link_speed_gbps = MIN(module->super.btl_bandwidth,
proc_modex_addr->link_speed_mbps) / 1000;
/* Returned metric is:
* 0 - same VLAN
* 1..MAXINT - relative distance metric
* -1 - unreachable
*/
metric = 0;
err = opal_btl_usnic_nl_ip_rt_lookup(mca_btl_usnic_component.unlsk,
module->if_name,
module->if_ipv4_addr,
proc_modex_addr->ipv4_addr,
&metric);
if (0 != err) {
memset(&sin, 0, sizeof(sin));
sin.sin_family = AF_INET;
sin.sin_addr.s_addr = proc_modex_addr->ipv4_addr;
err = module->usnic_av_ops->get_distance(module->av, &sin, &metric);
if (0 != err || (0 == err && -1 == metric)) {
return 0; /* no connectivity */
}
else {
@ -578,7 +583,14 @@ static int match_modex(opal_btl_usnic_module_t *module,
* sides are always setting up the exact same graph by always putting
* the process with the lower (jobid,vpid) on the "left".
*/
proc_is_left = opal_compare_proc(proc->proc_opal->proc_name, opal_proc_local_get()->proc_name) < 0;
#if 0
proc_is_left = (proc->proc_opal->proc_name <
opal_proc_local_get()->proc_name);
#else
proc_is_left =
usnic_compat_proc_name_compare(proc->proc_opal->proc_name,
opal_proc_local_get()->proc_name);
#endif
err = create_proc_module_graph(proc, proc_is_left, &g);
if (OPAL_SUCCESS != err) {
@ -604,7 +616,8 @@ static int match_modex(opal_btl_usnic_module_t *module,
if (!proc->proc_match_exists) {
opal_output_verbose(5, USNIC_OUT, "btl:usnic:%s: unable to find any valid interface pairs for proc %s",
__func__, OPAL_NAME_PRINT(proc->proc_opal->proc_name));
__func__,
usnic_compat_proc_name_print(&proc->proc_opal->proc_name));
return OPAL_ERR_NOT_FOUND;
}
@ -623,15 +636,16 @@ static int match_modex(opal_btl_usnic_module_t *module,
* the min of the two MTUs? Another choice is to disqualify this pairing
* before running the matching algorithm on it. */
if (*index_out >= 0 &&
proc->proc_modex[*index_out].mtu != (uint16_t) module->if_mtu) {
proc->proc_modex[*index_out].max_msg_size !=
(uint16_t) module->fabric_info->ep_attr->max_msg_size) {
opal_show_help("help-mpi-btl-usnic.txt", "MTU mismatch",
true,
opal_process_info.nodename,
ibv_get_device_name(module->device),
module->if_name,
module->if_mtu,
opal_get_proc_hostname(proc->proc_opal),
proc->proc_modex[*index_out].mtu);
true,
opal_process_info.nodename,
module->fabric_info->fabric_attr->name,
module->fabric_info->ep_attr->max_msg_size,
(NULL == proc->proc_opal->proc_hostname) ?
"unknown" : proc->proc_opal->proc_hostname,
proc->proc_modex[*index_out].max_msg_size);
*index_out = -1;
return OPAL_ERR_UNREACH;
}
@ -647,6 +661,54 @@ out_free_table:
return err;
}
/*
* Initiate the process to create a USD dest.
* It will be polled for completion later.
*/
static int start_av_insert(opal_btl_usnic_module_t *module,
opal_btl_usnic_endpoint_t *endpoint,
int channel)
{
int ret;
opal_btl_usnic_modex_t *modex = &endpoint->endpoint_remote_modex;
opal_btl_usnic_addr_context_t *context;
struct sockaddr_in sin;
context = calloc(1, sizeof(*context));
context->endpoint = endpoint;
context->channel_id = channel;
char str[IPV4STRADDRLEN];
opal_btl_usnic_snprintf_ipv4_addr(str, sizeof(str), modex->ipv4_addr,
modex->netmask);
opal_output_verbose(5, USNIC_OUT,
"btl:usnic:start_av_insert: to channel %d at %s:%d",
channel, str, modex->ports[channel]);
/* build remote address */
memset(&sin, 0, sizeof(sin));
sin.sin_family = AF_INET;
sin.sin_port = htons(modex->ports[channel]);
sin.sin_addr.s_addr = modex->ipv4_addr;
ret = fi_av_insert(module->av, &sin, 1,
&endpoint->endpoint_remote_addrs[channel], 0, context);
/* Did an error occur? */
if (1 != ret) {
opal_show_help("help-mpi-btl-usnic.txt", "libfabric API failed",
true,
opal_process_info.nodename,
module->fabric_info->fabric_attr->name,
"fi_av_insert()", __FILE__, __LINE__,
ret,
"Failed to initiate AV insert");
free(context);
return OPAL_ERROR;
}
return OPAL_SUCCESS;
}
/*
* Create an endpoint and claim the matched modex slot
*/
@ -655,17 +717,17 @@ opal_btl_usnic_create_endpoint(opal_btl_usnic_module_t *module,
opal_btl_usnic_proc_t *proc,
opal_btl_usnic_endpoint_t **endpoint_o)
{
int err;
int rc;
int modex_index;
opal_btl_usnic_endpoint_t *endpoint;
/* look for matching modex info */
err = match_modex(module, proc, &modex_index);
if (OPAL_SUCCESS != err) {
rc = match_modex(module, proc, &modex_index);
if (OPAL_SUCCESS != rc) {
opal_output_verbose(5, USNIC_OUT,
"btl:usnic:create_endpoint: did not match usnic modex info for peer %s",
OPAL_NAME_PRINT(proc->proc_opal->proc_name));
return err;
usnic_compat_proc_name_print(&proc->proc_opal->proc_name));
return rc;
}
endpoint = OBJ_NEW(opal_btl_usnic_endpoint_t);
@ -676,22 +738,27 @@ opal_btl_usnic_create_endpoint(opal_btl_usnic_module_t *module,
/* Initalize the endpoint */
endpoint->endpoint_module = module;
assert(modex_index >= 0 && modex_index < (int)proc->proc_modex_count);
endpoint->endpoint_remote_addr = proc->proc_modex[modex_index];
endpoint->endpoint_remote_modex = proc->proc_modex[modex_index];
/* Start creating destinations; one for each channel. These
progress in the background.a */
for (int i = 0; i < USNIC_NUM_CHANNELS; ++i) {
rc = start_av_insert(module, endpoint, i);
if (OPAL_SUCCESS != rc) {
OBJ_RELEASE(endpoint);
return rc;
}
}
/* Initialize endpoint sequence number info */
endpoint->endpoint_next_seq_to_send = module->local_addr.isn;
endpoint->endpoint_next_seq_to_send = module->local_modex.isn;
endpoint->endpoint_ack_seq_rcvd = endpoint->endpoint_next_seq_to_send - 1;
endpoint->endpoint_next_contig_seq_to_recv =
endpoint->endpoint_remote_addr.isn;
endpoint->endpoint_remote_modex.isn;
endpoint->endpoint_highest_seq_rcvd =
endpoint->endpoint_next_contig_seq_to_recv - 1;
endpoint->endpoint_rfstart = WINDOW_SIZE_MOD(endpoint->endpoint_next_contig_seq_to_recv);
/* Defer creating the ibv_ah. Since calling ibv_create_ah() may
trigger ARP resolution, it's better to batch all the endpoints'
calls to ibv_create_ah() together to get some parallelism. */
endpoint->endpoint_remote_ah = NULL;
/* Now claim that modex slot */
proc->proc_modex_claimed[modex_index] = true;
MSGDEBUG1_OUT("create_endpoint: module=%p claimed endpoint=%p on proc=%p (hash=0x%" PRIx64 ")\n",

Просмотреть файл

@ -12,8 +12,6 @@
* Copyright (c) 2006 Sandia National Laboratories. All rights
* reserved.
* Copyright (c) 2013-2014 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2014 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -47,7 +45,7 @@ typedef struct opal_btl_usnic_proc_t {
opal_proc_t *proc_opal;
/** Addresses received via modex for this remote proc */
opal_btl_usnic_addr_t* proc_modex;
opal_btl_usnic_modex_t* proc_modex;
/** Number of entries in the proc_modex array */
size_t proc_modex_count;
/** Whether the modex entry is "claimed" by a module or not */
@ -87,7 +85,7 @@ struct opal_btl_usnic_module_t;
opal_btl_usnic_endpoint_t *
opal_btl_usnic_proc_lookup_endpoint(struct opal_btl_usnic_module_t *receiver,
opal_process_name_t name);
uint64_t sender_hashed_rte_name);
int opal_btl_usnic_proc_match(opal_proc_t* opal_proc,
struct opal_btl_usnic_module_t *module,

Просмотреть файл

@ -23,16 +23,19 @@
#include "opal_config.h"
#include <infiniband/verbs.h>
#include <unistd.h>
#include "opal_stdint.h"
#include "opal/mca/memchecker/base/base.h"
#include "opal/constants.h"
#if BTL_IN_OPAL
#include "opal/mca/btl/btl.h"
#include "opal/mca/btl/base/base.h"
#include "opal/mca/common/verbs/common_verbs.h"
#else
#include "ompi/mca/btl/btl.h"
#include "ompi/mca/btl/base/base.h"
#endif
#include "btl_usnic.h"
#include "btl_usnic_frag.h"
@ -50,8 +53,7 @@
*/
void opal_btl_usnic_recv_call(opal_btl_usnic_module_t *module,
opal_btl_usnic_recv_segment_t *seg,
opal_btl_usnic_channel_t *channel,
uint32_t l2_bytes_rcvd)
opal_btl_usnic_channel_t *channel)
{
opal_btl_usnic_segment_t *bseg;
mca_btl_active_message_callback_t* reg;
@ -61,8 +63,8 @@ void opal_btl_usnic_recv_call(opal_btl_usnic_module_t *module,
uint32_t window_index;
int rc;
#if MSGDEBUG1
char src_mac[32];
char dest_mac[32];
char local_ip[IPV4STRADDRLEN];
char remote_ip[IPV4STRADDRLEN];
#endif
bseg = &seg->rs_base;
@ -70,23 +72,34 @@ void opal_btl_usnic_recv_call(opal_btl_usnic_module_t *module,
++module->stats.num_total_recvs;
/* Valgrind help */
opal_memchecker_base_mem_defined((void*)(seg->rs_recv_desc.sg_list[0].addr),
seg->rs_recv_desc.sg_list[0].length);
opal_memchecker_base_mem_defined((void*)(seg->rs_protocol_header),
seg->rs_len);
/* Find out who sent this segment */
endpoint = seg->rs_endpoint;
if (FAKE_RECV_FRAG_DROP || OPAL_UNLIKELY(NULL == endpoint)) {
/* No idea who this was from, so drop it */
#if MSGDEBUG1
opal_output(0, "=== Unknown sender; dropped: from MAC %s to MAC %s, seq %" UDSEQ,
src_mac,
dest_mac,
opal_output(0, "=== Unknown sender; dropped: seq %" UDSEQ,
bseg->us_btl_header->pkt_seq);
#endif
++module->stats.num_unk_recvs;
goto repost_no_endpoint;
}
#if MSGDEBUG1
struct opal_btl_usnic_modex_t *modex;
modex = &module->local_modex;
opal_btl_usnic_snprintf_ipv4_addr(local_ip, sizeof(local_ip),
modex->ipv4_addr,
modex->netmask);
modex = &endpoint->endpoint_remote_modex;
opal_btl_usnic_snprintf_ipv4_addr(remote_ip, sizeof(remote_ip),
modex->ipv4_addr,
modex->netmask);
#endif
/***********************************************************************/
/* Segment is an incoming frag */
if (OPAL_BTL_USNIC_PAYLOAD_TYPE_FRAG == bseg->us_btl_header->payload_type) {
@ -107,7 +120,7 @@ void opal_btl_usnic_recv_call(opal_btl_usnic_module_t *module,
opal_output(0, "<-- Received FRAG ep %p, seq %" UDSEQ " from %s to %s: GOOD! (rel seq %d, lowest seq %" UDSEQ ", highest seq: %" UDSEQ ", rwstart %d) seg %p, module %p\n",
(void*) endpoint,
seg->rs_base.us_btl_header->pkt_seq,
src_mac, dest_mac,
remote_ip, local_ip,
window_index,
endpoint->endpoint_next_contig_seq_to_recv,
endpoint->endpoint_highest_seq_rcvd,
@ -120,14 +133,6 @@ void opal_btl_usnic_recv_call(opal_btl_usnic_module_t *module,
#endif
#endif
if (OPAL_UNLIKELY(opal_btl_usnic_frag_seg_proto_size(seg) !=
l2_bytes_rcvd)) {
BTL_ERROR(("L2 packet size and segment payload len do not agree!"
" l2_bytes_rcvd=%" PRIu32 " expected=%" PRIu32,
l2_bytes_rcvd, opal_btl_usnic_frag_seg_proto_size(seg)));
abort();
}
/* If this it not a PUT, Pass this segment up to the PML.
* Be sure to get the payload length from the BTL header because
* the L2 layer may artificially inflate (or otherwise change)
@ -168,14 +173,6 @@ void opal_btl_usnic_recv_call(opal_btl_usnic_module_t *module,
int frag_index;
opal_btl_usnic_rx_frag_info_t *fip;
if (OPAL_UNLIKELY(opal_btl_usnic_chunk_seg_proto_size(seg) !=
l2_bytes_rcvd)) {
BTL_ERROR(("L2 packet size and segment payload len do not agree!"
" l2_bytes_rcvd=%" PRIu32 " expected=%" PRIu32,
l2_bytes_rcvd, opal_btl_usnic_chunk_seg_proto_size(seg)));
abort();
}
/* Is incoming sequence # ok? */
if (OPAL_UNLIKELY(opal_btl_usnic_check_rx_seq(endpoint, seg,
&window_index) != 0)) {
@ -187,7 +184,7 @@ void opal_btl_usnic_recv_call(opal_btl_usnic_module_t *module,
seg->rs_base.us_btl_chunk_header->ch_frag_id,
(void*) endpoint,
seg->rs_base.us_btl_chunk_header->ch_hdr.pkt_seq,
src_mac, dest_mac,
remote_ip, local_ip,
window_index,
endpoint->endpoint_next_contig_seq_to_recv,
endpoint->endpoint_highest_seq_rcvd,
@ -284,8 +281,8 @@ void opal_btl_usnic_recv_call(opal_btl_usnic_module_t *module,
segment.seg_addr.pval = fip->rfi_data;
segment.seg_len = fip->rfi_frag_size;
desc.des_local = &segment;
desc.des_local_count = 1;
desc.USNIC_RECV_LOCAL = &segment;
desc.USNIC_RECV_LOCAL_COUNT = 1;
/* only up to PML if this was not a put */
if (chunk_hdr->ch_hdr.put_addr == NULL) {
@ -293,8 +290,8 @@ void opal_btl_usnic_recv_call(opal_btl_usnic_module_t *module,
/* Pass this segment up to the PML */
#if MSGDEBUG2
opal_output(0, "large recv complete, pass up %p, %u bytes, tag=%d\n",
desc.des_local->seg_addr.pval,
(unsigned)desc.des_local->seg_len,
desc.USNIC_RECV_LOCAL->seg_addr.pval,
(unsigned)desc.USNIC_RECV_LOCAL->seg_len,
(int)chunk_hdr->ch_hdr.tag);
#endif
reg = mca_btl_base_active_message_trigger +
@ -342,7 +339,7 @@ void opal_btl_usnic_recv_call(opal_btl_usnic_module_t *module,
#if MSGDEBUG1
opal_output(0, " Received ACK for sequence number %" UDSEQ " from %s to %s\n",
bseg->us_btl_header->ack_seq, src_mac, dest_mac);
bseg->us_btl_header->ack_seq, remote_ip, local_ip);
#endif
opal_btl_usnic_handle_ack(endpoint, ack_seq);
@ -353,7 +350,11 @@ void opal_btl_usnic_recv_call(opal_btl_usnic_module_t *module,
/* Have no idea what the frag is; drop it */
else {
++module->stats.num_unk_recvs;
opal_output(0, "==========================unknown 2");
if (module->stats.num_unk_recvs < 10) {
opal_output(0, "unrecognized payload type %d", bseg->us_btl_header->payload_type);
opal_output(0, "base = %p, proto = %p, hdr = %p", bseg->us_list.ptr, seg->rs_protocol_header, (void*) bseg->us_btl_header);
opal_btl_usnic_dump_hex(bseg->us_list.ptr, 96+sizeof(*bseg->us_btl_header));
}
goto repost;
}
@ -368,6 +369,6 @@ void opal_btl_usnic_recv_call(opal_btl_usnic_module_t *module,
++module->stats.num_recv_reposts;
/* Add recv to linked list for reposting */
seg->rs_recv_desc.next = channel->repost_recv_head;
channel->repost_recv_head = &seg->rs_recv_desc;
seg->rs_next = channel->repost_recv_head;
channel->repost_recv_head = seg;
}

Просмотреть файл

@ -1,7 +1,5 @@
/*
* Copyright (c) 2013-2014 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2014 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -12,8 +10,6 @@
#ifndef BTL_USNIC_RECV_H
#define BTL_USNIC_RECV_H
#include <infiniband/verbs.h>
#include "btl_usnic.h"
#include "btl_usnic_util.h"
#include "btl_usnic_frag.h"
@ -22,8 +18,25 @@
void opal_btl_usnic_recv_call(opal_btl_usnic_module_t *module,
opal_btl_usnic_recv_segment_t *rseg,
opal_btl_usnic_channel_t *channel,
uint32_t l2_bytes_rcvd);
opal_btl_usnic_channel_t *channel);
static inline int
opal_btl_usnic_post_recv_list(opal_btl_usnic_channel_t *channel)
{
opal_btl_usnic_recv_segment_t *rseg;
int rc;
for (rseg = channel->repost_recv_head; NULL != rseg; rseg = rseg->rs_next) {
rc = fi_recv(channel->ep, rseg->rs_protocol_header,
rseg->rs_len, NULL, rseg);
if (0 != rc) {
return rc;
}
}
channel->repost_recv_head = NULL;
return 0;
}
/*
* Given an incoming segment, lookup the endpoint that sent it
@ -43,9 +56,9 @@ lookup_sender(opal_btl_usnic_module_t *module, opal_btl_usnic_segment_t *seg)
btl_header->sender, echo back the ptr to the sender's
ompi_proc. There was limited speedup with this scheme; more
investigation is required. */
ret = opal_proc_table_get_value(&module->senders,
seg->us_btl_header->sender,
(void**) &sender);
ret = opal_hash_table_get_value_uint64(&module->senders,
seg->us_btl_header->sender,
(void**) &sender);
if (OPAL_LIKELY(OPAL_SUCCESS == ret)) {
return sender;
}
@ -55,8 +68,8 @@ lookup_sender(opal_btl_usnic_module_t *module, opal_btl_usnic_segment_t *seg)
sender = opal_btl_usnic_proc_lookup_endpoint(module,
seg->us_btl_header->sender);
if (NULL != sender) {
opal_proc_table_set_value(&module->senders,
seg->us_btl_header->sender, sender);
opal_hash_table_set_value_uint64(&module->senders,
seg->us_btl_header->sender, sender);
return sender;
}
@ -245,8 +258,7 @@ dup_needs_ack:
static inline void
opal_btl_usnic_recv_fast(opal_btl_usnic_module_t *module,
opal_btl_usnic_recv_segment_t *seg,
opal_btl_usnic_channel_t *channel,
uint32_t l2_bytes_rcvd)
opal_btl_usnic_channel_t *channel)
{
opal_btl_usnic_segment_t *bseg;
mca_btl_active_message_callback_t* reg;
@ -261,6 +273,10 @@ opal_btl_usnic_recv_fast(opal_btl_usnic_module_t *module,
endpoint = lookup_sender(module, bseg);
seg->rs_endpoint = endpoint;
#if 0
opal_output(0, "fast recv %d bytes:\n", bseg->us_btl_header->payload_len + sizeof(opal_btl_usnic_btl_header_t));
opal_btl_usnic_dump_hex(bseg->us_btl_header, bseg->us_btl_header->payload_len + sizeof(opal_btl_usnic_btl_header_t));
#endif
if (endpoint != NULL && !endpoint->endpoint_exiting &&
(OPAL_BTL_USNIC_PAYLOAD_TYPE_FRAG ==
bseg->us_btl_header->payload_type) &&
@ -268,8 +284,7 @@ opal_btl_usnic_recv_fast(opal_btl_usnic_module_t *module,
/* Valgrind help */
opal_memchecker_base_mem_defined(
(void*)(seg->rs_recv_desc.sg_list[0].addr),
seg->rs_recv_desc.sg_list[0].length);
(void*)(seg->rs_rs_protocol_header), seg->rs_len);
seq = seg->rs_base.us_btl_header->pkt_seq;
delta = SEQ_DIFF(seq, endpoint->endpoint_next_contig_seq_to_recv);
@ -298,7 +313,7 @@ drop:
channel->chan_deferred_recv = seg;
} else {
opal_btl_usnic_recv_call(module, seg, channel, l2_bytes_rcvd);
opal_btl_usnic_recv_call(module, seg, channel);
}
}
@ -318,8 +333,7 @@ opal_btl_usnic_recv_frag_bookkeeping(
/* Valgrind help */
opal_memchecker_base_mem_defined(
(void*)(seg->rs_recv_desc.sg_list[0].addr),
seg->rs_recv_desc.sg_list[0].length);
(void*)(seg->rs_rs_protocol_header), seg->rs_len);
++module->stats.num_total_recvs;
@ -342,8 +356,8 @@ repost:
++module->stats.num_recv_reposts;
/* Add recv to linked list for reposting */
seg->rs_recv_desc.next = channel->repost_recv_head;
channel->repost_recv_head = &seg->rs_recv_desc;
seg->rs_next = channel->repost_recv_head;
channel->repost_recv_head = seg;
return rc;
}
@ -355,8 +369,7 @@ repost:
static inline void
opal_btl_usnic_recv(opal_btl_usnic_module_t *module,
opal_btl_usnic_recv_segment_t *seg,
opal_btl_usnic_channel_t *channel,
uint32_t l2_bytes_rcvd)
opal_btl_usnic_channel_t *channel)
{
opal_btl_usnic_segment_t *bseg;
mca_btl_active_message_callback_t* reg;
@ -378,14 +391,6 @@ opal_btl_usnic_recv(opal_btl_usnic_module_t *module,
(void*) endpoint, bseg->us_btl_header->pkt_seq,
bseg->us_btl_header->payload_len);
if (OPAL_UNLIKELY(opal_btl_usnic_frag_seg_proto_size(seg) !=
l2_bytes_rcvd)) {
BTL_ERROR(("L2 packet size and segment payload len do not agree!"
" l2_bytes_rcvd=%" PRIu32 " expected=%" PRIu32,
l2_bytes_rcvd, opal_btl_usnic_frag_seg_proto_size(seg)));
abort();
}
/* do the receive bookkeeping */
rc = opal_btl_usnic_recv_frag_bookkeeping(module, seg, channel);
if (rc != 0) {
@ -404,7 +409,7 @@ opal_btl_usnic_recv(opal_btl_usnic_module_t *module,
&seg->rs_desc, reg->cbdata);
} else {
opal_btl_usnic_recv_call(module, seg, channel, l2_bytes_rcvd);
opal_btl_usnic_recv_call(module, seg, channel);
}
}

Просмотреть файл

@ -23,14 +23,17 @@
#include "opal_config.h"
#include <infiniband/verbs.h>
#include "opal_stdint.h"
#include "opal/constants.h"
#if BTL_IN_OPAL
#include "opal/mca/btl/btl.h"
#include "opal/mca/btl/base/base.h"
#include "opal/mca/common/verbs/common_verbs.h"
#else
#include "ompi/mca/btl/btl.h"
#include "ompi/mca/btl/base/base.h"
#endif
#include "btl_usnic.h"
#include "btl_usnic_frag.h"
@ -60,7 +63,6 @@ opal_btl_usnic_frag_send_complete(opal_btl_usnic_module_t *module,
/* do bookkeeping */
++frag->sf_endpoint->endpoint_send_credits;
++module->mod_channels[sseg->ss_channel].sd_wqe;
/* see if this endpoint needs to be made ready-to-send */
opal_btl_usnic_check_rts(frag->sf_endpoint);
@ -92,7 +94,6 @@ opal_btl_usnic_chunk_send_complete(opal_btl_usnic_module_t *module,
/* do bookkeeping */
++frag->sf_endpoint->endpoint_send_credits;
++module->mod_channels[sseg->ss_channel].sd_wqe;
/* see if this endpoint needs to be made ready-to-send */
opal_btl_usnic_check_rts(frag->sf_endpoint);
@ -131,7 +132,7 @@ opal_btl_usnic_finish_put_or_send(
* We already packed via the convertor if necessary, so we only need to
* handle the simple memcpy case here.
*/
if (frag->sf_base.uf_base.des_local_count > 1) {
if (frag->sf_base.uf_base.USNIC_SEND_LOCAL_COUNT > 1) {
/* no convertor */
assert(NULL != frag->sf_base.uf_local_seg[1].seg_addr.pval);
@ -141,21 +142,16 @@ opal_btl_usnic_finish_put_or_send(
frag->sf_base.uf_local_seg[1].seg_len);
/* update 1st segment length */
frag->sf_base.uf_base.des_local_count = 1;
frag->sf_base.uf_base.USNIC_SEND_LOCAL_COUNT = 1;
frag->sf_base.uf_local_seg[0].seg_len +=
frag->sf_base.uf_local_seg[1].seg_len;
}
sseg->ss_base.us_sg_entry[0].length =
sizeof(opal_btl_usnic_btl_header_t) + frag->sf_size;
sseg->ss_len = sizeof(opal_btl_usnic_btl_header_t) + frag->sf_size;
/* use standard channel */
sseg->ss_channel = USNIC_DATA_CHANNEL;
sseg->ss_base.us_btl_header->tag = tag;
if (frag->sf_base.uf_local_seg[0].seg_len < module->tiny_mtu) {
sseg->ss_send_desc.send_flags |= IBV_SEND_INLINE;
}
} else {
opal_btl_usnic_large_send_frag_t *lfrag;

Просмотреть файл

@ -10,8 +10,6 @@
#ifndef BTL_USNIC_SEND_H
#define BTL_USNIC_SEND_H
#include <infiniband/verbs.h>
#include "btl_usnic.h"
#include "btl_usnic_frag.h"
#include "btl_usnic_ack.h"
@ -53,23 +51,8 @@ opal_btl_usnic_check_rts(
}
}
#if MSGDEBUG2
static inline
int sge_total(struct ibv_send_wr *wr)
{
int i;
int len;
len=0;
for (i=0; i<wr->num_sge; ++i) {
len += wr->sg_list[i].length;
}
return len;
}
#endif
/*
* Common point for posting a segment to VERBS
* Common point for posting a segment
*/
static inline void
opal_btl_usnic_post_segment(
@ -77,33 +60,34 @@ opal_btl_usnic_post_segment(
opal_btl_usnic_endpoint_t *endpoint,
opal_btl_usnic_send_segment_t *sseg)
{
struct ibv_send_wr *bad_wr;
opal_btl_usnic_channel_t *channel;
struct ibv_send_wr *wr;
int ret;
/* get channel and remote channel */
opal_btl_usnic_channel_id_t channel_id = sseg->ss_channel;
opal_btl_usnic_channel_t *channel = &module->mod_channels[channel_id];
#if MSGDEBUG1
opal_output(0, "post_send: type=%s, addr=%p, len=%d, payload=%d\n",
usnic_seg_type(sseg->ss_base.us_type),
(void*) sseg->ss_send_desc.sg_list->addr,
sge_total(&sseg->ss_send_desc),
sseg->ss_base.us_btl_header->payload_len);
/*opal_btl_usnic_dump_hex((void *)(sseg->ss_send_desc.sg_list->addr + sizeof(opal_btl_usnic_btl_header_t)), 16); */
opal_output(0, "post_send: type=%s, ep=%p, remote_addr=%p, addr=%p, len=%"
PRIsize_t,
usnic_seg_type_str(sseg->ss_base.us_type),
(void*) channel->ep,
(void*) endpoint->endpoint_remote_addrs[channel_id],
(void*) sseg->ss_ptr,
sseg->ss_len);
#endif
/* set target address */
wr = &sseg->ss_send_desc;
wr->wr.ud.ah = endpoint->endpoint_remote_ah;
/* get channel and remote QPN */
channel = &module->mod_channels[sseg->ss_channel];
wr->wr.ud.remote_qpn =
endpoint->endpoint_remote_addr.qp_num[sseg->ss_channel];
/* Post segment to the NIC */
ret = ibv_post_send(channel->qp, &sseg->ss_send_desc, &bad_wr);
/* Send the segment */
#if 0
opal_output(0, "send len=%d ret=%d\n", sseg->ss_len, ret);
opal_btl_usnic_dump_hex(sseg->ss_ptr, sseg->ss_len);
#endif
ret = fi_sendto(channel->ep,
sseg->ss_ptr,
sseg->ss_len,
NULL,
endpoint->endpoint_remote_addrs[channel_id],
sseg);
if (OPAL_UNLIKELY(0 != ret)) {
opal_btl_usnic_util_abort("ibv_post_send() failed",
opal_btl_usnic_util_abort("fi_sendto() failed",
__FILE__, __LINE__);
/* Never returns */
}
@ -114,15 +98,61 @@ opal_btl_usnic_post_segment(
++sseg->ss_parent_frag->sf_seg_post_cnt;
}
/* consume a WQE */
--channel->sd_wqe;
/* Stats */
++module->stats.num_total_sends;
++channel->num_channel_sends;
--channel->credits;
}
/*
* Common point for posting an ACK
*/
static inline void
opal_btl_usnic_post_ack(
opal_btl_usnic_module_t *module,
opal_btl_usnic_endpoint_t *endpoint,
opal_btl_usnic_send_segment_t *sseg)
{
int ret;
/* get channel and remote channel */
opal_btl_usnic_channel_id_t channel_id = sseg->ss_channel;
opal_btl_usnic_channel_t *channel = &module->mod_channels[channel_id];
#if MSGDEBUG1
opal_output(0, "post_send ACK: type=%s, ep=%p, remote_addr=%p, addr=%p, len=%"
PRIsize_t,
usnic_seg_type_str(sseg->ss_base.us_type),
(void*) channel->ep,
(void*) endpoint->endpoint_remote_addrs[channel_id],
(void*) sseg->ss_ptr,
sseg->ss_len);
#endif
#if 0
opal_output(0, "ACK send:\n");
opal_btl_usnic_dump_hex(sseg->ss_ptr, sseg->ss_len);
#endif
ret = fi_sendto(channel->ep,
sseg->ss_ptr,
sseg->ss_len,
NULL,
endpoint->endpoint_remote_addrs[channel_id],
sseg);
if (OPAL_UNLIKELY(0 != ret)) {
opal_btl_usnic_util_abort("fi_sendto() failed",
__FILE__, __LINE__);
/* Never returns */
}
/* Stats */
++module->stats.num_total_sends;
++channel->num_channel_sends;
--channel->credits;
}
/*
* Post a send to the verbs work queue
* Post a send to the work queue
*/
static inline void
opal_btl_usnic_endpoint_send_segment(
@ -174,23 +204,29 @@ opal_btl_usnic_endpoint_send_segment(
#if MSGDEBUG1
{
uint8_t mac[6];
char mac_str1[128];
char mac_str2[128];
opal_btl_usnic_sprintf_mac(mac_str1, module->local_addr.mac);
opal_btl_usnic_gid_to_mac(&endpoint->endpoint_remote_addr.gid, mac);
opal_btl_usnic_sprintf_mac(mac_str2, mac);
char local_ip[32];
char remote_ip[32];
opal_output(0, "--> Sending %s: seq: %" UDSEQ ", sender: 0x%016lx from device %s MAC %s, qp %u, seg %p, room %d, wc len %u, remote MAC %s, qp %u",
opal_btl_usnic_snprintf_ipv4_addr(local_ip, sizeof(local_ip),
module->local_modex.ipv4_addr,
module->local_modex.netmask);
opal_btl_usnic_snprintf_ipv4_addr(remote_ip, sizeof(remote_ip),
endpoint->endpoint_remote_modex.ipv4_addr,
endpoint->endpoint_remote_modex.netmask);
opal_output(0, "--> Sending %s: seq: %" UDSEQ ", sender: 0x%016lx from device %s, IP %s, port %u, seg %p, room %d, wc len %u, remote IP %s, port %u",
(sseg->ss_parent_frag->sf_base.uf_type == OPAL_BTL_USNIC_FRAG_LARGE_SEND)?
"CHUNK" : "FRAG",
sseg->ss_base.us_btl_header->pkt_seq,
sseg->ss_base.us_btl_header->sender,
endpoint->endpoint_module->device->name,
mac_str1, module->local_addr.qp_num[sseg->ss_channel],
(void*)sseg, sseg->ss_hotel_room,
sseg->ss_base.us_sg_entry[0].length,
mac_str2, endpoint->endpoint_remote_addr.qp_num[sseg->ss_channel]);
"CHUNK" : "FRAG",
sseg->ss_base.us_btl_header->pkt_seq,
sseg->ss_base.us_btl_header->sender,
endpoint->endpoint_module->fabric_info->fabric_attr->name,
local_ip,
module->local_modex.ports[sseg->ss_channel],
(void*)sseg,
sseg->ss_hotel_room,
sseg->ss_ptr,
remote_ip,
endpoint->endpoint_remote_modex.ports[sseg->ss_channel]);
}
#endif

Просмотреть файл

@ -1,7 +1,5 @@
/*
* Copyright (c) 2013-2014 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2014 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -18,11 +16,11 @@
#include "opal/mca/base/mca_base_var.h"
#include "opal/mca/base/mca_base_pvar.h"
#include "opal/util/proc.h"
#include "btl_usnic_compat.h"
#include "btl_usnic.h"
#include "btl_usnic_module.h"
#include "btl_usnic_stats.h"
#include "btl_usnic_util.h"
/*
* Local variables
@ -266,11 +264,10 @@ static int usnic_pvar_notify(struct mca_base_pvar_t *pvar,
static int usnic_pvar_read(const struct mca_base_pvar_t *pvar,
void *value, void *bound_obj)
{
size_t i;
size_t offset = (size_t) pvar->ctx;
uint64_t *array = (uint64_t*) value;
for (i = 0; i < mca_btl_usnic_component.num_modules; ++i) {
for (int i = 0; i < mca_btl_usnic_component.num_modules; ++i) {
char *base = (char*) &(mca_btl_usnic_component.usnic_active_modules[i]->stats);
array[i] = *((uint64_t*) (base + offset));
}
@ -312,10 +309,9 @@ static void register_pvar_highwater(char *name, char *desc, size_t offset)
static int usnic_pvar_enum_read(const struct mca_base_pvar_t *pvar,
void *value, void *bound_obj)
{
size_t i;
int *array = (int *) value;
for (i = 0; i < mca_btl_usnic_component.num_modules; ++i) {
for (int i = 0; i < mca_btl_usnic_component.num_modules; ++i) {
array[i] = i;
}
@ -378,13 +374,13 @@ static bool setup_mpit_pvar_type(void)
*/
static void setup_mpit_pvars_enum(void)
{
size_t i;
int i;
int rc __opal_attribute_unused__;
mca_base_var_enum_value_t *devices;
static mca_base_var_enum_t *devices_enum;
struct ibv_device *device;
opal_btl_usnic_module_t *m;
unsigned char *c;
struct sockaddr_in *sin;
devices = calloc(mca_btl_usnic_component.num_modules + 1,
sizeof(*devices));
@ -394,15 +390,14 @@ static void setup_mpit_pvars_enum(void)
char *str;
m = mca_btl_usnic_component.usnic_active_modules[i];
c = (unsigned char*) &m->if_ipv4_addr;
sin = m->fabric_info->src_addr;
c = (unsigned char*) &sin->sin_addr.s_addr;
device = m->device;
devices[i].value = i;
rc = asprintf(&str, "%s,%s,%hhu.%hhu.%hhu.%hhu/%" PRIu32,
ibv_get_device_name(device),
m->if_name,
c[0], c[1], c[2], c[3],
m->if_cidrmask);
rc = asprintf(&str, "%s,%hhu.%hhu.%hhu.%hhu/%" PRIu32,
m->fabric_info->fabric_attr->name,
c[0], c[1], c[2], c[3],
usnic_netmask_to_cidrlen(sin->sin_addr.s_addr));
assert(rc > 0);
devices[i].string = str;
}
@ -429,7 +424,7 @@ static void setup_mpit_pvars_enum(void)
/* Free the strings (mca_base_var_enum_create() strdup()'ed them
into private storage, so we don't need them any more) */
for (i = 0; i < mca_btl_usnic_component.num_modules; ++i) {
for (int i = 0; i < mca_btl_usnic_component.num_modules; ++i) {
free((char*) devices[i].string);
}

Просмотреть файл

@ -11,12 +11,12 @@
#include <stdio.h>
#include <unistd.h>
#include <infiniband/verbs.h>
#include "opal/util/show_help.h"
#include "opal/constants.h"
#include "opal/util/if.h"
#include "btl_usnic_module.h"
#include "btl_usnic_util.h"
@ -24,7 +24,7 @@ void opal_btl_usnic_exit(opal_btl_usnic_module_t *module)
{
if (NULL == module) {
/* Find the first module with an error callback */
for (uint32_t i = 0; i < mca_btl_usnic_component.num_modules; ++i) {
for (int i = 0; i < mca_btl_usnic_component.num_modules; ++i) {
if (NULL != mca_btl_usnic_component.usnic_active_modules[i]->pml_error_callback) {
module = mca_btl_usnic_component.usnic_active_modules[i];
break;
@ -54,15 +54,33 @@ void opal_btl_usnic_exit(opal_btl_usnic_module_t *module)
}
/*
* Simple utility in a .c file, mainly so that inline functions in .h
* files don't need to include the show_help header file.
*/
void opal_btl_usnic_util_abort(const char *msg, const char *file, int line)
{
opal_show_help("help-mpi-btl-usnic.txt", "internal error after init",
true,
opal_process_info.nodename,
msg, file, line);
opal_btl_usnic_exit(NULL);
/* Never returns */
}
void
opal_btl_usnic_dump_hex(uint8_t *addr, int len)
opal_btl_usnic_dump_hex(void *vaddr, int len)
{
char buf[128];
size_t bufspace;
int i, ret;
char *p;
uint32_t sum=0;
uint8_t *addr;
addr = vaddr;
p = buf;
memset(buf, 0, sizeof(buf));
bufspace = sizeof(buf) - 1;
@ -95,16 +113,18 @@ opal_btl_usnic_dump_hex(uint8_t *addr, int len)
* using inet_ntop()).
*/
void opal_btl_usnic_snprintf_ipv4_addr(char *out, size_t maxlen,
uint32_t addr, uint32_t cidrmask)
uint32_t addr, uint32_t netmask)
{
int prefixlen;
uint8_t *p = (uint8_t*) &addr;
if (cidrmask > 0) {
if (netmask != 0) {
prefixlen = 33 - ffs(netmask);
snprintf(out, maxlen, "%u.%u.%u.%u/%u",
p[0],
p[1],
p[2],
p[3],
cidrmask);
prefixlen);
} else {
snprintf(out, maxlen, "%u.%u.%u.%u",
p[0],
@ -115,28 +135,10 @@ void opal_btl_usnic_snprintf_ipv4_addr(char *out, size_t maxlen,
}
void opal_btl_usnic_sprintf_mac(char *out, const uint8_t mac[6])
{
snprintf(out, 32, "%02x:%02x:%02x:%02x:%02x:%02x",
mac[0],
mac[1],
mac[2],
mac[3],
mac[4],
mac[5]);
}
void opal_btl_usnic_sprintf_gid_mac(char *out, union ibv_gid *gid)
{
uint8_t mac[6];
opal_btl_usnic_gid_to_mac(gid, mac);
opal_btl_usnic_sprintf_mac(out, mac);
}
/* Pretty-print the given boolean array as a hexadecimal string. slen should
* include space for any null terminator. */
void opal_btl_usnic_snprintf_bool_array(char *s, size_t slen, bool a[], size_t alen)
void opal_btl_usnic_snprintf_bool_array(char *s, size_t slen, bool a[],
size_t alen)
{
size_t i = 0;
size_t j = 0;
@ -165,112 +167,6 @@ void opal_btl_usnic_snprintf_bool_array(char *s, size_t slen, bool a[], size_t a
assert(j <= slen);
}
int opal_btl_usnic_find_ip(opal_btl_usnic_module_t *module, uint8_t mac[6])
{
int i;
uint8_t localmac[6];
char addr_string[32], mac_string[32];
struct sockaddr sa;
struct sockaddr_in *sai;
/* Loop through all IP interfaces looking for the one with the
right MAC */
for (i = opal_ifbegin(); i != -1; i = opal_ifnext(i)) {
if (OPAL_SUCCESS == opal_ifindextomac(i, localmac)) {
/* Is this the MAC I'm looking for? */
if (0 != memcmp(mac, localmac, 6)) {
continue;
}
/* Yes, it is! */
if (OPAL_SUCCESS != opal_ifindextoname(i, module->if_name,
sizeof(module->if_name)) ||
OPAL_SUCCESS != opal_ifindextoaddr(i, &sa, sizeof(sa)) ||
OPAL_SUCCESS != opal_ifindextomask(i, &module->if_cidrmask,
sizeof(module->if_cidrmask)) ||
OPAL_SUCCESS != opal_ifindextomac(i, module->if_mac) ||
OPAL_SUCCESS != opal_ifindextomtu(i, &module->if_mtu)) {
continue;
}
sai = (struct sockaddr_in *) &sa;
memcpy(&module->if_ipv4_addr, &sai->sin_addr, 4);
/* Save this information to my local address field on the
module so that it gets sent in the modex */
module->local_addr.ipv4_addr = module->if_ipv4_addr;
module->local_addr.cidrmask = module->if_cidrmask;
/* Since verbs doesn't offer a way to get standard
Ethernet MTUs (as of libibverbs 1.1.5, the MTUs are
enums, and don't inlcude values for 1500 or 9000), look
up the MTU in the corresponding enic interface. */
module->local_addr.mtu = module->if_mtu;
inet_ntop(AF_INET, &(module->if_ipv4_addr),
addr_string, sizeof(addr_string));
opal_btl_usnic_sprintf_mac(mac_string, mac);
opal_output_verbose(5, USNIC_OUT,
"btl:usnic: found usNIC device corresponds to IP device %s, %s/%d, MAC %s",
module->if_name, addr_string, module->if_cidrmask,
mac_string);
return OPAL_SUCCESS;
}
}
return OPAL_ERR_NOT_FOUND;
}
/*
* Reverses the encoding done in usnic_main.c:usnic_mac_to_gid() in
* the usnic.ko kernel code.
*
* Got this scheme from Mellanox RoCE; Emulex did the same thing. So
* we followed convention.
* http://www.mellanox.com/related-docs/prod_software/RoCE_with_Priority_Flow_Control_Application_Guide.pdf
*/
void opal_btl_usnic_gid_to_mac(union ibv_gid *gid, uint8_t mac[6])
{
mac[0] = gid->raw[8] ^ 2;
mac[1] = gid->raw[9];
mac[2] = gid->raw[10];
mac[3] = gid->raw[13];
mac[4] = gid->raw[14];
mac[5] = gid->raw[15];
}
/* takes an IPv4 address in network byte order and a CIDR prefix length (the
* "X" in "a.b.c.d/X") and returns the subnet in network byte order. */
uint32_t opal_btl_usnic_get_ipv4_subnet(uint32_t addrn, uint32_t cidr_len)
{
uint32_t mask;
assert(cidr_len <= 32);
/* perform arithmetic in host byte order for shift correctness */
mask = (~0) << (32 - cidr_len);
return htonl(ntohl(addrn) & mask);
}
/*
* Simple utility in a .c file, mainly so that inline functions in .h
* files don't need to include RTE header files.
*/
void opal_btl_usnic_util_abort(const char *msg, const char *file, int line)
{
opal_show_help("help-mpi-btl-usnic.txt", "internal error after init",
true,
opal_process_info.nodename,
msg, file, line);
opal_btl_usnic_exit(NULL);
/* Never returns */
}
/* Return the largest size data size that can be packed into max_len using the
* given convertor. For example, a 1000 byte max_len buffer may only be able
* to hold 998 bytes if an indivisible convertor element straddles the 1000

Просмотреть файл

@ -13,7 +13,6 @@
#include "opal/datatype/opal_convertor.h"
#include "btl_usnic.h"
#include "btl_usnic_module.h"
#ifndef MIN
# define MIN(a,b) ((a) < (b) ? (a) : (b))
@ -75,11 +74,34 @@ usnic_convertor_pack_simple(
}
}
static inline int
usnic_netmask_to_cidrlen(
uint32_t netmask_be)
{
return 33 - ffs(ntohl(netmask_be));
}
static inline uint32_t
usnic_cidrlen_to_netmask(
int cidrlen)
{
uint32_t mask;
mask = ~0 << (32 - cidrlen);
return htonl(mask);
}
/*
* Safely (but abnornmally) exit this process without abort()'ing (and
* leaving a corefile).
*/
void opal_btl_usnic_exit(opal_btl_usnic_module_t *module);
struct opal_btl_usnic_module_t;
void opal_btl_usnic_exit(struct opal_btl_usnic_module_t *module);
/*
* Print a show_help message and then call opal_btl_usnic_exit().
*/
void opal_btl_usnic_util_abort(const char *msg, const char *file, int line);
/*
* Long enough to hold "xxx.xxx.xxx.xxx/xx"
@ -87,32 +109,15 @@ void opal_btl_usnic_exit(opal_btl_usnic_module_t *module);
#define IPV4STRADDRLEN 20
/*
* Long enough to hold "xx:xx:xx:xx:xx:xx"
*/
#define MACSTRLEN 18
/*
* If cidrmask==0, it is not included in the output string. addr is
* If netmask==0, it is not included in the output string. addr is
* expected to be in network byte order.
*/
void opal_btl_usnic_snprintf_ipv4_addr(char *out, size_t maxlen,
uint32_t addr, uint32_t cidrmask);
void opal_btl_usnic_sprintf_mac(char *out, const uint8_t mac[6]);
void opal_btl_usnic_sprintf_gid_mac(char *out, union ibv_gid *gid);
uint32_t addr, uint32_t netmask);
void opal_btl_usnic_snprintf_bool_array(char *s, size_t slen, bool a[], size_t alen);
int opal_btl_usnic_find_ip(opal_btl_usnic_module_t *module, uint8_t mac[6]);
void opal_btl_usnic_gid_to_mac(union ibv_gid *gid, uint8_t mac[6]);
void opal_btl_usnic_dump_hex(uint8_t *addr, int len);
uint32_t opal_btl_usnic_get_ipv4_subnet(uint32_t addrn, uint32_t cidr_len);
void opal_btl_usnic_util_abort(const char *msg, const char *file, int line);
void opal_btl_usnic_dump_hex(void *vaddr, int len);
size_t opal_btl_usnic_convertor_pack_peek(const opal_convertor_t *conv,
size_t max_len);

Просмотреть файл

@ -20,95 +20,15 @@
# $HEADER$
#
# OPAL_CHECK_LIBNL3(prefix, [action-if-found], [action-if-not-found])
# --------------------------------------------------------
# check if libnl3 support can be found. sets prefix_{CPPFLAGS,
# LDFLAGS, LIBS} as needed and runs action-if-found if there is
# support, otherwise executes action-if-not-found
#
# libnl3 changed its default header location as of v3.2 (released ca. September
# 2011). It was previously "${prefix}/include/netlink/...". It now is
# "${prefix}/libnl3/include/netlink/...". The logic below only supports
# >=v3.2, under the assumption that it is not widely deployed.
AC_DEFUN([OPAL_CHECK_LIBNL3],[
AC_ARG_WITH([libnl3],
[AC_HELP_STRING([--with-libnl3(=DIR)],
[Build libnl3 support])])
OPAL_CHECK_WITHDIR([libnl3], [$with_libnl3], [include/libnl3/netlink/netlink.h])
AC_ARG_WITH([libnl3-libdir],
[AC_HELP_STRING([--with-libnl3-libdir=DIR],
[Search for libnl3 libraries in DIR])])
OPAL_CHECK_WITHDIR([libnl3-libdir], [$with_libnl3_libdir], [libnl-3.*])
ompi_check_libnl3_$1_save_CPPFLAGS="$CPPFLAGS"
ompi_check_libnl3_$1_save_LDFLAGS="$LDFLAGS"
ompi_check_libnl3_$1_save_LIBS="$LIBS"
ompi_check_libnl3_$1_orig_CPPFLAGS="$$1_CPPFLAGS"
ompi_check_libnl3_$1_orig_LDFLAGS="$$1_LDFLAGS"
ompi_check_libnl3_$1_orig_LIBS="$$1_LIBS"
AS_IF([test "$with_libnl3" != "no"],
[AS_IF([test ! -z "$with_libnl3" -a "$with_libnl3" != "yes"],
[ompi_check_libnl3_dir="$with_libnl3"])
AS_IF([test ! -z "$with_libnl3_libdir" -a "$with_libnl3_libdir" != "yes"],
[ompi_check_libnl3_libdir="$with_libnl3_libdir"])
# OPAL_CHECK_PACKAGE unfortunately can't handle this weird include
# dir layout
AS_IF([test -n "$ompi_check_libnl3_dir"],
[ompi_check_libnl3_includedir="$ompi_check_libnl3_dir/include/libnl3"],
[ompi_check_libnl3_includedir="/usr/include/libnl3"])
$1_CPPFLAGS="$$1_CPPFLAGS -I$ompi_check_libnl3_includedir"
CPPFLAGS="$CPPFLAGS -I$ompi_check_libnl3_includedir"
AC_CHECK_HEADER([netlink/netlink.h],
[# nl_recvmsgs_report appears to be a symbol which
# is present in libnl-3 but not libnl (v1)
_OPAL_CHECK_PACKAGE_LIB([$1],
[nl-3],
[nl_recvmsgs_report],
[],
[$ompi_check_libnl3_dir],
[$ompi_check_libnl3_libdir],
[ompi_check_libnl3_happy="yes"],
[ompi_check_libnl3_happy="no"])],
[ompi_check_libnl3_happy=no])
# make sure that we don't pollute the cache with the results of a
# test performed under different CPPFLAGS
AS_UNSET([ac_cv_header_netlink_netlink_h])],
[ompi_check_libnl3_happy="no"])
# restore global flags
CPPFLAGS="$ompi_check_libnl3_$1_save_CPPFLAGS"
LDFLAGS="$ompi_check_libnl3_$1_save_LDFLAGS"
LIBS="$ompi_check_libnl3_$1_save_LIBS"
AS_IF([test "$ompi_check_libnl3_happy" = "yes"],
[$2],
[AS_IF([test ! -z "$with_libnl3" -a "$with_libnl3" != "no"],
[AC_MSG_ERROR([libnl3 support requested but not found. Aborting])])
# restore prefixed flags on failure
$1_CPPFLAGS="$ompi_check_package_$1_orig_CPPFLAGS"
$1_LDFLAGS="$ompi_check_package_$1_orig_LDFLAGS"
$1_LIBS="$ompi_check_package_$1_orig_LIBS"
$3])
])
# MCA_opal_btl_usnic_POST_CONFIG([should_build])
# ------------------------------------------
AC_DEFUN([MCA_opal_btl_usnic_POST_CONFIG], [
AM_CONDITIONAL([OPAL_BTL_USNIC_BUILD_UNIT_TESTS],
[test "$1" -eq 1 && test "X$enable_opal_btl_usnic_unit_tests" = "Xyes"])
AM_CONDITIONAL([OPAL_BTL_USNIC_BUILD_LIBNL1_UTILS],
[test "$1" -eq 1 && test "X$enable_opal_btl_usnic_libnl1_utils" = "Xyes"])
AM_CONDITIONAL([OPAL_BTL_USNIC_BUILD_LIBNL3_UTILS],
[test "$1" -eq 1 && test "X$enable_opal_btl_usnic_libnl3_utils" = "Xyes"])
])
# MCA_btl_usnic_CONFIG([action-if-can-compile],
# [action-if-cant-compile])
# MCA_btl_usnic_CONFIG([action-if-can-copalle],
# [action-if-cant-copalle])
# ------------------------------------------------
AC_DEFUN([MCA_opal_btl_usnic_CONFIG],[
AC_CONFIG_FILES([opal/mca/btl/usnic/Makefile])
@ -123,13 +43,15 @@ AC_DEFUN([MCA_opal_btl_usnic_CONFIG],[
AS_IF([test "x$with_usnic" = "xno"],
[AC_MSG_WARN([--without-usnic specified; skipping usnic BTL])
$2],
[OMPI_BTL_USNIC_DO_CONFIG($1, $2)])
[_OPAL_BTL_USNIC_DO_CONFIG($1, $2)])
])
AC_DEFUN([OMPI_BTL_USNIC_DO_CONFIG],[
AC_DEFUN([_OPAL_BTL_USNIC_DO_CONFIG],[
OPAL_VAR_SCOPE_PUSH([unit_tests])
# see README.test for information about this scheme
AC_ARG_ENABLE([ompi-btl-usnic-unit-tests],
[AS_HELP_STRING([--enable-ompi-btl-usnic-unit-tests],
AC_ARG_ENABLE([opal-btl-usnic-unit-tests],
[AS_HELP_STRING([--enable-opal-btl-usnic-unit-tests],
[build unit tests for the usnic BTL,
including the test runner program,
opal_btl_usnic_run_tests])])
@ -141,22 +63,18 @@ AC_DEFUN([OMPI_BTL_USNIC_DO_CONFIG],[
[define to 1 if usnic BTL unit tests are enabled, 0 otherwise])
unset unit_tests
OPAL_CHECK_OPENFABRICS([btl_usnic],
[btl_usnic_happy="yes"],
[btl_usnic_happy="no"])
# The current logic in btl_usnic_compat.h checks the OPAL version as a
# proxy for the top-level OMPI version. Unfortunately this does the wrong
# proxy for the top-level OPAL version. Unfortunately this does the wrong
# thing for other top-level projects that might use the usnic BTL, such as
# ORCM. ORCM's versioning is totally unrelated to OMPI's. As a short term
# ORCM. ORCM's versioning is totally unrelated to OPAL's. As a short term
# workaround, just disqualify ourselves if the OPAL version seems too old.
# In the longer term we should be doing something else, like versioning
# OPAL and OMPI separately.
AS_IF([test "$btl_usnic_happy" = "yes"],
[AS_IF([test "$OPAL_MAJOR_VERSION" -eq "1" && \
test "$OPAL_MINOR_VERSION" -lt "7"],
[AC_MSG_NOTICE([OPAL version appears to be too old, disabling the usnic BTL])
btl_usnic_happy=no])])
# OPAL and OPAL separately.
btl_usnic_happy=yes
AS_IF([test "$OPAL_MAJOR_VERSION" -eq "1" && \
test "$OPAL_MINOR_VERSION" -lt "7"],
[AC_MSG_NOTICE([OPAL version appears to be too old, disabling the usnic BTL])
btl_usnic_happy=no])
# We only want to build on 64 bit Linux.
AS_IF([test "$btl_usnic_happy" = "yes"],
@ -165,7 +83,7 @@ AC_DEFUN([OMPI_BTL_USNIC_DO_CONFIG],[
case $host_os in
*linux*)
AS_IF([test $ac_cv_sizeof_void_p -eq 8],
[btl_usnic_happy=yes],
[],
[btl_usnic_happy=no])
;;
*)
@ -173,62 +91,28 @@ AC_DEFUN([OMPI_BTL_USNIC_DO_CONFIG],[
;;
esac
AC_MSG_RESULT([$btl_usnic_happy])
]
)
])
# The usnic BTL requires libfabric support. libfabric is in the
# opal/mca/common tree, so it should have been configured already
# (the common/ configury is guaranteed to come first). So we can
# simply check to see if libfabric setup was happy.
AS_IF([test "$btl_usnic_happy" = "yes"],
[AC_MSG_CHECKING([if building embedded libfabric])
AS_IF([test $opal_common_libfabric_happy -eq 1 && \
test $opal_common_libfabric_build_embedded -eq 1],
[AC_MSG_RESULT([yes])],
[AC_MSG_RESULT([no])
btl_usnic_happy=no])
])
AS_IF([test "$btl_usnic_happy" = "yes"],
[AC_CHECK_DECLS([IBV_EVENT_GID_CHANGE, ibv_event_type_str], [], [],
[#include <infiniband/verbs.h>
])
]
)
# Search for libnl so we can query routing information. We need to
# distinguish between v1 and v3.
enable_opal_btl_usnic_libnl1_utils=no
enable_opal_btl_usnic_libnl3_utils=no
AS_IF([test "$btl_usnic_happy" = "yes"],
[OPAL_CHECK_LIBNL3([btl_usnic_libnl],
[enable_opal_btl_usnic_libnl3_utils=yes],
[enable_opal_btl_usnic_libnl3_utils=no])
# fall back to libnl1 if libnl3 could not be found
AS_IF([test "X$enable_opal_btl_usnic_libnl3_utils" = "Xno"],
[OPAL_CHECK_PACKAGE([btl_usnic_libnl],
[netlink/netlink.h],
[nl],
[nl_recvmsgs_default],
[],
[],
[],
[enable_opal_btl_usnic_libnl1_utils=yes],
[enable_opal_btl_usnic_libnl1_utils=no])])
AS_IF([test "X$enable_opal_btl_usnic_libnl3_utils" = "Xno" &&
test "X$enable_opal_btl_usnic_libnl1_utils" = "Xno"],
[AC_MSG_NOTICE([could not find a libnl or libnl-3, disabling the usnic BTL])
btl_usnic_happy="no"])
btl_usnic_CPPFLAGS="$btl_usnic_CPPFLAGS $btl_usnic_libnl_CPPFLAGS"
btl_usnic_CFLAGS="$btl_usnic_CFLAGS $btl_usnic_libnl_CFLAGS"
btl_usnic_LDFLAGS="$btl_usnic_LDFLAGS $btl_usnic_libnl_LDFLAGS"
btl_usnic_LIBS="$btl_usnic_libnl_LIBS $btl_usnic_LIBS"
])
AS_IF([test "$btl_usnic_happy" = "yes"],
[btl_usnic_WRAPPER_EXTRA_LDFLAGS="$btl_usnic_LDFLAGS"
btl_usnic_WRAPPER_EXTRA_LIBS="$btl_usnic_LIBS"
$1],
[$1],
[AS_IF([test "$with_usnic" = "yes"],
[AC_MSG_WARN([--with-usnic specified, but usNIC support cannot be built])
[AC_MSG_WARN([--with-usnic was specified, but Cisco usNIC support cannot be built])
AC_MSG_ERROR([Cannot continue])],
[$2])
])
# Substitute in the things needed to build USNIC
AC_SUBST([btl_usnic_CPPFLAGS])
AC_SUBST([btl_usnic_CFLAGS])
AC_SUBST([btl_usnic_LDFLAGS])
AC_SUBST([btl_usnic_LIBS])
OPAL_VAR_SCOPE_POP
])dnl

Просмотреть файл

@ -10,26 +10,6 @@
#
# This is the US/English help file for the Open MPI usnic BTL.
#
[ibv API failed]
Open MPI failed a basic verbs operation on a Cisco usNIC interface.
This is highly unusual and shouldn't happen. It suggests that there
may be something wrong with the usNIC or OpenFabrics configuration on
this server.
In addition to any suggestions listed below, you might want to check
the Linux "memlock" limits on your system (they should probably be
"unlimited"). See this FAQ entry for details:
http://www.open-mpi.org/faq/?category=openfabrics#ib-locked-pages
Open MPI will skip this usNIC interface in the usnic BTL, which may
result in either lower performance or your job aborting.
Server: %s
usNIC interface: %s (which is %s)
Failed function: %s (%s:%d)
Description: %s
#
[not enough usnic resources]
There are not enough usNIC resources on a VIC for all the MPI
processes on this server.
@ -48,24 +28,24 @@ result in either lower performance or your job aborting.
usNIC interface: %s
Description: %s
#
[create ibv resource failed]
Open MPI failed to allocate a usNIC-related resource on a VIC. This
usually means one of two things:
[libfabric API failed]
Open MPI failed a basic API operation on a Cisco usNIC interface.
This is highly unusual and shouldn't happen. It suggests that there
may be something wrong with the usNIC configuration on this server.
1. You are running something other than this MPI job on this server
that is consuming usNIC resources.
2. You have run out of locked Linux memory. You should probably set
the Linux "memlock" limits to "unlimited". See this FAQ entry for
details:
In addition to any suggestions listed below, you might want to check
the Linux "memlock" limits on your system (they should probably be
"unlimited"). See this FAQ entry for details:
http://www.open-mpi.org/faq/?category=openfabrics#ib-locked-pages
http://www.open-mpi.org/faq/?category=openfabrics#ib-locked-pages
This Open MPI job will skip this usNIC interface in the usnic BTL,
which may result in either lower performance or the job aborting.
Open MPI will skip this usNIC interface in the usnic BTL, which may
result in either lower performance or your job aborting.
Server: %s
usNIC interface: %s (which is %s)
usNIC interface: %s
Failed function: %s (%s:%d)
Return status: %d
Description: %s
#
[async event]
@ -73,7 +53,7 @@ Open MPI detected a fatal error on a usNIC interface. Your MPI job
will now abort; sorry.
Server: %s
usNIC interface: %s (which is %s)
usNIC interface: %s
Async event code: %s (%d)
#
[internal error during init]
@ -86,8 +66,9 @@ Open MPI will skip this usNIC interface in the usnic BTL, which may
result in either lower performance or your job aborting.
Server: %s
usNIC interface: %s (which is %s)
Failure: %s (%s:%d)
usNIC interface: %s
Failure at: %s (%s:%d)
Error: %d (%s)
#
[internal error after init]
An internal error has occurred in the Open MPI usNIC BTL. This is
@ -101,18 +82,6 @@ server.
Line: %d
Error: %s
#
[verbs_port_bw failed]
Open MPI failed to query the supported bandwidth of a usNIC interface.
This is unusual and shouldn't happen. It suggests that there may be
something wrong with the usNIC or OpenFabrics configuration on this
server.
Open MPI will skip this usNIC interface in the usnic BTL, which may
result in either lower performance or your job aborting.
Server: %s
usNIC interface: %s (which is %s)
#
[check_reg_mem_basics fail]
The usNIC BTL failed to initialize while trying to register some
memory. This typically can indicate that the "memlock" limits are set
@ -136,6 +105,16 @@ value will be ignored.
Value: %s
Message: %s
#
[device present but not up]
Open MPI has found a usNIC device that is present / listed in Linux,
but in a "down" state. It will not be used by this MPI job.
You may wish to check this device, especially if it is unexpectedly
down.
Local server: %s
Device name: %s
#
[MTU mismatch]
The MTU does not match on local and remote hosts. All interfaces on
all hosts participating in an MPI job must be configured with the same
@ -143,17 +122,11 @@ MTU. The usNIC interface listed below will not be used to communicate
with this remote host.
Local host: %s
usNIC interface: %s (which is %s)
usNIC interface: %s
Local MTU: %d
Remote host: %s
Remote MTU: %d
#
[rtnetlink init fail]
The usnic BTL failed to initialize the rtnetlink query subsystem.
Server: %s
Error message: %s
#
[connectivity error: small ok, large bad]
The Open MPI usNIC BTL was unable to establish full connectivity
between at least one pair of interfaces on servers in the MPI job.
@ -164,11 +137,9 @@ Your MPI job is going to abort now.
Source:
Hostname / IP: %s (%s)
Host interfaces: %s / %s
MAC address: %s
Host interface: %s
Destination:
Hostname / IP: %s (%s)
MAC address: %s
Small message size: %u
Large message size: %u
@ -197,11 +168,9 @@ Your MPI job is going to abort now.
Source:
Hostname / IP: %s (%s)
Host interfaces: %s / %s
MAC address: %s
Host interface: %s
Destination:
Hostname / IP: %s (%s)
MAC address: %s
Small message size: %u
Large message size: %u
@ -235,11 +204,9 @@ Your MPI job is going to abort now.
Source:
Hostname / IP: %s (%s)
Host interfaces: %s / %s
MAC address: %s
Host interface: %s
Destination:
Hostname / IP: %s (%s)
MAC address: %s
Small message size: %u
Large message size: %u
@ -258,43 +225,23 @@ Check the resulting "mymap*" files to see the exact pairing of IP
interfaces. Inconsistent results may be indicative of underlying
network misconfigurations.
#
[ibv_create_ah timeout]
[usd_create_dest timeout]
The usnic BTL failed to create addresses for remote peers within the
specified timeout. When using the usNIC/UDP transport, this usually
means that ARP requests failed to resolve in time. You may be able to
solve the problem by increasing the usnic BTL's ARP timeout. If that
doesn't work, you should diagnose why ARP replies are apparently not
being delivered in a timely manner.
specified timeout. This usually means that ARP requests failed to
resolve in time. You may be able to solve the problem by increasing
the usnic BTL's ARP timeout. If that doesn't work, you should
diagnose why ARP replies are apparently not being delivered in a
timely manner.
The usNIC interface listed below will be ignored. Your MPI
application will likely either run with degraded performance and/or
abort.
Server: %s
usNIC interface: %s (which is %s)
usNIC interface: %s
Current ARP timeout: %d (btl_usnic_arp_timeout MCA param)
#
[transport mismatch]
The underlying transports used by the usNIC driver stack on multiple
servers do not match. This configuration is unsupported and is almost
certainly not what you want.
This error indicates that the VIC firmware, Linux usNIC kernel driver,
and/or Linux usNIC userspace drivers are not compatible between at
least the following two servers:
Local server: %s
Remote server: %s
The usnic MPI transport will be deactivated in at least the one local
MPI process that reported the problem. This may lead to performance
degradation, and may also result in aborting the overall MPI job.
It is usually easiest to have the same VIC firmware, Linux usNIC
kernel driver, and Linux usNIC userspace driver installed on all
servers.
#
[create_ah failed]
[unreachable peer IP]
WARNING: Open MPI failed to find a route to a peer IP address via a
specific usNIC interface. This usually indicates a problem in the IP
routing between these peers.
@ -308,7 +255,7 @@ Note that this error message defaults to only printing for the *first*
pair of MPI process peers that exhibit this problem; this same problem
may exist for other peer pairs, too.
Local interface: %s:%s (which is %s and %s)
Local interface: %s:%s (which is %s)
Peer: %s:%s
NOTE: You can set the MCA param btl_usnic_show_route_failures to 0 to
@ -336,7 +283,7 @@ caused by higher-than-usual retransmission rates (to compensate for
the corrupted received packets).
Local server: %s
usNIC interface: %s (which is %s)
usNIC interface: %s
# of short packets
received so far: %d
@ -355,12 +302,29 @@ Your MPI job will continue, but you should monitor the job and ensure
that it behaves correctly.
Local server: %s
usNIC interface: %s (which is %s)
usNIC interface: %s
Channel index: %d
Completion status: %d
Completion status: %s (%d)
Work request ID: %p
Opcode: %d
Vendor error: %d
Opcode: %s (%d)
If this error keeps happening, you should contact Cisco technical
support.
#
[device present but not up]
Open MPI has found a usNIC device that is present / listed in Linux,
but in a "down" state. It will not be used by this MPI job.
You may wish to check this device, especially if it is unexpectedly
down.
Local server: %s
Device name: %s
#
[transport mismatch]
Open MPI has found two servers with different underlying usNIC
transports. This is an unsupported configuration; all usNIC devices
must have the same underlying transport in order to use the usNIC BTL.
Local server / transport: %s / %s
Remote server / transport: %s / %s

Просмотреть файл

@ -42,8 +42,8 @@ static int test_parse_ifex_str(void *ctx)
check(f->elts[0].is_netmask == false);
check_str_eq(f->elts[0].if_name, "usnic_1");
check(f->elts[1].is_netmask == true);
check(f->elts[1].addr == htonl(0x01020300));
check(f->elts[1].prefixlen == 24);
check(f->elts[1].addr_be == htonl(0x01020300));
check(f->elts[1].netmask_be == 24);
free_filter(f);
return 0;

Просмотреть файл

@ -10,11 +10,12 @@
#ifndef BTL_USNIC_GRAPH_TEST_H
#define BTL_USNIC_GRAPH_TEST_H
#include <stdlib.h>
#include "btl_usnic_test.h"
#if OPAL_BTL_USNIC_UNIT_TESTS
#include <stdlib.h>
#include <sys/time.h>
#include "btl_usnic_test.h"
#define check_graph_is_consistent(g) \
do { \
check(NUM_VERTICES(g) <= opal_pointer_array_get_size(&g->vertices)); \
@ -89,6 +90,19 @@ static int cmp_int_pair(const void *a, const void *b)
}
}
/* Simple time function so that we don't have to deal with the
complexity of finding mpi.h to use MPI_Wtime */
static double gettime(void)
{
double wtime;
struct timeval tv;
gettimeofday(&tv, NULL);
wtime = tv.tv_sec;
wtime += (double)tv.tv_usec / 1000000.0;
return wtime;
}
static int test_graph_create(void *ctx)
{
opal_btl_usnic_graph_t *g;
@ -591,7 +605,7 @@ static int test_graph_assignment_solver(void *ctx)
* 2 --> 4
*/
#define NUM_ITER (10000)
start = MPI_Wtime();
start = gettime();
for (iter = 0; iter < NUM_ITER; ++iter) {
err = opal_btl_usnic_gr_create(NULL, NULL, &g);
check_err_code(err, OPAL_SUCCESS);
@ -627,7 +641,7 @@ static int test_graph_assignment_solver(void *ctx)
err = opal_btl_usnic_gr_free(g);
check_err_code(err, OPAL_SUCCESS);
}
end = MPI_Wtime();
end = gettime();
/* ensure that this operation on a 1000 node cluster will take less than one second */
check(((end - start) / NUM_ITER) < 0.001);
#if 0

Просмотреть файл

@ -18,7 +18,6 @@
#include <stdlib.h>
#include <string.h>
#include <libgen.h> /* for dirname() */
#include <mpi.h>
#include <dlfcn.h>
@ -85,9 +84,10 @@ int main(int argc, char **argv)
free(path);
/* casting awfulness needed for GCC's "-pedantic" option :( */
*(void **)(&run_tests) = dlsym(usnic_handle, "opal_btl_usnic_run_tests");
*(void **)(&run_tests) = dlsym(usnic_handle, BTL_USNIC_RUN_TESTS_SYMBOL);
if (run_tests == NULL) {
fprintf(stderr, "run_tests=%p dlerror()=%s\n", *(void **)(&run_tests), dlerror());
fprintf(stderr, "run_tests=%p dlerror()=%s\n",
*(void **)(&run_tests), dlerror());
abort();
}
run_tests();

200
opal/mca/common/libfabric/Makefile.am Обычный файл
Просмотреть файл

@ -0,0 +1,200 @@
# Copyright (c) 2014 Cisco Systems, Inc. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# See opal/mca/common/sm/Makefile.am for an explanation of the
# different library targets in this file.
#
# Embedded libfabric
#
lib_LTLIBRARIES =
noinst_LTLIBRARIES =
# Only build this embedded copy if we're not using an external
# libfabric.
if OPAL_COMMON_LIBFABRIC_BUILD_EMBEDDED
comp_inst = lib@OPAL_LIB_PREFIX@mca_common_libfabric.la
comp_noinst = lib@OPAL_LIB_PREFIX@mca_common_libfabric_noinst.la
if MCA_BUILD_opal_common_libfabric_DSO
lib_LTLIBRARIES += $(comp_inst)
else
noinst_LTLIBRARIES += $(comp_noinst)
endif
# We have to override the CFLAGS that come in from above to remove
# --pedantic (see configure.m4 for details).
CFLAGS = $(opal_common_libfabric_embedded_CFLAGS)
cppflags = \
$(opal_common_libfabric_embedded_CPPFLAGS) \
-D_GNU_SOURCE \
-DSYSCONFDIR=\"$(sysconfdir)\" \
-DRDMADIR=\"/tmp\" \
-DEXTDIR=\"$(pkglibdir)\" \
-D__LIBUSNIC__
libadd = $(opal_common_libfabric_embedded_LIBADD)
libfabric_core_headers = \
libfabric/include/fi.h \
libfabric/include/fi_enosys.h \
libfabric/include/fi_indexer.h \
libfabric/include/fi_list.h \
libfabric/include/fi_rbuf.h \
libfabric/include/rdma/fabric.h \
libfabric/include/rdma/fi_atomic.h \
libfabric/include/rdma/fi_cm.h \
libfabric/include/rdma/fi_domain.h \
libfabric/include/rdma/fi_endpoint.h \
libfabric/include/rdma/fi_eq.h \
libfabric/include/rdma/fi_errno.h \
libfabric/include/rdma/fi_prov.h \
libfabric/include/rdma/fi_rma.h \
libfabric/include/rdma/fi_tagged.h \
libfabric/include/rdma/fi_trigger.h
libfabric_core_sources = \
libfabric/src/fabric.c \
libfabric/src/fi_tostr.c \
libfabric/src/common.c \
libfabric/src/enosys.c
libfabric_usnic_headers = \
libfabric/prov/usnic/src/fi_usnic.h \
libfabric/prov/usnic/src/usdf.h \
libfabric/prov/usnic/src/usdf_av.h \
libfabric/prov/usnic/src/usdf_cm.h \
libfabric/prov/usnic/src/usdf_dgram.h \
libfabric/prov/usnic/src/usdf_endpoint.h \
libfabric/prov/usnic/src/usdf_msg.h \
libfabric/prov/usnic/src/usdf_progress.h \
libfabric/prov/usnic/src/usdf_timer.h \
libfabric/prov/usnic/src/usnic_direct/cq_desc.h \
libfabric/prov/usnic/src/usnic_direct/cq_enet_desc.h \
libfabric/prov/usnic/src/usnic_direct/kcompat.h \
libfabric/prov/usnic/src/usnic_direct/kcompat_priv.h \
libfabric/prov/usnic/src/usnic_direct/libnl1_utils.h \
libfabric/prov/usnic/src/usnic_direct/libnl3_utils.h \
libfabric/prov/usnic/src/usnic_direct/libnl_utils.h \
libfabric/prov/usnic/src/usnic_direct/linux/delay.h \
libfabric/prov/usnic/src/usnic_direct/linux/slab.h \
libfabric/prov/usnic/src/usnic_direct/linux_types.h \
libfabric/prov/usnic/src/usnic_direct/rq_enet_desc.h \
libfabric/prov/usnic/src/usnic_direct/usd.h \
libfabric/prov/usnic/src/usnic_direct/usd_caps.h \
libfabric/prov/usnic/src/usnic_direct/usd_dest.h \
libfabric/prov/usnic/src/usnic_direct/usd_device.h \
libfabric/prov/usnic/src/usnic_direct/usd_ib_cmd.h \
libfabric/prov/usnic/src/usnic_direct/usd_ib_sysfs.h \
libfabric/prov/usnic/src/usnic_direct/usd_post.h \
libfabric/prov/usnic/src/usnic_direct/usd_queue.h \
libfabric/prov/usnic/src/usnic_direct/usd_socket.h \
libfabric/prov/usnic/src/usnic_direct/usd_time.h \
libfabric/prov/usnic/src/usnic_direct/usd_util.h \
libfabric/prov/usnic/src/usnic_direct/usd_vnic.h \
libfabric/prov/usnic/src/usnic_direct/usnic_abi.h \
libfabric/prov/usnic/src/usnic_direct/usnic_direct.h \
libfabric/prov/usnic/src/usnic_direct/usnic_ib_abi.h \
libfabric/prov/usnic/src/usnic_direct/usnic_ip_utils.h \
libfabric/prov/usnic/src/usnic_direct/usnic_user_utils.h \
libfabric/prov/usnic/src/usnic_direct/vnic_cq.h \
libfabric/prov/usnic/src/usnic_direct/vnic_devcmd.h \
libfabric/prov/usnic/src/usnic_direct/vnic_dev.h \
libfabric/prov/usnic/src/usnic_direct/vnic_enet.h \
libfabric/prov/usnic/src/usnic_direct/vnic_resource.h \
libfabric/prov/usnic/src/usnic_direct/vnic_rq.h \
libfabric/prov/usnic/src/usnic_direct/vnic_stats.h \
libfabric/prov/usnic/src/usnic_direct/vnic_wq.h \
libfabric/prov/usnic/src/usnic_direct/wq_enet_desc.h
libfabric_usnic_sources = \
libfabric/prov/usnic/src/usdf_av.c \
libfabric/prov/usnic/src/usdf_cm.c \
libfabric/prov/usnic/src/usdf_cq.c \
libfabric/prov/usnic/src/usdf_dgram.c \
libfabric/prov/usnic/src/usdf_domain.c \
libfabric/prov/usnic/src/usdf_endpoint.c \
libfabric/prov/usnic/src/usdf_ep_dgram.c \
libfabric/prov/usnic/src/usdf_ep_msg.c \
libfabric/prov/usnic/src/usdf_eq.c \
libfabric/prov/usnic/src/usdf_fabric.c \
libfabric/prov/usnic/src/usdf_mem.c \
libfabric/prov/usnic/src/usdf_msg.c \
libfabric/prov/usnic/src/usdf_pep.c \
libfabric/prov/usnic/src/usdf_progress.c \
libfabric/prov/usnic/src/usdf_timer.c \
libfabric/prov/usnic/src/usnic_direct/libnl_utils_common.c \
libfabric/prov/usnic/src/usnic_direct/usd_caps.c \
libfabric/prov/usnic/src/usnic_direct/usd_dest.c \
libfabric/prov/usnic/src/usnic_direct/usd_device.c \
libfabric/prov/usnic/src/usnic_direct/usd_event.c \
libfabric/prov/usnic/src/usnic_direct/usd_ib_cmd.c \
libfabric/prov/usnic/src/usnic_direct/usd_ib_sysfs.c \
libfabric/prov/usnic/src/usnic_direct/usd_mem.c \
libfabric/prov/usnic/src/usnic_direct/usd_poll.c \
libfabric/prov/usnic/src/usnic_direct/usd_post.c \
libfabric/prov/usnic/src/usnic_direct/usd_post_raw_normal.c \
libfabric/prov/usnic/src/usnic_direct/usd_post_udp_normal.c \
libfabric/prov/usnic/src/usnic_direct/usd_post_udp_pio.c \
libfabric/prov/usnic/src/usnic_direct/usd_queues.c \
libfabric/prov/usnic/src/usnic_direct/usd_socket.c \
libfabric/prov/usnic/src/usnic_direct/usd_vnic.c \
libfabric/prov/usnic/src/usnic_direct/usnic_ip_utils.c \
libfabric/prov/usnic/src/usnic_direct/vnic_cq.c \
libfabric/prov/usnic/src/usnic_direct/vnic_dev.c \
libfabric/prov/usnic/src/usnic_direct/vnic_rq.c \
libfabric/prov/usnic/src/usnic_direct/vnic_wq.c
sources = \
$(libfabric_core_sources) \
$(libfabric_usnic_sources)
headers = \
$(libfabric_core_headers) \
$(libfabric_usnic_headers)
lib@OPAL_LIB_PREFIX@mca_common_libfabric_la_SOURCES = $(headers) $(sources)
lib@OPAL_LIB_PREFIX@mca_common_libfabric_la_CPPFLAGS = $(cppflags)
lib@OPAL_LIB_PREFIX@mca_common_libfabric_la_LDFLAGS = -version-info 0:0:0
lib@OPAL_LIB_PREFIX@mca_common_libfabric_la_LIBADD = $(libadd)
lib@OPAL_LIB_PREFIX@mca_common_libfabric_noinst_la_SOURCES = $(headers) $(sources)
lib@OPAL_LIB_PREFIX@mca_common_libfabric_noinst_la_CPPFLAGS = $(cppflags)
lib@OPAL_LIB_PREFIX@mca_common_libfabric_noinst_la_LIBADD = $(libadd)
# Conditionally install the header files
if WANT_INSTALL_HEADERS
opaldir = $(opalincludedir)/$(subdir)
opal_HEADERS = $(headers)
endif
# These two rules will sym link the "noinst" libtool library filename
# to the installable libtool library filename in the case where we are
# compiling this component statically (case 2), described above).
# See Makefile.ompi-rules for an explanation of the "V" macros, below
V=0
OMPI_V_LN_SCOMP = $(ompi__v_LN_SCOMP_$V)
ompi__v_LN_SCOMP_ = $(ompi__v_LN_SCOMP_$AM_DEFAULT_VERBOSITY)
ompi__v_LN_SCOMP_0 = @echo " LN_S " `basename $(comp_inst)`;
all-local:
$(OMPI_V_LN_SCOMP) if test -z "$(lib_LTLIBRARIES)"; then \
rm -f "$(comp_inst)"; \
$(LN_S) "$(comp_noinst)" "$(comp_inst)"; \
fi
clean-local:
if test -z "$(lib_LTLIBRARIES)"; then \
rm -f "$(comp_inst)"; \
fi
endif OPAL_COMMON_LIBFABRIC_BUILD_EMBEDDED

255
opal/mca/common/libfabric/configure.m4 Обычный файл
Просмотреть файл

@ -0,0 +1,255 @@
# -*- shell-script -*-
#
# Copyright (c) 2014 Cisco Systems, Inc. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# MCA_opal_common_libfabric_CONFIG([action-if-can-copalle],
# [action-if-cant-copalle])
#
# Will also set $opal_common_libfabric_happy to 0 or 1 (0 = no
# libfabric support, 1 = libfabric support). Will also set
# $opal_common_libfabric_build_embedded to 0 or 1 (1 = building
# embedded libfabric, 0 = not building embedded libfabric).
# ------------------------------------------------
AC_DEFUN([MCA_opal_common_libfabric_CONFIG],[
AC_CONFIG_FILES([opal/mca/common/libfabric/Makefile])
# Initially state that we're unhappy
opal_common_libfabric_happy=0
opal_common_libfabric_build_embedded=0
# Setup the --with switches to allow users to specify where
# libfabric stuff lives.
AC_REQUIRE([_OPAL_COMMON_LIBFABRIC_WITH_FLAGS])
AS_IF([test "$opal_want_libfabric" = "no"],
[$2],
[opal_check_libfabric_save_CPPFLAGS=$CPPFLAGS
opal_check_libfabric_save_LDFLAGS=$LDFLAGS
opal_check_libfabric_save_LIBS=$LIBS
# Use the internal or external libfabric?
AS_IF([test -z "$opal_libfabric_dir" && \
test -z "$opal_libfabric_libdir"],
[_OPAL_COMMON_LIBFABRIC_SETUP_LIBFABRIC_EMBEDDED],
[_OPAL_COMMON_LIBFABRIC_SETUP_LIBFABRIC_EXTERNAL])
CPPFLAGS=$opal_check_libfabric_save_CPPFLAGS
LDFLAGS=$opal_check_libfabric_save_LDFLAGS
LIBS=$opal_check_libfabric_save_LIBS
AS_IF([test $opal_common_libfabric_happy -eq 1],
[$1],
[AS_IF([test "$opal_want_lifabric" = "yes"],
[AC_MSG_WARN([Libfabric support requested (via --with-libfabric) but not found.])
AC_MSG_ERROR([Cannot continue])])
$2])
])
_OPAL_COMMON_LIBFABRIC_SETUP_LIBFABRIC_EMBEDDED_CONDITIONALS
AM_CONDITIONAL([OPAL_COMMON_LIBFABRIC_BUILD_EMBEDDED],
[test $opal_common_libfabric_build_embedded -eq 1])
# This is for components that build with libfabric support
AC_SUBST(opal_common_libfabric_CPPFLAGS)
AC_SUBST(opal_common_libfabric_LIBADD)
# This is for building the libfabric component itself
opal_common_libfabric_embedded_CPPFLAGS=$opal_common_libfabric_CPPFLAGS
AC_SUBST(opal_common_libfabric_embedded_CPPFLAGS)
AC_SUBST(opal_common_libfabric_embedded_CFLAGS)
AC_SUBST(opal_common_libfabric_embedded_LIBADD)
])
# --------------------------------------------------------
# _OPAL_COMMON_LIBFABRIC_WITH_FLAGS (internal)
# --------------------------------------------------------
# Add --with-libfabric options, and if directories are specified,
# sanity check them.
#
# At the end of this macro:
#
# 1. $opal_want_libfabric will be set to:
# "yes" if --with-libfabric or --with-libfabric=DIR was specified
# "no" if --without-libfabric was specified)
# "optional" if neither --with-libfabric* nor --without-libfabric
# was specified
#
# 2. $opal_libfabric_dir and $opal_libfabric_libdir with either both
# be set or both be empty.
#
# --------------------------------------------------------
AC_DEFUN([_OPAL_COMMON_LIBFABRIC_WITH_FLAGS],[
# Add --with options
AC_ARG_WITH([libfabric],
[AC_HELP_STRING([--with-libfabric(=DIR)],
[Build libfabric support, optionally adding DIR/include, DIR/lib, and DIR/lib64 to the search path for headers and libraries])])
AC_ARG_WITH([libfabric-libdir],
[AC_HELP_STRING([--with-libfabric-libdir=DIR],
[Search for libfabric libraries in DIR])])
# Sanity check the --with values
OPAL_CHECK_WITHDIR([libfabric], [$with_libfabric],
[include/rdma/fabric.h])
OPAL_CHECK_WITHDIR([libfabric-libdir], [$with_libfabric_libdir],
[libfabric.*])
# Set standardized shell variables for libfabric lovin' components to
# use. Either both of $opal_libfabric_dir and
# $libfabric_libdir will be set, or neither will be set.
opal_want_libfabric=no
AS_IF([test -z "$with_libfabric"],
[opal_want_libfabric=optional],
[AS_IF([test "$with_libfabric" = "no"],
[opal_want_libfabric=no],
[opal_want_libfabric=yes])
])
opal_libfabric_dir=
AS_IF([test -n "$with_libfabric" && \
test "$with_libfabric" != "yes" && \
test "$with_libfabric" != "no"],
[opal_libfabric_dir=$with_libfabric])
opal_libfabric_libdir=
AS_IF([test -n "$with_libfabric_libdir" && \
test "$with_libfabric_libdir" != "yes" && \
test "$with_libfabric_libdir" != "no"],
[opal_libfabric_libdir=$with_libfabric_libdir])
])
# --------------------------------------------------------
# Internal helper macro to setup the embedded libfabric.
#
# The internal libfabric is *TEMPORARY* and only for convenience of
# development. Ultimately, the embedded libfabric will disappear and
# you will need to have libfabric installed.
# --------------------------------------------------------
AC_DEFUN([_OPAL_COMMON_LIBFABRIC_SETUP_LIBFABRIC_EMBEDDED_CONDITIONALS],[
AM_CONDITIONAL([HAVE_LD_VERSION_SCRIPT], [/bin/false])
AM_CONDITIONAL([HAVE_DIRECT], [/bin/false])
])
AC_DEFUN([_OPAL_COMMON_LIBFABRIC_SETUP_LIBFABRIC_EMBEDDED],[
AC_MSG_NOTICE([Setting up for EMBEDDED libfabric])
# Mostly replicate relevant parts from the libfabric configure.ac
# script. Make a lot of simplifying assumptions, just for the
# sake of embedding here.
AC_DEFINE([INCLUDE_VALGRIND], 0, [no valgrind])
AC_DEFINE([STREAM_CLOEXEC], 0, [no streamcloexec])
AC_DEFINE([HAVE_ATOMICS], 0, [no atomics])
AC_DEFINE([HAVE_SYMVER_SUPPORT], 1, [assembler has .symver support])
opal_common_libfabric_happy=1
AC_CHECK_HEADER([infiniband/verbs.h], [], [opal_common_libfabric_happy=0])
# Add flags for libfabric core
AS_IF([test $opal_common_libfabric_happy -eq 1],
[opal_common_libfabric_CPPFLAGS="-I$OPAL_TOP_SRCDIR/opal/mca/common/libfabric/libfabric/include"
opal_common_libfabric_build_embedded=1
# OMPI's debugging compile flags are fairly aggressive,
# and include -pedantic. Unfortunately, there's a bunch
# of code in libfabric that complains about -pedantic, so
# remove it from the CFLAGS.
for flag in $CFLAGS; do
case $flag in
-pedantic) ;;
*) opal_common_libfabric_embedded_CFLAGS="$opal_common_libfabric_embedded_CFLAGS $flag" ;;
esac
done
# Do stuff for specific providers
_OPAL_COMMON_LIBFABRIC_EMBEDDED_PROVIDER_USNIC
])
])
# --------------------------------------------------------
# Internal helper macro to setup for an external libfabric
# --------------------------------------------------------
AC_DEFUN([_OPAL_COMMON_LIBFABRIC_SETUP_LIBFABRIC_EXTERNAL],[
AC_MSG_NOTICE([Setting up for EXTERNAL libfabric])
# If the top dir was specified but the libdir was not, look for
# it. Note that if the user needs a specific libdir (i.e., if our
# hueristic ordering below is not sufficient), they need to
# specify it.
AS_IF([test -z "$opal_libfabric_libdir" -a -n "$opal_libfabric_dir"],
[_OPAL_COMMON_LIBFABRIC_CHECK_LIBDIR(["$opal_libfabric_dir/lib"])])
AS_IF([test -z "$opal_libfabric_libdir" -a -n "$opal_libfabric_dir"],
[_OPAL_COMMON_LIBFABRIC_CHECK_LIBDIR(["$opal_libfabric_dir/lib64"])])
AS_IF([test -z "$opal_libfabric_libdir" -a -n "$opal_libfabric_dir"],
[_OPAL_COMMON_LIBFABRIC_CHECK_LIBDIR(["$opal_libfabric_dir/lib32"])])
AS_IF([test -z "$opal_libfabric_libdir" -a -n "$opal_libfabric_dir"],
[AC_MSG_WARN([Could not find libiblibfabric in the usual locations under $opal_libfabric_dir])
AC_MSG_ERROR([Cannot continue])
])
# If the libdir was specified, but the top dir was not, look for
# it. Note that if the user needs a specific top dir (i.e., if
# our hueristic below is not sufficient), they need to specify it.
AS_IF([test -z "$opal_libfabric" -a -n "$opal_libfabric_libdir"],
[_OPAL_COMMON_LIBFABRIC_CHECK_INCDIR([`dirname "$opal_libfabric_libdir"`])])
AS_IF([test -z "$opal_libfabric_dir" -a -n "$opal_libfabric_libdir"],
[AC_MSG_WARN([Could not find libfabric.h in the usual locations under $opal_libfabric_dir])
AC_MSG_ERROR([Cannot continue])
])
# Now actually check to ensure that the external libfabric works
OPAL_CHECK_PACKAGE([opal_common_libfabric],
[rdma/fabric.h],
[fabric],
[fi_getinfo],
[],
[$opal_libfabric_dir],
[$opal_libfabric_libdir],
[opal_common_libfabric_happy=1],
[opal_common_libfabric_happy=0])
opal_common_libfabric_LIBADD="-lfabric"
])
# --------------------------------------------------------
# Internal helper macro to look for the libfabric libdir
# --------------------------------------------------------
AC_DEFUN([_OPAL_COMMON_LIBFABRIC_CHECK_LIBDIR],[
AS_IF([test -d "$1"],
[AS_IF([test "x`ls $1/libfabric.* 2> /dev/null`" != "x"],
[opal_libfabric_libdir="$1"])
])
])
# --------------------------------------------------------
# Internal helper macro to look for the libfabric dir
# --------------------------------------------------------
AC_DEFUN([_OPAL_COMMON_LIBFABRIC_CHECK_INCDIR],[
AS_IF([test -d "$1"],
[AS_IF([test -f "$1/include/rdma/fabric.h"],
[opal_libfabric_dir="$1"])
])
])
# Internal helper macro to look for the things the usnic provider
# needs
# --------------------------------------------------------
AC_DEFUN([_OPAL_COMMON_LIBFABRIC_EMBEDDED_PROVIDER_USNIC],[
AC_CHECK_HEADER([linux/netlink.h], [], [opal_common_libfabric_happy=0], [
#include <sys/types.h>
#include <net/if.h>
])
AC_CHECK_LIB([nl], [nl_connect], [], [opal_common_libfabric_happy=0])
opal_common_libfabric_CPPFLAGS="$opal_common_libfabric_CPPFLAGS -I$OPAL_TOP_SRCDIR/opal/mca/common/libfabric/libfabric/prov/usnic/src -I$OPAL_TOP_SRCDIR/opal/mca/common/libfabric/libfabric/prov/usnic/src/usnic_direct"
opal_common_libfabric_LIBADD="\$(OPAL_TOP_BUILDDIR)/opal/mca/common/libfabric/lib${OPAL_LIB_PREFIX}mca_common_libfabric.la"
opal_common_libfabric_embedded_LIBADD="-lnl"
])

Просмотреть файл

@ -0,0 +1,5 @@
Sean Hefty <sean.hefty@intel.com>
Reese Faucette <rfaucett@cisco.com>
Jeff Squyres <jsquyres@cisco.com>
Jianxin Xiong <jianxin.xiong@intel.com>
Sayantan Sur <sayantan.sur@intel.com>

378
opal/mca/common/libfabric/libfabric/COPYING Обычный файл
Просмотреть файл

@ -0,0 +1,378 @@
This software is available to you under a choice of one of two
licenses. You may choose to be licensed under the terms of the the
BSD license or the GNU General Public License (GPL) Version
2, both included below.
Copyright (c) 2005 Intel Corporation. All rights reserved.
==================================================================
BSD license
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following
disclaimer in the documentation and/or other materials provided
with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
==================================================================
GNU GENERAL PUBLIC LICENSE
Version 2, June 1991
Copyright (C) 1989, 1991 Free Software Foundation, Inc.
59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Everyone is permitted to copy and distribute verbatim copies
of this license document, but changing it is not allowed.
Preamble
The licenses for most software are designed to take away your
freedom to share and change it. By contrast, the GNU General Public
License is intended to guarantee your freedom to share and change free
software--to make sure the software is free for all its users. This
General Public License applies to most of the Free Software
Foundation's software and to any other program whose authors commit to
using it. (Some other Free Software Foundation software is covered by
the GNU Library General Public License instead.) You can apply it to
your programs, too.
When we speak of free software, we are referring to freedom, not
price. Our General Public Licenses are designed to make sure that you
have the freedom to distribute copies of free software (and charge for
this service if you wish), that you receive source code or can get it
if you want it, that you can change the software or use pieces of it
in new free programs; and that you know you can do these things.
To protect your rights, we need to make restrictions that forbid
anyone to deny you these rights or to ask you to surrender the rights.
These restrictions translate to certain responsibilities for you if you
distribute copies of the software, or if you modify it.
For example, if you distribute copies of such a program, whether
gratis or for a fee, you must give the recipients all the rights that
you have. You must make sure that they, too, receive or can get the
source code. And you must show them these terms so they know their
rights.
We protect your rights with two steps: (1) copyright the software, and
(2) offer you this license which gives you legal permission to copy,
distribute and/or modify the software.
Also, for each author's protection and ours, we want to make certain
that everyone understands that there is no warranty for this free
software. If the software is modified by someone else and passed on, we
want its recipients to know that what they have is not the original, so
that any problems introduced by others will not reflect on the original
authors' reputations.
Finally, any free program is threatened constantly by software
patents. We wish to avoid the danger that redistributors of a free
program will individually obtain patent licenses, in effect making the
program proprietary. To prevent this, we have made it clear that any
patent must be licensed for everyone's free use or not licensed at all.
The precise terms and conditions for copying, distribution and
modification follow.
GNU GENERAL PUBLIC LICENSE
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
0. This License applies to any program or other work which contains
a notice placed by the copyright holder saying it may be distributed
under the terms of this General Public License. The "Program", below,
refers to any such program or work, and a "work based on the Program"
means either the Program or any derivative work under copyright law:
that is to say, a work containing the Program or a portion of it,
either verbatim or with modifications and/or translated into another
language. (Hereinafter, translation is included without limitation in
the term "modification".) Each licensee is addressed as "you".
Activities other than copying, distribution and modification are not
covered by this License; they are outside its scope. The act of
running the Program is not restricted, and the output from the Program
is covered only if its contents constitute a work based on the
Program (independent of having been made by running the Program).
Whether that is true depends on what the Program does.
1. You may copy and distribute verbatim copies of the Program's
source code as you receive it, in any medium, provided that you
conspicuously and appropriately publish on each copy an appropriate
copyright notice and disclaimer of warranty; keep intact all the
notices that refer to this License and to the absence of any warranty;
and give any other recipients of the Program a copy of this License
along with the Program.
You may charge a fee for the physical act of transferring a copy, and
you may at your option offer warranty protection in exchange for a fee.
2. You may modify your copy or copies of the Program or any portion
of it, thus forming a work based on the Program, and copy and
distribute such modifications or work under the terms of Section 1
above, provided that you also meet all of these conditions:
a) You must cause the modified files to carry prominent notices
stating that you changed the files and the date of any change.
b) You must cause any work that you distribute or publish, that in
whole or in part contains or is derived from the Program or any
part thereof, to be licensed as a whole at no charge to all third
parties under the terms of this License.
c) If the modified program normally reads commands interactively
when run, you must cause it, when started running for such
interactive use in the most ordinary way, to print or display an
announcement including an appropriate copyright notice and a
notice that there is no warranty (or else, saying that you provide
a warranty) and that users may redistribute the program under
these conditions, and telling the user how to view a copy of this
License. (Exception: if the Program itself is interactive but
does not normally print such an announcement, your work based on
the Program is not required to print an announcement.)
These requirements apply to the modified work as a whole. If
identifiable sections of that work are not derived from the Program,
and can be reasonably considered independent and separate works in
themselves, then this License, and its terms, do not apply to those
sections when you distribute them as separate works. But when you
distribute the same sections as part of a whole which is a work based
on the Program, the distribution of the whole must be on the terms of
this License, whose permissions for other licensees extend to the
entire whole, and thus to each and every part regardless of who wrote it.
Thus, it is not the intent of this section to claim rights or contest
your rights to work written entirely by you; rather, the intent is to
exercise the right to control the distribution of derivative or
collective works based on the Program.
In addition, mere aggregation of another work not based on the Program
with the Program (or with a work based on the Program) on a volume of
a storage or distribution medium does not bring the other work under
the scope of this License.
3. You may copy and distribute the Program (or a work based on it,
under Section 2) in object code or executable form under the terms of
Sections 1 and 2 above provided that you also do one of the following:
a) Accompany it with the complete corresponding machine-readable
source code, which must be distributed under the terms of Sections
1 and 2 above on a medium customarily used for software interchange; or,
b) Accompany it with a written offer, valid for at least three
years, to give any third party, for a charge no more than your
cost of physically performing source distribution, a complete
machine-readable copy of the corresponding source code, to be
distributed under the terms of Sections 1 and 2 above on a medium
customarily used for software interchange; or,
c) Accompany it with the information you received as to the offer
to distribute corresponding source code. (This alternative is
allowed only for noncommercial distribution and only if you
received the program in object code or executable form with such
an offer, in accord with Subsection b above.)
The source code for a work means the preferred form of the work for
making modifications to it. For an executable work, complete source
code means all the source code for all modules it contains, plus any
associated interface definition files, plus the scripts used to
control compilation and installation of the executable. However, as a
special exception, the source code distributed need not include
anything that is normally distributed (in either source or binary
form) with the major components (compiler, kernel, and so on) of the
operating system on which the executable runs, unless that component
itself accompanies the executable.
If distribution of executable or object code is made by offering
access to copy from a designated place, then offering equivalent
access to copy the source code from the same place counts as
distribution of the source code, even though third parties are not
compelled to copy the source along with the object code.
4. You may not copy, modify, sublicense, or distribute the Program
except as expressly provided under this License. Any attempt
otherwise to copy, modify, sublicense or distribute the Program is
void, and will automatically terminate your rights under this License.
However, parties who have received copies, or rights, from you under
this License will not have their licenses terminated so long as such
parties remain in full compliance.
5. You are not required to accept this License, since you have not
signed it. However, nothing else grants you permission to modify or
distribute the Program or its derivative works. These actions are
prohibited by law if you do not accept this License. Therefore, by
modifying or distributing the Program (or any work based on the
Program), you indicate your acceptance of this License to do so, and
all its terms and conditions for copying, distributing or modifying
the Program or works based on it.
6. Each time you redistribute the Program (or any work based on the
Program), the recipient automatically receives a license from the
original licensor to copy, distribute or modify the Program subject to
these terms and conditions. You may not impose any further
restrictions on the recipients' exercise of the rights granted herein.
You are not responsible for enforcing compliance by third parties to
this License.
7. If, as a consequence of a court judgment or allegation of patent
infringement or for any other reason (not limited to patent issues),
conditions are imposed on you (whether by court order, agreement or
otherwise) that contradict the conditions of this License, they do not
excuse you from the conditions of this License. If you cannot
distribute so as to satisfy simultaneously your obligations under this
License and any other pertinent obligations, then as a consequence you
may not distribute the Program at all. For example, if a patent
license would not permit royalty-free redistribution of the Program by
all those who receive copies directly or indirectly through you, then
the only way you could satisfy both it and this License would be to
refrain entirely from distribution of the Program.
If any portion of this section is held invalid or unenforceable under
any particular circumstance, the balance of the section is intended to
apply and the section as a whole is intended to apply in other
circumstances.
It is not the purpose of this section to induce you to infringe any
patents or other property right claims or to contest validity of any
such claims; this section has the sole purpose of protecting the
integrity of the free software distribution system, which is
implemented by public license practices. Many people have made
generous contributions to the wide range of software distributed
through that system in reliance on consistent application of that
system; it is up to the author/donor to decide if he or she is willing
to distribute software through any other system and a licensee cannot
impose that choice.
This section is intended to make thoroughly clear what is believed to
be a consequence of the rest of this License.
8. If the distribution and/or use of the Program is restricted in
certain countries either by patents or by copyrighted interfaces, the
original copyright holder who places the Program under this License
may add an explicit geographical distribution limitation excluding
those countries, so that distribution is permitted only in or among
countries not thus excluded. In such case, this License incorporates
the limitation as if written in the body of this License.
9. The Free Software Foundation may publish revised and/or new versions
of the General Public License from time to time. Such new versions will
be similar in spirit to the present version, but may differ in detail to
address new problems or concerns.
Each version is given a distinguishing version number. If the Program
specifies a version number of this License which applies to it and "any
later version", you have the option of following the terms and conditions
either of that version or of any later version published by the Free
Software Foundation. If the Program does not specify a version number of
this License, you may choose any version ever published by the Free Software
Foundation.
10. If you wish to incorporate parts of the Program into other free
programs whose distribution conditions are different, write to the author
to ask for permission. For software which is copyrighted by the Free
Software Foundation, write to the Free Software Foundation; we sometimes
make exceptions for this. Our decision will be guided by the two goals
of preserving the free status of all derivatives of our free software and
of promoting the sharing and reuse of software generally.
NO WARRANTY
11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
REPAIR OR CORRECTION.
12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
POSSIBILITY OF SUCH DAMAGES.
END OF TERMS AND CONDITIONS
How to Apply These Terms to Your New Programs
If you develop a new program, and you want it to be of the greatest
possible use to the public, the best way to achieve this is to make it
free software which everyone can redistribute and change under these terms.
To do so, attach the following notices to the program. It is safest
to attach them to the start of each source file to most effectively
convey the exclusion of warranty; and each file should have at least
the "copyright" line and a pointer to where the full notice is found.
<one line to give the program's name and a brief idea of what it does.>
Copyright (C) <year> <name of author>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Also add information on how to contact you by electronic and paper mail.
If the program is interactive, make it output a short notice like this
when it starts in an interactive mode:
Gnomovision version 69, Copyright (C) year name of author
Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
This is free software, and you are welcome to redistribute it
under certain conditions; type `show c' for details.
The hypothetical commands `show w' and `show c' should show the appropriate
parts of the General Public License. Of course, the commands you use may
be called something other than `show w' and `show c'; they could even be
mouse-clicks or menu items--whatever suits your program.
You should also get your employer (if you work as a programmer) or your
school, if any, to sign a "copyright disclaimer" for the program, if
necessary. Here is a sample; alter the names:
Yoyodyne, Inc., hereby disclaims all copyright interest in the program
`Gnomovision' (which makes passes at compilers) written by James Hacker.
<signature of Ty Coon>, 1 April 1989
Ty Coon, President of Vice
This General Public License does not permit incorporating your program into
proprietary programs. If your program is a subroutine library, you may
consider it more useful to permit linking proprietary applications with the
library. If this is what you want to do, use the GNU Library General
Public License instead of this License.

Просмотреть файл

@ -0,0 +1,377 @@
AM_CPPFLAGS = \
-I$(srcdir)/include \
-D_GNU_SOURCE \
-DSYSCONFDIR=\"$(sysconfdir)\" \
-DRDMADIR=\"@rdmadir@\" \
-DEXTDIR=\"$(pkglibdir)\"
lib_LTLIBRARIES = src/libfabric.la
pkglib_LTLIBRARIES = $(DL_PROVIDERS)
ACLOCAL_AMFLAGS = -I config
AM_CFLAGS = -g -Wall
if HAVE_LD_VERSION_SCRIPT
libfabric_version_script = -Wl,--version-script=$(srcdir)/libfabric.map
else !HAVE_LD_VERSION_SCRIPT
libfabric_version_script =
endif !HAVE_LD_VERSION_SCRIPT
# internal utility functions shared by in-tree providers:
common_srcs = \
src/common.c \
src/enosys.c
# ensure dl-built providers link back to libfabric
linkback = -lfabric -Lsrc/.libs/
src_libfabric_la_SOURCES = \
include/fi.h \
include/fi_enosys.h \
include/fi_indexer.h \
include/fi_list.h \
include/fi_rbuf.h \
src/fabric.c \
src/fi_tostr.c \
$(common_srcs)
if HAVE_SOCKETS
_sockets_files = \
prov/sockets/src/sock.h \
prov/sockets/src/sock_av.c \
prov/sockets/src/sock_dgram.c \
prov/sockets/src/sock_dom.c \
prov/sockets/src/sock_eq.c \
prov/sockets/src/sock_cq.c \
prov/sockets/src/sock_cntr.c \
prov/sockets/src/sock_poll.c \
prov/sockets/src/sock_rdm.c \
prov/sockets/src/sock_fabric.c \
prov/sockets/src/sock_ep.c \
prov/sockets/src/sock_ctx.c \
prov/sockets/src/sock_util.c \
prov/sockets/src/sock_util.h \
prov/sockets/src/indexer.c \
prov/sockets/src/list.c \
prov/sockets/src/list.h
if HAVE_SOCKETS_DL
pkglib_LTLIBRARIES += libsockets-fi.la
libsockets_fi_la_SOURCES = $(_sockets_files) $(common_srcs)
libsockets_fi_la_LDFLAGS = -module -avoid-version -shared -export-dynamic $(linkback)
else !HAVE_SOCKETS_DL
src_libfabric_la_SOURCES += $(_sockets_files)
endif !HAVE_SOCKETS_DL
endif HAVE_SOCKETS
if HAVE_VERBS
_verbs_files = prov/verbs/src/fi_verbs.c
if HAVE_VERBS_DL
pkglib_LTLIBRARIES += libverbs-fi.la
libverbs_fi_la_SOURCES = $(_verbs_files) $(common_srcs)
libverbs_fi_la_LDFLAGS = -module -avoid-version -shared -export-dynamic -libverbs -lrdmacm $(linkback)
else !HAVE_VERBS_DL
src_libfabric_la_SOURCES += $(_verbs_files)
endif !HAVE_VERBS_DL
endif HAVE_VERBS
if HAVE_USNIC
libusnic_direct_sources = \
prov/usnic/src/usnic_direct/cq_desc.h \
prov/usnic/src/usnic_direct/cq_enet_desc.h \
prov/usnic/src/usnic_direct/kcompat.h \
prov/usnic/src/usnic_direct/kcompat_priv.h \
prov/usnic/src/usnic_direct/libnl1_utils.h \
prov/usnic/src/usnic_direct/libnl3_utils.h \
prov/usnic/src/usnic_direct/libnl_utils_common.c \
prov/usnic/src/usnic_direct/libnl_utils.h \
prov/usnic/src/usnic_direct/linux/delay.h \
prov/usnic/src/usnic_direct/linux/slab.h \
prov/usnic/src/usnic_direct/linux_types.h \
prov/usnic/src/usnic_direct/rq_enet_desc.h \
prov/usnic/src/usnic_direct/usd_caps.c \
prov/usnic/src/usnic_direct/usd_caps.h \
prov/usnic/src/usnic_direct/usd_dest.c \
prov/usnic/src/usnic_direct/usd_dest.h \
prov/usnic/src/usnic_direct/usd_device.c \
prov/usnic/src/usnic_direct/usd_device.h \
prov/usnic/src/usnic_direct/usd_event.c \
prov/usnic/src/usnic_direct/usd.h \
prov/usnic/src/usnic_direct/usd_ib_cmd.c \
prov/usnic/src/usnic_direct/usd_ib_cmd.h \
prov/usnic/src/usnic_direct/usd_ib_sysfs.c \
prov/usnic/src/usnic_direct/usd_ib_sysfs.h \
prov/usnic/src/usnic_direct/usd_mem.c \
prov/usnic/src/usnic_direct/usd_poll.c \
prov/usnic/src/usnic_direct/usd_post.c \
prov/usnic/src/usnic_direct/usd_post.h \
prov/usnic/src/usnic_direct/usd_post_raw_normal.c \
prov/usnic/src/usnic_direct/usd_post_udp_normal.c \
prov/usnic/src/usnic_direct/usd_post_udp_pio.c \
prov/usnic/src/usnic_direct/usd_queue.h \
prov/usnic/src/usnic_direct/usd_queues.c \
prov/usnic/src/usnic_direct/usd_socket.c \
prov/usnic/src/usnic_direct/usd_socket.h \
prov/usnic/src/usnic_direct/usd_time.h \
prov/usnic/src/usnic_direct/usd_util.h \
prov/usnic/src/usnic_direct/usd_vnic.c \
prov/usnic/src/usnic_direct/usd_vnic.h \
prov/usnic/src/usnic_direct/usnic_abi.h \
prov/usnic/src/usnic_direct/usnic_direct.h \
prov/usnic/src/usnic_direct/usnic_ib_abi.h \
prov/usnic/src/usnic_direct/usnic_ip_utils.c \
prov/usnic/src/usnic_direct/usnic_ip_utils.h \
prov/usnic/src/usnic_direct/usnic_user_utils.h \
prov/usnic/src/usnic_direct/vnic_cq.c \
prov/usnic/src/usnic_direct/vnic_cq.h \
prov/usnic/src/usnic_direct/vnic_dev.c \
prov/usnic/src/usnic_direct/vnic_devcmd.h \
prov/usnic/src/usnic_direct/vnic_dev.h \
prov/usnic/src/usnic_direct/vnic_enet.h \
prov/usnic/src/usnic_direct/vnic_resource.h \
prov/usnic/src/usnic_direct/vnic_rq.c \
prov/usnic/src/usnic_direct/vnic_rq.h \
prov/usnic/src/usnic_direct/vnic_stats.h \
prov/usnic/src/usnic_direct/vnic_wq.c \
prov/usnic/src/usnic_direct/vnic_wq.h \
prov/usnic/src/usnic_direct/wq_enet_desc.h
_usnic_files = \
$(libusnic_direct_sources) \
prov/usnic/src/fi_usnic.h \
prov/usnic/src/usdf.h \
prov/usnic/src/usdf_av.c \
prov/usnic/src/usdf_av.h \
prov/usnic/src/usdf_cm.c \
prov/usnic/src/usdf_cm.h \
prov/usnic/src/usdf_cq.c \
prov/usnic/src/usdf_dgram.c \
prov/usnic/src/usdf_dgram.h \
prov/usnic/src/usdf_domain.c \
prov/usnic/src/usdf_endpoint.c \
prov/usnic/src/usdf_endpoint.h \
prov/usnic/src/usdf_ep_dgram.c \
prov/usnic/src/usdf_ep_msg.c \
prov/usnic/src/usdf_eq.c \
prov/usnic/src/usdf_fabric.c \
prov/usnic/src/usdf_mem.c \
prov/usnic/src/usdf_msg.c \
prov/usnic/src/usdf_msg.h \
prov/usnic/src/usdf_pep.c \
prov/usnic/src/usdf_progress.c \
prov/usnic/src/usdf_progress.h \
prov/usnic/src/usdf_timer.c \
prov/usnic/src/usdf_timer.h
_usnic_cppflags = \
-D__LIBUSNIC__ \
-I$(top_srcdir)/prov/usnic/src/usnic_direct
if HAVE_USNIC_DL
pkglib_LTLIBRARIES += libusnic-fi.la
libusnic_fi_la_CPPFLAGS = $(AM_CPPFLAGS) $(_usnic_cppflags)
libusnic_fi_la_SOURCES = $(_usnic_files) $(common_srcs)
libusnic_fi_la_LDFLAGS = -module -avoid-version -shared -export-dynamic
libusnic_fi_la_LIBS = $(linkback)
else !HAVE_USNIC_DL
AM_CPPFLAGS += $(_usnic_cppflags)
src_libfabric_la_SOURCES += $(_usnic_files)
endif !HAVE_USNIC_DL
endif HAVE_USNIC
if HAVE_PSM
_psm_files = \
prov/psm/src/psmx.h \
prov/psm/src/psmx_init.c \
prov/psm/src/psmx_domain.c \
prov/psm/src/psmx_cq.c \
prov/psm/src/psmx_cntr.c \
prov/psm/src/psmx_av.c \
prov/psm/src/psmx_ep.c \
prov/psm/src/psmx_cm.c \
prov/psm/src/psmx_tagged.c \
prov/psm/src/psmx_msg.c \
prov/psm/src/psmx_msg2.c \
prov/psm/src/psmx_rma.c \
prov/psm/src/psmx_atomic.c \
prov/psm/src/psmx_am.c \
prov/psm/src/psmx_mr.c \
prov/psm/src/psmx_wait.c \
prov/psm/src/psmx_poll.c \
prov/psm/src/psmx_util.c
if HAVE_PSM_DL
pkglib_LTLIBRARIES += libpsmx-fi.la
libpsmx_fi_la_SOURCES = $(_psm_files) $(common_srcs)
libpsmx_fi_la_LDFLAGS = -module -avoid-version -shared -export-dynamic $(linkback)
else !HAVE_PSM_DL
src_libfabric_la_SOURCES += $(_psm_files)
endif !HAVE_PSM_DL
endif HAVE_PSM
src_libfabric_la_LDFLAGS = -version-info 1 -export-dynamic \
$(libfabric_version_script)
src_libfabric_la_DEPENDENCIES = $(srcdir)/libfabric.map
rdmaincludedir = $(includedir)/rdma
rdmainclude_HEADERS = \
$(top_srcdir)/include/rdma/fabric.h \
$(top_srcdir)/include/rdma/fi_atomic.h \
$(top_srcdir)/include/rdma/fi_cm.h \
$(top_srcdir)/include/rdma/fi_domain.h \
$(top_srcdir)/include/rdma/fi_eq.h \
$(top_srcdir)/include/rdma/fi_prov.h \
$(top_srcdir)/include/rdma/fi_rma.h \
$(top_srcdir)/include/rdma/fi_endpoint.h \
$(top_srcdir)/include/rdma/fi_errno.h \
$(top_srcdir)/include/rdma/fi_tagged.h \
$(top_srcdir)/include/rdma/fi_trigger.h
if HAVE_DIRECT
nodist_rdmainclude_HEADERS = \
$(top_srcdir)/prov/$(PROVIDER_DIRECT)/include/rdma/fi_direct.h \
$(top_srcdir)/prov/$(PROVIDER_DIRECT)/include/rdma/fi_direct_domain.h \
$(top_srcdir)/prov/$(PROVIDER_DIRECT)/include/rdma/fi_direct_endpoint.h \
$(top_srcdir)/prov/$(PROVIDER_DIRECT)/include/rdma/fi_direct_tagged.h \
$(top_srcdir)/prov/$(PROVIDER_DIRECT)/include/rdma/fi_direct_rma.h \
$(top_srcdir)/prov/$(PROVIDER_DIRECT)/include/rdma/fi_direct_atomic_def.h \
$(top_srcdir)/prov/$(PROVIDER_DIRECT)/include/rdma/fi_direct_atomic.h \
$(top_srcdir)/prov/$(PROVIDER_DIRECT)/include/rdma/fi_direct_cm.h \
$(top_srcdir)/prov/$(PROVIDER_DIRECT)/include/rdma/fi_direct_eq.h \
$(top_srcdir)/prov/$(PROVIDER_DIRECT)/include/rdma/fi_direct_trigger.h
endif HAVE_DIRECT
man_MANS = \
man/fabric.7 \
man/fi_accept.3 \
man/fi_alias.3 \
man/fi_atomic.3 \
man/fi_atomicv.3 \
man/fi_atomicto.3 \
man/fi_atomicmsg.3 \
man/fi_atomic_valid.3 \
man/fi_av.3 \
man/fi_av_bind.3 \
man/fi_av_insert.3 \
man/fi_av_insertsvc.3 \
man/fi_av_lookup.3 \
man/fi_av_open.3 \
man/fi_av_remove.3 \
man/fi_av_straddr.3 \
man/fi_cancel.3 \
man/fi_close.3 \
man/fi_cm.3 \
man/fi_compare_atomic.3 \
man/fi_compare_atomicv.3 \
man/fi_compare_atomicto.3 \
man/fi_compare_atomicmsg.3 \
man/fi_compare_atomic_valid.3 \
man/fi_connect.3 \
man/fi_cntr.3 \
man/fi_cntr_open.3 \
man/fi_cntr_read.3 \
man/fi_cntr_add.3 \
man/fi_cntr_set.3 \
man/fi_cntr_wait.3 \
man/fi_cq.3 \
man/fi_cq_open.3 \
man/fi_cq_read.3 \
man/fi_cq_readfrom.3 \
man/fi_cq_readerr.3 \
man/fi_cq_write.3 \
man/fi_cq_sread.3 \
man/fi_cq_sreadfrom.3 \
man/fi_cq_strerror.3 \
man/fi_direct.7 \
man/fi_domain.3 \
man/fi_domain_query.3 \
man/fi_domain_bind.3 \
man/fi_dupinfo.3 \
man/fi_ep_bind.3 \
man/fi_ep_sync.3 \
man/fi_eq.3 \
man/fi_eq_open.3 \
man/fi_eq_read.3 \
man/fi_eq_readerr.3 \
man/fi_eq_write.3 \
man/fi_eq_sread.3 \
man/fi_eq_strerror.3 \
man/fi_enable.3 \
man/fi_endpoint.3 \
man/fi_fabric.3 \
man/fi_fetch_atomic.3 \
man/fi_fetch_atomicv.3 \
man/fi_fetch_atomicto.3 \
man/fi_fetch_atomicmsg.3 \
man/fi_fetch_atomic_valid.3 \
man/fi_freeinfo.3 \
man/fi_getinfo.3 \
man/fi_getname.3 \
man/fi_getopt.3 \
man/fi_getpeer.3 \
man/fi_inject.3 \
man/fi_injectto.3 \
man/fi_join.3 \
man/fi_leave.3 \
man/fi_listen.3 \
man/fi_mr.3 \
man/fi_mr_reg.3 \
man/fi_mr_regv.3 \
man/fi_mr_regattr.3 \
man/fi_mr_desc.3 \
man/fi_mr_key.3 \
man/fi_mr_bind.3 \
man/fi_msg.3 \
man/fi_open.3 \
man/fi_pendpoint.3 \
man/fi_poll.3 \
man/fi_poll_add.3 \
man/fi_poll_del.3 \
man/fi_poll_open.3 \
man/fi_recv.3 \
man/fi_recvv.3 \
man/fi_recvfrom.3 \
man/fi_recvmsg.3 \
man/fi_reject.3 \
man/fi_rma.3 \
man/fi_rx_addr.3 \
man/fi_send.3 \
man/fi_senddata.3 \
man/fi_senddatato.3 \
man/fi_sendv.3 \
man/fi_sendto.3 \
man/fi_sendmsg.3 \
man/fi_setopt.3 \
man/fi_shutdown.3 \
man/fi_tagged.3 \
man/fi_tinject.3 \
man/fi_tinjectto.3 \
man/fi_tostr.3 \
man/fi_trecv.3 \
man/fi_trecvv.3 \
man/fi_trecvfrom.3 \
man/fi_trecvmsg.3 \
man/fi_trigger.3 \
man/fi_tsearch.3 \
man/fi_tsend.3 \
man/fi_tsenddata.3 \
man/fi_tsenddatato.3 \
man/fi_tsendv.3 \
man/fi_tsendto.3 \
man/fi_tsendmsg.3 \
man/fi_version.3 \
man/fi_wait.3 \
man/fi_wait_open.3
EXTRA_DIST = libfabric.map libfabric.spec.in config/distscript.pl $(man_MANS)
dist-hook: libfabric.spec
cp libfabric.spec $(distdir)
"$(top_srcdir)/config/distscript.pl" "$(distdir)" "$(PACKAGE_VERSION)"

13
opal/mca/common/libfabric/libfabric/README Обычный файл
Просмотреть файл

@ -0,0 +1,13 @@
This README is for userspace RDMA fabric library.
Version Libfabric v0.0.2
Released on 2014-11-11
Building
========
To make this directory, run:
./autogen.sh && ./configure && make && make install
Typically the autogen and configure steps only need be done the first
time unless configure.ac or Makefile.am changes.

Просмотреть файл

@ -0,0 +1,121 @@
/* config.h.in. Generated from configure.ac by autoheader. */
/* Set to 1 to use c11 atomic functions */
#undef HAVE_ATOMICS
/* Define to 1 if you have the declaration of `O_CLOEXEC', and to 0 if you
don't. */
#undef HAVE_DECL_O_CLOEXEC
/* Define to 1 if you have the declaration of `SOCK_CLOEXEC', and to 0 if you
don't. */
#undef HAVE_DECL_SOCK_CLOEXEC
/* Define to 1 if you have the <dlfcn.h> header file. */
#undef HAVE_DLFCN_H
/* Define to 1 if you have the <fcntl.h> header file. */
#undef HAVE_FCNTL_H
/* Define to 1 if you have the <inttypes.h> header file. */
#undef HAVE_INTTYPES_H
/* Define to 1 if you have the `dl' library (-ldl). */
#undef HAVE_LIBDL
/* Define to 1 if you have the `ibverbs' library (-libverbs). */
#undef HAVE_LIBIBVERBS
/* Define to 1 if you have the `nl' library (-lnl). */
#undef HAVE_LIBNL
/* Define to 1 if you have the `psm_infinipath' library (-lpsm_infinipath). */
#undef HAVE_LIBPSM_INFINIPATH
/* Define to 1 if you have the `pthread' library (-lpthread). */
#undef HAVE_LIBPTHREAD
/* Define to 1 if you have the `rdmacm' library (-lrdmacm). */
#undef HAVE_LIBRDMACM
/* Define to 1 if you have the `rt' library (-lrt). */
#undef HAVE_LIBRT
/* Define to 1 if you have the <memory.h> header file. */
#undef HAVE_MEMORY_H
/* Define to 1 if you have the <stdint.h> header file. */
#undef HAVE_STDINT_H
/* Define to 1 if you have the <stdlib.h> header file. */
#undef HAVE_STDLIB_H
/* Define to 1 if you have the <strings.h> header file. */
#undef HAVE_STRINGS_H
/* Define to 1 if you have the <string.h> header file. */
#undef HAVE_STRING_H
/* assembler has .symver support */
#undef HAVE_SYMVER_SUPPORT
/* Define to 1 if you have the <sys/socket.h> header file. */
#undef HAVE_SYS_SOCKET_H
/* Define to 1 if you have the <sys/stat.h> header file. */
#undef HAVE_SYS_STAT_H
/* Define to 1 if you have the <sys/types.h> header file. */
#undef HAVE_SYS_TYPES_H
/* Define to 1 if you have the <unistd.h> header file. */
#undef HAVE_UNISTD_H
/* Define to 1 to enable valgrind annotations */
#undef INCLUDE_VALGRIND
/* Define to the sub-directory in which libtool stores uninstalled libraries.
*/
#undef LT_OBJDIR
/* Defined to 0 if not provided */
#undef O_CLOEXEC
/* Name of package */
#undef PACKAGE
/* Define to the address where bug reports for this package should be sent. */
#undef PACKAGE_BUGREPORT
/* Define to the full name of this package. */
#undef PACKAGE_NAME
/* Define to the full name and version of this package. */
#undef PACKAGE_STRING
/* Define to the one symbol short name of this package. */
#undef PACKAGE_TARNAME
/* Define to the home page for this package. */
#undef PACKAGE_URL
/* Define to the version of this package. */
#undef PACKAGE_VERSION
/* The size of `long', as computed by sizeof. */
#undef SIZEOF_LONG
/* Defined to 0 if not provided */
#undef SOCK_CLOEXEC
/* Define to 1 if you have the ANSI C header files. */
#undef STDC_HEADERS
/* fopen() modifier for setting close on exec flag */
#undef STREAM_CLOEXEC
/* Version number of package */
#undef VERSION
/* Define to empty if `const' does not conform to ANSI C. */
#undef const

270
opal/mca/common/libfabric/libfabric/config/ar-lib Исполняемый файл
Просмотреть файл

@ -0,0 +1,270 @@
#! /bin/sh
# Wrapper for Microsoft lib.exe
me=ar-lib
scriptversion=2012-03-01.08; # UTC
# Copyright (C) 2010-2013 Free Software Foundation, Inc.
# Written by Peter Rosin <peda@lysator.liu.se>.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2, or (at your option)
# any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
# As a special exception to the GNU General Public License, if you
# distribute this file as part of a program that contains a
# configuration script generated by Autoconf, you may include it under
# the same distribution terms that you use for the rest of that program.
# This file is maintained in Automake, please report
# bugs to <bug-automake@gnu.org> or send patches to
# <automake-patches@gnu.org>.
# func_error message
func_error ()
{
echo "$me: $1" 1>&2
exit 1
}
file_conv=
# func_file_conv build_file
# Convert a $build file to $host form and store it in $file
# Currently only supports Windows hosts.
func_file_conv ()
{
file=$1
case $file in
/ | /[!/]*) # absolute file, and not a UNC file
if test -z "$file_conv"; then
# lazily determine how to convert abs files
case `uname -s` in
MINGW*)
file_conv=mingw
;;
CYGWIN*)
file_conv=cygwin
;;
*)
file_conv=wine
;;
esac
fi
case $file_conv in
mingw)
file=`cmd //C echo "$file " | sed -e 's/"\(.*\) " *$/\1/'`
;;
cygwin)
file=`cygpath -m "$file" || echo "$file"`
;;
wine)
file=`winepath -w "$file" || echo "$file"`
;;
esac
;;
esac
}
# func_at_file at_file operation archive
# Iterate over all members in AT_FILE performing OPERATION on ARCHIVE
# for each of them.
# When interpreting the content of the @FILE, do NOT use func_file_conv,
# since the user would need to supply preconverted file names to
# binutils ar, at least for MinGW.
func_at_file ()
{
operation=$2
archive=$3
at_file_contents=`cat "$1"`
eval set x "$at_file_contents"
shift
for member
do
$AR -NOLOGO $operation:"$member" "$archive" || exit $?
done
}
case $1 in
'')
func_error "no command. Try '$0 --help' for more information."
;;
-h | --h*)
cat <<EOF
Usage: $me [--help] [--version] PROGRAM ACTION ARCHIVE [MEMBER...]
Members may be specified in a file named with @FILE.
EOF
exit $?
;;
-v | --v*)
echo "$me, version $scriptversion"
exit $?
;;
esac
if test $# -lt 3; then
func_error "you must specify a program, an action and an archive"
fi
AR=$1
shift
while :
do
if test $# -lt 2; then
func_error "you must specify a program, an action and an archive"
fi
case $1 in
-lib | -LIB \
| -ltcg | -LTCG \
| -machine* | -MACHINE* \
| -subsystem* | -SUBSYSTEM* \
| -verbose | -VERBOSE \
| -wx* | -WX* )
AR="$AR $1"
shift
;;
*)
action=$1
shift
break
;;
esac
done
orig_archive=$1
shift
func_file_conv "$orig_archive"
archive=$file
# strip leading dash in $action
action=${action#-}
delete=
extract=
list=
quick=
replace=
index=
create=
while test -n "$action"
do
case $action in
d*) delete=yes ;;
x*) extract=yes ;;
t*) list=yes ;;
q*) quick=yes ;;
r*) replace=yes ;;
s*) index=yes ;;
S*) ;; # the index is always updated implicitly
c*) create=yes ;;
u*) ;; # TODO: don't ignore the update modifier
v*) ;; # TODO: don't ignore the verbose modifier
*)
func_error "unknown action specified"
;;
esac
action=${action#?}
done
case $delete$extract$list$quick$replace,$index in
yes,* | ,yes)
;;
yesyes*)
func_error "more than one action specified"
;;
*)
func_error "no action specified"
;;
esac
if test -n "$delete"; then
if test ! -f "$orig_archive"; then
func_error "archive not found"
fi
for member
do
case $1 in
@*)
func_at_file "${1#@}" -REMOVE "$archive"
;;
*)
func_file_conv "$1"
$AR -NOLOGO -REMOVE:"$file" "$archive" || exit $?
;;
esac
done
elif test -n "$extract"; then
if test ! -f "$orig_archive"; then
func_error "archive not found"
fi
if test $# -gt 0; then
for member
do
case $1 in
@*)
func_at_file "${1#@}" -EXTRACT "$archive"
;;
*)
func_file_conv "$1"
$AR -NOLOGO -EXTRACT:"$file" "$archive" || exit $?
;;
esac
done
else
$AR -NOLOGO -LIST "$archive" | sed -e 's/\\/\\\\/g' | while read member
do
$AR -NOLOGO -EXTRACT:"$member" "$archive" || exit $?
done
fi
elif test -n "$quick$replace"; then
if test ! -f "$orig_archive"; then
if test -z "$create"; then
echo "$me: creating $orig_archive"
fi
orig_archive=
else
orig_archive=$archive
fi
for member
do
case $1 in
@*)
func_file_conv "${1#@}"
set x "$@" "@$file"
;;
*)
func_file_conv "$1"
set x "$@" "$file"
;;
esac
shift
shift
done
if test -n "$orig_archive"; then
$AR -NOLOGO -OUT:"$archive" "$orig_archive" "$@" || exit $?
else
$AR -NOLOGO -OUT:"$archive" "$@" || exit $?
fi
elif test -n "$list"; then
if test ! -f "$orig_archive"; then
func_error "archive not found"
fi
$AR -NOLOGO -LIST "$archive" || exit $?
fi

1552
opal/mca/common/libfabric/libfabric/config/config.guess поставляемый Исполняемый файл

Разница между файлами не показана из-за своего большого размера Загрузить разницу

1788
opal/mca/common/libfabric/libfabric/config/config.sub поставляемый Исполняемый файл

Разница между файлами не показана из-за своего большого размера Загрузить разницу

791
opal/mca/common/libfabric/libfabric/config/depcomp Исполняемый файл
Просмотреть файл

@ -0,0 +1,791 @@
#! /bin/sh
# depcomp - compile a program generating dependencies as side-effects
scriptversion=2013-05-30.07; # UTC
# Copyright (C) 1999-2013 Free Software Foundation, Inc.
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2, or (at your option)
# any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
# As a special exception to the GNU General Public License, if you
# distribute this file as part of a program that contains a
# configuration script generated by Autoconf, you may include it under
# the same distribution terms that you use for the rest of that program.
# Originally written by Alexandre Oliva <oliva@dcc.unicamp.br>.
case $1 in
'')
echo "$0: No command. Try '$0 --help' for more information." 1>&2
exit 1;
;;
-h | --h*)
cat <<\EOF
Usage: depcomp [--help] [--version] PROGRAM [ARGS]
Run PROGRAMS ARGS to compile a file, generating dependencies
as side-effects.
Environment variables:
depmode Dependency tracking mode.
source Source file read by 'PROGRAMS ARGS'.
object Object file output by 'PROGRAMS ARGS'.
DEPDIR directory where to store dependencies.
depfile Dependency file to output.
tmpdepfile Temporary file to use when outputting dependencies.
libtool Whether libtool is used (yes/no).
Report bugs to <bug-automake@gnu.org>.
EOF
exit $?
;;
-v | --v*)
echo "depcomp $scriptversion"
exit $?
;;
esac
# Get the directory component of the given path, and save it in the
# global variables '$dir'. Note that this directory component will
# be either empty or ending with a '/' character. This is deliberate.
set_dir_from ()
{
case $1 in
*/*) dir=`echo "$1" | sed -e 's|/[^/]*$|/|'`;;
*) dir=;;
esac
}
# Get the suffix-stripped basename of the given path, and save it the
# global variable '$base'.
set_base_from ()
{
base=`echo "$1" | sed -e 's|^.*/||' -e 's/\.[^.]*$//'`
}
# If no dependency file was actually created by the compiler invocation,
# we still have to create a dummy depfile, to avoid errors with the
# Makefile "include basename.Plo" scheme.
make_dummy_depfile ()
{
echo "#dummy" > "$depfile"
}
# Factor out some common post-processing of the generated depfile.
# Requires the auxiliary global variable '$tmpdepfile' to be set.
aix_post_process_depfile ()
{
# If the compiler actually managed to produce a dependency file,
# post-process it.
if test -f "$tmpdepfile"; then
# Each line is of the form 'foo.o: dependency.h'.
# Do two passes, one to just change these to
# $object: dependency.h
# and one to simply output
# dependency.h:
# which is needed to avoid the deleted-header problem.
{ sed -e "s,^.*\.[$lower]*:,$object:," < "$tmpdepfile"
sed -e "s,^.*\.[$lower]*:[$tab ]*,," -e 's,$,:,' < "$tmpdepfile"
} > "$depfile"
rm -f "$tmpdepfile"
else
make_dummy_depfile
fi
}
# A tabulation character.
tab=' '
# A newline character.
nl='
'
# Character ranges might be problematic outside the C locale.
# These definitions help.
upper=ABCDEFGHIJKLMNOPQRSTUVWXYZ
lower=abcdefghijklmnopqrstuvwxyz
digits=0123456789
alpha=${upper}${lower}
if test -z "$depmode" || test -z "$source" || test -z "$object"; then
echo "depcomp: Variables source, object and depmode must be set" 1>&2
exit 1
fi
# Dependencies for sub/bar.o or sub/bar.obj go into sub/.deps/bar.Po.
depfile=${depfile-`echo "$object" |
sed 's|[^\\/]*$|'${DEPDIR-.deps}'/&|;s|\.\([^.]*\)$|.P\1|;s|Pobj$|Po|'`}
tmpdepfile=${tmpdepfile-`echo "$depfile" | sed 's/\.\([^.]*\)$/.T\1/'`}
rm -f "$tmpdepfile"
# Avoid interferences from the environment.
gccflag= dashmflag=
# Some modes work just like other modes, but use different flags. We
# parameterize here, but still list the modes in the big case below,
# to make depend.m4 easier to write. Note that we *cannot* use a case
# here, because this file can only contain one case statement.
if test "$depmode" = hp; then
# HP compiler uses -M and no extra arg.
gccflag=-M
depmode=gcc
fi
if test "$depmode" = dashXmstdout; then
# This is just like dashmstdout with a different argument.
dashmflag=-xM
depmode=dashmstdout
fi
cygpath_u="cygpath -u -f -"
if test "$depmode" = msvcmsys; then
# This is just like msvisualcpp but w/o cygpath translation.
# Just convert the backslash-escaped backslashes to single forward
# slashes to satisfy depend.m4
cygpath_u='sed s,\\\\,/,g'
depmode=msvisualcpp
fi
if test "$depmode" = msvc7msys; then
# This is just like msvc7 but w/o cygpath translation.
# Just convert the backslash-escaped backslashes to single forward
# slashes to satisfy depend.m4
cygpath_u='sed s,\\\\,/,g'
depmode=msvc7
fi
if test "$depmode" = xlc; then
# IBM C/C++ Compilers xlc/xlC can output gcc-like dependency information.
gccflag=-qmakedep=gcc,-MF
depmode=gcc
fi
case "$depmode" in
gcc3)
## gcc 3 implements dependency tracking that does exactly what
## we want. Yay! Note: for some reason libtool 1.4 doesn't like
## it if -MD -MP comes after the -MF stuff. Hmm.
## Unfortunately, FreeBSD c89 acceptance of flags depends upon
## the command line argument order; so add the flags where they
## appear in depend2.am. Note that the slowdown incurred here
## affects only configure: in makefiles, %FASTDEP% shortcuts this.
for arg
do
case $arg in
-c) set fnord "$@" -MT "$object" -MD -MP -MF "$tmpdepfile" "$arg" ;;
*) set fnord "$@" "$arg" ;;
esac
shift # fnord
shift # $arg
done
"$@"
stat=$?
if test $stat -ne 0; then
rm -f "$tmpdepfile"
exit $stat
fi
mv "$tmpdepfile" "$depfile"
;;
gcc)
## Note that this doesn't just cater to obsosete pre-3.x GCC compilers.
## but also to in-use compilers like IMB xlc/xlC and the HP C compiler.
## (see the conditional assignment to $gccflag above).
## There are various ways to get dependency output from gcc. Here's
## why we pick this rather obscure method:
## - Don't want to use -MD because we'd like the dependencies to end
## up in a subdir. Having to rename by hand is ugly.
## (We might end up doing this anyway to support other compilers.)
## - The DEPENDENCIES_OUTPUT environment variable makes gcc act like
## -MM, not -M (despite what the docs say). Also, it might not be
## supported by the other compilers which use the 'gcc' depmode.
## - Using -M directly means running the compiler twice (even worse
## than renaming).
if test -z "$gccflag"; then
gccflag=-MD,
fi
"$@" -Wp,"$gccflag$tmpdepfile"
stat=$?
if test $stat -ne 0; then
rm -f "$tmpdepfile"
exit $stat
fi
rm -f "$depfile"
echo "$object : \\" > "$depfile"
# The second -e expression handles DOS-style file names with drive
# letters.
sed -e 's/^[^:]*: / /' \
-e 's/^['$alpha']:\/[^:]*: / /' < "$tmpdepfile" >> "$depfile"
## This next piece of magic avoids the "deleted header file" problem.
## The problem is that when a header file which appears in a .P file
## is deleted, the dependency causes make to die (because there is
## typically no way to rebuild the header). We avoid this by adding
## dummy dependencies for each header file. Too bad gcc doesn't do
## this for us directly.
## Some versions of gcc put a space before the ':'. On the theory
## that the space means something, we add a space to the output as
## well. hp depmode also adds that space, but also prefixes the VPATH
## to the object. Take care to not repeat it in the output.
## Some versions of the HPUX 10.20 sed can't process this invocation
## correctly. Breaking it into two sed invocations is a workaround.
tr ' ' "$nl" < "$tmpdepfile" \
| sed -e 's/^\\$//' -e '/^$/d' -e "s|.*$object$||" -e '/:$/d' \
| sed -e 's/$/ :/' >> "$depfile"
rm -f "$tmpdepfile"
;;
hp)
# This case exists only to let depend.m4 do its work. It works by
# looking at the text of this script. This case will never be run,
# since it is checked for above.
exit 1
;;
sgi)
if test "$libtool" = yes; then
"$@" "-Wp,-MDupdate,$tmpdepfile"
else
"$@" -MDupdate "$tmpdepfile"
fi
stat=$?
if test $stat -ne 0; then
rm -f "$tmpdepfile"
exit $stat
fi
rm -f "$depfile"
if test -f "$tmpdepfile"; then # yes, the sourcefile depend on other files
echo "$object : \\" > "$depfile"
# Clip off the initial element (the dependent). Don't try to be
# clever and replace this with sed code, as IRIX sed won't handle
# lines with more than a fixed number of characters (4096 in
# IRIX 6.2 sed, 8192 in IRIX 6.5). We also remove comment lines;
# the IRIX cc adds comments like '#:fec' to the end of the
# dependency line.
tr ' ' "$nl" < "$tmpdepfile" \
| sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' \
| tr "$nl" ' ' >> "$depfile"
echo >> "$depfile"
# The second pass generates a dummy entry for each header file.
tr ' ' "$nl" < "$tmpdepfile" \
| sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' -e 's/$/:/' \
>> "$depfile"
else
make_dummy_depfile
fi
rm -f "$tmpdepfile"
;;
xlc)
# This case exists only to let depend.m4 do its work. It works by
# looking at the text of this script. This case will never be run,
# since it is checked for above.
exit 1
;;
aix)
# The C for AIX Compiler uses -M and outputs the dependencies
# in a .u file. In older versions, this file always lives in the
# current directory. Also, the AIX compiler puts '$object:' at the
# start of each line; $object doesn't have directory information.
# Version 6 uses the directory in both cases.
set_dir_from "$object"
set_base_from "$object"
if test "$libtool" = yes; then
tmpdepfile1=$dir$base.u
tmpdepfile2=$base.u
tmpdepfile3=$dir.libs/$base.u
"$@" -Wc,-M
else
tmpdepfile1=$dir$base.u
tmpdepfile2=$dir$base.u
tmpdepfile3=$dir$base.u
"$@" -M
fi
stat=$?
if test $stat -ne 0; then
rm -f "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3"
exit $stat
fi
for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3"
do
test -f "$tmpdepfile" && break
done
aix_post_process_depfile
;;
tcc)
# tcc (Tiny C Compiler) understand '-MD -MF file' since version 0.9.26
# FIXME: That version still under development at the moment of writing.
# Make that this statement remains true also for stable, released
# versions.
# It will wrap lines (doesn't matter whether long or short) with a
# trailing '\', as in:
#
# foo.o : \
# foo.c \
# foo.h \
#
# It will put a trailing '\' even on the last line, and will use leading
# spaces rather than leading tabs (at least since its commit 0394caf7
# "Emit spaces for -MD").
"$@" -MD -MF "$tmpdepfile"
stat=$?
if test $stat -ne 0; then
rm -f "$tmpdepfile"
exit $stat
fi
rm -f "$depfile"
# Each non-empty line is of the form 'foo.o : \' or ' dep.h \'.
# We have to change lines of the first kind to '$object: \'.
sed -e "s|.*:|$object :|" < "$tmpdepfile" > "$depfile"
# And for each line of the second kind, we have to emit a 'dep.h:'
# dummy dependency, to avoid the deleted-header problem.
sed -n -e 's|^ *\(.*\) *\\$|\1:|p' < "$tmpdepfile" >> "$depfile"
rm -f "$tmpdepfile"
;;
## The order of this option in the case statement is important, since the
## shell code in configure will try each of these formats in the order
## listed in this file. A plain '-MD' option would be understood by many
## compilers, so we must ensure this comes after the gcc and icc options.
pgcc)
# Portland's C compiler understands '-MD'.
# Will always output deps to 'file.d' where file is the root name of the
# source file under compilation, even if file resides in a subdirectory.
# The object file name does not affect the name of the '.d' file.
# pgcc 10.2 will output
# foo.o: sub/foo.c sub/foo.h
# and will wrap long lines using '\' :
# foo.o: sub/foo.c ... \
# sub/foo.h ... \
# ...
set_dir_from "$object"
# Use the source, not the object, to determine the base name, since
# that's sadly what pgcc will do too.
set_base_from "$source"
tmpdepfile=$base.d
# For projects that build the same source file twice into different object
# files, the pgcc approach of using the *source* file root name can cause
# problems in parallel builds. Use a locking strategy to avoid stomping on
# the same $tmpdepfile.
lockdir=$base.d-lock
trap "
echo '$0: caught signal, cleaning up...' >&2
rmdir '$lockdir'
exit 1
" 1 2 13 15
numtries=100
i=$numtries
while test $i -gt 0; do
# mkdir is a portable test-and-set.
if mkdir "$lockdir" 2>/dev/null; then
# This process acquired the lock.
"$@" -MD
stat=$?
# Release the lock.
rmdir "$lockdir"
break
else
# If the lock is being held by a different process, wait
# until the winning process is done or we timeout.
while test -d "$lockdir" && test $i -gt 0; do
sleep 1
i=`expr $i - 1`
done
fi
i=`expr $i - 1`
done
trap - 1 2 13 15
if test $i -le 0; then
echo "$0: failed to acquire lock after $numtries attempts" >&2
echo "$0: check lockdir '$lockdir'" >&2
exit 1
fi
if test $stat -ne 0; then
rm -f "$tmpdepfile"
exit $stat
fi
rm -f "$depfile"
# Each line is of the form `foo.o: dependent.h',
# or `foo.o: dep1.h dep2.h \', or ` dep3.h dep4.h \'.
# Do two passes, one to just change these to
# `$object: dependent.h' and one to simply `dependent.h:'.
sed "s,^[^:]*:,$object :," < "$tmpdepfile" > "$depfile"
# Some versions of the HPUX 10.20 sed can't process this invocation
# correctly. Breaking it into two sed invocations is a workaround.
sed 's,^[^:]*: \(.*\)$,\1,;s/^\\$//;/^$/d;/:$/d' < "$tmpdepfile" \
| sed -e 's/$/ :/' >> "$depfile"
rm -f "$tmpdepfile"
;;
hp2)
# The "hp" stanza above does not work with aCC (C++) and HP's ia64
# compilers, which have integrated preprocessors. The correct option
# to use with these is +Maked; it writes dependencies to a file named
# 'foo.d', which lands next to the object file, wherever that
# happens to be.
# Much of this is similar to the tru64 case; see comments there.
set_dir_from "$object"
set_base_from "$object"
if test "$libtool" = yes; then
tmpdepfile1=$dir$base.d
tmpdepfile2=$dir.libs/$base.d
"$@" -Wc,+Maked
else
tmpdepfile1=$dir$base.d
tmpdepfile2=$dir$base.d
"$@" +Maked
fi
stat=$?
if test $stat -ne 0; then
rm -f "$tmpdepfile1" "$tmpdepfile2"
exit $stat
fi
for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2"
do
test -f "$tmpdepfile" && break
done
if test -f "$tmpdepfile"; then
sed -e "s,^.*\.[$lower]*:,$object:," "$tmpdepfile" > "$depfile"
# Add 'dependent.h:' lines.
sed -ne '2,${
s/^ *//
s/ \\*$//
s/$/:/
p
}' "$tmpdepfile" >> "$depfile"
else
make_dummy_depfile
fi
rm -f "$tmpdepfile" "$tmpdepfile2"
;;
tru64)
# The Tru64 compiler uses -MD to generate dependencies as a side
# effect. 'cc -MD -o foo.o ...' puts the dependencies into 'foo.o.d'.
# At least on Alpha/Redhat 6.1, Compaq CCC V6.2-504 seems to put
# dependencies in 'foo.d' instead, so we check for that too.
# Subdirectories are respected.
set_dir_from "$object"
set_base_from "$object"
if test "$libtool" = yes; then
# Libtool generates 2 separate objects for the 2 libraries. These
# two compilations output dependencies in $dir.libs/$base.o.d and
# in $dir$base.o.d. We have to check for both files, because
# one of the two compilations can be disabled. We should prefer
# $dir$base.o.d over $dir.libs/$base.o.d because the latter is
# automatically cleaned when .libs/ is deleted, while ignoring
# the former would cause a distcleancheck panic.
tmpdepfile1=$dir$base.o.d # libtool 1.5
tmpdepfile2=$dir.libs/$base.o.d # Likewise.
tmpdepfile3=$dir.libs/$base.d # Compaq CCC V6.2-504
"$@" -Wc,-MD
else
tmpdepfile1=$dir$base.d
tmpdepfile2=$dir$base.d
tmpdepfile3=$dir$base.d
"$@" -MD
fi
stat=$?
if test $stat -ne 0; then
rm -f "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3"
exit $stat
fi
for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3"
do
test -f "$tmpdepfile" && break
done
# Same post-processing that is required for AIX mode.
aix_post_process_depfile
;;
msvc7)
if test "$libtool" = yes; then
showIncludes=-Wc,-showIncludes
else
showIncludes=-showIncludes
fi
"$@" $showIncludes > "$tmpdepfile"
stat=$?
grep -v '^Note: including file: ' "$tmpdepfile"
if test $stat -ne 0; then
rm -f "$tmpdepfile"
exit $stat
fi
rm -f "$depfile"
echo "$object : \\" > "$depfile"
# The first sed program below extracts the file names and escapes
# backslashes for cygpath. The second sed program outputs the file
# name when reading, but also accumulates all include files in the
# hold buffer in order to output them again at the end. This only
# works with sed implementations that can handle large buffers.
sed < "$tmpdepfile" -n '
/^Note: including file: *\(.*\)/ {
s//\1/
s/\\/\\\\/g
p
}' | $cygpath_u | sort -u | sed -n '
s/ /\\ /g
s/\(.*\)/'"$tab"'\1 \\/p
s/.\(.*\) \\/\1:/
H
$ {
s/.*/'"$tab"'/
G
p
}' >> "$depfile"
echo >> "$depfile" # make sure the fragment doesn't end with a backslash
rm -f "$tmpdepfile"
;;
msvc7msys)
# This case exists only to let depend.m4 do its work. It works by
# looking at the text of this script. This case will never be run,
# since it is checked for above.
exit 1
;;
#nosideeffect)
# This comment above is used by automake to tell side-effect
# dependency tracking mechanisms from slower ones.
dashmstdout)
# Important note: in order to support this mode, a compiler *must*
# always write the preprocessed file to stdout, regardless of -o.
"$@" || exit $?
# Remove the call to Libtool.
if test "$libtool" = yes; then
while test "X$1" != 'X--mode=compile'; do
shift
done
shift
fi
# Remove '-o $object'.
IFS=" "
for arg
do
case $arg in
-o)
shift
;;
$object)
shift
;;
*)
set fnord "$@" "$arg"
shift # fnord
shift # $arg
;;
esac
done
test -z "$dashmflag" && dashmflag=-M
# Require at least two characters before searching for ':'
# in the target name. This is to cope with DOS-style filenames:
# a dependency such as 'c:/foo/bar' could be seen as target 'c' otherwise.
"$@" $dashmflag |
sed "s|^[$tab ]*[^:$tab ][^:][^:]*:[$tab ]*|$object: |" > "$tmpdepfile"
rm -f "$depfile"
cat < "$tmpdepfile" > "$depfile"
# Some versions of the HPUX 10.20 sed can't process this sed invocation
# correctly. Breaking it into two sed invocations is a workaround.
tr ' ' "$nl" < "$tmpdepfile" \
| sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' \
| sed -e 's/$/ :/' >> "$depfile"
rm -f "$tmpdepfile"
;;
dashXmstdout)
# This case only exists to satisfy depend.m4. It is never actually
# run, as this mode is specially recognized in the preamble.
exit 1
;;
makedepend)
"$@" || exit $?
# Remove any Libtool call
if test "$libtool" = yes; then
while test "X$1" != 'X--mode=compile'; do
shift
done
shift
fi
# X makedepend
shift
cleared=no eat=no
for arg
do
case $cleared in
no)
set ""; shift
cleared=yes ;;
esac
if test $eat = yes; then
eat=no
continue
fi
case "$arg" in
-D*|-I*)
set fnord "$@" "$arg"; shift ;;
# Strip any option that makedepend may not understand. Remove
# the object too, otherwise makedepend will parse it as a source file.
-arch)
eat=yes ;;
-*|$object)
;;
*)
set fnord "$@" "$arg"; shift ;;
esac
done
obj_suffix=`echo "$object" | sed 's/^.*\././'`
touch "$tmpdepfile"
${MAKEDEPEND-makedepend} -o"$obj_suffix" -f"$tmpdepfile" "$@"
rm -f "$depfile"
# makedepend may prepend the VPATH from the source file name to the object.
# No need to regex-escape $object, excess matching of '.' is harmless.
sed "s|^.*\($object *:\)|\1|" "$tmpdepfile" > "$depfile"
# Some versions of the HPUX 10.20 sed can't process the last invocation
# correctly. Breaking it into two sed invocations is a workaround.
sed '1,2d' "$tmpdepfile" \
| tr ' ' "$nl" \
| sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' \
| sed -e 's/$/ :/' >> "$depfile"
rm -f "$tmpdepfile" "$tmpdepfile".bak
;;
cpp)
# Important note: in order to support this mode, a compiler *must*
# always write the preprocessed file to stdout.
"$@" || exit $?
# Remove the call to Libtool.
if test "$libtool" = yes; then
while test "X$1" != 'X--mode=compile'; do
shift
done
shift
fi
# Remove '-o $object'.
IFS=" "
for arg
do
case $arg in
-o)
shift
;;
$object)
shift
;;
*)
set fnord "$@" "$arg"
shift # fnord
shift # $arg
;;
esac
done
"$@" -E \
| sed -n -e '/^# [0-9][0-9]* "\([^"]*\)".*/ s:: \1 \\:p' \
-e '/^#line [0-9][0-9]* "\([^"]*\)".*/ s:: \1 \\:p' \
| sed '$ s: \\$::' > "$tmpdepfile"
rm -f "$depfile"
echo "$object : \\" > "$depfile"
cat < "$tmpdepfile" >> "$depfile"
sed < "$tmpdepfile" '/^$/d;s/^ //;s/ \\$//;s/$/ :/' >> "$depfile"
rm -f "$tmpdepfile"
;;
msvisualcpp)
# Important note: in order to support this mode, a compiler *must*
# always write the preprocessed file to stdout.
"$@" || exit $?
# Remove the call to Libtool.
if test "$libtool" = yes; then
while test "X$1" != 'X--mode=compile'; do
shift
done
shift
fi
IFS=" "
for arg
do
case "$arg" in
-o)
shift
;;
$object)
shift
;;
"-Gm"|"/Gm"|"-Gi"|"/Gi"|"-ZI"|"/ZI")
set fnord "$@"
shift
shift
;;
*)
set fnord "$@" "$arg"
shift
shift
;;
esac
done
"$@" -E 2>/dev/null |
sed -n '/^#line [0-9][0-9]* "\([^"]*\)"/ s::\1:p' | $cygpath_u | sort -u > "$tmpdepfile"
rm -f "$depfile"
echo "$object : \\" > "$depfile"
sed < "$tmpdepfile" -n -e 's% %\\ %g' -e '/^\(.*\)$/ s::'"$tab"'\1 \\:p' >> "$depfile"
echo "$tab" >> "$depfile"
sed < "$tmpdepfile" -n -e 's% %\\ %g' -e '/^\(.*\)$/ s::\1\::p' >> "$depfile"
rm -f "$tmpdepfile"
;;
msvcmsys)
# This case exists only to let depend.m4 do its work. It works by
# looking at the text of this script. This case will never be run,
# since it is checked for above.
exit 1
;;
none)
exec "$@"
;;
*)
echo "Unknown depmode $depmode" 1>&2
exit 1
;;
esac
exit 0
# Local Variables:
# mode: shell-script
# sh-indentation: 2
# eval: (add-hook 'write-file-hooks 'time-stamp)
# time-stamp-start: "scriptversion="
# time-stamp-format: "%:y-%02m-%02d.%02H"
# time-stamp-time-zone: "UTC"
# time-stamp-end: "; # UTC"
# End:

Просмотреть файл

@ -0,0 +1,59 @@
#!/usr/bin/env perl
use strict;
use warnings;
use POSIX qw(strftime);
my $builddir = shift(@ARGV);
my $version = shift(@ARGV);
# Sanity check
die "Must specify builddir, version"
if (!defined($builddir) || !$builddir || ! -d $builddir ||
!defined($version) || !$version);
my $today = strftime "%Y-%m-%d", localtime;
#------------------------------------------------------------------------------
# Helper function to re-write files
#------------------------------------------------------------------------------
sub subst {
my $file = shift;
my $orig;
open(IN, $file) || die "Can't read $file: $!";
$orig .= $_
while (<IN>);
close(IN);
my $copy = $orig;
$copy =~ s/\@VERSION\@/Libfabric v$version/g;
$copy =~ s/\@DATE\@/$today/g;
if ($copy ne $orig) {
print "*** VERSION/DATE-ifying $file...\n";
open(OUT, ">$file") || die "Can't write to $file: $!";
print OUT $copy;
close(OUT);
}
}
###############################################################################
# Change into the new distribution tree
###############################################################################
chdir($builddir);
subst("README");
chdir("man");
opendir(my $dh, ".") || die "Can't open man directory: $!";
my @files = grep { /\.\d$/ && -f "./$_" } readdir($dh);
closedir $dh;
foreach my $file (@files) {
subst($file);
}
exit(0);

Просмотреть файл

@ -0,0 +1,176 @@
dnl Macros to help setup FI providers
dnl
dnl Helper macro called from top-level configure.ac to get ready to
dnl configure providers.
dnl
AC_DEFUN([FI_PROVIDER_INIT],[
PROVIDERS_TO_BUILD=
PROVIDERS_DL=
PROVIDERS_STATIC=
PROVIDERS_COUNT=
])
dnl
dnl Helper macro called from top-level configure.ac to finalize
dnl after all providers have been initialized
dnl
AC_DEFUN([FI_PROVIDER_FINI],[
AC_SUBST(PROVIDERS_TO_BUILD)
AC_SUBST(PROVIDERS_DL)
AC_SUBST(PROVIDERS_STATIC)
])
dnl Helper macro called from top-level configure.ac to setup a
dnl provider.
dnl
dnl 1. Sets up --enable-<provider_name>
dnl 2. Checks for --enable-<provider_name>=dl;
dnl sets $<provider_name>_dl to 0 or 1
dnl 3. Sets $enable_<provider_name> to "yes" or "no"
dnl 4. Calls <provider_name>_CONFIGURE m4 macro
dnl 5. If a directory was provider in --enable-<provider_name>, ensure
dnl it is sane
dnl 6. Calls <provider_name>_CONDITIONALS m4 macro
dnl 7. Outputs whether this provider will be built or not, and if so,
dnl whether it is static or a DSO
dnl
dnl Arguments:
dnl
dnl $1: provider name (must be same as directory name)
dnl
dnl Shell variable outputs:
dnl
dnl enable_$1: yes, no, or auto
dnl $1_dl: 1 if the provider is supposed to be built as a DSO, 0 otherwise
dnl
dnl AC_DEFINE outputs:
dnl
dnl HAVE_$1_DL: same value as $1_dl
dnl
AC_DEFUN([FI_PROVIDER_SETUP],[
AC_MSG_NOTICE([*** Configuring $1 provider])
AC_ARG_ENABLE([$1],
[AS_HELP_STRING([--enable-$1],
[Enable $1 provider @<:@default=auto@:>@])
],
[],
[enable_$1=auto])
# Check the --enable-<$1> value
$1_dl=0
AS_CASE([$enable_$1],
[yes|no], [],
[dl], [enable_$1=yes $1_dl=1],
[auto], [],
[FI_CHECK_PREFIX_DIR([$enable_$1])
enable_$1=yes]
)
# Call the provider's CONFIGURE and CONDITIONALS macros
m4_include([prov/]$1[/configure.m4])
_FI_PROVIDER_INVOKE($1, [CONFIGURE], [yes], [yes])
_FI_PROVIDER_INVOKE($1, [CONDITIONALS], [no], [no])
# See if the provider configured successfully
AS_IF([test $$1_happy -eq 1],
[PROVIDERS_TO_BUILD="$PROVIDERS_TO_BUILD $1"
PROVIDERS_COUNT=$((PROVIDERS_COUNT+1))
AS_IF([test $$1_dl -eq 1],
[AC_MSG_NOTICE([$1 provider to be built as a DSO])
PROVIDERS_DL="prov/$1/lib$1.la $PROVIDERS_DL"
AS_IF([test x"$enable_static" = x"yes" &&
test x"$enable_shared" = x"no"],
[AC_MSG_WARN([$1 provider was selected to be built as DL])
AC_MSG_WARN([but libfabric is being built as static-only])
AC_MSG_ERROR([This is an impossible situation. Cannot continue.])])
],
[AC_MSG_NOTICE([$1 provider to be built statically])
PROVIDERS_STATIC="prov/$1/lib$1.la $PROVIDERS_STATIC"])
],
[AC_MSG_NOTICE([$1 provider disabled])])
# Set conditionals for HAVE_<provider> and HAVE_<provider>_DL
AM_CONDITIONAL([HAVE_]m4_translit([$1], [a-z], [A-Z]),
[test $$1_happy -eq 1])
AM_CONDITIONAL([HAVE_]m4_translit([$1], [a-z], [A-Z])[_DL],
[test $$1_dl -eq 1])
# If this provier was specifically requested but we can't
# build it, error.
AS_IF([test "$enable_$1 $$1_happy" = "yes 0"],
[AC_MSG_WARN([$1 provider was requested, but cannot be compiled])
AC_MSG_ERROR([Cannot continue])
])
# If this provider was requested for direct build, ensure that
# provider's fi_direct.h exists in tree. Error otherwise.
AS_IF([test x"$enable_direct" = x"$1"],
[AC_CHECK_FILE(prov/$1/include/rdma/fi_direct.h, [],
[AC_MSG_WARN([$1 provider was requested as direct, but is missing required files])
AC_MSG_ERROR([Cannot continue])])])
])
dnl
dnl Helper macro that can use to check that a user-provided directory
dnl is valid as the root of an installation tree. I.e., that it has an
dnl include and lib or lib64 directory. This helps prevent users from
dnl specifying incorrect/invalid directories on the configure command line
dnl (e.g., typoing a directory name and then wondering why a given
dnl provider chooses not to build).
dnl
dnl Arguments:
dnl
dnl $1: directory to check
dnl
AC_DEFUN([FI_CHECK_PREFIX_DIR],[
# Check that the base directory exists
AS_IF([test ! -d "$1"],
[AC_MSG_WARN([supplied directory "$1" does not exist])
AC_MSG_ERROR([Cannot continue])
])
# Check that base/include exists
AS_IF([test -d "$1/include"],
[CPPFLAGS="-I$1/include"],
[AC_MSG_WARN([could not find "include" subdirectory in supplied "$1" directory"])
AC_MSG_ERROR([Cannot continue])
])
# Check that base/lib or base/lib64 exists
AS_IF([test -d "$1/lib"],
[LDFLAGS="-L$1/lib"],
[AS_IF([test -d "$1/lib64"],
[LDFLAGS="-L$1/lib64"],
[AC_MSG_WARN([could not find "lib" or "lib64" subdirectories in supplied "$1" directory"])
AC_MSG_ERROR([Cannot continue])
])
])
])
dnl ------------------------------------------------------------------------
dnl
dnl Internal; should not be called from provder .m4 scripts.
dnl Helper macro to invoke the AC_DEFUN'ed macros down in the providers
dnl
dnl Arguments:
dnl
dnl $1: name of the provider
dnl $2: suffix of the macro to invoke
dnl $3: whether to pass the happy/sad parameters to the invoked macro
dnl $4: whether the macro must exist or not
dnl
AC_DEFUN([_FI_PROVIDER_INVOKE],[
dnl If the FI_<provider>_<suffix> macro is defined, invoke it.
m4_ifdef([FI_]m4_translit([$1], [a-z], [A-Z])[_$2],
[m4_if([$3], [yes],
[FI_]m4_translit([$1], [a-z], [A-Z])[_$2([$1_happy=1],[$1_happy=0])],
[FI_]m4_translit([$1], [a-z], [A-Z])[_$2()]
)],
dnl If $4 is yes and the macro does not exist, error
[m4_if([$4], [yes],
[m4_fatal([$1 provider did not define FI_]m4_translit([$1], [a-z], [A-Z])[_$2 macro in prov/$1/configure.m4])],
[])]
)
])

527
opal/mca/common/libfabric/libfabric/config/install-sh Исполняемый файл
Просмотреть файл

@ -0,0 +1,527 @@
#!/bin/sh
# install - install a program, script, or datafile
scriptversion=2011-11-20.07; # UTC
# This originates from X11R5 (mit/util/scripts/install.sh), which was
# later released in X11R6 (xc/config/util/install.sh) with the
# following copyright and license.
#
# Copyright (C) 1994 X Consortium
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to
# deal in the Software without restriction, including without limitation the
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
# sell copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# X CONSORTIUM BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
# AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNEC-
# TION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#
# Except as contained in this notice, the name of the X Consortium shall not
# be used in advertising or otherwise to promote the sale, use or other deal-
# ings in this Software without prior written authorization from the X Consor-
# tium.
#
#
# FSF changes to this file are in the public domain.
#
# Calling this script install-sh is preferred over install.sh, to prevent
# 'make' implicit rules from creating a file called install from it
# when there is no Makefile.
#
# This script is compatible with the BSD install script, but was written
# from scratch.
nl='
'
IFS=" "" $nl"
# set DOITPROG to echo to test this script
# Don't use :- since 4.3BSD and earlier shells don't like it.
doit=${DOITPROG-}
if test -z "$doit"; then
doit_exec=exec
else
doit_exec=$doit
fi
# Put in absolute file names if you don't have them in your path;
# or use environment vars.
chgrpprog=${CHGRPPROG-chgrp}
chmodprog=${CHMODPROG-chmod}
chownprog=${CHOWNPROG-chown}
cmpprog=${CMPPROG-cmp}
cpprog=${CPPROG-cp}
mkdirprog=${MKDIRPROG-mkdir}
mvprog=${MVPROG-mv}
rmprog=${RMPROG-rm}
stripprog=${STRIPPROG-strip}
posix_glob='?'
initialize_posix_glob='
test "$posix_glob" != "?" || {
if (set -f) 2>/dev/null; then
posix_glob=
else
posix_glob=:
fi
}
'
posix_mkdir=
# Desired mode of installed file.
mode=0755
chgrpcmd=
chmodcmd=$chmodprog
chowncmd=
mvcmd=$mvprog
rmcmd="$rmprog -f"
stripcmd=
src=
dst=
dir_arg=
dst_arg=
copy_on_change=false
no_target_directory=
usage="\
Usage: $0 [OPTION]... [-T] SRCFILE DSTFILE
or: $0 [OPTION]... SRCFILES... DIRECTORY
or: $0 [OPTION]... -t DIRECTORY SRCFILES...
or: $0 [OPTION]... -d DIRECTORIES...
In the 1st form, copy SRCFILE to DSTFILE.
In the 2nd and 3rd, copy all SRCFILES to DIRECTORY.
In the 4th, create DIRECTORIES.
Options:
--help display this help and exit.
--version display version info and exit.
-c (ignored)
-C install only if different (preserve the last data modification time)
-d create directories instead of installing files.
-g GROUP $chgrpprog installed files to GROUP.
-m MODE $chmodprog installed files to MODE.
-o USER $chownprog installed files to USER.
-s $stripprog installed files.
-t DIRECTORY install into DIRECTORY.
-T report an error if DSTFILE is a directory.
Environment variables override the default commands:
CHGRPPROG CHMODPROG CHOWNPROG CMPPROG CPPROG MKDIRPROG MVPROG
RMPROG STRIPPROG
"
while test $# -ne 0; do
case $1 in
-c) ;;
-C) copy_on_change=true;;
-d) dir_arg=true;;
-g) chgrpcmd="$chgrpprog $2"
shift;;
--help) echo "$usage"; exit $?;;
-m) mode=$2
case $mode in
*' '* | *' '* | *'
'* | *'*'* | *'?'* | *'['*)
echo "$0: invalid mode: $mode" >&2
exit 1;;
esac
shift;;
-o) chowncmd="$chownprog $2"
shift;;
-s) stripcmd=$stripprog;;
-t) dst_arg=$2
# Protect names problematic for 'test' and other utilities.
case $dst_arg in
-* | [=\(\)!]) dst_arg=./$dst_arg;;
esac
shift;;
-T) no_target_directory=true;;
--version) echo "$0 $scriptversion"; exit $?;;
--) shift
break;;
-*) echo "$0: invalid option: $1" >&2
exit 1;;
*) break;;
esac
shift
done
if test $# -ne 0 && test -z "$dir_arg$dst_arg"; then
# When -d is used, all remaining arguments are directories to create.
# When -t is used, the destination is already specified.
# Otherwise, the last argument is the destination. Remove it from $@.
for arg
do
if test -n "$dst_arg"; then
# $@ is not empty: it contains at least $arg.
set fnord "$@" "$dst_arg"
shift # fnord
fi
shift # arg
dst_arg=$arg
# Protect names problematic for 'test' and other utilities.
case $dst_arg in
-* | [=\(\)!]) dst_arg=./$dst_arg;;
esac
done
fi
if test $# -eq 0; then
if test -z "$dir_arg"; then
echo "$0: no input file specified." >&2
exit 1
fi
# It's OK to call 'install-sh -d' without argument.
# This can happen when creating conditional directories.
exit 0
fi
if test -z "$dir_arg"; then
do_exit='(exit $ret); exit $ret'
trap "ret=129; $do_exit" 1
trap "ret=130; $do_exit" 2
trap "ret=141; $do_exit" 13
trap "ret=143; $do_exit" 15
# Set umask so as not to create temps with too-generous modes.
# However, 'strip' requires both read and write access to temps.
case $mode in
# Optimize common cases.
*644) cp_umask=133;;
*755) cp_umask=22;;
*[0-7])
if test -z "$stripcmd"; then
u_plus_rw=
else
u_plus_rw='% 200'
fi
cp_umask=`expr '(' 777 - $mode % 1000 ')' $u_plus_rw`;;
*)
if test -z "$stripcmd"; then
u_plus_rw=
else
u_plus_rw=,u+rw
fi
cp_umask=$mode$u_plus_rw;;
esac
fi
for src
do
# Protect names problematic for 'test' and other utilities.
case $src in
-* | [=\(\)!]) src=./$src;;
esac
if test -n "$dir_arg"; then
dst=$src
dstdir=$dst
test -d "$dstdir"
dstdir_status=$?
else
# Waiting for this to be detected by the "$cpprog $src $dsttmp" command
# might cause directories to be created, which would be especially bad
# if $src (and thus $dsttmp) contains '*'.
if test ! -f "$src" && test ! -d "$src"; then
echo "$0: $src does not exist." >&2
exit 1
fi
if test -z "$dst_arg"; then
echo "$0: no destination specified." >&2
exit 1
fi
dst=$dst_arg
# If destination is a directory, append the input filename; won't work
# if double slashes aren't ignored.
if test -d "$dst"; then
if test -n "$no_target_directory"; then
echo "$0: $dst_arg: Is a directory" >&2
exit 1
fi
dstdir=$dst
dst=$dstdir/`basename "$src"`
dstdir_status=0
else
# Prefer dirname, but fall back on a substitute if dirname fails.
dstdir=`
(dirname "$dst") 2>/dev/null ||
expr X"$dst" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
X"$dst" : 'X\(//\)[^/]' \| \
X"$dst" : 'X\(//\)$' \| \
X"$dst" : 'X\(/\)' \| . 2>/dev/null ||
echo X"$dst" |
sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{
s//\1/
q
}
/^X\(\/\/\)[^/].*/{
s//\1/
q
}
/^X\(\/\/\)$/{
s//\1/
q
}
/^X\(\/\).*/{
s//\1/
q
}
s/.*/./; q'
`
test -d "$dstdir"
dstdir_status=$?
fi
fi
obsolete_mkdir_used=false
if test $dstdir_status != 0; then
case $posix_mkdir in
'')
# Create intermediate dirs using mode 755 as modified by the umask.
# This is like FreeBSD 'install' as of 1997-10-28.
umask=`umask`
case $stripcmd.$umask in
# Optimize common cases.
*[2367][2367]) mkdir_umask=$umask;;
.*0[02][02] | .[02][02] | .[02]) mkdir_umask=22;;
*[0-7])
mkdir_umask=`expr $umask + 22 \
- $umask % 100 % 40 + $umask % 20 \
- $umask % 10 % 4 + $umask % 2
`;;
*) mkdir_umask=$umask,go-w;;
esac
# With -d, create the new directory with the user-specified mode.
# Otherwise, rely on $mkdir_umask.
if test -n "$dir_arg"; then
mkdir_mode=-m$mode
else
mkdir_mode=
fi
posix_mkdir=false
case $umask in
*[123567][0-7][0-7])
# POSIX mkdir -p sets u+wx bits regardless of umask, which
# is incompatible with FreeBSD 'install' when (umask & 300) != 0.
;;
*)
tmpdir=${TMPDIR-/tmp}/ins$RANDOM-$$
trap 'ret=$?; rmdir "$tmpdir/d" "$tmpdir" 2>/dev/null; exit $ret' 0
if (umask $mkdir_umask &&
exec $mkdirprog $mkdir_mode -p -- "$tmpdir/d") >/dev/null 2>&1
then
if test -z "$dir_arg" || {
# Check for POSIX incompatibilities with -m.
# HP-UX 11.23 and IRIX 6.5 mkdir -m -p sets group- or
# other-writable bit of parent directory when it shouldn't.
# FreeBSD 6.1 mkdir -m -p sets mode of existing directory.
ls_ld_tmpdir=`ls -ld "$tmpdir"`
case $ls_ld_tmpdir in
d????-?r-*) different_mode=700;;
d????-?--*) different_mode=755;;
*) false;;
esac &&
$mkdirprog -m$different_mode -p -- "$tmpdir" && {
ls_ld_tmpdir_1=`ls -ld "$tmpdir"`
test "$ls_ld_tmpdir" = "$ls_ld_tmpdir_1"
}
}
then posix_mkdir=:
fi
rmdir "$tmpdir/d" "$tmpdir"
else
# Remove any dirs left behind by ancient mkdir implementations.
rmdir ./$mkdir_mode ./-p ./-- 2>/dev/null
fi
trap '' 0;;
esac;;
esac
if
$posix_mkdir && (
umask $mkdir_umask &&
$doit_exec $mkdirprog $mkdir_mode -p -- "$dstdir"
)
then :
else
# The umask is ridiculous, or mkdir does not conform to POSIX,
# or it failed possibly due to a race condition. Create the
# directory the slow way, step by step, checking for races as we go.
case $dstdir in
/*) prefix='/';;
[-=\(\)!]*) prefix='./';;
*) prefix='';;
esac
eval "$initialize_posix_glob"
oIFS=$IFS
IFS=/
$posix_glob set -f
set fnord $dstdir
shift
$posix_glob set +f
IFS=$oIFS
prefixes=
for d
do
test X"$d" = X && continue
prefix=$prefix$d
if test -d "$prefix"; then
prefixes=
else
if $posix_mkdir; then
(umask=$mkdir_umask &&
$doit_exec $mkdirprog $mkdir_mode -p -- "$dstdir") && break
# Don't fail if two instances are running concurrently.
test -d "$prefix" || exit 1
else
case $prefix in
*\'*) qprefix=`echo "$prefix" | sed "s/'/'\\\\\\\\''/g"`;;
*) qprefix=$prefix;;
esac
prefixes="$prefixes '$qprefix'"
fi
fi
prefix=$prefix/
done
if test -n "$prefixes"; then
# Don't fail if two instances are running concurrently.
(umask $mkdir_umask &&
eval "\$doit_exec \$mkdirprog $prefixes") ||
test -d "$dstdir" || exit 1
obsolete_mkdir_used=true
fi
fi
fi
if test -n "$dir_arg"; then
{ test -z "$chowncmd" || $doit $chowncmd "$dst"; } &&
{ test -z "$chgrpcmd" || $doit $chgrpcmd "$dst"; } &&
{ test "$obsolete_mkdir_used$chowncmd$chgrpcmd" = false ||
test -z "$chmodcmd" || $doit $chmodcmd $mode "$dst"; } || exit 1
else
# Make a couple of temp file names in the proper directory.
dsttmp=$dstdir/_inst.$$_
rmtmp=$dstdir/_rm.$$_
# Trap to clean up those temp files at exit.
trap 'ret=$?; rm -f "$dsttmp" "$rmtmp" && exit $ret' 0
# Copy the file name to the temp name.
(umask $cp_umask && $doit_exec $cpprog "$src" "$dsttmp") &&
# and set any options; do chmod last to preserve setuid bits.
#
# If any of these fail, we abort the whole thing. If we want to
# ignore errors from any of these, just make sure not to ignore
# errors from the above "$doit $cpprog $src $dsttmp" command.
#
{ test -z "$chowncmd" || $doit $chowncmd "$dsttmp"; } &&
{ test -z "$chgrpcmd" || $doit $chgrpcmd "$dsttmp"; } &&
{ test -z "$stripcmd" || $doit $stripcmd "$dsttmp"; } &&
{ test -z "$chmodcmd" || $doit $chmodcmd $mode "$dsttmp"; } &&
# If -C, don't bother to copy if it wouldn't change the file.
if $copy_on_change &&
old=`LC_ALL=C ls -dlL "$dst" 2>/dev/null` &&
new=`LC_ALL=C ls -dlL "$dsttmp" 2>/dev/null` &&
eval "$initialize_posix_glob" &&
$posix_glob set -f &&
set X $old && old=:$2:$4:$5:$6 &&
set X $new && new=:$2:$4:$5:$6 &&
$posix_glob set +f &&
test "$old" = "$new" &&
$cmpprog "$dst" "$dsttmp" >/dev/null 2>&1
then
rm -f "$dsttmp"
else
# Rename the file to the real destination.
$doit $mvcmd -f "$dsttmp" "$dst" 2>/dev/null ||
# The rename failed, perhaps because mv can't rename something else
# to itself, or perhaps because mv is so ancient that it does not
# support -f.
{
# Now remove or move aside any old file at destination location.
# We try this two ways since rm can't unlink itself on some
# systems and the destination file might be busy for other
# reasons. In this case, the final cleanup might fail but the new
# file should still install successfully.
{
test ! -f "$dst" ||
$doit $rmcmd -f "$dst" 2>/dev/null ||
{ $doit $mvcmd -f "$dst" "$rmtmp" 2>/dev/null &&
{ $doit $rmcmd -f "$rmtmp" 2>/dev/null; :; }
} ||
{ echo "$0: cannot unlink or rename $dst" >&2
(exit 1); exit 1
}
} &&
# Now rename the file to the real destination.
$doit $mvcmd "$dsttmp" "$dst"
}
fi || exit 1
trap '' 0
fi
done
# Local variables:
# eval: (add-hook 'write-file-hooks 'time-stamp)
# time-stamp-start: "scriptversion="
# time-stamp-format: "%:y-%02m-%02d.%02H"
# time-stamp-time-zone: "UTC"
# time-stamp-end: "; # UTC"
# End:

7982
opal/mca/common/libfabric/libfabric/config/libtool.m4 поставляемый Обычный файл

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Разница между файлами не показана из-за своего большого размера Загрузить разницу

384
opal/mca/common/libfabric/libfabric/config/ltoptions.m4 поставляемый Обычный файл
Просмотреть файл

@ -0,0 +1,384 @@
# Helper functions for option handling. -*- Autoconf -*-
#
# Copyright (C) 2004, 2005, 2007, 2008, 2009 Free Software Foundation,
# Inc.
# Written by Gary V. Vaughan, 2004
#
# This file is free software; the Free Software Foundation gives
# unlimited permission to copy and/or distribute it, with or without
# modifications, as long as this notice is preserved.
# serial 7 ltoptions.m4
# This is to help aclocal find these macros, as it can't see m4_define.
AC_DEFUN([LTOPTIONS_VERSION], [m4_if([1])])
# _LT_MANGLE_OPTION(MACRO-NAME, OPTION-NAME)
# ------------------------------------------
m4_define([_LT_MANGLE_OPTION],
[[_LT_OPTION_]m4_bpatsubst($1__$2, [[^a-zA-Z0-9_]], [_])])
# _LT_SET_OPTION(MACRO-NAME, OPTION-NAME)
# ---------------------------------------
# Set option OPTION-NAME for macro MACRO-NAME, and if there is a
# matching handler defined, dispatch to it. Other OPTION-NAMEs are
# saved as a flag.
m4_define([_LT_SET_OPTION],
[m4_define(_LT_MANGLE_OPTION([$1], [$2]))dnl
m4_ifdef(_LT_MANGLE_DEFUN([$1], [$2]),
_LT_MANGLE_DEFUN([$1], [$2]),
[m4_warning([Unknown $1 option `$2'])])[]dnl
])
# _LT_IF_OPTION(MACRO-NAME, OPTION-NAME, IF-SET, [IF-NOT-SET])
# ------------------------------------------------------------
# Execute IF-SET if OPTION is set, IF-NOT-SET otherwise.
m4_define([_LT_IF_OPTION],
[m4_ifdef(_LT_MANGLE_OPTION([$1], [$2]), [$3], [$4])])
# _LT_UNLESS_OPTIONS(MACRO-NAME, OPTION-LIST, IF-NOT-SET)
# -------------------------------------------------------
# Execute IF-NOT-SET unless all options in OPTION-LIST for MACRO-NAME
# are set.
m4_define([_LT_UNLESS_OPTIONS],
[m4_foreach([_LT_Option], m4_split(m4_normalize([$2])),
[m4_ifdef(_LT_MANGLE_OPTION([$1], _LT_Option),
[m4_define([$0_found])])])[]dnl
m4_ifdef([$0_found], [m4_undefine([$0_found])], [$3
])[]dnl
])
# _LT_SET_OPTIONS(MACRO-NAME, OPTION-LIST)
# ----------------------------------------
# OPTION-LIST is a space-separated list of Libtool options associated
# with MACRO-NAME. If any OPTION has a matching handler declared with
# LT_OPTION_DEFINE, dispatch to that macro; otherwise complain about
# the unknown option and exit.
m4_defun([_LT_SET_OPTIONS],
[# Set options
m4_foreach([_LT_Option], m4_split(m4_normalize([$2])),
[_LT_SET_OPTION([$1], _LT_Option)])
m4_if([$1],[LT_INIT],[
dnl
dnl Simply set some default values (i.e off) if boolean options were not
dnl specified:
_LT_UNLESS_OPTIONS([LT_INIT], [dlopen], [enable_dlopen=no
])
_LT_UNLESS_OPTIONS([LT_INIT], [win32-dll], [enable_win32_dll=no
])
dnl
dnl If no reference was made to various pairs of opposing options, then
dnl we run the default mode handler for the pair. For example, if neither
dnl `shared' nor `disable-shared' was passed, we enable building of shared
dnl archives by default:
_LT_UNLESS_OPTIONS([LT_INIT], [shared disable-shared], [_LT_ENABLE_SHARED])
_LT_UNLESS_OPTIONS([LT_INIT], [static disable-static], [_LT_ENABLE_STATIC])
_LT_UNLESS_OPTIONS([LT_INIT], [pic-only no-pic], [_LT_WITH_PIC])
_LT_UNLESS_OPTIONS([LT_INIT], [fast-install disable-fast-install],
[_LT_ENABLE_FAST_INSTALL])
])
])# _LT_SET_OPTIONS
## --------------------------------- ##
## Macros to handle LT_INIT options. ##
## --------------------------------- ##
# _LT_MANGLE_DEFUN(MACRO-NAME, OPTION-NAME)
# -----------------------------------------
m4_define([_LT_MANGLE_DEFUN],
[[_LT_OPTION_DEFUN_]m4_bpatsubst(m4_toupper([$1__$2]), [[^A-Z0-9_]], [_])])
# LT_OPTION_DEFINE(MACRO-NAME, OPTION-NAME, CODE)
# -----------------------------------------------
m4_define([LT_OPTION_DEFINE],
[m4_define(_LT_MANGLE_DEFUN([$1], [$2]), [$3])[]dnl
])# LT_OPTION_DEFINE
# dlopen
# ------
LT_OPTION_DEFINE([LT_INIT], [dlopen], [enable_dlopen=yes
])
AU_DEFUN([AC_LIBTOOL_DLOPEN],
[_LT_SET_OPTION([LT_INIT], [dlopen])
AC_DIAGNOSE([obsolete],
[$0: Remove this warning and the call to _LT_SET_OPTION when you
put the `dlopen' option into LT_INIT's first parameter.])
])
dnl aclocal-1.4 backwards compatibility:
dnl AC_DEFUN([AC_LIBTOOL_DLOPEN], [])
# win32-dll
# ---------
# Declare package support for building win32 dll's.
LT_OPTION_DEFINE([LT_INIT], [win32-dll],
[enable_win32_dll=yes
case $host in
*-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-cegcc*)
AC_CHECK_TOOL(AS, as, false)
AC_CHECK_TOOL(DLLTOOL, dlltool, false)
AC_CHECK_TOOL(OBJDUMP, objdump, false)
;;
esac
test -z "$AS" && AS=as
_LT_DECL([], [AS], [1], [Assembler program])dnl
test -z "$DLLTOOL" && DLLTOOL=dlltool
_LT_DECL([], [DLLTOOL], [1], [DLL creation program])dnl
test -z "$OBJDUMP" && OBJDUMP=objdump
_LT_DECL([], [OBJDUMP], [1], [Object dumper program])dnl
])# win32-dll
AU_DEFUN([AC_LIBTOOL_WIN32_DLL],
[AC_REQUIRE([AC_CANONICAL_HOST])dnl
_LT_SET_OPTION([LT_INIT], [win32-dll])
AC_DIAGNOSE([obsolete],
[$0: Remove this warning and the call to _LT_SET_OPTION when you
put the `win32-dll' option into LT_INIT's first parameter.])
])
dnl aclocal-1.4 backwards compatibility:
dnl AC_DEFUN([AC_LIBTOOL_WIN32_DLL], [])
# _LT_ENABLE_SHARED([DEFAULT])
# ----------------------------
# implement the --enable-shared flag, and supports the `shared' and
# `disable-shared' LT_INIT options.
# DEFAULT is either `yes' or `no'. If omitted, it defaults to `yes'.
m4_define([_LT_ENABLE_SHARED],
[m4_define([_LT_ENABLE_SHARED_DEFAULT], [m4_if($1, no, no, yes)])dnl
AC_ARG_ENABLE([shared],
[AS_HELP_STRING([--enable-shared@<:@=PKGS@:>@],
[build shared libraries @<:@default=]_LT_ENABLE_SHARED_DEFAULT[@:>@])],
[p=${PACKAGE-default}
case $enableval in
yes) enable_shared=yes ;;
no) enable_shared=no ;;
*)
enable_shared=no
# Look at the argument we got. We use all the common list separators.
lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR,"
for pkg in $enableval; do
IFS="$lt_save_ifs"
if test "X$pkg" = "X$p"; then
enable_shared=yes
fi
done
IFS="$lt_save_ifs"
;;
esac],
[enable_shared=]_LT_ENABLE_SHARED_DEFAULT)
_LT_DECL([build_libtool_libs], [enable_shared], [0],
[Whether or not to build shared libraries])
])# _LT_ENABLE_SHARED
LT_OPTION_DEFINE([LT_INIT], [shared], [_LT_ENABLE_SHARED([yes])])
LT_OPTION_DEFINE([LT_INIT], [disable-shared], [_LT_ENABLE_SHARED([no])])
# Old names:
AC_DEFUN([AC_ENABLE_SHARED],
[_LT_SET_OPTION([LT_INIT], m4_if([$1], [no], [disable-])[shared])
])
AC_DEFUN([AC_DISABLE_SHARED],
[_LT_SET_OPTION([LT_INIT], [disable-shared])
])
AU_DEFUN([AM_ENABLE_SHARED], [AC_ENABLE_SHARED($@)])
AU_DEFUN([AM_DISABLE_SHARED], [AC_DISABLE_SHARED($@)])
dnl aclocal-1.4 backwards compatibility:
dnl AC_DEFUN([AM_ENABLE_SHARED], [])
dnl AC_DEFUN([AM_DISABLE_SHARED], [])
# _LT_ENABLE_STATIC([DEFAULT])
# ----------------------------
# implement the --enable-static flag, and support the `static' and
# `disable-static' LT_INIT options.
# DEFAULT is either `yes' or `no'. If omitted, it defaults to `yes'.
m4_define([_LT_ENABLE_STATIC],
[m4_define([_LT_ENABLE_STATIC_DEFAULT], [m4_if($1, no, no, yes)])dnl
AC_ARG_ENABLE([static],
[AS_HELP_STRING([--enable-static@<:@=PKGS@:>@],
[build static libraries @<:@default=]_LT_ENABLE_STATIC_DEFAULT[@:>@])],
[p=${PACKAGE-default}
case $enableval in
yes) enable_static=yes ;;
no) enable_static=no ;;
*)
enable_static=no
# Look at the argument we got. We use all the common list separators.
lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR,"
for pkg in $enableval; do
IFS="$lt_save_ifs"
if test "X$pkg" = "X$p"; then
enable_static=yes
fi
done
IFS="$lt_save_ifs"
;;
esac],
[enable_static=]_LT_ENABLE_STATIC_DEFAULT)
_LT_DECL([build_old_libs], [enable_static], [0],
[Whether or not to build static libraries])
])# _LT_ENABLE_STATIC
LT_OPTION_DEFINE([LT_INIT], [static], [_LT_ENABLE_STATIC([yes])])
LT_OPTION_DEFINE([LT_INIT], [disable-static], [_LT_ENABLE_STATIC([no])])
# Old names:
AC_DEFUN([AC_ENABLE_STATIC],
[_LT_SET_OPTION([LT_INIT], m4_if([$1], [no], [disable-])[static])
])
AC_DEFUN([AC_DISABLE_STATIC],
[_LT_SET_OPTION([LT_INIT], [disable-static])
])
AU_DEFUN([AM_ENABLE_STATIC], [AC_ENABLE_STATIC($@)])
AU_DEFUN([AM_DISABLE_STATIC], [AC_DISABLE_STATIC($@)])
dnl aclocal-1.4 backwards compatibility:
dnl AC_DEFUN([AM_ENABLE_STATIC], [])
dnl AC_DEFUN([AM_DISABLE_STATIC], [])
# _LT_ENABLE_FAST_INSTALL([DEFAULT])
# ----------------------------------
# implement the --enable-fast-install flag, and support the `fast-install'
# and `disable-fast-install' LT_INIT options.
# DEFAULT is either `yes' or `no'. If omitted, it defaults to `yes'.
m4_define([_LT_ENABLE_FAST_INSTALL],
[m4_define([_LT_ENABLE_FAST_INSTALL_DEFAULT], [m4_if($1, no, no, yes)])dnl
AC_ARG_ENABLE([fast-install],
[AS_HELP_STRING([--enable-fast-install@<:@=PKGS@:>@],
[optimize for fast installation @<:@default=]_LT_ENABLE_FAST_INSTALL_DEFAULT[@:>@])],
[p=${PACKAGE-default}
case $enableval in
yes) enable_fast_install=yes ;;
no) enable_fast_install=no ;;
*)
enable_fast_install=no
# Look at the argument we got. We use all the common list separators.
lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR,"
for pkg in $enableval; do
IFS="$lt_save_ifs"
if test "X$pkg" = "X$p"; then
enable_fast_install=yes
fi
done
IFS="$lt_save_ifs"
;;
esac],
[enable_fast_install=]_LT_ENABLE_FAST_INSTALL_DEFAULT)
_LT_DECL([fast_install], [enable_fast_install], [0],
[Whether or not to optimize for fast installation])dnl
])# _LT_ENABLE_FAST_INSTALL
LT_OPTION_DEFINE([LT_INIT], [fast-install], [_LT_ENABLE_FAST_INSTALL([yes])])
LT_OPTION_DEFINE([LT_INIT], [disable-fast-install], [_LT_ENABLE_FAST_INSTALL([no])])
# Old names:
AU_DEFUN([AC_ENABLE_FAST_INSTALL],
[_LT_SET_OPTION([LT_INIT], m4_if([$1], [no], [disable-])[fast-install])
AC_DIAGNOSE([obsolete],
[$0: Remove this warning and the call to _LT_SET_OPTION when you put
the `fast-install' option into LT_INIT's first parameter.])
])
AU_DEFUN([AC_DISABLE_FAST_INSTALL],
[_LT_SET_OPTION([LT_INIT], [disable-fast-install])
AC_DIAGNOSE([obsolete],
[$0: Remove this warning and the call to _LT_SET_OPTION when you put
the `disable-fast-install' option into LT_INIT's first parameter.])
])
dnl aclocal-1.4 backwards compatibility:
dnl AC_DEFUN([AC_ENABLE_FAST_INSTALL], [])
dnl AC_DEFUN([AM_DISABLE_FAST_INSTALL], [])
# _LT_WITH_PIC([MODE])
# --------------------
# implement the --with-pic flag, and support the `pic-only' and `no-pic'
# LT_INIT options.
# MODE is either `yes' or `no'. If omitted, it defaults to `both'.
m4_define([_LT_WITH_PIC],
[AC_ARG_WITH([pic],
[AS_HELP_STRING([--with-pic@<:@=PKGS@:>@],
[try to use only PIC/non-PIC objects @<:@default=use both@:>@])],
[lt_p=${PACKAGE-default}
case $withval in
yes|no) pic_mode=$withval ;;
*)
pic_mode=default
# Look at the argument we got. We use all the common list separators.
lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR,"
for lt_pkg in $withval; do
IFS="$lt_save_ifs"
if test "X$lt_pkg" = "X$lt_p"; then
pic_mode=yes
fi
done
IFS="$lt_save_ifs"
;;
esac],
[pic_mode=default])
test -z "$pic_mode" && pic_mode=m4_default([$1], [default])
_LT_DECL([], [pic_mode], [0], [What type of objects to build])dnl
])# _LT_WITH_PIC
LT_OPTION_DEFINE([LT_INIT], [pic-only], [_LT_WITH_PIC([yes])])
LT_OPTION_DEFINE([LT_INIT], [no-pic], [_LT_WITH_PIC([no])])
# Old name:
AU_DEFUN([AC_LIBTOOL_PICMODE],
[_LT_SET_OPTION([LT_INIT], [pic-only])
AC_DIAGNOSE([obsolete],
[$0: Remove this warning and the call to _LT_SET_OPTION when you
put the `pic-only' option into LT_INIT's first parameter.])
])
dnl aclocal-1.4 backwards compatibility:
dnl AC_DEFUN([AC_LIBTOOL_PICMODE], [])
## ----------------- ##
## LTDL_INIT Options ##
## ----------------- ##
m4_define([_LTDL_MODE], [])
LT_OPTION_DEFINE([LTDL_INIT], [nonrecursive],
[m4_define([_LTDL_MODE], [nonrecursive])])
LT_OPTION_DEFINE([LTDL_INIT], [recursive],
[m4_define([_LTDL_MODE], [recursive])])
LT_OPTION_DEFINE([LTDL_INIT], [subproject],
[m4_define([_LTDL_MODE], [subproject])])
m4_define([_LTDL_TYPE], [])
LT_OPTION_DEFINE([LTDL_INIT], [installable],
[m4_define([_LTDL_TYPE], [installable])])
LT_OPTION_DEFINE([LTDL_INIT], [convenience],
[m4_define([_LTDL_TYPE], [convenience])])

123
opal/mca/common/libfabric/libfabric/config/ltsugar.m4 поставляемый Обычный файл
Просмотреть файл

@ -0,0 +1,123 @@
# ltsugar.m4 -- libtool m4 base layer. -*-Autoconf-*-
#
# Copyright (C) 2004, 2005, 2007, 2008 Free Software Foundation, Inc.
# Written by Gary V. Vaughan, 2004
#
# This file is free software; the Free Software Foundation gives
# unlimited permission to copy and/or distribute it, with or without
# modifications, as long as this notice is preserved.
# serial 6 ltsugar.m4
# This is to help aclocal find these macros, as it can't see m4_define.
AC_DEFUN([LTSUGAR_VERSION], [m4_if([0.1])])
# lt_join(SEP, ARG1, [ARG2...])
# -----------------------------
# Produce ARG1SEPARG2...SEPARGn, omitting [] arguments and their
# associated separator.
# Needed until we can rely on m4_join from Autoconf 2.62, since all earlier
# versions in m4sugar had bugs.
m4_define([lt_join],
[m4_if([$#], [1], [],
[$#], [2], [[$2]],
[m4_if([$2], [], [], [[$2]_])$0([$1], m4_shift(m4_shift($@)))])])
m4_define([_lt_join],
[m4_if([$#$2], [2], [],
[m4_if([$2], [], [], [[$1$2]])$0([$1], m4_shift(m4_shift($@)))])])
# lt_car(LIST)
# lt_cdr(LIST)
# ------------
# Manipulate m4 lists.
# These macros are necessary as long as will still need to support
# Autoconf-2.59 which quotes differently.
m4_define([lt_car], [[$1]])
m4_define([lt_cdr],
[m4_if([$#], 0, [m4_fatal([$0: cannot be called without arguments])],
[$#], 1, [],
[m4_dquote(m4_shift($@))])])
m4_define([lt_unquote], $1)
# lt_append(MACRO-NAME, STRING, [SEPARATOR])
# ------------------------------------------
# Redefine MACRO-NAME to hold its former content plus `SEPARATOR'`STRING'.
# Note that neither SEPARATOR nor STRING are expanded; they are appended
# to MACRO-NAME as is (leaving the expansion for when MACRO-NAME is invoked).
# No SEPARATOR is output if MACRO-NAME was previously undefined (different
# than defined and empty).
#
# This macro is needed until we can rely on Autoconf 2.62, since earlier
# versions of m4sugar mistakenly expanded SEPARATOR but not STRING.
m4_define([lt_append],
[m4_define([$1],
m4_ifdef([$1], [m4_defn([$1])[$3]])[$2])])
# lt_combine(SEP, PREFIX-LIST, INFIX, SUFFIX1, [SUFFIX2...])
# ----------------------------------------------------------
# Produce a SEP delimited list of all paired combinations of elements of
# PREFIX-LIST with SUFFIX1 through SUFFIXn. Each element of the list
# has the form PREFIXmINFIXSUFFIXn.
# Needed until we can rely on m4_combine added in Autoconf 2.62.
m4_define([lt_combine],
[m4_if(m4_eval([$# > 3]), [1],
[m4_pushdef([_Lt_sep], [m4_define([_Lt_sep], m4_defn([lt_car]))])]]dnl
[[m4_foreach([_Lt_prefix], [$2],
[m4_foreach([_Lt_suffix],
]m4_dquote(m4_dquote(m4_shift(m4_shift(m4_shift($@)))))[,
[_Lt_sep([$1])[]m4_defn([_Lt_prefix])[$3]m4_defn([_Lt_suffix])])])])])
# lt_if_append_uniq(MACRO-NAME, VARNAME, [SEPARATOR], [UNIQ], [NOT-UNIQ])
# -----------------------------------------------------------------------
# Iff MACRO-NAME does not yet contain VARNAME, then append it (delimited
# by SEPARATOR if supplied) and expand UNIQ, else NOT-UNIQ.
m4_define([lt_if_append_uniq],
[m4_ifdef([$1],
[m4_if(m4_index([$3]m4_defn([$1])[$3], [$3$2$3]), [-1],
[lt_append([$1], [$2], [$3])$4],
[$5])],
[lt_append([$1], [$2], [$3])$4])])
# lt_dict_add(DICT, KEY, VALUE)
# -----------------------------
m4_define([lt_dict_add],
[m4_define([$1($2)], [$3])])
# lt_dict_add_subkey(DICT, KEY, SUBKEY, VALUE)
# --------------------------------------------
m4_define([lt_dict_add_subkey],
[m4_define([$1($2:$3)], [$4])])
# lt_dict_fetch(DICT, KEY, [SUBKEY])
# ----------------------------------
m4_define([lt_dict_fetch],
[m4_ifval([$3],
m4_ifdef([$1($2:$3)], [m4_defn([$1($2:$3)])]),
m4_ifdef([$1($2)], [m4_defn([$1($2)])]))])
# lt_if_dict_fetch(DICT, KEY, [SUBKEY], VALUE, IF-TRUE, [IF-FALSE])
# -----------------------------------------------------------------
m4_define([lt_if_dict_fetch],
[m4_if(lt_dict_fetch([$1], [$2], [$3]), [$4],
[$5],
[$6])])
# lt_dict_filter(DICT, [SUBKEY], VALUE, [SEPARATOR], KEY, [...])
# --------------------------------------------------------------
m4_define([lt_dict_filter],
[m4_if([$5], [], [],
[lt_join(m4_quote(m4_default([$4], [[, ]])),
lt_unquote(m4_split(m4_normalize(m4_foreach(_Lt_key, lt_car([m4_shiftn(4, $@)]),
[lt_if_dict_fetch([$1], _Lt_key, [$2], [$3], [_Lt_key ])])))))])[]dnl
])

23
opal/mca/common/libfabric/libfabric/config/ltversion.m4 поставляемый Обычный файл
Просмотреть файл

@ -0,0 +1,23 @@
# ltversion.m4 -- version numbers -*- Autoconf -*-
#
# Copyright (C) 2004 Free Software Foundation, Inc.
# Written by Scott James Remnant, 2004
#
# This file is free software; the Free Software Foundation gives
# unlimited permission to copy and/or distribute it, with or without
# modifications, as long as this notice is preserved.
# @configure_input@
# serial 3337 ltversion.m4
# This file is part of GNU Libtool
m4_define([LT_PACKAGE_VERSION], [2.4.2])
m4_define([LT_PACKAGE_REVISION], [1.3337])
AC_DEFUN([LTVERSION_VERSION],
[macro_version='2.4.2'
macro_revision='1.3337'
_LT_DECL(, macro_version, 0, [Which release of libtool.m4 was used?])
_LT_DECL(, macro_revision, 0)
])

98
opal/mca/common/libfabric/libfabric/config/lt~obsolete.m4 поставляемый Обычный файл
Просмотреть файл

@ -0,0 +1,98 @@
# lt~obsolete.m4 -- aclocal satisfying obsolete definitions. -*-Autoconf-*-
#
# Copyright (C) 2004, 2005, 2007, 2009 Free Software Foundation, Inc.
# Written by Scott James Remnant, 2004.
#
# This file is free software; the Free Software Foundation gives
# unlimited permission to copy and/or distribute it, with or without
# modifications, as long as this notice is preserved.
# serial 5 lt~obsolete.m4
# These exist entirely to fool aclocal when bootstrapping libtool.
#
# In the past libtool.m4 has provided macros via AC_DEFUN (or AU_DEFUN)
# which have later been changed to m4_define as they aren't part of the
# exported API, or moved to Autoconf or Automake where they belong.
#
# The trouble is, aclocal is a bit thick. It'll see the old AC_DEFUN
# in /usr/share/aclocal/libtool.m4 and remember it, then when it sees us
# using a macro with the same name in our local m4/libtool.m4 it'll
# pull the old libtool.m4 in (it doesn't see our shiny new m4_define
# and doesn't know about Autoconf macros at all.)
#
# So we provide this file, which has a silly filename so it's always
# included after everything else. This provides aclocal with the
# AC_DEFUNs it wants, but when m4 processes it, it doesn't do anything
# because those macros already exist, or will be overwritten later.
# We use AC_DEFUN over AU_DEFUN for compatibility with aclocal-1.6.
#
# Anytime we withdraw an AC_DEFUN or AU_DEFUN, remember to add it here.
# Yes, that means every name once taken will need to remain here until
# we give up compatibility with versions before 1.7, at which point
# we need to keep only those names which we still refer to.
# This is to help aclocal find these macros, as it can't see m4_define.
AC_DEFUN([LTOBSOLETE_VERSION], [m4_if([1])])
m4_ifndef([AC_LIBTOOL_LINKER_OPTION], [AC_DEFUN([AC_LIBTOOL_LINKER_OPTION])])
m4_ifndef([AC_PROG_EGREP], [AC_DEFUN([AC_PROG_EGREP])])
m4_ifndef([_LT_AC_PROG_ECHO_BACKSLASH], [AC_DEFUN([_LT_AC_PROG_ECHO_BACKSLASH])])
m4_ifndef([_LT_AC_SHELL_INIT], [AC_DEFUN([_LT_AC_SHELL_INIT])])
m4_ifndef([_LT_AC_SYS_LIBPATH_AIX], [AC_DEFUN([_LT_AC_SYS_LIBPATH_AIX])])
m4_ifndef([_LT_PROG_LTMAIN], [AC_DEFUN([_LT_PROG_LTMAIN])])
m4_ifndef([_LT_AC_TAGVAR], [AC_DEFUN([_LT_AC_TAGVAR])])
m4_ifndef([AC_LTDL_ENABLE_INSTALL], [AC_DEFUN([AC_LTDL_ENABLE_INSTALL])])
m4_ifndef([AC_LTDL_PREOPEN], [AC_DEFUN([AC_LTDL_PREOPEN])])
m4_ifndef([_LT_AC_SYS_COMPILER], [AC_DEFUN([_LT_AC_SYS_COMPILER])])
m4_ifndef([_LT_AC_LOCK], [AC_DEFUN([_LT_AC_LOCK])])
m4_ifndef([AC_LIBTOOL_SYS_OLD_ARCHIVE], [AC_DEFUN([AC_LIBTOOL_SYS_OLD_ARCHIVE])])
m4_ifndef([_LT_AC_TRY_DLOPEN_SELF], [AC_DEFUN([_LT_AC_TRY_DLOPEN_SELF])])
m4_ifndef([AC_LIBTOOL_PROG_CC_C_O], [AC_DEFUN([AC_LIBTOOL_PROG_CC_C_O])])
m4_ifndef([AC_LIBTOOL_SYS_HARD_LINK_LOCKS], [AC_DEFUN([AC_LIBTOOL_SYS_HARD_LINK_LOCKS])])
m4_ifndef([AC_LIBTOOL_OBJDIR], [AC_DEFUN([AC_LIBTOOL_OBJDIR])])
m4_ifndef([AC_LTDL_OBJDIR], [AC_DEFUN([AC_LTDL_OBJDIR])])
m4_ifndef([AC_LIBTOOL_PROG_LD_HARDCODE_LIBPATH], [AC_DEFUN([AC_LIBTOOL_PROG_LD_HARDCODE_LIBPATH])])
m4_ifndef([AC_LIBTOOL_SYS_LIB_STRIP], [AC_DEFUN([AC_LIBTOOL_SYS_LIB_STRIP])])
m4_ifndef([AC_PATH_MAGIC], [AC_DEFUN([AC_PATH_MAGIC])])
m4_ifndef([AC_PROG_LD_GNU], [AC_DEFUN([AC_PROG_LD_GNU])])
m4_ifndef([AC_PROG_LD_RELOAD_FLAG], [AC_DEFUN([AC_PROG_LD_RELOAD_FLAG])])
m4_ifndef([AC_DEPLIBS_CHECK_METHOD], [AC_DEFUN([AC_DEPLIBS_CHECK_METHOD])])
m4_ifndef([AC_LIBTOOL_PROG_COMPILER_NO_RTTI], [AC_DEFUN([AC_LIBTOOL_PROG_COMPILER_NO_RTTI])])
m4_ifndef([AC_LIBTOOL_SYS_GLOBAL_SYMBOL_PIPE], [AC_DEFUN([AC_LIBTOOL_SYS_GLOBAL_SYMBOL_PIPE])])
m4_ifndef([AC_LIBTOOL_PROG_COMPILER_PIC], [AC_DEFUN([AC_LIBTOOL_PROG_COMPILER_PIC])])
m4_ifndef([AC_LIBTOOL_PROG_LD_SHLIBS], [AC_DEFUN([AC_LIBTOOL_PROG_LD_SHLIBS])])
m4_ifndef([AC_LIBTOOL_POSTDEP_PREDEP], [AC_DEFUN([AC_LIBTOOL_POSTDEP_PREDEP])])
m4_ifndef([LT_AC_PROG_EGREP], [AC_DEFUN([LT_AC_PROG_EGREP])])
m4_ifndef([LT_AC_PROG_SED], [AC_DEFUN([LT_AC_PROG_SED])])
m4_ifndef([_LT_CC_BASENAME], [AC_DEFUN([_LT_CC_BASENAME])])
m4_ifndef([_LT_COMPILER_BOILERPLATE], [AC_DEFUN([_LT_COMPILER_BOILERPLATE])])
m4_ifndef([_LT_LINKER_BOILERPLATE], [AC_DEFUN([_LT_LINKER_BOILERPLATE])])
m4_ifndef([_AC_PROG_LIBTOOL], [AC_DEFUN([_AC_PROG_LIBTOOL])])
m4_ifndef([AC_LIBTOOL_SETUP], [AC_DEFUN([AC_LIBTOOL_SETUP])])
m4_ifndef([_LT_AC_CHECK_DLFCN], [AC_DEFUN([_LT_AC_CHECK_DLFCN])])
m4_ifndef([AC_LIBTOOL_SYS_DYNAMIC_LINKER], [AC_DEFUN([AC_LIBTOOL_SYS_DYNAMIC_LINKER])])
m4_ifndef([_LT_AC_TAGCONFIG], [AC_DEFUN([_LT_AC_TAGCONFIG])])
m4_ifndef([AC_DISABLE_FAST_INSTALL], [AC_DEFUN([AC_DISABLE_FAST_INSTALL])])
m4_ifndef([_LT_AC_LANG_CXX], [AC_DEFUN([_LT_AC_LANG_CXX])])
m4_ifndef([_LT_AC_LANG_F77], [AC_DEFUN([_LT_AC_LANG_F77])])
m4_ifndef([_LT_AC_LANG_GCJ], [AC_DEFUN([_LT_AC_LANG_GCJ])])
m4_ifndef([AC_LIBTOOL_LANG_C_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_C_CONFIG])])
m4_ifndef([_LT_AC_LANG_C_CONFIG], [AC_DEFUN([_LT_AC_LANG_C_CONFIG])])
m4_ifndef([AC_LIBTOOL_LANG_CXX_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_CXX_CONFIG])])
m4_ifndef([_LT_AC_LANG_CXX_CONFIG], [AC_DEFUN([_LT_AC_LANG_CXX_CONFIG])])
m4_ifndef([AC_LIBTOOL_LANG_F77_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_F77_CONFIG])])
m4_ifndef([_LT_AC_LANG_F77_CONFIG], [AC_DEFUN([_LT_AC_LANG_F77_CONFIG])])
m4_ifndef([AC_LIBTOOL_LANG_GCJ_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_GCJ_CONFIG])])
m4_ifndef([_LT_AC_LANG_GCJ_CONFIG], [AC_DEFUN([_LT_AC_LANG_GCJ_CONFIG])])
m4_ifndef([AC_LIBTOOL_LANG_RC_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_RC_CONFIG])])
m4_ifndef([_LT_AC_LANG_RC_CONFIG], [AC_DEFUN([_LT_AC_LANG_RC_CONFIG])])
m4_ifndef([AC_LIBTOOL_CONFIG], [AC_DEFUN([AC_LIBTOOL_CONFIG])])
m4_ifndef([_LT_AC_FILE_LTDLL_C], [AC_DEFUN([_LT_AC_FILE_LTDLL_C])])
m4_ifndef([_LT_REQUIRED_DARWIN_CHECKS], [AC_DEFUN([_LT_REQUIRED_DARWIN_CHECKS])])
m4_ifndef([_LT_AC_PROG_CXXCPP], [AC_DEFUN([_LT_AC_PROG_CXXCPP])])
m4_ifndef([_LT_PREPARE_SED_QUOTE_VARS], [AC_DEFUN([_LT_PREPARE_SED_QUOTE_VARS])])
m4_ifndef([_LT_PROG_ECHO_BACKSLASH], [AC_DEFUN([_LT_PROG_ECHO_BACKSLASH])])
m4_ifndef([_LT_PROG_F77], [AC_DEFUN([_LT_PROG_F77])])
m4_ifndef([_LT_PROG_FC], [AC_DEFUN([_LT_PROG_FC])])
m4_ifndef([_LT_PROG_CXX], [AC_DEFUN([_LT_PROG_CXX])])

215
opal/mca/common/libfabric/libfabric/config/missing Исполняемый файл
Просмотреть файл

@ -0,0 +1,215 @@
#! /bin/sh
# Common wrapper for a few potentially missing GNU programs.
scriptversion=2012-06-26.16; # UTC
# Copyright (C) 1996-2013 Free Software Foundation, Inc.
# Originally written by Fran,cois Pinard <pinard@iro.umontreal.ca>, 1996.
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2, or (at your option)
# any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
# As a special exception to the GNU General Public License, if you
# distribute this file as part of a program that contains a
# configuration script generated by Autoconf, you may include it under
# the same distribution terms that you use for the rest of that program.
if test $# -eq 0; then
echo 1>&2 "Try '$0 --help' for more information"
exit 1
fi
case $1 in
--is-lightweight)
# Used by our autoconf macros to check whether the available missing
# script is modern enough.
exit 0
;;
--run)
# Back-compat with the calling convention used by older automake.
shift
;;
-h|--h|--he|--hel|--help)
echo "\
$0 [OPTION]... PROGRAM [ARGUMENT]...
Run 'PROGRAM [ARGUMENT]...', returning a proper advice when this fails due
to PROGRAM being missing or too old.
Options:
-h, --help display this help and exit
-v, --version output version information and exit
Supported PROGRAM values:
aclocal autoconf autoheader autom4te automake makeinfo
bison yacc flex lex help2man
Version suffixes to PROGRAM as well as the prefixes 'gnu-', 'gnu', and
'g' are ignored when checking the name.
Send bug reports to <bug-automake@gnu.org>."
exit $?
;;
-v|--v|--ve|--ver|--vers|--versi|--versio|--version)
echo "missing $scriptversion (GNU Automake)"
exit $?
;;
-*)
echo 1>&2 "$0: unknown '$1' option"
echo 1>&2 "Try '$0 --help' for more information"
exit 1
;;
esac
# Run the given program, remember its exit status.
"$@"; st=$?
# If it succeeded, we are done.
test $st -eq 0 && exit 0
# Also exit now if we it failed (or wasn't found), and '--version' was
# passed; such an option is passed most likely to detect whether the
# program is present and works.
case $2 in --version|--help) exit $st;; esac
# Exit code 63 means version mismatch. This often happens when the user
# tries to use an ancient version of a tool on a file that requires a
# minimum version.
if test $st -eq 63; then
msg="probably too old"
elif test $st -eq 127; then
# Program was missing.
msg="missing on your system"
else
# Program was found and executed, but failed. Give up.
exit $st
fi
perl_URL=http://www.perl.org/
flex_URL=http://flex.sourceforge.net/
gnu_software_URL=http://www.gnu.org/software
program_details ()
{
case $1 in
aclocal|automake)
echo "The '$1' program is part of the GNU Automake package:"
echo "<$gnu_software_URL/automake>"
echo "It also requires GNU Autoconf, GNU m4 and Perl in order to run:"
echo "<$gnu_software_URL/autoconf>"
echo "<$gnu_software_URL/m4/>"
echo "<$perl_URL>"
;;
autoconf|autom4te|autoheader)
echo "The '$1' program is part of the GNU Autoconf package:"
echo "<$gnu_software_URL/autoconf/>"
echo "It also requires GNU m4 and Perl in order to run:"
echo "<$gnu_software_URL/m4/>"
echo "<$perl_URL>"
;;
esac
}
give_advice ()
{
# Normalize program name to check for.
normalized_program=`echo "$1" | sed '
s/^gnu-//; t
s/^gnu//; t
s/^g//; t'`
printf '%s\n' "'$1' is $msg."
configure_deps="'configure.ac' or m4 files included by 'configure.ac'"
case $normalized_program in
autoconf*)
echo "You should only need it if you modified 'configure.ac',"
echo "or m4 files included by it."
program_details 'autoconf'
;;
autoheader*)
echo "You should only need it if you modified 'acconfig.h' or"
echo "$configure_deps."
program_details 'autoheader'
;;
automake*)
echo "You should only need it if you modified 'Makefile.am' or"
echo "$configure_deps."
program_details 'automake'
;;
aclocal*)
echo "You should only need it if you modified 'acinclude.m4' or"
echo "$configure_deps."
program_details 'aclocal'
;;
autom4te*)
echo "You might have modified some maintainer files that require"
echo "the 'automa4te' program to be rebuilt."
program_details 'autom4te'
;;
bison*|yacc*)
echo "You should only need it if you modified a '.y' file."
echo "You may want to install the GNU Bison package:"
echo "<$gnu_software_URL/bison/>"
;;
lex*|flex*)
echo "You should only need it if you modified a '.l' file."
echo "You may want to install the Fast Lexical Analyzer package:"
echo "<$flex_URL>"
;;
help2man*)
echo "You should only need it if you modified a dependency" \
"of a man page."
echo "You may want to install the GNU Help2man package:"
echo "<$gnu_software_URL/help2man/>"
;;
makeinfo*)
echo "You should only need it if you modified a '.texi' file, or"
echo "any other file indirectly affecting the aspect of the manual."
echo "You might want to install the Texinfo package:"
echo "<$gnu_software_URL/texinfo/>"
echo "The spurious makeinfo call might also be the consequence of"
echo "using a buggy 'make' (AIX, DU, IRIX), in which case you might"
echo "want to install GNU make:"
echo "<$gnu_software_URL/make/>"
;;
*)
echo "You might have modified some files without having the proper"
echo "tools for further handling them. Check the 'README' file, it"
echo "often tells you about the needed prerequisites for installing"
echo "this package. You may also peek at any GNU archive site, in"
echo "case some other package contains this missing '$1' program."
;;
esac
}
give_advice "$1" | sed -e '1s/^/WARNING: /' \
-e '2,$s/^/ /' >&2
# Propagate the correct exit status (expected to be 127 for a program
# not found, 63 for a program that failed due to version mismatch).
exit $st
# Local variables:
# eval: (add-hook 'write-file-hooks 'time-stamp)
# time-stamp-start: "scriptversion="
# time-stamp-format: "%:y-%02m-%02d.%02H"
# time-stamp-time-zone: "UTC"
# time-stamp-end: "; # UTC"
# End:

Просмотреть файл

@ -0,0 +1,169 @@
dnl Process this file with autoconf to produce a configure script.
AC_PREREQ(2.57)
AC_INIT([libfabric], [0.0.2], [linux-rdma@vger.kernel.org])
AC_CONFIG_SRCDIR([src/fabric.c])
AC_CONFIG_AUX_DIR(config)
AC_CONFIG_MACRO_DIR(config)
AC_CONFIG_HEADERS(config.h)
AM_INIT_AUTOMAKE([1.11 dist-bzip2 foreign -Wall -Werror subdir-objects parallel-tests])
m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])])
AC_ARG_ENABLE([debug],
[AS_HELP_STRING([--enable-debug],
[Enable debugging @<:@default=no@:>@])
],
[CFLAGS="$CFLAGS -g -O0 -Wall"],
[enable_debug=no])
dnl Fix autoconf's habit of adding -g -O2 by default
AS_IF([test -z "$CFLAGS"],
[CFLAGS='-fvisibility=hidden -O2 -DNDEBUG -Wall'])
# AM PROG_AR did not exist pre AM 1.11.x (where x is somewhere >0 and
# <3), but it is necessary in AM 1.12.x.
m4_ifdef([AM_PROG_AR], [AM_PROG_AR])
AC_ARG_WITH([valgrind],
AC_HELP_STRING([--with-valgrind],
[Enable valgrind annotations @<:@default=no@:>@]))
if test "$with_valgrind" != "" && test "$with_valgrind" != "no"; then
AC_DEFINE([INCLUDE_VALGRIND], 1,
[Define to 1 to enable valgrind annotations])
if test -d $with_valgrind; then
CPPFLAGS="$CPPLFAGS -I$with_valgrind/include"
fi
fi
AC_ARG_ENABLE([direct],
[AS_HELP_STRING([--enable-direct=@<:@provider@:>@],
[Enable direct calls to a fabric provider @<:@default=no@:>@])
],
[],
[enable_direct=no])
dnl Checks for programs
AC_PROG_CC
dnl Checks for header files.
AC_HEADER_STDC
dnl Checks for typedefs, structures, and compiler characteristics.
AC_C_CONST
AC_CHECK_SIZEOF(long)
dnl Only build on Linux
AC_CHECK_HEADER([linux/types.h], [],
[AC_MSG_ERROR([libfabric only builds on Linux])])
LT_INIT
AC_CHECK_HEADERS([fcntl.h sys/socket.h])
AC_CHECK_DECLS([O_CLOEXEC],,[AC_DEFINE([O_CLOEXEC],[0],
[Defined to 0 if not provided])],
[[
#ifdef HAVE_FCNTL_H
#include <fcntl.h>
#endif
]])
AC_CHECK_DECLS([SOCK_CLOEXEC],,[AC_DEFINE([SOCK_CLOEXEC],[0],
[Defined to 0 if not provided])],
[[
#ifdef HAVE_SYS_SOCKET_H
#include <sys/socket.h>
#endif
]])
AC_CACHE_CHECK(for close on exec modifier for fopen(),
ac_cv_feature_stream_cloexec_flag,
[if test $ac_cv_have_decl_O_CLOEXEC = yes ; then
if test $ac_cv_have_decl_SOCK_CLOEXEC = yes ; then
ac_cv_feature_stream_cloexec_flag="e"
fi
fi])
AC_DEFINE_UNQUOTED([STREAM_CLOEXEC], "$ac_cv_feature_stream_cloexec_flag",
[fopen() modifier for setting close on exec flag])
dnl dlopen support is optional
AC_ARG_WITH([dlopen],
AC_HELP_STRING([--with-dlopen],
[dl-loadable provider support @<:@default=yes@:>@]),
)
AS_IF([test x"$with_dlopen" != x"no"], [
AC_CHECK_LIB(dl, dlopen, [],
AC_MSG_ERROR([dlopen not found. libfabric requires libdl.]))
])
dnl Checks for libraries
AC_CHECK_LIB(pthread, pthread_mutex_init, [],
AC_MSG_ERROR([pthread_mutex_init() not found. libfabric requires libpthread.]))
AC_CHECK_LIB(rt, clock_gettime, [],
AC_MSG_ERROR([clock_gettime() not found. libfabric requires librt.]))
dnl Check for gcc atomic intrinsics
AC_MSG_CHECKING(compiler support for c11 atomics)
AC_TRY_LINK([#include <stdatomic.h>],
[#ifdef __STDC_NO_ATOMICS__
return 1;
#else
return 0;
#endif
],
[
AC_MSG_RESULT(yes)
AC_DEFINE(HAVE_ATOMICS, 1, [Set to 1 to use c11 atomic functions])
],
[AC_MSG_RESULT(no)])
if test "$with_valgrind" != "" && test "$with_valgrind" != "no"; then
AC_CHECK_HEADER(valgrind/memcheck.h, [],
AC_MSG_ERROR([valgrind requested but <valgrind/memcheck.h> not found.]))
fi
AC_CACHE_CHECK(whether ld accepts --version-script, ac_cv_version_script,
if test -n "`$LD --help < /dev/null 2>/dev/null | grep version-script`"; then
ac_cv_version_script=yes
else
ac_cv_version_script=no
fi)
AM_CONDITIONAL(HAVE_LD_VERSION_SCRIPT, test "$ac_cv_version_script" = "yes")
AC_CACHE_CHECK(for .symver assembler support, ac_cv_asm_symver_support,
[AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[]],
[[asm("symbol:\n.symver symbol, api@ABI\n");]])],
[ac_cv_asm_symver_support=yes],
[ac_cv_asm_symver_support=no])])
if test $ac_cv_asm_symver_support = yes; then
AC_DEFINE([HAVE_SYMVER_SUPPORT], 1, [assembler has .symver support])
fi
dnl Provider-specific checks
FI_PROVIDER_INIT
FI_PROVIDER_SETUP([psm])
FI_PROVIDER_SETUP([sockets])
FI_PROVIDER_SETUP([verbs])
FI_PROVIDER_SETUP([usnic])
FI_PROVIDER_FINI
# Make sure at least one provider was setup
AS_IF([test x"$PROVIDERS_TO_BUILD" = "x"],
[AC_MSG_NOTICE([No providers were configured])
AC_MSG_ERROR([Cannot continue])])
# If the user requested to build in direct mode, but
# we have more than one provider, error.
AS_IF([test x"$enable_direct" != x"no"],
[AS_IF([test "$PROVIDERS_COUNT" -eq "1"],
[AC_SUBST(PROVIDER_DIRECT, "$enable_direct")],
[AC_MSG_NOTICE([Only one provider can be chosen when using --enable-direct])
AC_MSG_ERROR(Cannot continue)])])
AM_CONDITIONAL([HAVE_DIRECT], [test x"$enable_direct" != x"no"])
AC_CONFIG_FILES([Makefile libfabric.spec])
AC_OUTPUT

Просмотреть файл

@ -0,0 +1,220 @@
/*
* Copyright (c) 2013-2014 Intel Corporation. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef _FI_H_
#define _FI_H_
#if HAVE_CONFIG_H
# include <config.h>
#endif /* HAVE_CONFIG_H */
#include <string.h>
#include <byteswap.h>
#include <endian.h>
#include <pthread.h>
#include <string.h>
#include <rdma/fabric.h>
#include <rdma/fi_prov.h>
#include <rdma/fi_atomic.h>
#ifdef HAVE_ATOMICS
# include <stdatomic.h>
#endif
#ifdef __cplusplus
extern "C" {
#endif
#ifdef INCLUDE_VALGRIND
# include <valgrind/memcheck.h>
# ifndef VALGRIND_MAKE_MEM_DEFINED
# warning "Valgrind requested, but VALGRIND_MAKE_MEM_DEFINED undefined"
# endif
#endif
#ifndef VALGRIND_MAKE_MEM_DEFINED
# define VALGRIND_MAKE_MEM_DEFINED(addr, len)
#endif
#if __BYTE_ORDER == __LITTLE_ENDIAN
static inline uint64_t htonll(uint64_t x) { return bswap_64(x); }
static inline uint64_t ntohll(uint64_t x) { return bswap_64(x); }
#else
static inline uint64_t htonll(uint64_t x) { return x; }
static inline uint64_t ntohll(uint64_t x) { return x; }
#endif
#define MIN(a, b) ((a) < (b) ? a : b)
#define MAX(a, b) ((a) > (b) ? a : b)
static inline int flsll(long long int i)
{
return i ? 65 - ffsll(htonll(i)) : 0;
}
static inline uint64_t roundup_power_of_two(uint64_t n)
{
return 1ULL << flsll(n - 1);
}
#define FI_TAG_GENERIC 0xAAAAAAAAAAAAAAAAULL
#if defined(PT_LOCK_SPIN)
#define fastlock_t pthread_spinlock_t
#define fastlock_init(lock) pthread_spin_init(lock, PTHREAD_PROCESS_PRIVATE)
#define fastlock_destroy(lock) pthread_spin_destroy(lock)
#define fastlock_acquire(lock) pthread_spin_lock(lock)
#define fastlock_release(lock) pthread_spin_unlock(lock)
#else
#define fastlock_t pthread_mutex_t
#define fastlock_init(lock) pthread_mutex_init(lock, NULL)
#define fastlock_destroy(lock) pthread_mutex_destroy(lock)
#define fastlock_acquire(lock) pthread_mutex_lock(lock)
#define fastlock_release(lock) pthread_mutex_unlock(lock)
#endif /* PT_LOCK_SPIN */
#ifdef HAVE_ATOMICS
typedef atomic_int atomic_t;
static inline int atomic_inc(atomic_t *atomic)
{
return atomic_fetch_add_explicit(atomic, 1, memory_order_acq_rel) + 1;
}
static inline int atomic_dec(atomic_t *atomic)
{
return atomic_fetch_sub_explicit(atomic, 1, memory_order_acq_rel) - 1;
}
static inline int atomic_set(atomic_t *atomic, int value)
{
atomic_store(atomic, value);
return value;
}
static inline int atomic_get(atomic_t *atomic)
{
return atomic_load(atomic);
}
#else
typedef struct { fastlock_t lock; int val; } atomic_t;
static inline int atomic_inc(atomic_t *atomic)
{
int v;
fastlock_acquire(&atomic->lock);
v = ++(atomic->val);
fastlock_release(&atomic->lock);
return v;
}
static inline int atomic_dec(atomic_t *atomic)
{
int v;
fastlock_acquire(&atomic->lock);
v = --(atomic->val);
fastlock_release(&atomic->lock);
return v;
}
static inline int atomic_set(atomic_t *atomic, int value)
{
fastlock_acquire(&atomic->lock);
atomic->val = value;
fastlock_release(&atomic->lock);
return value;
}
static inline void atomic_init(atomic_t *atomic, int value)
{
fastlock_init(&atomic->lock);
atomic->val = value;
}
static inline int atomic_get(atomic_t *atomic)
{
return atomic->val;
}
#endif // HAVE_ATOMICS
/* non exported symbols */
int fi_init(void);
int fi_read_file(const char *dir, const char *file, char *buf, size_t size);
int fi_poll_fd(int fd, int timeout);
int fi_wait_cond(pthread_cond_t *cond, pthread_mutex_t *mut, int timeout);
struct fi_info *fi_allocinfo_internal(void);
void fi_freeinfo_internal(struct fi_info *info);
int fi_sockaddr_len(struct sockaddr *addr);
size_t fi_datatype_size(enum fi_datatype datatype);
uint64_t fi_tag_bits(uint64_t mem_tag_format);
uint64_t fi_tag_format(uint64_t tag_bits);
int fi_version_register(uint32_t version, struct fi_provider *provider);
#define RDMA_CONF_DIR SYSCONFDIR "/" RDMADIR
#define FI_CONF_DIR RDMA_CONF_DIR "/fabric"
#define DEFAULT_ABI "FABRIC_1.0"
/* symbol -> external symbol mappings */
#ifdef HAVE_SYMVER_SUPPORT
# define symver(name, api, ver) \
asm(".symver " #name "," #api "@" #ver)
# define default_symver(name, api) \
asm(".symver " #name "," #api "@@" DEFAULT_ABI)
#else
# define symver(name, api, ver)
# define default_symver(name, api) \
extern __typeof(name) api __attribute__((alias(#name)))
#endif /* HAVE_SYMVER_SUPPORT */
#ifdef __cplusplus
}
#endif
#endif /* _FI_H_ */

Просмотреть файл

@ -0,0 +1,495 @@
/*
* Copyright (c); 2014 Intel Corporation. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL); Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef _FI_ENOSYS_H_
#define _FI_ENOSYS_H_
#include <rdma/fabric.h>
#include <rdma/fi_atomic.h>
#include <rdma/fi_cm.h>
#include <rdma/fi_domain.h>
#include <rdma/fi_endpoint.h>
#include <rdma/fi_eq.h>
#include <rdma/fi_rma.h>
#include <rdma/fi_tagged.h>
#ifdef __cplusplus
extern "C" {
#endif
/*
static struct fi_ops X = {
.size = sizeof(struct fi_ops);,
.close = X,
.bind = fi_no_bind,
.sync = fi_no_sync,
.control = fi_no_control,
.ops_open = fi_no_ops_open,
};
*/
int fi_no_bind(struct fid *fid, struct fid *bfid, uint64_t flags);
int fi_no_sync(struct fid *fid, uint64_t flags, void *context);
int fi_no_control(struct fid *fid, int command, void *arg);
int fi_no_ops_open(struct fid *fid, const char *name,
uint64_t flags, void **ops, void *context);
/*
static struct fi_ops_fabric X = {
.size = sizeof(struct fi_ops_fabric);,
.domain = fi_no_domain,
.endpoint = fi_no_pendpoint,
.eq_open = fi_no_eq_open,
};
*/
int fi_no_domain(struct fid_fabric *fabric, struct fi_domain_attr *attr,
struct fid_domain **dom, void *context);
int fi_no_pendpoint(struct fid_fabric *fabric, struct fi_info *info,
struct fid_pep **pep, void *context);
int fi_no_eq_open(struct fid_fabric *fabric, struct fi_eq_attr *attr,
struct fid_eq **eq, void *context);
/*
static struct fi_ops_atomic X = {
.size = sizeof(struct fi_ops_atomic);,
.write = fi_no_atomic_write,
.writev = fi_no_atomic_writev,
.writeto = fi_no_atomic_writeto,
.writemsg = fi_no_atomic_writemsg,
.inject = fi_no_atomic_inject,
.injectto = fi_no_atomic_injectto,
.readwrite = fi_no_atomic_readwrite,
.readwritev = fi_no_atomic_readwritev,
.readwriteto = fi_no_atomic_readwriteto,
.readwritemsg = fi_no_atomic_readwritemsg,
.compwrite = fi_no_atomic_compwrite,
.compwritev = fi_no_atomic_compwritev,
.compwriteto = fi_no_atomic_compwriteto,
.compwritemsg = fi_no_atomic_compwritemsg,
.writevalid = fi_no_atomic_writevalid,
.readwritevalid = fi_no_atomic_readwritevalid,
.compwritevalid = fi_no_atomic_compwritevalid,
};
*/
ssize_t fi_no_atomic_write(struct fid_ep *ep,
const void *buf, size_t count, void *desc,
uint64_t addr, uint64_t key,
enum fi_datatype datatype, enum fi_op op, void *context);
ssize_t fi_no_atomic_writev(struct fid_ep *ep,
const struct fi_ioc *iov, void **desc, size_t count,
uint64_t addr, uint64_t key,
enum fi_datatype datatype, enum fi_op op, void *context);
ssize_t fi_no_atomic_writeto(struct fid_ep *ep,
const void *buf, size_t count, void *desc,
fi_addr_t dest_addr,
uint64_t addr, uint64_t key,
enum fi_datatype datatype, enum fi_op op, void *context);
ssize_t fi_no_atomic_writemsg(struct fid_ep *ep,
const struct fi_msg_atomic *msg, uint64_t flags);
ssize_t fi_no_atomic_inject(struct fid_ep *ep, const void *buf, size_t count,
uint64_t addr, uint64_t key, enum fi_datatype datatype,
enum fi_op op);
ssize_t fi_no_atomic_injectto(struct fid_ep *ep, const void *buf, size_t count,
fi_addr_t dest_addr, uint64_t addr, uint64_t key,
enum fi_datatype datatype, enum fi_op op);
ssize_t fi_no_atomic_readwrite(struct fid_ep *ep,
const void *buf, size_t count, void *desc,
void *result, void *result_desc,
uint64_t addr, uint64_t key,
enum fi_datatype datatype, enum fi_op op, void *context);
ssize_t fi_no_atomic_readwritev(struct fid_ep *ep,
const struct fi_ioc *iov, void **desc, size_t count,
struct fi_ioc *resultv, void **result_desc, size_t result_count,
uint64_t addr, uint64_t key,
enum fi_datatype datatype, enum fi_op op, void *context);
ssize_t fi_no_atomic_readwriteto(struct fid_ep *ep,
const void *buf, size_t count, void *desc,
void *result, void *result_desc,
fi_addr_t dest_addr,
uint64_t addr, uint64_t key,
enum fi_datatype datatype, enum fi_op op, void *context);
ssize_t fi_no_atomic_readwritemsg(struct fid_ep *ep,
const struct fi_msg_atomic *msg,
struct fi_ioc *resultv, void **result_desc, size_t result_count,
uint64_t flags);
ssize_t fi_no_atomic_compwrite(struct fid_ep *ep,
const void *buf, size_t count, void *desc,
const void *compare, void *compare_desc,
void *result, void *result_desc,
uint64_t addr, uint64_t key,
enum fi_datatype datatype, enum fi_op op, void *context);
ssize_t fi_no_atomic_compwritev(struct fid_ep *ep,
const struct fi_ioc *iov, void **desc, size_t count,
const struct fi_ioc *comparev, void **compare_desc, size_t compare_count,
struct fi_ioc *resultv, void **result_desc, size_t result_count,
uint64_t addr, uint64_t key,
enum fi_datatype datatype, enum fi_op op, void *context);
ssize_t fi_no_atomic_compwriteto(struct fid_ep *ep,
const void *buf, size_t count, void *desc,
const void *compare, void *compare_desc,
void *result, void *result_desc,
fi_addr_t dest_addr,
uint64_t addr, uint64_t key,
enum fi_datatype datatype, enum fi_op op, void *context);
ssize_t fi_no_atomic_compwritemsg(struct fid_ep *ep,
const struct fi_msg_atomic *msg,
const struct fi_ioc *comparev, void **compare_desc, size_t compare_count,
struct fi_ioc *resultv, void **result_desc, size_t result_count,
uint64_t flags);
int fi_no_atomic_writevalid(struct fid_ep *ep,
enum fi_datatype datatype, enum fi_op op, size_t *count);
int fi_no_atomic_readwritevalid(struct fid_ep *ep,
enum fi_datatype datatype, enum fi_op op, size_t *count);
int fi_no_atomic_compwritevalid(struct fid_ep *ep,
enum fi_datatype datatype, enum fi_op op, size_t *count);
/*
static struct fi_ops_cm X = {
.size = sizeof(struct fi_ops_cm);,
.getname = X,
.getpeer = fi_no_getpeer,
.connect = fi_no_connect,
.listen = fi_no_listen,
.accept = fi_no_accept,
.reject = fi_no_reject,
.shutdown = fi_no_shutdown,
.join = fi_no_join,
.leave = fi_no_leave,
};
*/
int fi_no_getname(fid_t fid, void *addr, size_t *addrlen);
int fi_no_getpeer(struct fid_ep *ep, void *addr, size_t *addrlen);
int fi_no_connect(struct fid_ep *ep, const void *addr,
const void *param, size_t paramlen);
int fi_no_listen(struct fid_pep *pep);
int fi_no_accept(struct fid_ep *ep, const void *param, size_t paramlen);
int fi_no_reject(struct fid_pep *pep, fi_connreq_t connreq,
const void *param, size_t paramlen);
int fi_no_shutdown(struct fid_ep *ep, uint64_t flags);
int fi_no_join(struct fid_ep *ep, void *addr, fi_addr_t *fi_addr,
uint64_t flags, void *context);
int fi_no_leave(struct fid_ep *ep, void *addr, fi_addr_t fi_addr,
uint64_t flags);
/*
static struct fi_ops_av X = {
.size = sizeof(struct fi_ops_av);,
.insert = X,
.insertsvc = X,
.insertsym = X,
.remove = X,
.lookup = X,
.straddr = X,
};
*/
/*
static struct fi_ops_domain X = {
.size = sizeof(struct fi_ops_domain);,
.av_open = fi_no_av_open,
.cq_open = fi_no_cq_open,
.endpoint = fi_no_endpoint,
.cntr_open = fi_no_cntr_open,
.wait_open = fi_no_wait_open,
.poll_open = fi_no_poll_open,
};
*/
int fi_no_av_open(struct fid_domain *domain, struct fi_av_attr *attr,
struct fid_av **av, void *context);
int fi_no_cq_open(struct fid_domain *domain, struct fi_cq_attr *attr,
struct fid_cq **cq, void *context);
int fi_no_endpoint(struct fid_domain *domain, struct fi_info *info,
struct fid_ep **ep, void *context);
int fi_no_cntr_open(struct fid_domain *domain, struct fi_cntr_attr *attr,
struct fid_cntr **cntr, void *context);
int fi_no_wait_open(struct fid_domain *domain, struct fi_wait_attr *attr,
struct fid_wait **waitset);
int fi_no_poll_open(struct fid_domain *domain, struct fi_poll_attr *attr,
struct fid_poll **pollset);
/*
static struct fi_ops_mr X = {
.size = sizeof(struct fi_ops_mr);,
.reg = fi_no_mr_reg,
.regv = fi_no_mr_regv,
.regattr = fi_no_mr_regattr,
};
*/
int fi_no_mr_reg(struct fid_domain *domain, const void *buf, size_t len,
uint64_t access, uint64_t offset, uint64_t requested_key,
uint64_t flags, struct fid_mr **mr, void *context);
int fi_no_mr_regv(struct fid_domain *domain, const struct iovec *iov,
size_t count, uint64_t access,
uint64_t offset, uint64_t requested_key,
uint64_t flags, struct fid_mr **mr, void *context);
int fi_no_mr_regattr(struct fid_domain *domain, const struct fi_mr_attr *attr,
uint64_t flags, struct fid_mr **mr);
/*
static struct fi_ops_ep X = {
.size = sizeof(struct fi_ops_ep);,
.enable = fi_no_enable,
.cancel = fi_no_cancel,
.getopt = fi_no_getopt,
.setopt = fi_no_setopt,
.tx_ctx = fi_no_tx_ctx,
.rx_ctx = fi_no_rx_ctx,
};
*/
int fi_no_enable(struct fid_ep *ep);
ssize_t fi_no_cancel(fid_t fid, void *context);
int fi_no_getopt(fid_t fid, int level, int optname,
void *optval, size_t *optlen);
int fi_no_setopt(fid_t fid, int level, int optname,
const void *optval, size_t optlen);
int fi_no_tx_ctx(struct fid_ep *ep, int index,
struct fi_tx_ctx_attr *attr, struct fid_ep **tx_ep,
void *context);
int fi_no_rx_ctx(struct fid_ep *ep, int index,
struct fi_rx_ctx_attr *attr, struct fid_ep **rx_ep,
void *context);
/*
static struct fi_ops_msg X = {
.size = sizeof(struct fi_ops_msg);,
.recv = fi_no_msg_recv,
.recvv = fi_no_msg_recvv,
.recvfrom = fi_no_msg_recvfrom,
.recvmsg = fi_no_msg_recvmsg,
.send = fi_no_msg_send,
.sendv = fi_no_msg_sendv,
.sendto = fi_no_msg_sendto,
.sendmsg = fi_no_msg_sendmsg,
.inject = fi_no_msg_inject,
.injectto = fi_no_msg_injectto,
.senddata = fi_no_msg_senddata,
.senddatato = fi_no_msg_senddatato,
};
*/
ssize_t fi_no_msg_recv(struct fid_ep *ep, void *buf, size_t len, void *desc,
void *context);
ssize_t fi_no_msg_recvv(struct fid_ep *ep, const struct iovec *iov, void **desc,
size_t count, void *context);
ssize_t fi_no_msg_recvfrom(struct fid_ep *ep, void *buf, size_t len, void *desc,
fi_addr_t src_addr, void *context);
ssize_t fi_no_msg_recvmsg(struct fid_ep *ep, const struct fi_msg *msg,
uint64_t flags);
ssize_t fi_no_msg_send(struct fid_ep *ep, const void *buf, size_t len, void *desc,
void *context);
ssize_t fi_no_msg_sendv(struct fid_ep *ep, const struct iovec *iov, void **desc,
size_t count, void *context);
ssize_t fi_no_msg_sendto(struct fid_ep *ep, const void *buf, size_t len, void *desc,
fi_addr_t dest_addr, void *context);
ssize_t fi_no_msg_sendmsg(struct fid_ep *ep, const struct fi_msg *msg,
uint64_t flags);
ssize_t fi_no_msg_inject(struct fid_ep *ep, const void *buf, size_t len);
ssize_t fi_no_msg_injectto(struct fid_ep *ep, const void *buf, size_t len,
fi_addr_t dest_addr);
ssize_t fi_no_msg_senddata(struct fid_ep *ep, const void *buf, size_t len, void *desc,
uint64_t data, void *context);
ssize_t fi_no_msg_senddatato(struct fid_ep *ep, const void *buf, size_t len, void *desc,
uint64_t data, fi_addr_t dest_addr, void *context);
/*
static struct fi_ops_wait X = {
.size = sizeof(struct fi_ops_wait);,
.wait = X,
};
*/
/*
static struct fi_ops_poll X = {
.size = sizeof(struct fi_ops_poll);,
.poll = X,
};
*/
/*
static struct fi_ops_eq X = {
.size = sizeof(struct fi_ops_eq);,
.read = X,
.readerr = X,
.write = fi_no_eq_write,
.sread = fi_no_eq_sread,
.strerror = X,
};
*/
ssize_t fi_no_eq_write(struct fid_eq *eq, uint32_t event,
const void *buf, size_t len, uint64_t flags);
ssize_t fi_no_eq_sread(struct fid_eq *eq, uint32_t *event,
void *buf, size_t len, int timeout, uint64_t flags);
/*
static struct fi_ops_cq X = {
.size = sizeof(struct fi_ops_cq);,
.read = X,
.readfrom = fi_no_cq_readfrom,
.readerr = X,
.write = fi_no_cq_write,
.sread = fi_no_cq_sread,
.sreadfrom = fi_no_cq_readfrom,
.strerror = X,
};
*/
ssize_t fi_no_cq_readfrom(struct fid_cq *cq, void *buf, size_t count,
fi_addr_t *src_addr);
ssize_t fi_no_cq_write(struct fid_cq *cq, const void *buf, size_t len);
ssize_t fi_no_cq_writeerr(struct fid_cq *cq, struct fi_cq_err_entry *buf,
size_t len, uint64_t flags);
ssize_t fi_no_cq_sread(struct fid_cq *cq, void *buf, size_t count,
const void *cond, int timeout);
ssize_t fi_no_cq_sreadfrom(struct fid_cq *cq, void *buf, size_t count,
fi_addr_t *src_addr, const void *cond, int timeout);
/*
static struct fi_ops_cntr X = {
.size = sizeof(struct fi_ops_cntr);,
.read = X,
.readerr = X,
.add = fi_no_cntr_add,
.set = fi_no_cntr_set,
.wait = fi_no_cntr_wait,
};
*/
int fi_no_cntr_add(struct fid_cntr *cntr, uint64_t value);
int fi_no_cntr_set(struct fid_cntr *cntr, uint64_t value);
int fi_no_cntr_wait(struct fid_cntr *cntr, uint64_t threshold, int timeout);
/*
static struct fi_ops_rma X = {
.size = sizeof(struct fi_ops_rma);,
.read = fi_no_rma_read,
.readv = fi_no_rma_readv,
.readfrom = fi_no_rma_readfrom,
.readmsg = fi_no_rma_readmsg,
.write = fi_no_rma_write,
.writev = fi_no_rma_writev,
.writeto = fi_no_rma_writeto,
.writemsg = fi_no_rma_writemsg,
.inject = fi_no_rma_inject,
.injectto = fi_no_rma_injectto,
.writedata = fi_no_rma_writedata,
.writedatato = fi_no_rma_writedatato,
};
*/
ssize_t fi_no_rma_read(struct fid_ep *ep, void *buf, size_t len, void *desc,
uint64_t addr, uint64_t key, void *context);
ssize_t fi_no_rma_readv(struct fid_ep *ep, const struct iovec *iov, void **desc,
size_t count, uint64_t addr, uint64_t key, void *context);
ssize_t fi_no_rma_readfrom(struct fid_ep *ep, void *buf, size_t len, void *desc,
fi_addr_t src_addr, uint64_t addr, uint64_t key,
void *context);
ssize_t fi_no_rma_readmsg(struct fid_ep *ep, const struct fi_msg_rma *msg,
uint64_t flags);
ssize_t fi_no_rma_write(struct fid_ep *ep, const void *buf, size_t len, void *desc,
uint64_t addr, uint64_t key, void *context);
ssize_t fi_no_rma_writev(struct fid_ep *ep, const struct iovec *iov, void **desc,
size_t count, uint64_t addr, uint64_t key, void *context);
ssize_t fi_no_rma_writeto(struct fid_ep *ep, const void *buf, size_t len, void *desc,
fi_addr_t dest_addr, uint64_t addr, uint64_t key,
void *context);
ssize_t fi_no_rma_writemsg(struct fid_ep *ep, const struct fi_msg_rma *msg,
uint64_t flags);
ssize_t fi_no_rma_inject(struct fid_ep *ep, const void *buf, size_t len,
uint64_t addr, uint64_t key);
ssize_t fi_no_rma_injectto(struct fid_ep *ep, const void *buf, size_t len,
fi_addr_t dest_addr, uint64_t addr, uint64_t key);
ssize_t fi_no_rma_writedata(struct fid_ep *ep, const void *buf, size_t len, void *desc,
uint64_t data, uint64_t addr, uint64_t key, void *context);
ssize_t fi_no_rma_writedatato(struct fid_ep *ep, const void *buf, size_t len, void *desc,
uint64_t data, fi_addr_t dest_addr, uint64_t addr, uint64_t key,
void *context);
/*
static struct fi_ops_tagged X = {
.size = sizeof(struct fi_ops_tagged);,
.recv = fi_no_tagged_recv,
.recvv = fi_no_tagged_recvv,
.recvfrom = fi_no_tagged_recvfrom,
.recvmsg = fi_no_tagged_recvmsg,
.send = fi_no_tagged_send,
.sendv = fi_no_tagged_sendv,
.sendto = fi_no_tagged_sendto,
.sendmsg = fi_no_tagged_sendmsg,
.inject = fi_no_tagged_inject,
.injectto = fi_no_tagged_injectto,
.senddata = fi_no_tagged_senddata,
.senddatato = fi_no_tagged_senddatato,
.search = fi_no_tagged_search,
};
*/
ssize_t fi_no_tagged_recv(struct fid_ep *ep, void *buf, size_t len, void *desc,
uint64_t tag, uint64_t ignore, void *context);
ssize_t fi_no_tagged_recvv(struct fid_ep *ep, const struct iovec *iov, void **desc,
size_t count, uint64_t tag, uint64_t ignore, void *context);
ssize_t fi_no_tagged_recvfrom(struct fid_ep *ep, void *buf, size_t len, void *desc,
fi_addr_t src_addr,
uint64_t tag, uint64_t ignore, void *context);
ssize_t fi_no_tagged_recvmsg(struct fid_ep *ep, const struct fi_msg_tagged *msg,
uint64_t flags);
ssize_t fi_no_tagged_send(struct fid_ep *ep, const void *buf, size_t len, void *desc,
uint64_t tag, void *context);
ssize_t fi_no_tagged_sendv(struct fid_ep *ep, const struct iovec *iov, void **desc,
size_t count, uint64_t tag, void *context);
ssize_t fi_no_tagged_sendto(struct fid_ep *ep, const void *buf, size_t len, void *desc,
fi_addr_t dest_addr, uint64_t tag, void *context);
ssize_t fi_no_tagged_sendmsg(struct fid_ep *ep, const struct fi_msg_tagged *msg,
uint64_t flags);
ssize_t fi_no_tagged_inject(struct fid_ep *ep, const void *buf, size_t len,
uint64_t tag);
ssize_t fi_no_tagged_injectto(struct fid_ep *ep, const void *buf, size_t len,
fi_addr_t dest_addr, uint64_t tag);
ssize_t fi_no_tagged_senddata(struct fid_ep *ep, const void *buf, size_t len, void *desc,
uint64_t data, uint64_t tag, void *context);
ssize_t fi_no_tagged_senddatato(struct fid_ep *ep, const void *buf, size_t len, void *desc,
uint64_t data, fi_addr_t dest_addr, uint64_t tag, void *context);
ssize_t fi_no_tagged_search(struct fid_ep *ep, uint64_t *tag, uint64_t ignore,
uint64_t flags, fi_addr_t *src_addr, size_t *len, void *context);
/*
* fi_ops_av
*/
int fi_no_av_insert(struct fid_av *av, const void *addr, size_t count,
fi_addr_t *fi_addr, uint64_t flags, void *context);
int fi_no_av_insertsvc(struct fid_av *av, const char *node,
const char *service, fi_addr_t *fi_addr, uint64_t flags,
void *context);
int fi_no_av_insertsym(struct fid_av *av, const char *node, size_t nodecnt,
const char *service, size_t svccnt, fi_addr_t *fi_addr,
uint64_t flags, void *context);
int fi_no_av_remove(struct fid_av *av, fi_addr_t *fi_addr, size_t count,
uint64_t flags);
#ifdef __cplusplus
}
#endif
#endif /* _FI_ENOSYS_H_ */

Просмотреть файл

@ -0,0 +1,107 @@
/*
* Copyright (c) 2011 Intel Corporation. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
*/
#if !defined(INDEXER_H)
#define INDEXER_H
#if HAVE_CONFIG_H
# include <config.h>
#endif /* HAVE_CONFIG_H */
#include <sys/types.h>
/*
* Indexer - to find a structure given an index. Synchronization
* must be provided by the caller. Caller must initialize the
* indexer by setting free_list and size to 0.
*/
union idx_entry {
void *item;
int next;
};
#define IDX_INDEX_BITS 16
#define IDX_ENTRY_BITS 10
#define IDX_ENTRY_SIZE (1 << IDX_ENTRY_BITS)
#define IDX_ARRAY_SIZE (1 << (IDX_INDEX_BITS - IDX_ENTRY_BITS))
#define IDX_MAX_INDEX ((1 << IDX_INDEX_BITS) - 1)
struct indexer
{
union idx_entry *array[IDX_ARRAY_SIZE];
int free_list;
int size;
};
#define idx_array_index(index) (index >> IDX_ENTRY_BITS)
#define idx_entry_index(index) (index & (IDX_ENTRY_SIZE - 1))
int idx_insert(struct indexer *idx, void *item);
void *idx_remove(struct indexer *idx, int index);
void idx_replace(struct indexer *idx, int index, void *item);
static inline void *idx_at(struct indexer *idx, int index)
{
return (idx->array[idx_array_index(index)] + idx_entry_index(index))->item;
}
/*
* Index map - associates a structure with an index. Synchronization
* must be provided by the caller. Caller must initialize the
* index map by setting it to 0.
*/
struct index_map
{
void **array[IDX_ARRAY_SIZE];
int count[IDX_ARRAY_SIZE];
};
int idm_set(struct index_map *idm, int index, void *item);
void *idm_clear(struct index_map *idm, int index);
static inline void *idm_at(struct index_map *idm, int index)
{
void **entry;
entry = idm->array[idx_array_index(index)];
return entry[idx_entry_index(index)];
}
static inline void *idm_lookup(struct index_map *idm, int index)
{
return ((index <= IDX_MAX_INDEX) && idm->array[idx_array_index(index)]) ?
idm_at(idm, index) : NULL;
}
#endif /* INDEXER_H */

Просмотреть файл

@ -0,0 +1,248 @@
/*
* Copyright (c) 2011 Intel Corporation. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
*/
#if !defined(LIST_H)
#define LIST_H
#if HAVE_CONFIG_H
# include <config.h>
#endif /* HAVE_CONFIG_H */
#include <sys/types.h>
#include <sys/socket.h>
#include <unistd.h>
#include <errno.h>
#include <fcntl.h>
#include <fi.h>
#include <rdma/fi_errno.h>
/*
* Double-linked list
*/
struct dlist_entry {
struct dlist_entry *next;
struct dlist_entry *prev;
};
static inline void dlist_init(struct dlist_entry *head)
{
head->next = head;
head->prev = head;
}
static inline int dlist_empty(struct dlist_entry *head)
{
return head->next == head;
}
static inline void
dlist_insert_after(struct dlist_entry *item, struct dlist_entry *head)
{
item->next = head->next;
item->prev = head;
head->next->prev = item;
head->next = item;
}
static inline void
dlist_insert_before(struct dlist_entry *item, struct dlist_entry *head)
{
dlist_insert_after(item, head->prev);
}
#define dlist_insert_head dlist_insert_after
#define dlist_insert_tail dlist_insert_before
static inline void dlist_remove(struct dlist_entry *item)
{
item->prev->next = item->next;
item->next->prev = item->prev;
}
/*
* Single-linked list
*/
struct slist_entry {
struct slist_entry *next;
};
struct slist {
struct slist_entry *head;
struct slist_entry *tail;
};
static inline void slist_init(struct slist *list)
{
list->head = NULL;
}
static inline int slist_empty(struct slist *list)
{
return !list->head;
}
static inline void slist_insert_head(struct slist_entry *item, struct slist *list)
{
if (slist_empty(list))
list->tail = item;
else
item->next = list->head;
list->head = item;
}
static inline void slist_insert_tail(struct slist_entry *item, struct slist *list)
{
if (slist_empty(list))
list->head = item;
else
list->tail->next = item;
list->tail = item;
}
static inline struct slist_entry *slist_remove_head(struct slist *list)
{
struct slist_entry *item;
item = list->head;
if (list->head == list->tail)
slist_init(list);
else
list->head = item->next;
return item;
}
/*
* Double-linked list with blocking wait-until-avail support
*/
enum {
LIST_READ_FD = 0,
LIST_WRITE_FD
};
struct dlistfd_head {
struct dlist_entry list;
int fdrcnt;
int fdwcnt;
int fd[2];
};
static inline int dlistfd_head_init(struct dlistfd_head *head)
{
int ret;
dlist_init(&head->list);
ret = socketpair(AF_UNIX, SOCK_STREAM, 0, head->fd);
if (ret < 0)
return -errno;
ret = fcntl(head->fd[LIST_READ_FD], F_SETFL, O_NONBLOCK);
if (ret < 0)
goto err;
return 0;
err:
close(head->fd[0]);
close(head->fd[1]);
return -errno;
}
static inline void dlistfd_head_free(struct dlistfd_head *head)
{
close(head->fd[0]);
close(head->fd[1]);
}
static inline int dlistfd_empty(struct dlistfd_head *head)
{
return dlist_empty(&head->list);
}
static inline void dlistfd_signal(struct dlistfd_head *head)
{
if (head->fdwcnt == head->fdrcnt) {
write(head->fd[LIST_WRITE_FD], head, sizeof head);
head->fdwcnt++;
}
}
static inline void dlistfd_reset(struct dlistfd_head *head)
{
void *buf;
if (dlistfd_empty(head) && (head->fdrcnt < head->fdwcnt)) {
read(head->fd[LIST_READ_FD], &buf, sizeof buf);
head->fdrcnt++;
}
}
static inline void
dlistfd_insert_head(struct dlist_entry *item, struct dlistfd_head *head)
{
dlist_insert_after(item, &head->list);
dlistfd_signal(head);
}
static inline void
dlistfd_insert_tail(struct dlist_entry *item, struct dlistfd_head *head)
{
dlist_insert_before(item, &head->list);
dlistfd_signal(head);
}
static inline void dlistfd_remove(struct dlist_entry *item, struct dlistfd_head *head)
{
dlist_remove(item);
dlistfd_reset(head);
}
static inline int dlistfd_wait_avail(struct dlistfd_head *head, int timeout)
{
int ret;
if(!dlistfd_empty(head))
return 1;
ret = fi_poll_fd(head->fd[LIST_READ_FD], timeout);
if(ret < 0)
return ret;
return (ret == 0) ? -FI_ETIMEDOUT : !dlistfd_empty(head);
}
#endif /* LIST_H */

Просмотреть файл

@ -0,0 +1,277 @@
/*
* Copyright (c) 2014 Intel Corporation. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
*/
#if !defined(RBUF_H)
#define RBUF_H
#if HAVE_CONFIG_H
# include <config.h>
#endif /* HAVE_CONFIG_H */
#include <sys/types.h>
#include <sys/socket.h>
#include <unistd.h>
#include <fcntl.h>
#include <fi.h>
/*
* Simple ring buffer
*/
struct ringbuf {
size_t size;
size_t size_mask;
size_t rcnt;
size_t wcnt;
size_t wpos;
void *buf;
};
static inline int rbinit(struct ringbuf *rb, size_t size)
{
rb->size = roundup_power_of_two(size);
rb->size_mask = rb->size - 1;
rb->rcnt = 0;
rb->wcnt = 0;
rb->wpos = 0;
rb->buf = calloc(1, rb->size);
if (!rb->buf)
return -ENOMEM;
return 0;
}
static inline void rbfree(struct ringbuf *rb)
{
free(rb->buf);
}
static inline int rbfull(struct ringbuf *rb)
{
return rb->wcnt - rb->rcnt >= rb->size;
}
static inline int rbempty(struct ringbuf *rb)
{
return rb->wcnt == rb->rcnt;
}
static inline size_t rbused(struct ringbuf *rb)
{
return rb->wcnt - rb->rcnt;
}
static inline size_t rbavail(struct ringbuf *rb)
{
return rb->size - rbused(rb);
}
static inline void rbwrite(struct ringbuf *rb, const void *buf, size_t len)
{
size_t endlen;
endlen = rb->size - (rb->wpos & rb->size_mask);
if (len <= endlen) {
memcpy(rb->buf + (rb->wpos & rb->size_mask), buf, len);
} else {
memcpy(rb->buf + (rb->wpos & rb->size_mask), buf, endlen);
memcpy(rb->buf, buf, len - endlen);
}
rb->wpos += len;
}
static inline void rbcommit(struct ringbuf *rb)
{
rb->wcnt = rb->wpos;
}
static inline void rbabort(struct ringbuf *rb)
{
rb->wpos = rb->wcnt;
}
static inline void rbpeek(struct ringbuf *rb, void *buf, size_t len)
{
size_t endlen;
endlen = rb->size - (rb->rcnt & rb->size_mask);
if (len <= endlen) {
memcpy(buf, rb->buf + (rb->rcnt & rb->size_mask), len);
} else {
memcpy(buf, rb->buf + (rb->rcnt & rb->size_mask), endlen);
memcpy(buf, rb->buf, len - endlen);
}
}
static inline void rbread(struct ringbuf *rb, void *buf, size_t len)
{
rbpeek(rb, buf, len);
rb->rcnt += len;
}
/*
* Ring buffer with blocking read support using an fd
*/
enum {
RB_READ_FD,
RB_WRITE_FD
};
struct ringbuffd {
struct ringbuf rb;
int fdrcnt;
int fdwcnt;
int fd[2];
};
static inline int rbfdinit(struct ringbuffd *rbfd, size_t size)
{
int ret;
rbfd->fdrcnt = 0;
rbfd->fdwcnt = 0;
ret = rbinit(&rbfd->rb, size);
if (!ret)
return ret;
ret = socketpair(AF_UNIX, SOCK_STREAM, 0, rbfd->fd);
if (ret < 0)
goto err1;
ret = fcntl(rbfd->fd[RB_READ_FD], F_SETFL, O_NONBLOCK);
if (ret < 0)
goto err2;
return 0;
err2:
close(rbfd->fd[0]);
close(rbfd->fd[1]);
err1:
rbfree(&rbfd->rb);
return -errno;
}
static inline void rbfdfree(struct ringbuffd *rbfd)
{
rbfree(&rbfd->rb);
close(rbfd->fd[0]);
close(rbfd->fd[1]);
}
static inline int rbfdfull(struct ringbuffd *rbfd)
{
return rbfull(&rbfd->rb);
}
static inline int rbfdempty(struct ringbuffd *rbfd)
{
return rbempty(&rbfd->rb);
}
static inline size_t rbfdused(struct ringbuffd *rbfd)
{
return rbused(&rbfd->rb);
}
static inline size_t rbfdavail(struct ringbuffd *rbfd)
{
return rbavail(&rbfd->rb);
}
static inline void rbfdsignal(struct ringbuffd *rbfd)
{
if (rbfd->fdwcnt == rbfd->fdrcnt) {
write(rbfd->fd[RB_WRITE_FD], rbfd, sizeof rbfd);
rbfd->fdwcnt++;
}
}
static inline void rbfdreset(struct ringbuffd *rbfd)
{
void *buf;
if (rbfdempty(rbfd) && (rbfd->fdrcnt < rbfd->fdwcnt)) {
read(rbfd->fd[RB_READ_FD], &buf, sizeof buf);
rbfd->fdrcnt++;
}
}
static inline void rbfdwrite(struct ringbuffd *rbfd, const void *buf, size_t len)
{
rbwrite(&rbfd->rb, buf, len);
}
static inline void rbfdcommit(struct ringbuffd *rbfd)
{
rbcommit(&rbfd->rb);
rbfdsignal(rbfd);
}
static inline void rbfdabort(struct ringbuffd *rbfd)
{
rbabort(&rbfd->rb);
}
static inline void rbfdpeek(struct ringbuffd *rbfd, void *buf, size_t len)
{
rbpeek(&rbfd->rb, buf, len);
}
static inline void rbfdread(struct ringbuffd *rbfd, void *buf, size_t len)
{
rbread(&rbfd->rb, buf, len);
rbfdreset(rbfd);
}
static inline size_t rbfdsread(struct ringbuffd *rbfd, void *buf, size_t len,
int timeout)
{
size_t avail;
int ret;
do {
avail = rbfdused(rbfd);
if (avail) {
len = MIN(len, avail);
rbfdread(rbfd, buf, len);
return len;
}
ret = fi_poll_fd(rbfd->fd[RB_READ_FD], timeout);
} while (!ret);
return ret;
}
#endif /* RBUF_H */

Просмотреть файл

@ -0,0 +1,440 @@
/*
* Copyright (c) 2013-2014 Intel Corporation. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef _FABRIC_H_
#define _FABRIC_H_
#include <stdint.h>
#include <stddef.h>
#include <assert.h>
#include <sys/socket.h>
#ifdef __cplusplus
extern "C" {
#endif
#ifndef container_of
#define container_of(ptr, type, field) \
((type *) ((char *)ptr - offsetof(type, field)))
#endif
enum {
FI_MAJOR_VERSION = 1,
FI_MINOR_VERSION = 0,
FI_PATH_MAX = 256,
FI_NAME_MAX = 64,
FI_VERSION_MAX = 64
};
#define FI_VERSION(major, minor) ((major << 16) | (minor))
#define FI_MAJOR(version) (version >> 16)
#define FI_MINOR(version) (version & 0xFFFF)
#define FI_VERSION_GE(v1, v2) ((FI_MAJOR(v1) > FI_MAJOR(v2)) || \
(FI_MAJOR(v1) == FI_MAJOR(v2) && FI_MINOR(v1) == FI_MINOR(v2)) || \
(FI_MAJOR(v1) == FI_MAJOR(v2) && FI_MINOR(v1) > FI_MINOR(v2)))
uint32_t fi_version(void);
struct fid;
struct fid_fabric;
struct fid_domain;
struct fid_av;
struct fid_wait;
struct fid_poll;
struct fid_eq;
struct fid_cq;
struct fid_cntr;
struct fid_ep;
struct fid_pep;
struct fid_mr;
typedef struct fid *fid_t;
/*
* Provider specific values are indicated by setting the high-order bit.
*/
#define FI_PROV_SPECIFIC (1 << 31)
/* fi_info and operation flags - pass into endpoint ops calls.
* A user may also set these on a endpoint by using fcntl, which has the
* affect of applying them to all applicable operations.
*/
/* FI capabilities */
#define FI_MSG (1ULL << 1)
#define FI_RMA (1ULL << 2)
#define FI_TAGGED (1ULL << 3)
#define FI_ATOMICS (1ULL << 4)
#define FI_MULTICAST (1ULL << 5) /* multicast uses MSG ops */
#define FI_DYNAMIC_MR (1ULL << 7)
#define FI_NAMED_RX_CTX (1ULL << 8)
#define FI_BUFFERED_RECV (1ULL << 9)
/*
* Flags
* The 64-bit flag field is divided as follows:
* bits use
* 0 - 10 operation specific (used for a single call)
* 11 - 32 common (usable with multiple operations)
* 33 - 59 reserved
* 60 - 63 provider specific
*/
#define FI_INJECT (1ULL << 11)
#define FI_MULTI_RECV (1ULL << 12)
#define FI_SOURCE (1ULL << 13)
#define FI_SYMMETRIC (1ULL << 14)
#define FI_READ (1ULL << 16)
#define FI_WRITE (1ULL << 17)
#define FI_RECV (1ULL << 18)
#define FI_SEND (1ULL << 19)
#define FI_REMOTE_READ (1ULL << 20)
#define FI_REMOTE_WRITE (1ULL << 21)
#define FI_REMOTE_CQ_DATA (1ULL << 24)
#define FI_EVENT (1ULL << 25)
#define FI_REMOTE_SIGNAL (1ULL << 26)
#define FI_REMOTE_COMPLETE (1ULL << 27)
#define FI_CANCEL (1ULL << 28)
#define FI_MORE (1ULL << 29)
#define FI_PEEK (1ULL << 30)
#define FI_TRIGGER (1ULL << 31)
struct fi_ioc {
void *addr;
size_t count;
};
/*
* Format for transport addresses: sendto, writeto, etc.
*/
enum {
FI_ADDR_UNSPEC, /* void * */
FI_SOCKADDR, /* struct sockaddr */
FI_SOCKADDR_IN, /* struct sockaddr_in */
FI_SOCKADDR_IN6, /* struct sockaddr_in6 */
FI_SOCKADDR_IB, /* struct sockaddr_ib */
FI_ADDR_PSMX, /* uint64_t */
};
#define FI_ADDR_NOTAVAIL UINT64_MAX
typedef uint64_t fi_addr_t;
typedef void * fi_connreq_t;
enum fi_progress {
FI_PROGRESS_UNSPEC,
FI_PROGRESS_AUTO,
FI_PROGRESS_MANUAL
};
enum fi_threading {
FI_THREAD_UNSPEC,
FI_THREAD_SAFE,
FI_THREAD_PROGRESS
};
#define FI_ORDER_RAR (1 << 0)
#define FI_ORDER_RAW (1 << 1)
#define FI_ORDER_RAS (1 << 2)
#define FI_ORDER_WAR (1 << 3)
#define FI_ORDER_WAW (1 << 4)
#define FI_ORDER_WAS (1 << 5)
#define FI_ORDER_SAR (1 << 6)
#define FI_ORDER_SAW (1 << 7)
#define FI_ORDER_SAS (1 << 8)
enum fi_ep_type {
FI_EP_UNSPEC,
FI_EP_MSG,
FI_EP_DGRAM,
FI_EP_RDM,
/* FI_EP_RAW, */
/* FI_EP_PACKET, */
};
/* Endpoint protocol
* If two providers support the same protocol, then they shall interoperate
* when the protocol capabilities match.
*/
enum {
FI_PROTO_UNSPEC,
FI_PROTO_RDMA_CM_IB_RC,
FI_PROTO_IWARP,
FI_PROTO_IB_UD,
FI_PROTO_PSMX,
FI_PROTO_UDP,
FI_PROTO_SOCK_RDS,
};
/* Mode bits */
#define FI_CONTEXT (1ULL << 0)
#define FI_LOCAL_MR (1ULL << 1)
#define FI_WRITE_NONCOHERENT (1ULL << 2)
#define FI_PROV_MR_KEY (1ULL << 3)
#define FI_MSG_PREFIX (1ULL << 4)
struct fi_tx_ctx_attr {
uint64_t caps;
uint64_t op_flags;
uint64_t msg_order;
size_t inject_size;
size_t size;
size_t iov_limit;
size_t op_alignment;
};
struct fi_rx_ctx_attr {
uint64_t caps;
uint64_t op_flags;
uint64_t msg_order;
size_t total_buffered_recv;
size_t size;
size_t iov_limit;
size_t op_alignment;
};
struct fi_ep_attr {
uint32_t protocol;
size_t max_msg_size;
size_t inject_size;
size_t total_buffered_recv;
size_t msg_prefix_size;
size_t max_order_raw_size;
size_t max_order_war_size;
size_t max_order_waw_size;
uint64_t mem_tag_format;
uint64_t msg_order;
size_t tx_ctx_cnt;
size_t rx_ctx_cnt;
};
struct fi_domain_attr {
struct fid_domain *domain;
char *name;
enum fi_threading threading;
enum fi_progress control_progress;
enum fi_progress data_progress;
size_t mr_key_size;
size_t cq_data_size;
size_t ep_cnt;
size_t tx_ctx_cnt;
size_t rx_ctx_cnt;
size_t max_ep_tx_ctx;
size_t max_ep_rx_ctx;
size_t op_size;
size_t iov_size;
};
struct fi_fabric_attr {
struct fid_fabric *fabric;
char *name;
char *prov_name;
uint32_t prov_version;
};
struct fi_info {
struct fi_info *next;
uint64_t caps;
uint64_t mode;
enum fi_ep_type ep_type;
uint32_t addr_format;
size_t src_addrlen;
size_t dest_addrlen;
void *src_addr;
void *dest_addr;
fi_connreq_t connreq;
struct fi_tx_ctx_attr *tx_attr;
struct fi_rx_ctx_attr *rx_attr;
struct fi_ep_attr *ep_attr;
struct fi_domain_attr *domain_attr;
struct fi_fabric_attr *fabric_attr;
};
enum {
FI_CLASS_UNSPEC,
FI_CLASS_FABRIC,
FI_CLASS_DOMAIN,
FI_CLASS_EP,
FI_CLASS_RX_CTX,
FI_CLASS_TX_CTX,
FI_CLASS_PEP,
FI_CLASS_INTERFACE,
FI_CLASS_AV,
FI_CLASS_MR,
FI_CLASS_EQ,
FI_CLASS_CQ,
FI_CLASS_CNTR,
FI_CLASS_WAIT,
FI_CLASS_POLL
};
struct fi_eq_attr;
struct fi_ops {
size_t size;
int (*close)(struct fid *fid);
int (*bind)(struct fid *fid, struct fid *bfid, uint64_t flags);
int (*sync)(struct fid *fid, uint64_t flags, void *context);
int (*control)(struct fid *fid, int command, void *arg);
int (*ops_open)(struct fid *fid, const char *name,
uint64_t flags, void **ops, void *context);
};
/* All fabric interface descriptors must start with this structure */
struct fid {
size_t fclass;
void *context;
struct fi_ops *ops;
};
#define FI_NUMERICHOST (1ULL << 1)
int fi_getinfo(uint32_t version, const char *node, const char *service,
uint64_t flags, struct fi_info *hints, struct fi_info **info);
void fi_freeinfo(struct fi_info *info);
struct fi_info *fi_dupinfo(const struct fi_info *info);
struct fi_ops_fabric {
size_t size;
int (*domain)(struct fid_fabric *fabric, struct fi_info *info,
struct fid_domain **dom, void *context);
int (*endpoint)(struct fid_fabric *fabric, struct fi_info *info,
struct fid_pep **pep, void *context);
int (*eq_open)(struct fid_fabric *fabric, struct fi_eq_attr *attr,
struct fid_eq **eq, void *context);
};
struct fid_fabric {
struct fid fid;
struct fi_ops_fabric *ops;
};
int fi_fabric(struct fi_fabric_attr *attr, struct fid_fabric **fabric, void *context);
#define FI_CHECK_OP(ops, opstype, op) \
((ops->size > offsetof(opstype, op)) && ops->op)
static inline int fi_close(struct fid *fid)
{
return fid->ops->close(fid);
}
static inline int fi_bind(struct fid *fid, struct fid *bfid, uint64_t flags)
{
return fid->ops->bind(fid, bfid, flags);
}
static inline int fi_sync(struct fid *fid, uint64_t flags, void *context)
{
return fid->ops->sync(fid, flags, context);
}
struct fi_alias {
struct fid **fid;
uint64_t flags;
};
/* control commands */
enum {
FI_GETFIDFLAG, /* uint64_t flags */
FI_SETFIDFLAG, /* uint64_t flags */
FI_GETOPSFLAG, /* uint64_t flags */
FI_SETOPSFLAG, /* uint64_t flags */
/* Duplicate a fid_t. This allows for 2 fids that refer to a single
* HW resource. Each fid may reference functions that are optimized
* for different use cases.
*/
FI_ALIAS, /* struct fi_alias * */
FI_GETWAIT, /* void * wait object */
};
static inline int fi_control(struct fid *fid, int command, void *arg)
{
return fid->ops->control(fid, command, arg);
}
static inline int fi_alias(struct fid *fid, struct fid **alias_fid, uint64_t flags)
{
struct fi_alias alias;
alias.fid = alias_fid;
alias.flags = flags;
return fi_control(fid, FI_ALIAS, &alias);
}
static inline int
fi_open_ops(struct fid *fid, const char *name, uint64_t flags,
void **ops, void *context)
{
return fid->ops->ops_open(fid, name, flags, ops, context);
}
enum fi_type {
FI_TYPE_INFO,
FI_TYPE_EP_TYPE,
FI_TYPE_CAPS,
FI_TYPE_OP_FLAGS,
FI_TYPE_ADDR_FORMAT,
FI_TYPE_TX_ATTR,
FI_TYPE_RX_ATTR,
FI_TYPE_EP_ATTR,
FI_TYPE_DOMAIN_ATTR,
FI_TYPE_FABRIC_ATTR,
FI_TYPE_THREADING,
FI_TYPE_PROGRESS,
FI_TYPE_PROTOCOL,
FI_TYPE_MSG_ORDER,
FI_TYPE_MODE,
FI_TYPE_AV_TYPE,
};
char *fi_tostr(const void *data, enum fi_type datatype);
#ifndef FABRIC_DIRECT
struct fi_context {
void *internal[4];
};
#else // FABRIC_DIRECT
#include <rdma/fi_direct.h>
#endif
#ifdef __cplusplus
}
#endif
#endif /* _FABRIC_H_ */

Просмотреть файл

@ -0,0 +1,371 @@
/*
* Copyright (c) 2013-2014 Intel Corporation. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef _FI_ATOMIC_H_
#define _FI_ATOMIC_H_
#include <assert.h>
#include <rdma/fabric.h>
#include <rdma/fi_endpoint.h>
#include <rdma/fi_rma.h>
#ifdef __cplusplus
extern "C" {
#endif
#ifndef FABRIC_DIRECT
enum fi_datatype {
FI_INT8,
FI_UINT8,
FI_INT16,
FI_UINT16,
FI_INT32,
FI_UINT32,
FI_INT64,
FI_UINT64,
FI_FLOAT,
FI_DOUBLE,
FI_FLOAT_COMPLEX,
FI_DOUBLE_COMPLEX,
FI_LONG_DOUBLE,
FI_LONG_DOUBLE_COMPLEX,
FI_DATATYPE_LAST
};
enum fi_op {
FI_MIN,
FI_MAX,
FI_SUM,
FI_PROD,
FI_LOR,
FI_LAND,
FI_BOR,
FI_BAND,
FI_LXOR,
FI_BXOR,
FI_ATOMIC_READ,
FI_ATOMIC_WRITE,
FI_CSWAP,
FI_CSWAP_NE,
FI_CSWAP_LE,
FI_CSWAP_LT,
FI_CSWAP_GE,
FI_CSWAP_GT,
FI_MSWAP,
FI_ATOMIC_OP_LAST
};
#else
#include <rdma/fi_direct_atomic_def.h>
#endif /* FABRIC_DIRECT */
struct fi_msg_atomic {
const struct fi_ioc *msg_iov;
void **desc;
size_t iov_count;
fi_addr_t addr;
const struct fi_rma_ioc *rma_iov;
size_t rma_iov_count;
enum fi_datatype datatype;
enum fi_op op;
void *context;
uint64_t data;
};
struct fi_ops_atomic {
size_t size;
ssize_t (*write)(struct fid_ep *ep,
const void *buf, size_t count, void *desc,
uint64_t addr, uint64_t key,
enum fi_datatype datatype, enum fi_op op, void *context);
ssize_t (*writev)(struct fid_ep *ep,
const struct fi_ioc *iov, void **desc, size_t count,
uint64_t addr, uint64_t key,
enum fi_datatype datatype, enum fi_op op, void *context);
ssize_t (*writeto)(struct fid_ep *ep,
const void *buf, size_t count, void *desc,
fi_addr_t dest_addr,
uint64_t addr, uint64_t key,
enum fi_datatype datatype, enum fi_op op, void *context);
ssize_t (*writemsg)(struct fid_ep *ep,
const struct fi_msg_atomic *msg, uint64_t flags);
ssize_t (*inject)(struct fid_ep *ep, const void *buf, size_t count,
uint64_t addr, uint64_t key, enum fi_datatype datatype,
enum fi_op op);
ssize_t (*injectto)(struct fid_ep *ep, const void *buf, size_t count,
fi_addr_t dest_addr, uint64_t addr, uint64_t key,
enum fi_datatype datatype, enum fi_op op);
ssize_t (*readwrite)(struct fid_ep *ep,
const void *buf, size_t count, void *desc,
void *result, void *result_desc,
uint64_t addr, uint64_t key,
enum fi_datatype datatype, enum fi_op op, void *context);
ssize_t (*readwritev)(struct fid_ep *ep,
const struct fi_ioc *iov, void **desc, size_t count,
struct fi_ioc *resultv, void **result_desc, size_t result_count,
uint64_t addr, uint64_t key,
enum fi_datatype datatype, enum fi_op op, void *context);
ssize_t (*readwriteto)(struct fid_ep *ep,
const void *buf, size_t count, void *desc,
void *result, void *result_desc,
fi_addr_t dest_addr,
uint64_t addr, uint64_t key,
enum fi_datatype datatype, enum fi_op op, void *context);
ssize_t (*readwritemsg)(struct fid_ep *ep,
const struct fi_msg_atomic *msg,
struct fi_ioc *resultv, void **result_desc, size_t result_count,
uint64_t flags);
ssize_t (*compwrite)(struct fid_ep *ep,
const void *buf, size_t count, void *desc,
const void *compare, void *compare_desc,
void *result, void *result_desc,
uint64_t addr, uint64_t key,
enum fi_datatype datatype, enum fi_op op, void *context);
ssize_t (*compwritev)(struct fid_ep *ep,
const struct fi_ioc *iov, void **desc, size_t count,
const struct fi_ioc *comparev, void **compare_desc, size_t compare_count,
struct fi_ioc *resultv, void **result_desc, size_t result_count,
uint64_t addr, uint64_t key,
enum fi_datatype datatype, enum fi_op op, void *context);
ssize_t (*compwriteto)(struct fid_ep *ep,
const void *buf, size_t count, void *desc,
const void *compare, void *compare_desc,
void *result, void *result_desc,
fi_addr_t dest_addr,
uint64_t addr, uint64_t key,
enum fi_datatype datatype, enum fi_op op, void *context);
ssize_t (*compwritemsg)(struct fid_ep *ep,
const struct fi_msg_atomic *msg,
const struct fi_ioc *comparev, void **compare_desc, size_t compare_count,
struct fi_ioc *resultv, void **result_desc, size_t result_count,
uint64_t flags);
int (*writevalid)(struct fid_ep *ep,
enum fi_datatype datatype, enum fi_op op, size_t *count);
int (*readwritevalid)(struct fid_ep *ep,
enum fi_datatype datatype, enum fi_op op, size_t *count);
int (*compwritevalid)(struct fid_ep *ep,
enum fi_datatype datatype, enum fi_op op, size_t *count);
};
#ifndef FABRIC_DIRECT
static inline ssize_t
fi_atomic(struct fid_ep *ep,
const void *buf, size_t count, void *desc,
uint64_t addr, uint64_t key,
enum fi_datatype datatype, enum fi_op op, void *context)
{
return ep->atomic->write(ep, buf, count, desc, addr, key,
datatype, op, context);
}
static inline ssize_t
fi_atomicv(struct fid_ep *ep,
const struct fi_ioc *iov, void **desc, size_t count,
uint64_t addr, uint64_t key,
enum fi_datatype datatype, enum fi_op op, void *context)
{
return ep->atomic->writev(ep, iov, desc, count, addr, key, datatype,
op, context);
}
static inline ssize_t
fi_atomicto(struct fid_ep *ep,
const void *buf, size_t count, void *desc,
fi_addr_t dest_addr,
uint64_t addr, uint64_t key,
enum fi_datatype datatype, enum fi_op op, void *context)
{
return ep->atomic->writeto(ep, buf, count, desc, dest_addr,
addr, key, datatype, op, context);
}
static inline ssize_t
fi_atomicmsg(struct fid_ep *ep,
const struct fi_msg_atomic *msg, uint64_t flags)
{
return ep->atomic->writemsg(ep, msg, flags);
}
static inline ssize_t
fi_inject_atomic(struct fid_ep *ep, const void *buf, size_t count,
uint64_t addr, uint64_t key, enum fi_datatype datatype,
enum fi_op op)
{
return ep->atomic->inject(ep, buf, count, addr, key,
datatype, op);
}
static inline ssize_t
fi_inject_atomicto(struct fid_ep *ep, const void *buf, size_t count,
fi_addr_t dest_addr, uint64_t addr, uint64_t key,
enum fi_datatype datatype, enum fi_op op)
{
return ep->atomic->injectto(ep, buf, count, dest_addr, addr,
key, datatype, op);
}
static inline ssize_t
fi_fetch_atomic(struct fid_ep *ep,
const void *buf, size_t count, void *desc,
void *result, void *result_desc,
uint64_t addr, uint64_t key,
enum fi_datatype datatype, enum fi_op op, void *context)
{
return ep->atomic->readwrite(ep, buf, count, desc, result, result_desc,
addr, key, datatype, op, context);
}
static inline ssize_t
fi_fetch_atomicv(struct fid_ep *ep,
const struct fi_ioc *iov, void **desc, size_t count,
struct fi_ioc *resultv, void **result_desc, size_t result_count,
uint64_t addr, uint64_t key,
enum fi_datatype datatype, enum fi_op op, void *context)
{
return ep->atomic->readwritev(ep, iov, desc, count,
resultv, result_desc, result_count,
addr, key, datatype, op, context);
}
static inline ssize_t
fi_fetch_atomicto(struct fid_ep *ep,
const void *buf, size_t count, void *desc,
void *result, void *result_desc,
fi_addr_t dest_addr,
uint64_t addr, uint64_t key,
enum fi_datatype datatype, enum fi_op op, void *context)
{
return ep->atomic->readwriteto(ep, buf, count, desc, result,
result_desc, dest_addr, addr,
key, datatype, op, context);
}
static inline ssize_t
fi_fetch_atomicmsg(struct fid_ep *ep,
const struct fi_msg_atomic *msg,
struct fi_ioc *resultv, void **result_desc, size_t result_count,
uint64_t flags)
{
return ep->atomic->readwritemsg(ep, msg, resultv, result_desc,
result_count, flags);
}
static inline ssize_t
fi_compare_atomic(struct fid_ep *ep,
const void *buf, size_t count, void *desc,
const void *compare, void *compare_desc,
void *result, void *result_desc,
uint64_t addr, uint64_t key,
enum fi_datatype datatype, enum fi_op op, void *context)
{
return ep->atomic->compwrite(ep, buf, count,
desc,compare, compare_desc,
result, result_desc, addr, key,
datatype, op, context);
}
static inline ssize_t
fi_compare_atomicv(struct fid_ep *ep,
const struct fi_ioc *iov, void **desc, size_t count,
const struct fi_ioc *comparev, void **compare_desc, size_t compare_count,
struct fi_ioc *resultv, void **result_desc, size_t result_count,
uint64_t addr, uint64_t key,
enum fi_datatype datatype, enum fi_op op, void *context)
{
return ep->atomic->compwritev(ep, iov, desc, count,
comparev, compare_desc, compare_count,
resultv, result_desc, result_count,
addr, key, datatype, op, context);
}
static inline ssize_t
fi_compare_atomicto(struct fid_ep *ep,
const void *buf, size_t count, void *desc,
const void *compare, void *compare_desc,
void *result, void *result_desc,
fi_addr_t dest_addr,
uint64_t addr, uint64_t key,
enum fi_datatype datatype, enum fi_op op, void *context)
{
return ep->atomic->compwriteto(ep, buf, count, desc,
compare, compare_desc, result, result_desc,
dest_addr, addr, key, datatype, op, context);
}
static inline ssize_t
fi_compare_atomicmsg(struct fid_ep *ep,
const struct fi_msg_atomic *msg,
const struct fi_ioc *comparev, void **compare_desc, size_t compare_count,
struct fi_ioc *resultv, void **result_desc, size_t result_count,
uint64_t flags)
{
return ep->atomic->compwritemsg(ep, msg,
comparev, compare_desc, compare_count,
resultv, result_desc, result_count, flags);
}
static inline int
fi_atomicvalid(struct fid_ep *ep,
enum fi_datatype datatype, enum fi_op op, size_t *count)
{
return ep->atomic->writevalid(ep, datatype, op, count);
}
static inline int
fi_fetch_atomicvalid(struct fid_ep *ep,
enum fi_datatype datatype, enum fi_op op, size_t *count)
{
return ep->atomic->readwritevalid(ep, datatype, op, count);
}
static inline int
fi_compare_atomicvalid(struct fid_ep *ep,
enum fi_datatype datatype, enum fi_op op, size_t *count)
{
return ep->atomic->compwritevalid(ep, datatype, op, count);
}
#else // FABRIC_DIRECT
#include <rdma/fi_direct_atomic.h>
#endif
#ifdef __cplusplus
}
#endif
#endif /* _FI_ATOMIC_H_ */

Просмотреть файл

@ -0,0 +1,126 @@
/*
* Copyright (c) 2013-2014 Intel Corporation. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef _FI_CM_H_
#define _FI_CM_H_
#include <rdma/fi_endpoint.h>
#ifdef __cplusplus
extern "C" {
#endif
struct fi_ops_cm {
size_t size;
int (*getname)(fid_t fid, void *addr, size_t *addrlen);
int (*getpeer)(struct fid_ep *ep, void *addr, size_t *addrlen);
int (*connect)(struct fid_ep *ep, const void *addr,
const void *param, size_t paramlen);
int (*listen)(struct fid_pep *pep);
int (*accept)(struct fid_ep *ep, const void *param, size_t paramlen);
int (*reject)(struct fid_pep *pep, fi_connreq_t connreq,
const void *param, size_t paramlen);
int (*shutdown)(struct fid_ep *ep, uint64_t flags);
int (*join)(struct fid_ep *ep, void *addr, fi_addr_t *fi_addr,
uint64_t flags, void *context);
int (*leave)(struct fid_ep *ep, void *addr, fi_addr_t fi_addr,
uint64_t flags);
};
#ifndef FABRIC_DIRECT
static inline int fi_getname(fid_t fid, void *addr, size_t *addrlen)
{
struct fid_ep *ep = container_of(fid, struct fid_ep, fid);
return ep->cm->getname(fid, addr, addrlen);
}
static inline int fi_getpeer(struct fid_ep *ep, void *addr, size_t *addrlen)
{
return ep->cm->getpeer(ep, addr, addrlen);
}
static inline int fi_listen(struct fid_pep *pep)
{
return pep->cm->listen(pep);
}
static inline int
fi_connect(struct fid_ep *ep, const void *addr,
const void *param, size_t paramlen)
{
return ep->cm->connect(ep, addr, param, paramlen);
}
static inline int
fi_accept(struct fid_ep *ep, const void *param, size_t paramlen)
{
return ep->cm->accept(ep, param, paramlen);
}
static inline int
fi_reject(struct fid_pep *pep, fi_connreq_t connreq,
const void *param, size_t paramlen)
{
return pep->cm->reject(pep, connreq, param, paramlen);
}
static inline int fi_shutdown(struct fid_ep *ep, uint64_t flags)
{
return ep->cm->shutdown(ep, flags);
}
static inline int
fi_join(struct fid_ep *ep, void *addr, fi_addr_t *fi_addr, uint64_t flags,
void *context)
{
return ep->cm->join(ep, addr, fi_addr, flags, context);
}
static inline int
fi_leave(struct fid_ep *ep, void *addr, fi_addr_t fi_addr, uint64_t flags)
{
return ep->cm->leave(ep, addr, fi_addr, flags);
}
#else // FABRIC_DIRECT
#include <rdma/fi_direct_cm.h>
#endif
#ifdef __cplusplus
}
#endif
#endif /* _FI_CM_H_ */

Просмотреть файл

@ -0,0 +1,261 @@
/*
* Copyright (c) 2013-2014 Intel Corporation. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef _FI_DOMAIN_H_
#define _FI_DOMAIN_H_
#include <rdma/fabric.h>
#include <rdma/fi_eq.h>
#ifdef __cplusplus
extern "C" {
#endif
/*
* AV = Address Vector
* Maps and stores transport/network addresses.
*/
enum fi_av_type {
FI_AV_MAP,
FI_AV_TABLE
};
struct fi_av_attr {
enum fi_av_type type;
int rx_ctx_bits;
size_t count;
size_t ep_per_node;
const char *name;
void *map_addr;
uint64_t flags;
};
struct fi_ops_av {
size_t size;
int (*insert)(struct fid_av *av, const void *addr, size_t count,
fi_addr_t *fi_addr, uint64_t flags, void *context);
int (*insertsvc)(struct fid_av *av, const char *node,
const char *service, fi_addr_t *fi_addr,
uint64_t flags, void *context);
int (*insertsym)(struct fid_av *av, const char *node, size_t nodecnt,
const char *service, size_t svccnt, fi_addr_t *fi_addr,
uint64_t flags, void *context);
int (*remove)(struct fid_av *av, fi_addr_t *fi_addr, size_t count,
uint64_t flags);
int (*lookup)(struct fid_av *av, fi_addr_t fi_addr, void *addr,
size_t *addrlen);
const char * (*straddr)(struct fid_av *av, const void *addr,
char *buf, size_t *len);
};
struct fid_av {
struct fid fid;
struct fi_ops_av *ops;
};
/*
* MR = Memory Region
* Tracks registered memory regions, primarily for remote access,
* but also for local access until we can remove that need.
*/
struct fid_mr {
struct fid fid;
void *mem_desc;
uint64_t key;
};
struct fi_mr_attr {
const struct iovec *mr_iov;
size_t iov_count;
uint64_t access;
uint64_t offset;
uint64_t requested_key;
void *context;
};
struct fi_cq_attr;
struct fi_cntr_attr;
struct fi_ops_domain {
size_t size;
int (*av_open)(struct fid_domain *domain, struct fi_av_attr *attr,
struct fid_av **av, void *context);
int (*cq_open)(struct fid_domain *domain, struct fi_cq_attr *attr,
struct fid_cq **cq, void *context);
int (*endpoint)(struct fid_domain *domain, struct fi_info *info,
struct fid_ep **ep, void *context);
int (*cntr_open)(struct fid_domain *domain, struct fi_cntr_attr *attr,
struct fid_cntr **cntr, void *context);
int (*wait_open)(struct fid_domain *domain, struct fi_wait_attr *attr,
struct fid_wait **waitset);
int (*poll_open)(struct fid_domain *domain, struct fi_poll_attr *attr,
struct fid_poll **pollset);
};
/* Memory registration flags */
#define FI_MR_OFFSET (1ULL << 0)
struct fi_ops_mr {
size_t size;
int (*reg)(struct fid_domain *domain, const void *buf, size_t len,
uint64_t access, uint64_t offset, uint64_t requested_key,
uint64_t flags, struct fid_mr **mr, void *context);
int (*regv)(struct fid_domain *domain, const struct iovec *iov,
size_t count, uint64_t access,
uint64_t offset, uint64_t requested_key,
uint64_t flags, struct fid_mr **mr, void *context);
int (*regattr)(struct fid_domain *domain, const struct fi_mr_attr *attr,
uint64_t flags, struct fid_mr **mr);
};
/* Domain bind flags */
#define FI_REG_MR (1ULL << 0)
struct fid_domain {
struct fid fid;
struct fi_ops_domain *ops;
struct fi_ops_mr *mr;
};
#ifndef FABRIC_DIRECT
static inline int
fi_domain(struct fid_fabric *fabric, struct fi_info *info,
struct fid_domain **domain, void *context)
{
return fabric->ops->domain(fabric, info, domain, context);
}
static inline int
fi_cq_open(struct fid_domain *domain, struct fi_cq_attr *attr,
struct fid_cq **cq, void *context)
{
return domain->ops->cq_open(domain, attr, cq, context);
}
static inline int
fi_cntr_open(struct fid_domain *domain, struct fi_cntr_attr *attr,
struct fid_cntr **cntr, void *context)
{
return domain->ops->cntr_open(domain, attr, cntr, context);
}
static inline int
fi_mr_reg(struct fid_domain *domain, const void *buf, size_t len,
uint64_t access, uint64_t offset, uint64_t requested_key,
uint64_t flags, struct fid_mr **mr, void *context)
{
return domain->mr->reg(domain, buf, len, access, offset,
requested_key, flags, mr, context);
}
static inline void *fi_mr_desc(struct fid_mr *mr)
{
return mr->mem_desc;
}
static inline uint64_t fi_mr_key(struct fid_mr *mr)
{
return mr->key;
}
static inline int
fi_av_open(struct fid_domain *domain, struct fi_av_attr *attr,
struct fid_av **av, void *context)
{
return domain->ops->av_open(domain, attr, av, context);
}
static inline int
fi_av_insert(struct fid_av *av, const void *addr, size_t count,
fi_addr_t *fi_addr, uint64_t flags, void *context)
{
return av->ops->insert(av, addr, count, fi_addr, flags, context);
}
static inline int
fi_av_insertsvc(struct fid_av *av, const char *node, const char *service,
fi_addr_t *fi_addr, uint64_t flags, void *context)
{
return av->ops->insertsvc(av, node, service, fi_addr, flags, context);
}
static inline int
fi_av_insertsym(struct fid_av *av, const char *node, size_t nodecnt,
const char *service, size_t svccnt,
fi_addr_t *fi_addr, uint64_t flags, void *context)
{
return av->ops->insertsym(av, node, nodecnt, service, svccnt,
fi_addr, flags, context);
}
static inline int
fi_av_remove(struct fid_av *av, fi_addr_t *fi_addr, size_t count, uint64_t flags)
{
return av->ops->remove(av, fi_addr, count, flags);
}
static inline int
fi_av_lookup(struct fid_av *av, fi_addr_t fi_addr, void *addr, size_t *addrlen)
{
return av->ops->lookup(av, fi_addr, addr, addrlen);
}
static inline fi_addr_t
fi_rx_addr(fi_addr_t fi_addr, int rx_index, int rx_ctx_bits)
{
return (fi_addr_t) (((uint64_t) rx_index << (64 - rx_ctx_bits)) | fi_addr);
}
static inline int fi_av_sync(struct fid_av *av, uint64_t flags, void *context)
{
return fi_sync(&av->fid, flags, context);
}
#else // FABRIC_DIRECT
#include <rdma/fi_direct_domain.h>
#endif
#ifdef __cplusplus
}
#endif
#endif /* _FI_DOMAIN_H_ */

Просмотреть файл

@ -0,0 +1,288 @@
/*
* Copyright (c) 2013-2014 Intel Corporation. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef _FI_ENDPOINT_H_
#define _FI_ENDPOINT_H_
#include <sys/socket.h>
#include <rdma/fabric.h>
#include <rdma/fi_domain.h>
#include <stddef.h>
#ifdef __cplusplus
extern "C" {
#endif
struct fi_msg {
const struct iovec *msg_iov;
void **desc;
size_t iov_count;
fi_addr_t addr;
void *context;
uint64_t data;
};
/* Endpoint option levels */
enum {
FI_OPT_ENDPOINT
};
/* FI_OPT_ENDPOINT option names */
enum {
FI_OPT_MIN_MULTI_RECV, /* size_t */
};
struct fi_ops_ep {
size_t size;
int (*enable)(struct fid_ep *ep);
ssize_t (*cancel)(fid_t fid, void *context);
int (*getopt)(fid_t fid, int level, int optname,
void *optval, size_t *optlen);
int (*setopt)(fid_t fid, int level, int optname,
const void *optval, size_t optlen);
int (*tx_ctx)(struct fid_ep *ep, int index,
struct fi_tx_ctx_attr *attr, struct fid_ep **tx_ep,
void *context);
int (*rx_ctx)(struct fid_ep *ep, int index,
struct fi_rx_ctx_attr *attr, struct fid_ep **rx_ep,
void *context);
};
struct fi_ops_msg {
size_t size;
ssize_t (*recv)(struct fid_ep *ep, void *buf, size_t len, void *desc,
void *context);
ssize_t (*recvv)(struct fid_ep *ep, const struct iovec *iov, void **desc,
size_t count, void *context);
ssize_t (*recvfrom)(struct fid_ep *ep, void *buf, size_t len, void *desc,
fi_addr_t src_addr, void *context);
ssize_t (*recvmsg)(struct fid_ep *ep, const struct fi_msg *msg,
uint64_t flags);
ssize_t (*send)(struct fid_ep *ep, const void *buf, size_t len, void *desc,
void *context);
ssize_t (*sendv)(struct fid_ep *ep, const struct iovec *iov, void **desc,
size_t count, void *context);
ssize_t (*sendto)(struct fid_ep *ep, const void *buf, size_t len, void *desc,
fi_addr_t dest_addr, void *context);
ssize_t (*sendmsg)(struct fid_ep *ep, const struct fi_msg *msg,
uint64_t flags);
ssize_t (*inject)(struct fid_ep *ep, const void *buf, size_t len);
ssize_t (*injectto)(struct fid_ep *ep, const void *buf, size_t len,
fi_addr_t dest_addr);
ssize_t (*senddata)(struct fid_ep *ep, const void *buf, size_t len, void *desc,
uint64_t data, void *context);
ssize_t (*senddatato)(struct fid_ep *ep, const void *buf, size_t len, void *desc,
uint64_t data, fi_addr_t dest_addr, void *context);
};
struct fi_ops_cm;
struct fi_ops_rma;
struct fi_ops_tagged;
struct fi_ops_atomic;
/* struct fi_ops_collectives; */
/*
* Calls which modify the properties of a endpoint (control, setopt, bind, ...)
* must be serialized against all other operations. Those calls may modify the
* operations referenced by a endpoint in order to optimize the data transfer code
* paths.
*
* A provider may allocate the minimal size structure needed to support the
* ops requested by the user.
*/
struct fid_ep {
struct fid fid;
struct fi_ops_ep *ops;
struct fi_ops_cm *cm;
struct fi_ops_msg *msg;
struct fi_ops_rma *rma;
struct fi_ops_tagged *tagged;
struct fi_ops_atomic *atomic;
};
struct fid_pep {
struct fid fid;
struct fi_ops_ep *ops;
struct fi_ops_cm *cm;
};
#ifndef FABRIC_DIRECT
static inline int
fi_pendpoint(struct fid_fabric *fabric, struct fi_info *info,
struct fid_pep **pep, void *context)
{
return fabric->ops->endpoint(fabric, info, pep, context);
}
static inline int
fi_endpoint(struct fid_domain *domain, struct fi_info *info,
struct fid_ep **ep, void *context)
{
return domain->ops->endpoint(domain, info, ep, context);
}
static inline int fi_ep_bind(struct fid_ep *ep, struct fid *bfid, uint64_t flags)
{
return ep->fid.ops->bind(&ep->fid, bfid, flags);
}
static inline int fi_enable(struct fid_ep *ep)
{
return ep->ops->enable(ep);
}
static inline ssize_t fi_cancel(fid_t fid, void *context)
{
struct fid_ep *ep = container_of(fid, struct fid_ep, fid);
return ep->ops->cancel(fid, context);
}
static inline int
fi_setopt(fid_t fid, int level, int optname,
const void *optval, size_t optlen)
{
struct fid_ep *ep = container_of(fid, struct fid_ep, fid);
return ep->ops->setopt(fid, level, optname, optval, optlen);
}
static inline int
fi_getopt(fid_t fid, int level, int optname,
void *optval, size_t *optlen)
{
struct fid_ep *ep = container_of(fid, struct fid_ep, fid);
return ep->ops->getopt(fid, level, optname, optval, optlen);
}
static inline int
fi_tx_context(struct fid_ep *ep, int index, struct fi_tx_ctx_attr *attr,
struct fid_ep **tx_ep, void *context)
{
return ep->ops->tx_ctx(ep, index, attr, tx_ep, context);
}
static inline int
fi_rx_context(struct fid_ep *ep, int index, struct fi_rx_ctx_attr *attr,
struct fid_ep **rx_ep, void *context)
{
return ep->ops->rx_ctx(ep, index, attr, rx_ep, context);
}
static inline ssize_t
fi_recv(struct fid_ep *ep, void *buf, size_t len, void *desc, void *context)
{
return ep->msg->recv(ep, buf, len, desc, context);
}
static inline ssize_t
fi_recvv(struct fid_ep *ep, const struct iovec *iov, void **desc,
size_t count, void *context)
{
return ep->msg->recvv(ep, iov, desc, count, context);
}
static inline ssize_t
fi_recvfrom(struct fid_ep *ep, void *buf, size_t len, void *desc,
fi_addr_t src_addr, void *context)
{
return ep->msg->recvfrom(ep, buf, len, desc, src_addr, context);
}
static inline ssize_t
fi_recvmsg(struct fid_ep *ep, const struct fi_msg *msg, uint64_t flags)
{
return ep->msg->recvmsg(ep, msg, flags);
}
static inline ssize_t
fi_send(struct fid_ep *ep, const void *buf, size_t len, void *desc, void *context)
{
return ep->msg->send(ep, buf, len, desc, context);
}
static inline ssize_t
fi_sendv(struct fid_ep *ep, const struct iovec *iov, void **desc,
size_t count, void *context)
{
return ep->msg->sendv(ep, iov, desc, count, context);
}
static inline ssize_t
fi_sendto(struct fid_ep *ep, const void *buf, size_t len, void *desc,
fi_addr_t dest_addr, void *context)
{
return ep->msg->sendto(ep, buf, len, desc, dest_addr, context);
}
static inline ssize_t
fi_sendmsg(struct fid_ep *ep, const struct fi_msg *msg, uint64_t flags)
{
return ep->msg->sendmsg(ep, msg, flags);
}
static inline ssize_t
fi_inject(struct fid_ep *ep, const void *buf, size_t len)
{
return ep->msg->inject(ep, buf, len);
}
static inline ssize_t
fi_injectto(struct fid_ep *ep, const void *buf, size_t len, fi_addr_t dest_addr)
{
return ep->msg->injectto(ep, buf, len, dest_addr);
}
static inline ssize_t
fi_senddata(struct fid_ep *ep, const void *buf, size_t len, void *desc,
uint64_t data, void *context)
{
return ep->msg->senddata(ep, buf, len, desc, data, context);
}
static inline ssize_t
fi_senddatato(struct fid_ep *ep, const void *buf, size_t len, void *desc,
uint64_t data, fi_addr_t dest_addr, void *context)
{
return ep->msg->senddatato(ep, buf, len, desc, data, dest_addr, context);
}
#else // FABRIC_DIRECT
#include <rdma/fi_direct_endpoint.h>
#endif
#ifdef __cplusplus
}
#endif
#endif /* _FI_ENDPOINT_H_ */

Просмотреть файл

@ -0,0 +1,433 @@
/*
* Copyright (c) 2013-2014 Intel Corporation. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef _FI_EQ_H_
#define _FI_EQ_H_
#include <rdma/fabric.h>
#ifdef __cplusplus
extern "C" {
#endif
/*
* Wait Set
* Allows associating multiple EQs and counters with a single wait object.
*/
/* Use fi_control GETWAIT to get underlying wait object(s) */
enum fi_wait_obj {
FI_WAIT_NONE,
FI_WAIT_UNSPEC,
FI_WAIT_SET,
FI_WAIT_FD,
FI_WAIT_MUT_COND, /* pthread mutex & cond */
};
struct fi_wait_attr {
enum fi_wait_obj wait_obj;
uint64_t flags;
};
struct fi_ops_wait {
size_t size;
int (*wait)(struct fid_wait *waitset, int timeout);
};
struct fid_wait {
struct fid fid;
struct fi_ops_wait *ops;
};
struct fi_wait_obj_set {
size_t count;
enum fi_wait_obj wait_obj;
void *obj;
};
/*
* Poll Set
* Allows polling multiple event queues and counters for progress
*/
struct fi_poll_attr {
uint64_t flags;
};
struct fi_ops_poll {
size_t size;
int (*poll)(struct fid_poll *pollset, void **context, int count);
};
struct fid_poll {
struct fid fid;
struct fi_ops_poll *ops;
};
/*
* EQ = Event Queue
* Used to report various control (not data transfer) events and operations.
*/
struct fi_eq_attr {
size_t size;
uint64_t flags;
enum fi_wait_obj wait_obj;
int signaling_vector;
struct fid_wait *wait_set;
};
/* Standard EQ events */
enum {
FI_COMPLETE,
FI_CONNREQ,
FI_SHUTDOWN
};
struct fi_eq_entry {
fid_t fid;
void *context;
uint64_t data;
};
struct fi_eq_err_entry {
fid_t fid;
void *context;
uint64_t data;
int err;
int prov_errno;
/* err_data is available until the next time the CQ is read */
void *err_data;
};
struct fi_eq_cm_entry {
fid_t fid;
/* user must call fi_freeinfo to release info */
struct fi_info *info;
/* connection data placed here, up to space provided */
uint8_t data[0];
};
struct fi_ops_eq {
size_t size;
ssize_t (*read)(struct fid_eq *eq, uint32_t *event,
void *buf, size_t len, uint64_t flags);
ssize_t (*readerr)(struct fid_eq *eq, struct fi_eq_err_entry *buf,
size_t len, uint64_t flags);
ssize_t (*write)(struct fid_eq *eq, uint32_t event,
const void *buf, size_t len, uint64_t flags);
ssize_t (*sread)(struct fid_eq *eq, uint32_t *event,
void *buf, size_t len, int timeout, uint64_t flags);
const char * (*strerror)(struct fid_eq *eq, int prov_errno,
const void *err_data, void *buf, size_t len);
};
struct fid_eq {
struct fid fid;
struct fi_ops_eq *ops;
};
/*
* CQ = Complete Queue
* Used to report the completion of data transfer operations.
*/
enum fi_cq_format {
FI_CQ_FORMAT_UNSPEC,
FI_CQ_FORMAT_CONTEXT,
FI_CQ_FORMAT_MSG,
FI_CQ_FORMAT_DATA,
FI_CQ_FORMAT_TAGGED,
};
struct fi_cq_entry {
void *op_context;
};
struct fi_cq_msg_entry {
void *op_context;
uint64_t flags;
size_t len;
};
struct fi_cq_data_entry {
void *op_context;
uint64_t flags;
size_t len;
void *buf;
/* data depends on operation and/or flags - e.g. remote EQ data */
uint64_t data;
};
struct fi_cq_tagged_entry {
void *op_context;
uint64_t flags;
size_t len;
void *buf;
uint64_t data;
uint64_t tag;
};
struct fi_cq_err_entry {
void *op_context;
uint64_t flags;
size_t len;
void *buf;
uint64_t data;
uint64_t tag;
size_t olen;
int err;
int prov_errno;
/* err_data is available until the next time the CQ is read */
void *err_data;
};
enum fi_cq_wait_cond {
FI_CQ_COND_NONE,
FI_CQ_COND_THRESHOLD /* size_t threshold */
};
struct fi_cq_attr {
size_t size;
uint64_t flags;
enum fi_cq_format format;
enum fi_wait_obj wait_obj;
int signaling_vector;
enum fi_cq_wait_cond wait_cond;
struct fid_wait *wait_set;
};
struct fi_ops_cq {
size_t size;
ssize_t (*read)(struct fid_cq *cq, void *buf, size_t count);
ssize_t (*readfrom)(struct fid_cq *cq, void *buf, size_t count,
fi_addr_t *src_addr);
ssize_t (*readerr)(struct fid_cq *cq, struct fi_cq_err_entry *buf,
size_t len, uint64_t flags);
ssize_t (*write)(struct fid_cq *cq, const void *buf, size_t len);
ssize_t (*writeerr)(struct fid_cq *cq, struct fi_cq_err_entry *buf,
size_t len, uint64_t flags);
ssize_t (*sread)(struct fid_cq *cq, void *buf, size_t count,
const void *cond, int timeout);
ssize_t (*sreadfrom)(struct fid_cq *cq, void *buf, size_t count,
fi_addr_t *src_addr, const void *cond, int timeout);
const char * (*strerror)(struct fid_cq *cq, int prov_errno,
const void *err_data, void *buf, size_t len);
};
struct fid_cq {
struct fid fid;
struct fi_ops_cq *ops;
};
/*
* CNTR = Counter
* Used to report the number of completed of asynchronous operations.
*/
enum fi_cntr_events {
FI_CNTR_EVENTS_COMP
};
struct fi_cntr_attr {
enum fi_cntr_events events;
enum fi_wait_obj wait_obj;
struct fid_wait *wait_set;
uint64_t flags;
};
struct fi_ops_cntr {
size_t size;
uint64_t (*read)(struct fid_cntr *cntr);
uint64_t (*readerr)(struct fid_cntr *cntr);
int (*add)(struct fid_cntr *cntr, uint64_t value);
int (*set)(struct fid_cntr *cntr, uint64_t value);
int (*wait)(struct fid_cntr *cntr, uint64_t threshold, int timeout);
};
struct fid_cntr {
struct fid fid;
struct fi_ops_cntr *ops;
};
#ifndef FABRIC_DIRECT
static inline int
fi_wait(struct fid_wait *waitset, int timeout)
{
return waitset->ops->wait(waitset, timeout);
}
static inline int
fi_poll(struct fid_poll *pollset, void **context, int count)
{
return pollset->ops->poll(pollset, context, count);
}
static inline int
fi_eq_open(struct fid_fabric *fabric, struct fi_eq_attr *attr,
struct fid_eq **eq, void *context)
{
return fabric->ops->eq_open(fabric, attr, eq, context);
}
static inline ssize_t
fi_eq_read(struct fid_eq *eq, uint32_t *event, void *buf,
size_t len, uint64_t flags)
{
return eq->ops->read(eq, event, buf, len, flags);
}
static inline ssize_t
fi_eq_readerr(struct fid_eq *eq, struct fi_eq_err_entry *buf,
size_t len, uint64_t flags)
{
return eq->ops->readerr(eq, buf, len, flags);
}
static inline ssize_t
fi_eq_write(struct fid_eq *eq, uint32_t event, const void *buf,
size_t len, uint64_t flags)
{
return eq->ops->write(eq, event, buf, len, flags);
}
static inline ssize_t
fi_eq_sread(struct fid_eq *eq, uint32_t *event, void *buf, size_t len,
int timeout, uint64_t flags)
{
return eq->ops->sread(eq, event, buf, len, timeout, flags);
}
static inline const char *
fi_eq_strerror(struct fid_eq *eq, int prov_errno, const void *err_data,
void *buf, size_t len)
{
return eq->ops->strerror(eq, prov_errno, err_data, buf, len);
}
static inline ssize_t fi_cq_read(struct fid_cq *cq, void *buf, size_t count)
{
return cq->ops->read(cq, buf, count);
}
static inline ssize_t
fi_cq_readfrom(struct fid_cq *cq, void *buf, size_t count, fi_addr_t *src_addr)
{
return cq->ops->readfrom(cq, buf, count, src_addr);
}
static inline ssize_t
fi_cq_readerr(struct fid_cq *cq, struct fi_cq_err_entry *buf, size_t len,
uint64_t flags)
{
return cq->ops->readerr(cq, buf, len, flags);
}
static inline ssize_t fi_cq_write(struct fid_cq *cq, const void *buf, size_t len)
{
return cq->ops->write(cq, buf, len);
}
static inline ssize_t fi_cq_writeerr(struct fid_cq *cq, struct fi_cq_err_entry *buf,
size_t len, uint64_t flags)
{
return cq->ops->writeerr(cq, buf, len, flags);
}
static inline ssize_t
fi_cq_sread(struct fid_cq *cq, void *buf, size_t count, const void *cond, int timeout)
{
return cq->ops->sread(cq, buf, count, cond, timeout);
}
static inline ssize_t
fi_cq_sreadfrom(struct fid_cq *cq, void *buf, size_t count,
fi_addr_t *src_addr, const void *cond, int timeout)
{
return cq->ops->sreadfrom(cq, buf, count, src_addr, cond, timeout);
}
static inline const char *
fi_cq_strerror(struct fid_cq *cq, int prov_errno, const void *err_data,
void *buf, size_t len)
{
return cq->ops->strerror(cq, prov_errno, err_data, buf, len);
}
static inline uint64_t fi_cntr_read(struct fid_cntr *cntr)
{
return cntr->ops->read(cntr);
}
static inline uint64_t fi_cntr_readerr(struct fid_cntr *cntr)
{
return cntr->ops->readerr(cntr);
}
static inline int fi_cntr_add(struct fid_cntr *cntr, uint64_t value)
{
return cntr->ops->add(cntr, value);
}
static inline int fi_cntr_set(struct fid_cntr *cntr, uint64_t value)
{
return cntr->ops->set(cntr, value);
}
static inline int
fi_cntr_wait(struct fid_cntr *cntr, uint64_t threshold, int timeout)
{
return cntr->ops->wait(cntr, threshold, timeout);
}
#else // FABRIC_DIRECT
#include <rdma/fi_direct_eq.h>
#endif
#ifdef __cplusplus
}
#endif
#endif /* _FI_EQ_H_ */

Просмотреть файл

@ -0,0 +1,192 @@
/*
* Copyright (c) 2013-2014 Intel Corporation. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef _FI_ERRNO_H_
#define _FI_ERRNO_H_
#include <errno.h>
#ifdef __cplusplus
extern "C" {
#endif
/* FI directly mapped errno values */
//#define FI_EPERM EPERM /* Operation not permitted */
#define FI_ENOENT ENOENT /* No such file or directory */
//#define FI_ESRCH ESRCH /* No such process */
//#define FI_EINTR EINTR /* Interrupted system call */
#define FI_EIO EIO /* I/O error */
//#define FI_ENXIO ENXIO /* No such device or address */
#define FI_E2BIG E2BIG /* Argument list too long */
//#define FI_ENOEXEC ENOEXEC /* Exec format error */
#define FI_EBADF EBADF /* Bad file number */
//#define FI_ECHILD ECHILD /* No child processes */
#define FI_EAGAIN EAGAIN /* Try again */
#define FI_ENOMEM ENOMEM /* Out of memory */
#define FI_EACCES EACCES /* Permission denied */
//#define FI_EFAULT EFAULT /* Bad address */
//#define FI_ENOTBLK ENOTBLK /* Block device required */
#define FI_EBUSY EBUSY /* Device or resource busy */
//#define FI_EEXIST EEXIST /* File exists */
//#define FI_EXDEV EXDEV /* Cross-device link */
#define FI_ENODEV ENODEV /* No such device */
//#define FI_ENOTDIR ENOTDIR /* Not a directory */
//#define FI_EISDIR EISDIR /* Is a directory */
#define FI_EINVAL EINVAL /* Invalid argument */
//#define FI_ENFILE ENFILE /* File table overflow */
#define FI_EMFILE EMFILE /* Too many open files */
//#define FI_ENOTTY ENOTTY /* Not a typewriter */
//#define FI_ETXTBSY ETXTBSY /* Text file busy */
//#define FI_EFBIG EFBIG /* File too large */
#define FI_ENOSPC ENOSPC /* No space left on device */
//#define FI_ESPIPE ESPIPE /* Illegal seek */
//#define FI_EROFS EROFS /* Read-only file system */
//#define FI_EMLINK EMLINK /* Too many links */
//#define FI_EPIPE EPIPE /* Broken pipe */
//#define FI_EDOM EDOM /* Math argument out of domain of func */
//#define FI_ERANGE ERANGE /* Math result not representable */
//#define FI_EDEADLK EDEADLK /* Resource deadlock would occur */
//#define FI_ENAMETOOLONG ENAMETOLONG /* File name too long */
//#define FI_ENOLCK ENOLCK /* No record locks available */
#define FI_ENOSYS ENOSYS /* Function not implemented */
//#define FI_ENOTEMPTY ENOTEMPTY /* Directory not empty */
//#define FI_ELOOP ELOOP /* Too many symbolic links encountered */
//#define FI_EWOULDBLOCK EWOULDBLOCK /* Operation would block */
#define FI_ENOMSG ENOMSG /* No message of desired type */
//#define FI_EIDRM EIDRM /* Identifier removed */
//#define FI_ECHRNG ECHRNG /* Channel number out of range */
//#define FI_EL2NSYNC EL2NSYCN /* Level 2 not synchronized */
//#define FI_EL3HLT EL3HLT /* Level 3 halted */
//#define FI_EL3RST EL3RST /* Level 3 reset */
//#define FI_ELNRNG ELNRNG /* Link number out of range */
//#define FI_EUNATCH EUNATCH /* Protocol driver not attached */
//#define FI_ENOCSI ENOCSI /* No CSI structure available */
//#define FI_EL2HLT EL2HLT /* Level 2 halted */
//#define FI_EBADE EBADE /* Invalid exchange */
//#define FI_EBADR EBADDR /* Invalid request descriptor */
//#define FI_EXFULL EXFULL /* Exchange full */
//#define FI_ENOANO ENOANO /* No anode */
//#define FI_EBADRQC EBADRQC /* Invalid request code */
//#define FI_EBADSLT EBADSLT /* Invalid slot */
//#define FI_EDEADLOCK EDEADLOCK /* Resource deadlock would occur */
//#define FI_EBFONT EBFONT /* Bad font file format */
//#define FI_ENOSTR ENOSTR /* Device not a stream */
#define FI_ENODATA ENODATA /* No data available */
//#define FI_ETIME ETIME /* Timer expired */
//#define FI_ENOSR ENOSR /* Out of streams resources */
//#define FI_ENONET ENONET /* Machine is not on the network */
//#define FI_ENOPKG ENOPKG /* Package not installed */
//#define FI_EREMOTE EREMOTE /* Object is remote */
//#define FI_ENOLINK ENOLINK /* Link has been severed */
//#define FI_EADV EADV /* Advertise error */
//#define FI_ESRMNT ESRMNT /* Srmount error */
//#define FI_ECOMM ECOMM /* Communication error on send */
//#define FI_EPROTO EPROTO /* Protocol error */
//#define FI_EMULTIHOP EMULTIHOP /* Multihop attempted */
//#define FI_EDOTDOT EDOTDOT /* RFS specific error */
//#define FI_EBADMSG EBADMSG /* Not a data message */
//#define FI_EOVERFLOW EOVERFLOW /* Value too large for defined data type */
//#define FI_ENOTUNIQ ENOTUNIQ /* Name not unique on network */
//#define FI_EBADFD EBADFD /* File descriptor in bad state */
//#define FI_EREMCHG EREMCHG /* Remote address changed */
//#define FI_ELIBACC ELIBACC /* Can not access a needed shared library */
//#define FI_ELIBBAD ELIBBAD /* Accessing a corrupted shared library */
//#define FI_ELIBSCN ELIBSCN /* .lib section in a.out corrupted */
//#define FI_ELIBMAX ELIBMAX /* Attempting to link in too many shared libraries */
//#define FI_ELIBEXEC ELIBEXEC /* Cannot exec a shared library directly */
//#define FI_EILSEQ EILSEQ /* Illegal byte sequence */
//#define FI_ERESTART ERESTART /* Interrupted system call should be restarted */
//#define FI_ESTRPIPE ESTRPIPE /* Streams pipe error */
//#define FI_EUSERS EUSERS /* Too many users */
//#define FI_ENOTSOCK ENOTSOCK /* Socket operation on non-socket */
//#define FI_EDESTADDRREQ EDESTADDRREQ /* Destination address required */
#define FI_EMSGSIZE EMSGSIZE /* Message too long */
//#define FI_EPROTOTYPE EPROTOTYPE /* Protocol wrong type for endpoint */
#define FI_ENOPROTOOPT ENOPROTOOPT /* Protocol not available */
//#define FI_EPROTONOSUPPORT EPROTONOSUPPORT /* Protocol not supported */
//#define FI_ESOCKTNOSUPPORT ESOCKTNOSUPPORT /* Socket type not supported */
#define FI_EOPNOTSUPP EOPNOTSUPP /* Operation not supported on transport endpoint */
//#define FI_EPFNOSUPPORT EPFNOSUPPORT /* Protocol family not supported */
//#define FI_EAFNOSUPPORT EAFNOSUPPORT /* Address family not supported by protocol */
#define FI_EADDRINUSE EADDRINUSE /* Address already in use */
#define FI_EADDRNOTAVAIL EADDRNOTAVAIL /* Cannot assign requested address */
#define FI_ENETDOWN ENETDOWN /* Network is down */
#define FI_ENETUNREACH ENETUNREACH /* Network is unreachable */
//#define FI_ENETRESET ENETRESET /* Network dropped connection because of reset */
#define FI_ECONNABORTED ECONNABORTED /* Software caused connection abort */
#define FI_ECONNRESET ECONNRESET /* Connection reset by peer */
//#define FI_ENOBUFS ENOBUFS /* No buffer space available */
#define FI_EISCONN EISCONN /* Transport endpoint is already connected */
#define FI_ENOTCONN ENOTCONN /* Transport endpoint is not connected */
#define FI_ESHUTDOWN ESHUTDOWN /* Cannot send after transport endpoint shutdown */
//#define FI_ETOOMANYREFS ETOOMANYREFS /* Too many references: cannot splice */
#define FI_ETIMEDOUT ETIMEDOUT /* Connection timed out */
#define FI_ECONNREFUSED ECONNREFUSED /* Connection refused */
//#define FI_EHOSTDOWN EHOSTDOWN /* Host is down */
#define FI_EHOSTUNREACH EHOSTUNREACH /* No route to host */
#define FI_EALREADY EALREADY /* Operation already in progress */
#define FI_EINPROGRESS EINPROGRESS /* Operation now in progress */
//#define FI_ESTALE ESTALE /* Stale NFS file handle */
//#define FI_EUCLEAN EUNCLEAN /* Structure needs cleaning */
//#define FI_ENOTNAM ENOTNAM /* Not a XENIX named type file */
//#define FI_ENAVAIL ENAVAIL /* No XENIX semaphores available */
//#define FI_EISNAM EISNAM /* Is a named type file */
#define FI_EREMOTEIO EREMOTEIO /* Remote I/O error */
//#define FI_EDQUOT EDQUOT /* Quota exceeded */
//#define FI_ENOMEDIUM ENOMEDIUM /* No medium found */
//#define FI_EMEDIUMTYPE EMEDIUMTYPE /* Wrong medium type */
#define FI_ECANCELED ECANCELED /* Operation Canceled */
#define FI_ENOKEY ENOKEY /* Required key not available */
//#define FI_EKEYEXPIRED EKEYEXPIRED /* Key has expired */
//#define FI_EKEYREVOKED EKEYREVOKED /* Key has been revoked */
#define FI_EKEYREJECTED EKEYREJECTED /* Key was rejected by service */
//#define FI_EOWNERDEAD EOWNERDEAD /* Owner died */
//#define FI_ENOTRECOVERABLE ENOTRECOVERABLE /* State not recoverable */
/* FI specific return values: >= 256 */
#define FI_EOTHER 256 /* Unspecified error */
#define FI_ETOOSMALL 257 /* Provided buffer is too small */
#define FI_EOPBADSTATE 258 /* Operation not permitted in current state */
#define FI_EAVAIL 259 /* Error available */
#define FI_EBADFLAGS 260 /* Flags not supported */
#define FI_ENOEQ 261 /* Missing or unavailable event queue */
#define FI_EDOMAIN 262 /* Invalid resource domain */
const char *fi_strerror(int errnum);
#ifdef __cplusplus
}
#endif
#endif /* _FI_ERRNO_H_ */

Просмотреть файл

@ -0,0 +1,77 @@
/*
* Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
* Copyright (c) 2005, 2006 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2005 PathScale, Inc. All rights reserved.
* Copyright (c) 2013-2014 Intel Corporation. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef _FI_PROV_H_
#define _FI_PROV_H_
#include <rdma/fabric.h>
#ifdef __cplusplus
extern "C" {
#endif
/*
* Extension that low-level drivers should add to their .so filename
* (probably via libtool "-release" option). For example a low-level
* driver named "libfoo" should build a plug-in named "libfoo-fi.so".
*/
#define FI_LIB_EXTENSION "fi"
#define FI_LIB_SUFFIX FI_LIB_EXTENSION ".so"
#define FI_LIB_CLASS_NAME "libfabric"
struct fi_provider {
const char *name;
uint32_t version;
int (*getinfo)(uint32_t version, const char *node, const char *service,
uint64_t flags, struct fi_info *hints, struct fi_info **info);
int (*freeinfo)(struct fi_info *info);
struct fi_info *(*dupinfo)(const struct fi_info *info);
int (*fabric)(struct fi_fabric_attr *attr, struct fid_fabric **fabric,
void *context);
};
int fi_register_provider(uint32_t fi_version, struct fi_provider *provider);
static inline int fi_register(struct fi_provider *provider)
{
return fi_register_provider(FI_VERSION(FI_MAJOR_VERSION, FI_MINOR_VERSION),
provider);
}
#ifdef __cplusplus
}
#endif
#endif /* _FI_PROV_H_ */

Просмотреть файл

@ -0,0 +1,195 @@
/*
* Copyright (c) 2013-2014 Intel Corporation. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef _FI_RMA_H_
#define _FI_RMA_H_
#include <assert.h>
#include <rdma/fabric.h>
#include <rdma/fi_endpoint.h>
#ifdef __cplusplus
extern "C" {
#endif
struct fi_rma_iov {
uint64_t addr;
size_t len;
uint64_t key;
};
struct fi_rma_ioc {
uint64_t addr;
size_t count;
uint64_t key;
};
struct fi_msg_rma {
const struct iovec *msg_iov;
void **desc;
size_t iov_count;
fi_addr_t addr;
const struct fi_rma_iov *rma_iov;
size_t rma_iov_count;
void *context;
uint64_t data;
};
struct fi_ops_rma {
size_t size;
ssize_t (*read)(struct fid_ep *ep, void *buf, size_t len, void *desc,
uint64_t addr, uint64_t key, void *context);
ssize_t (*readv)(struct fid_ep *ep, const struct iovec *iov, void **desc,
size_t count, uint64_t addr, uint64_t key, void *context);
ssize_t (*readfrom)(struct fid_ep *ep, void *buf, size_t len, void *desc,
fi_addr_t src_addr, uint64_t addr, uint64_t key,
void *context);
ssize_t (*readmsg)(struct fid_ep *ep, const struct fi_msg_rma *msg,
uint64_t flags);
ssize_t (*write)(struct fid_ep *ep, const void *buf, size_t len, void *desc,
uint64_t addr, uint64_t key, void *context);
ssize_t (*writev)(struct fid_ep *ep, const struct iovec *iov, void **desc,
size_t count, uint64_t addr, uint64_t key, void *context);
ssize_t (*writeto)(struct fid_ep *ep, const void *buf, size_t len, void *desc,
fi_addr_t dest_addr, uint64_t addr, uint64_t key,
void *context);
ssize_t (*writemsg)(struct fid_ep *ep, const struct fi_msg_rma *msg,
uint64_t flags);
ssize_t (*inject)(struct fid_ep *ep, const void *buf, size_t len,
uint64_t addr, uint64_t key);
ssize_t (*injectto)(struct fid_ep *ep, const void *buf, size_t len,
fi_addr_t dest_addr, uint64_t addr, uint64_t key);
ssize_t (*writedata)(struct fid_ep *ep, const void *buf, size_t len, void *desc,
uint64_t data, uint64_t addr, uint64_t key, void *context);
ssize_t (*writedatato)(struct fid_ep *ep, const void *buf, size_t len, void *desc,
uint64_t data, fi_addr_t dest_addr, uint64_t addr, uint64_t key,
void *context);
};
#ifndef FABRIC_DIRECT
static inline ssize_t
fi_read(struct fid_ep *ep, void *buf, size_t len, void *desc,
uint64_t addr, uint64_t key, void *context)
{
return ep->rma->read(ep, buf, len, desc, addr, key, context);
}
static inline ssize_t
fi_readv(struct fid_ep *ep, const struct iovec *iov, void **desc,
size_t count, uint64_t addr, uint64_t key, void *context)
{
return ep->rma->readv(ep, iov, desc, count, addr, key, context);
}
static inline ssize_t
fi_readfrom(struct fid_ep *ep, void *buf, size_t len, void *desc,
fi_addr_t src_addr, uint64_t addr, uint64_t key, void *context)
{
return ep->rma->readfrom(ep, buf, len, desc, src_addr, addr, key, context);
}
static inline ssize_t
fi_readmsg(struct fid_ep *ep, const struct fi_msg_rma *msg, uint64_t flags)
{
return ep->rma->readmsg(ep, msg, flags);
}
static inline ssize_t
fi_write(struct fid_ep *ep, const void *buf, size_t len, void *desc,
uint64_t addr, uint64_t key, void *context)
{
return ep->rma->write(ep, buf, len, desc, addr, key, context);
}
static inline ssize_t
fi_writev(struct fid_ep *ep, const struct iovec *iov, void **desc,
size_t count, uint64_t addr, uint64_t key, void *context)
{
return ep->rma->writev(ep, iov, desc, count, addr, key, context);
}
static inline ssize_t
fi_writeto(struct fid_ep *ep, const void *buf, size_t len, void *desc,
fi_addr_t dst_addr, uint64_t addr, uint64_t key, void *context)
{
return ep->rma->writeto(ep, buf, len, desc, dst_addr, addr, key, context);
}
static inline ssize_t
fi_writemsg(struct fid_ep *ep, const struct fi_msg_rma *msg, uint64_t flags)
{
return ep->rma->writemsg(ep, msg, flags);
}
static inline ssize_t
fi_inject_write(struct fid_ep *ep, const void *buf, size_t len,
uint64_t addr, uint64_t key)
{
return ep->rma->inject(ep, buf, len, addr, key);
}
static inline ssize_t
fi_inject_writeto(struct fid_ep *ep, const void *buf, size_t len,
fi_addr_t dest_addr, uint64_t addr, uint64_t key)
{
return ep->rma->injectto(ep, buf, len, dest_addr, addr, key);
}
static inline ssize_t
fi_writedata(struct fid_ep *ep, const void *buf, size_t len, void *desc,
uint64_t data, uint64_t addr, uint64_t key, void *context)
{
return ep->rma->writedata(ep, buf, len, desc, data, addr, key, context);
}
static inline ssize_t
fi_writedatato(struct fid_ep *ep, const void *buf, size_t len, void *desc,
uint64_t data, fi_addr_t dest_addr, uint64_t addr, uint64_t key,
void *context)
{
return ep->rma->writedatato(ep, buf, len, desc,data, dest_addr,
addr, key, context);
}
#else // FABRIC_DIRECT
#include <rdma/fi_direct_rma.h>
#endif
#ifdef __cplusplus
}
#endif
#endif /* _FI_RMA_H_ */

Просмотреть файл

@ -0,0 +1,192 @@
/*
* Copyright (c) 2013-2014 Intel Corporation. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef _FI_TAGGED_H_
#define _FI_TAGGED_H_
#include <assert.h>
#include <rdma/fabric.h>
#include <rdma/fi_endpoint.h>
#ifdef __cplusplus
extern "C" {
#endif
#define FI_CLAIM (1ULL << 0)
struct fi_msg_tagged {
const struct iovec *msg_iov;
void **desc;
size_t iov_count;
fi_addr_t addr;
uint64_t tag;
uint64_t ignore;
void *context;
uint64_t data;
};
struct fi_ops_tagged {
size_t size;
ssize_t (*recv)(struct fid_ep *ep, void *buf, size_t len, void *desc,
uint64_t tag, uint64_t ignore, void *context);
ssize_t (*recvv)(struct fid_ep *ep, const struct iovec *iov, void **desc,
size_t count, uint64_t tag, uint64_t ignore, void *context);
ssize_t (*recvfrom)(struct fid_ep *ep, void *buf, size_t len, void *desc,
fi_addr_t src_addr,
uint64_t tag, uint64_t ignore, void *context);
ssize_t (*recvmsg)(struct fid_ep *ep, const struct fi_msg_tagged *msg,
uint64_t flags);
ssize_t (*send)(struct fid_ep *ep, const void *buf, size_t len, void *desc,
uint64_t tag, void *context);
ssize_t (*sendv)(struct fid_ep *ep, const struct iovec *iov, void **desc,
size_t count, uint64_t tag, void *context);
ssize_t (*sendto)(struct fid_ep *ep, const void *buf, size_t len, void *desc,
fi_addr_t dest_addr, uint64_t tag, void *context);
ssize_t (*sendmsg)(struct fid_ep *ep, const struct fi_msg_tagged *msg,
uint64_t flags);
ssize_t (*inject)(struct fid_ep *ep, const void *buf, size_t len,
uint64_t tag);
ssize_t (*injectto)(struct fid_ep *ep, const void *buf, size_t len,
fi_addr_t dest_addr, uint64_t tag);
ssize_t (*senddata)(struct fid_ep *ep, const void *buf, size_t len, void *desc,
uint64_t data, uint64_t tag, void *context);
ssize_t (*senddatato)(struct fid_ep *ep, const void *buf, size_t len, void *desc,
uint64_t data, fi_addr_t dest_addr, uint64_t tag, void *context);
ssize_t (*search)(struct fid_ep *ep, uint64_t *tag, uint64_t ignore,
uint64_t flags, fi_addr_t *src_addr, size_t *len, void *context);
};
#ifndef FABRIC_DIRECT
static inline ssize_t
fi_trecv(struct fid_ep *ep, void *buf, size_t len, void *desc,
uint64_t tag, uint64_t ignore, void *context)
{
return ep->tagged->recv(ep, buf, len, desc, tag, ignore, context);
}
static inline ssize_t
fi_trecvv(struct fid_ep *ep, const struct iovec *iov, void **desc,
size_t count, uint64_t tag, uint64_t ignore, void *context)
{
return ep->tagged->recvv(ep, iov, desc, count, tag, ignore, context);
}
static inline ssize_t
fi_trecvfrom(struct fid_ep *ep, void *buf, size_t len, void *desc,
fi_addr_t src_addr, uint64_t tag, uint64_t ignore, void *context)
{
return ep->tagged->recvfrom(ep, buf, len, desc, src_addr, tag, ignore,
context);
}
static inline ssize_t
fi_trecvmsg(struct fid_ep *ep, const struct fi_msg_tagged *msg, uint64_t flags)
{
return ep->tagged->recvmsg(ep, msg, flags);
}
static inline ssize_t
fi_tsend(struct fid_ep *ep, const void *buf, size_t len, void *desc,
uint64_t tag, void *context)
{
return ep->tagged->send(ep, buf, len, desc, tag, context);
}
static inline ssize_t
fi_tsendv(struct fid_ep *ep, const struct iovec *iov, void **desc,
size_t count, uint64_t tag, void *context)
{
return ep->tagged->sendv(ep, iov, desc, count, tag, context);
}
static inline ssize_t
fi_tsendto(struct fid_ep *ep, const void *buf, size_t len, void *desc,
fi_addr_t dest_addr, uint64_t tag, void *context)
{
return ep->tagged->sendto(ep, buf, len, desc, dest_addr, tag, context);
}
static inline ssize_t
fi_tsendmsg(struct fid_ep *ep, const struct fi_msg_tagged *msg, uint64_t flags)
{
return ep->tagged->sendmsg(ep, msg, flags);
}
static inline ssize_t
fi_tinject(struct fid_ep *ep, const void *buf, size_t len, uint64_t tag)
{
return ep->tagged->inject(ep, buf, len, tag);
}
static inline ssize_t
fi_tinjectto(struct fid_ep *ep, const void *buf, size_t len,
fi_addr_t dest_addr, uint64_t tag)
{
return ep->tagged->injectto(ep, buf, len, dest_addr, tag);
}
static inline ssize_t
fi_tsenddata(struct fid_ep *ep, const void *buf, size_t len, void *desc,
uint64_t data, uint64_t tag, void *context)
{
return ep->tagged->senddata(ep, buf, len, desc, data, tag, context);
}
static inline ssize_t
fi_tsenddatato(struct fid_ep *ep, const void *buf, size_t len, void *desc,
uint64_t data, fi_addr_t dest_addr, uint64_t tag, void *context)
{
return ep->tagged->senddatato(ep, buf, len, desc, data,
dest_addr, tag, context);
}
static inline ssize_t
fi_tsearch(struct fid_ep *ep, uint64_t *tag, uint64_t ignore, uint64_t flags,
fi_addr_t *src_addr, size_t *len, void *context)
{
return ep->tagged->search(ep, tag, ignore, flags, src_addr,
len, context);
}
#else // FABRIC_DIRECT
#include <rdma/fi_direct_tagged.h>
#endif
#ifdef __cplusplus
}
#endif
#endif /* _FI_TAGGED_H_ */

Просмотреть файл

@ -0,0 +1,74 @@
/*
* Copyright (c) 2014 Intel Corporation. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef _FI_TRIGGER_H_
#define _FI_TRIGGER_H_
#include <stdint.h>
#include <stddef.h>
#include <rdma/fabric.h>
#ifdef __cplusplus
extern "C" {
#endif
enum fi_trigger_event {
FI_TRIGGER_THRESHOLD,
};
struct fi_trigger_threshold {
struct fid_cntr *cntr;
size_t threshold;
};
#ifndef FABRIC_DIRECT
/* Size must match struct fi_context */
struct fi_triggered_context {
enum fi_trigger_event event_type;
union {
struct fi_trigger_threshold threshold;
void *internal[3];
};
};
#else // FABRIC_DIRECT
#include <rdma/fi_direct_trigger.h>
#endif
#ifdef __cplusplus
}
#endif
#endif /* _FI_TRIGGER_H_ */

Просмотреть файл

@ -0,0 +1,12 @@
FABRIC_1.0 {
global:
fi_getinfo;
fi_freeinfo;
fi_dupinfo;
fi_fabric;
fi_version;
fi_strerror;
fi_register_provider;
fi_tostr;
local: *;
};

Просмотреть файл

@ -0,0 +1,71 @@
%define ver 0.0.2
Name: libfabric
Version: 0.0.2
Release: 1%{?dist}
Summary: Userspace RDMA Fabric Interfaces
Group: System Environment/Libraries
License: GPLv2 or BSD
Url: http://www.github.com/ofiwg/libfabric
Source: http://www.openfabrics.org/downloads/fabrics/%{name}-%{version}.tar.gz
BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root-%(%{__id_u} -n)
%description
libfabric provides a userspace API to access high-performance fabric
services, such as RDMA.
%package devel
Summary: Development files for the libfabric library
Group: System Environment/Libraries
%description devel
Development files for the libfabric library.
%package utils
Summary: Examples for the libfabric library
Group: System Environment/Libraries
Requires: %{name} = %{version}-%{release}
%description utils
Example test programs for the libfabric library.
%prep
%setup -q -n %{name}-%{ver}
%build
%configure
make %{?_smp_mflags}
%install
rm -rf $RPM_BUILD_ROOT
%makeinstall
# remove unpackaged files from the buildroot
rm -f $RPM_BUILD_ROOT%{_libdir}/*.la
%clean
rm -rf $RPM_BUILD_ROOT
%post -p /sbin/ldconfig
%postun -p /sbin/ldconfig
%files
%defattr(-,root,root,-)
%{_libdir}/lib*.so.*
%doc AUTHORS COPYING README
%files devel
%defattr(-,root,root)
%{_libdir}/libfabric*.so
%{_libdir}/*.a
%{_includedir}/*
%{_mandir}/man3/*
%{_mandir}/man7/*
%files utils
%defattr(-,root,root,-)
%{_bindir}/*
%{_mandir}/man1/*
%changelog

Просмотреть файл

@ -0,0 +1,71 @@
%define ver @VERSION@
Name: libfabric
Version: 0.0.2
Release: 1%{?dist}
Summary: Userspace RDMA Fabric Interfaces
Group: System Environment/Libraries
License: GPLv2 or BSD
Url: http://www.github.com/ofiwg/libfabric
Source: http://www.openfabrics.org/downloads/fabrics/%{name}-%{version}.tar.gz
BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root-%(%{__id_u} -n)
%description
libfabric provides a userspace API to access high-performance fabric
services, such as RDMA.
%package devel
Summary: Development files for the libfabric library
Group: System Environment/Libraries
%description devel
Development files for the libfabric library.
%package utils
Summary: Examples for the libfabric library
Group: System Environment/Libraries
Requires: %{name} = %{version}-%{release}
%description utils
Example test programs for the libfabric library.
%prep
%setup -q -n %{name}-%{ver}
%build
%configure
make %{?_smp_mflags}
%install
rm -rf $RPM_BUILD_ROOT
%makeinstall
# remove unpackaged files from the buildroot
rm -f $RPM_BUILD_ROOT%{_libdir}/*.la
%clean
rm -rf $RPM_BUILD_ROOT
%post -p /sbin/ldconfig
%postun -p /sbin/ldconfig
%files
%defattr(-,root,root,-)
%{_libdir}/lib*.so.*
%doc AUTHORS COPYING README
%files devel
%defattr(-,root,root)
%{_libdir}/libfabric*.so
%{_libdir}/*.a
%{_includedir}/*
%{_mandir}/man3/*
%{_mandir}/man7/*
%files utils
%defattr(-,root,root,-)
%{_bindir}/*
%{_mandir}/man1/*
%changelog

Просмотреть файл

@ -0,0 +1,189 @@
.TH "FI_DIRECT" 7 "2014-11-11" "Libfabric v0.0.2" "Libfabric Programmer's Manual" libfabric
.SH NAME
Fabric Interface Library
.SH SYNOPSIS
.B #include <rdma/fabric.h>
.PP
Libfabric is a high-performance fabric software library designed to
provide low-latency interfaces to fabric hardware.
.SH OVERVIEW
Libfabric provides 'process direct I/O' to application software communicating
across fabric software and hardware. Process direct I/O, historically
referred to as RDMA, allows an application to directly access network
resources without operating system interventions. Data transfers can
occur directly to and from application memory.
.PP
There are two components to the libfabric software:
.IP "Fabric Providers" 12
Conceptually, a fabric provider may be viewed as a local hardware
NIC driver, though a provider is not limited by this definition. The first
component of libfabric is a general purpose framework that is capable of
handling different types of fabric hardware. All fabric hardware
devices and their software drivers are required to support this framework.
Devices and the drivers that plug into the libfabric framework are
referred to as fabric providers, or simply providers. Provider details
may be found in fi_prov.
.PP
.IP "Fabric Interfaces" 12
The second component is a set of communication operations. Libfabric
defines several sets of communication functions that providers can support.
It is not required that providers implement all the interfaces that are
defined; however, providers clearly indicate which interfaces they do
support.
.sp
The fabric interfaces are designed such that they are
cohesive and not simply a union of disjoint interfaces. The interfaces are
logically divided into two groups: control interfaces and communication
operations. The control interfaces are a common set of operations that
provide access to local communication resources, such as address vectors
and event queues. The communication operations expose particular models
of communication and fabric functionality, such as message queues, remote
memory access, and atomic operations. Communication operations are
associated with fabric endpoints.
.sp
Applications will typically use the control interfaces to discover local
capabilities and allocate necessary resources. They will then allocate and
configure a communication endpoint to send and receive data, or perform
other types of data transfers, with remote endpoints.
.SH "CONTROL INTERFACES"
The control interfaces APIs provide applications access to network resources.
This involves listing all the interfaces available, obtaining the capabilities
of the interfaces and opening a provider.
.IP "fi_getinfo - Fabric Information" 12
The fi_getinfo call is the base call used to discover and request fabric
services offered by the system. Applications can use this call to
indicate the type of communication that they desire. The results from
fi_getinfo, fi_info, are used to reserve and configure fabric resources.
.sp
fi_getinfo returns a list of fi_info structures. Each structure references
a single fabric provider, indicating the interfaces that the provider
supports, along with a named set of resources. A fabric provider may
include multiple fi_info structures in the returned list.
.IP "fi_fabric - Fabric Domain" 12
A fabric domain represents a collection of hardware and software resources
that access a single physical or virtual network. All network ports on a
system that can communicate with each other through the fabric belong to
the same fabric domain. A fabric domain shares network addresses and
can span multiple providers. libfabric supports systems connected to
multiple fabrics.
.IP "fi_domain - Access Domains" 12
An access domain represents a single logical connection into a fabric.
It may map to a single physical or virtual NIC or a port.
An access domain defines the boundary across which fabric resources
may be associated. Each access domain belongs to a single fabric domain.
.IP "fi_endpoint - Fabric Endpoint" 12
A fabric endpoint is a communication portal. An endpoint may be either
active or passive. Passive endpoints are used to listen for connection
requests. Active endpoints can perform data transfers. Endpoints are
configured with specific communication capabilities and data transfer
interfaces.
.IP "fi_eq - Event Queue" 12
Event queues, are used to collect
and report the completion of asynchronous operations. For example, the
completion of a data transfer operation submitted over a fabric endpoint
may write an event to an event queue associated with the endpoint.
There are multiple types of event queues, and the format of the events
that they report are controlled by applications.
.IP "fi_cntr - Event Counters" 12
Event counters are used to report the number of completed asynchronous
operations. Event counters are considered light-weight, in that a
completion simply increments a counter, rather than placing an entry into
an event queue.
.IP "fi_mr - Memory Region" 12
Memory regions describe application local memory buffers. In order for
fabric resources to access application memory, the application must first
grant permission to the fabric provider by constructing a memory region.
Memory regions are required for specific types of data transfer operations,
such as RMA transfers (see below).
.IP "fi_av - Address Vector" 12
Address vectors are used to map higher level addresses, such as IP
addresses, which may be more natural for an application to use, into
fabric specific addresses. The use of address vectors allows providers
to reduce the amount of memory required to maintain large address
look-up tables, and eliminate expensive address resolution and look-up
methods during data transfer operations.
.SH "DATA TRANSFER INTERFACES"
Fabric endpoints are associated with multiple data transfer interfaces.
Each interface set is designed to support a specific style of communication,
with an endpoint allowing the different interfaces to be used in conjunction.
The following data transfer interfaces are defined by libfabric.
.IP "fi_msg - Message Queue" 12
Message queues expose a simple, message-based FIFO queue interface to
the application. Message data transfers allow applications to send and
receive data with message boundaries being maintained.
.IP "fi_tagged - Tagged Message Queues" 12
Tagged message lists expose send/receive data transfer operations
built on the concept of tagged messaging. The tagged message queue is
conceptually similar to standard message queues, but with the addition
of 64-bit tags for each message. Sent messages are matched with receive
buffers that are tagged with a similar value.
.IP "fi_rma - Remote Memory Access" 12
RMA transfers are one-sided operations that read or write data directly
to a remote memory region. Other than defining the appropriate memory
region, RMA operations do not require interaction at the target side for
the data transfer to complete.
.IP "fi_atomic - Atomic" 12
Atomic operations can perform one of several operations on a remote
memory region. Atomic operations include well-known functionality, such
as atomic-add and compare-and-swap, plus several other pre-defined calls.
Unlike other data transfer interfaces, atomic operations are aware of the
data formatting at the target memory region.
.SH "PROVIDER REQUIREMENTS"
Libfabric provides a general framework for supporting multiple types of
fabric objects and their related interfaces. Fabric providers have a large
amount of flexibility in selecting which components they are able and
willing to support, based on specific hardware constraints. To assist in
the development of applications, libfabric specifies the following
requirements that must be met by any fabric provider, if requested by an
application. (Note that the instantiation of a specific fabric object is
subject to application configuration parameters and need not meet these
requirements).
.IP \(bu
A fabric provider must support at least one endpoint type.
.IP \(bu
All endpoints must support the message queue data transfer interface.
.IP \(bu
An endpoint that advertises support for a specific endpoint capability
must support the corresponding data transfer interface.
.IP \(bu
Endpoints must support operations to send and receive data for any data
transfer operations that they support.
.IP \(bu
Connectionless endpoints must support all relevant 'to/from' data transfer
routines. (sendto / recvfrom / writeto / readfrom / etc.)
.IP \(bu
Connectionless endpoints must support the CM interfaces for getname, getpeer,
and connect.
.IP \(bu
Connectionless endpoints that support multicast operations must support the
CM interfaces join and leave.
.IP \(bu
Connection-oriented interfaces must support the CM interfaces getname, getpeer,
connect, listen, accept, reject, and shutdown.
.IP \(bu
All endpoints must support all relevant 'msg' data transfer routines.
(sendmsg / recvmsg / writemsg / readmsg / etc.)
.IP \(bu
Access domains must support opening address vector maps and tables.
.IP \(bu
Address vectors associated with domains that may be identified using IP
addresses must support FI_SOCKADDR_IN and FI_SOCKADDR_IN6 input formats.
.IP \(bu
Address vectors must support FI_ADDR, FI_ADDR_INDEX, and FI_AV output formats.
.IP \(bu
Access domains must support opening event queues and counters.
.IP \(bu
Event queues must support the FI_EQ_FORMAT_CONTEXT format.
.IP \(bu
Event queues associated with data transfer completions must support the
FI_EQ_FORMAT_DATA format.
.IP \(bu
Event queues associated with tagged message transfers must support the
FI_EQ_FORMAT_TAGGED format.
.IP \(bu
A provider is expected to be forward compatible, and must be able to be
compiled against expanded fi_xxx_ops structures that define new functions
added after the provider was written. Any unknown functions must be set
to NULL.
.SH "SEE ALSO"
fi_getinfo(3), fi_endpoint(3), fi_domain(3), fi_av(3), fi_eq(3), fi_mr(3)

Просмотреть файл

@ -0,0 +1 @@
.so man3/fi_cm.3

Просмотреть файл

@ -0,0 +1 @@
.so man3/fi_endpoint.3

Просмотреть файл

@ -0,0 +1,467 @@
.TH "FI_ATOMIC" 3 "2014-11-11" "Libfabric v0.0.2" "Libfabric Programmer's Manual" libfabric
.SH NAME
fi_atomic - Remote atomic functions
.PP
fi_atomic / fi_atomicv
.br
fi_atomicto / fi_atomicmsg
.RS
Initiates an atomic operation to remote memory
.RE
.P
fi_fetch_atomic / fi_fetch_atomicv
.br
fi_fetch_atomicto / fi_fetch_atomicmsg
.RS
Initiates an atomic operation to remote memory, retrieving the initial
value.
.RE
.P
fi_compare_atomic / fi_compare_atomicv
.br
fi_compare_atomicto / fi_compare_atomicmsg
.RS
Initiates an atomic compare-operation to remote memory, retrieving the initial
value.
.RE
.P
fi_atomic_valid / fi_fetch_atomic_valid / fi_compare_atomic_valid
.RS
Indicates if a provider supports a specific atomic operation
.RE
.SH SYNOPSIS
.B #include <rdma/fi_atomic.h>
.HP
.BI "ssize_t fi_atomic(struct fid_ep *" ep ","
.BI "const void * " buf ", size_t " count ", void *" desc ","
.BI "uint64_t " addr ", uint64_t " key ","
.BI "enum fi_datatype " datatype ", enum fi_op " op ", void * " context ");"
.HP
.BI "ssize_t fi_atomicv(struct fid_ep *" ep ","
.BI "const struct fi_ioc * " iov ", void **" desc ", size_t " count ","
.BI "uint64_t " addr ", uint64_t " key ","
.BI "enum fi_datatype " datatype ", enum fi_op " op ", void * " context ");"
.HP
.BI "ssize_t fi_atomicto(struct fid_ep *" ep ","
.BI "const void * " buf ", size_t " count ", void *" desc ","
.BI "fi_addr_t " dest_addr ","
.BI "uint64_t " addr ", uint64_t " key ","
.BI "enum fi_datatype " datatype ", enum fi_op " op ", void * " context ");"
.HP
.BI "ssize_t fi_atomicmsg(struct fid_ep *" ep ","
.BI "const struct fi_msg_atomic * " msg ","
.BI "uint64_t " flags ");"
.HP
.BI "ssize_t fi_inject_atomic(struct fid_ep *" ep ","
.BI "const void *" buf ","
.BI "size_t " count ","
.BI "uint64_t " addr ","
.BI "uint64_t " key ","
.BI "enum fi_datatype " datatype ","
.BI "enum fi_op " op ");"
.HP
.BI "ssize_t fi_inject_atomicto(struct fid_ep *" ep ","
.BI "const void *" buf ","
.BI "size_t " count ","
.BI "fi_addr_t " dest_addr ","
.BI "uint64_t " addr ","
.BI "uint64_t " key ","
.BI "enum fi_datatype " datatype ","
.BI "enum fi_op " op ");"
.PP
.HP
.BI "ssize_t fi_fetch_atomic(struct fid_ep *" ep ","
.BI "const void * " buf ", size_t " count ", void *" desc ","
.BI "void * " result ", void *" result_desc ","
.BI "uint64_t " addr ", uint64_t " key ","
.BI "enum fi_datatype " datatype ", enum fi_op " op ", void * " context ");"
.HP
.BI "ssize_t fi_fetch_atomicv(struct fid_ep *" ep ","
.BI "const struct fi_ioc * " iov ", void **" desc ", size_t " count ","
.BI "struct fi_ioc * " resultv ", void **" result_desc ", size_t " result_count ","
.BI "uint64_t " addr ", uint64_t " key ","
.BI "enum fi_datatype " datatype ", enum fi_op " op ", void * " context ");"
.HP
.BI "ssize_t fi_fetch_atomicto(struct fid_ep *" ep ","
.BI "const void * " buf ", size_t " count ", void *" desc ","
.BI "void * " result ", void *" result_desc ","
.BI "fi_addr_t " dest_addr ","
.BI "uint64_t " addr ", uint64_t " key ","
.BI "enum fi_datatype " datatype ", enum fi_op " op ", void * " context ");"
.HP
.BI "ssize_t fi_fetch_atomicmsg(struct fid_ep *" ep ","
.BI "const struct fi_msg_atomic * " msg ","
.BI "struct fi_ioc * " resultv ", void **" result_desc ", size_t " result_count ","
.BI "uint64_t " flags ");"
.PP
.HP
.BI "ssize_t fi_compare_atomic(struct fid_ep *" ep ","
.BI "const void * " buf ", size_t " count ", void *" desc ","
.BI "const void * " compare ", void *" compare_desc ","
.BI "void * " result ", void *" result_desc ","
.BI "uint64_t " addr ", uint64_t " key ","
.BI "enum fi_datatype " datatype ", enum fi_op " op ", void * " context ");"
.HP
.BI "ssize_t fi_compare_atomicv(struct fid_ep *" ep ","
.BI "const struct fi_ioc * " iov ", void **" desc ", size_t " count ","
.BI "const struct fi_ioc * " comparev ", void **" compare_desc ", size_t " compare_count ","
.BI "struct fi_ioc * " resultv ", void **" result_desc ", size_t " result_count ","
.BI "uint64_t " addr ", uint64_t " key ","
.BI "enum fi_datatype " datatype ", enum fi_op " op ", void * " context ");"
.HP
.BI "ssize_t fi_compare_atomicto(struct fid_ep *" ep ","
.BI "const void * " buf ", size_t " count ", void *" desc ","
.BI "const void * " compare ", void *" compare_desc ","
.BI "void * " result ", void *" result_desc ","
.BI "fi_addr_t " dest_addr ","
.BI "uint64_t " addr ", uint64_t " key ","
.BI "enum fi_datatype " datatype ", enum fi_op " op ", void * " context ");"
.HP
.BI "ssize_t fi_compare_atomicmsg(struct fid_ep *" ep ","
.BI "const struct fi_msg_atomic * " msg ","
.BI "const struct fi_ioc * " comparev ", void **" compare_desc ", size_t " compare_count ","
.BI "struct fi_ioc * " resultv ", void **" result_desc ", size_t " result_count ","
.BI "uint64_t " flags ");"
.PP
.HP
.BI "int fi_atomicvalid(struct fid_ep *" ep ","
.BI "enum fi_datatype " datatype ", enum fi_op " op ", size_t " count ");"
.HP
.BI "int fi_fetch_atomicvalid(struct fid_ep *" ep ","
.BI "enum fi_datatype " datatype ", enum fi_op " op ", size_t " count ");"
.HP
.BI "int fi_compare_atomicvalid(struct fid_ep *" ep ","
.BI "enum fi_datatype " datatype ", enum fi_op " op ", size_t " count ");"
.SH ARGUMENTS
.IP "ep"
Fabric endpoint on which to initiate atomic operation.
.IP "buf"
Local data buffer that specifies first operand of atomic operation
.IP "iov / comparev / resultv"
Vectored data buffer(s).
.IP "count / compare_count / result_count"
Count of vectored data entries.
.IP "addr"
Address of remote memory to access.
.IP "key"
Protection key associated with the remote memory.
.IP "datatype"
Datatype associated with atomic operands
.IP "op"
Atomic operation to perform
.IP "compare"
Local compare buffer, containing comparison data.
.IP "result"
Local data buffer to store initial value of remote buffer
.IP "desc / compare_desc / result_desc"
Data descriptor associated with the local data buffer, local compare buffer,
and local result buffer, respectively.
.IP "dest_addr"
Destination address for connectionless atomic operations
.IP "msg"
Message descriptor for atomic operations
.IP "flags"
Additional flags to apply for the atomic operation
.IP "context"
User specified pointer to associate with the operation.
.SH "DESCRIPTION"
Atomic transfers are used to read and update data located in remote memory
regions in an atomic fashion. Conceptually, they are similar to local
atomic operations of a similar nature (e.g. atomic increment, compare and
swap, etc.). Updates to remote data involve one of several operations on
the data, and act on specific types of data, as listed below. As such,
atomic transfers have knowledge of the format of the data being
accessed. A single atomic function may operate across an array of data
applying an atomic operation to each entry, but the atomicity of an
operation is limited to a single datatype or entry.
.SS "Atomic Data Types"
Atomic functions may operate on one of the following identified data types.
A given atomic function may support any datatype, subject to provider
implementation constraints.
.IP "FI_INT8"
Signed 8-bit integer.
.IP "FI_UINT8"
Unsigned 8-bit integer.
.IP "FI_INT16"
Signed 16-bit integer.
.IP "FI_UINT16"
Unsigned 16-bit integer.
.IP "FI_INT32"
Signed 32-bit integer.
.IP "FI_UINT32"
Unsigned 32-bit integer.
.IP "FI_INT64"
Signed 64-bit integer.
.IP "FI_UINT64"
Unsigned 64-bit integer.
.IP "FI_FLOAT"
A single-precision floating point value (IEEE 754).
.IP "FI_DOUBLE"
A double-precision floating point value (IEEE 754).
.IP "FI_FLOAT_COMPLEX"
An ordered pair of single-precision floating point values (IEEE 754),
with the first value representing the real portion of a complex
number and the second representing the imaginary portion.
.IP "FI_DOUBLE_COMPLEX"
An ordered pair of double-precision floating point values (IEEE 754),
with the first value representing the real portion of a complex
number and the second representing the imaginary portion.
.IP "FI_LONG_DOUBLE"
A double-extended precision floating point value (IEEE 754).
.IP "FI_LONG_DOUBLE_COMPLEX"
An ordered pair of double-extended precision floating point values (IEEE 754),
with the first value representing the real portion of a complex
number and the second representing the imaginary portion.
.SS "Atomic Operations"
The following atomic operations are defined. An atomic operation often acts
against a target value in the remote memory buffer and source value provided
with the atomic function. It may also carry source data to replace the
target value in compare and swap operations. A conceptual description of
each operation is provided.
.IP "FI_MIN"
Minimum
.nf
if (buf[i] < addr[i])
addr[i] = buf[i]
.fi
.IP "FI_MAX"
Maximum
.nf
if (buf[i] > addr[i])
addr[i] = buf[i]
.fi
.IP "FI_SUM"
Sum
.nf
addr[i] = addr[i] + buf[i]
.fi
.IP "FI_PROD"
Product
.nf
addr[i] = addr[i] * buf[i]
.fi
.IP "FI_LOR"
Logical OR
.nf
addr[i] = (addr[i] || buf[i])
.fi
.IP "FI_LAND"
Logical AN
.nf
addr[i] = (addr[i] && buf[i])
.fi
.IP "FI_BOR"
Bitwise OR
.nf
addr[i] = addr[i] | buf[i]
.fi
.IP "FI_BAND"
Bitwise AND
.nf
addr[i] = addr[i] & buf[i]
.fi
.IP "FI_LXOR"
Logical exclusive-OR (XOR)
.nf
addr[i] = ((addr[i] && !buf[i]) || (!addr[i] && buf[i]))
.fi
.IP "FI_BXOR"
Bitwise exclusive-OR (XOR)
.nf
addr[i] = addr[i] ^ buf[i]
.fi
.IP "FI_ATOMIC_READ"
Read data atomically
.nf
buf[i] = addr[i]
.fi
.IP "FI_ATOMIC_WRITE"
Write data atomically
.nf
addr[i] = buf[i]
.fi
.IP "FI_CSWAP"
Compare values and if equal swap with data
.nf
if (addr[i] == compare[i])
addr[i] = buf[i]
.fi
.IP "FI_CSWAP_NE"
Compare values and if not equal swap with data
.nf
if (addr[i] != compare[i])
addr[i] = buf[i]
.fi
.IP "FI_CSWAP_LE"
Compare values and if less than or equal swap with data
.nf
if (addr[i] <= compare[i])
addr[i] = buf[i]
.fi
.IP "FI_CSWAP_LT"
Compare values and if less than swap with data
.nf
if (addr[i] < compare[i])
addr[i] = buf[i]
.fi
.IP "FI_CSWAP_GE"
Compare values and if greater than or equal swap with data
.nf
if (addr[i] >= compare[i])
addr[i] = buf[i]
.fi
.IP "FI_CSWAP_GT"
Compare values and if greater than swap with data
.nf
if (addr[i] > compare[i])
addr[i] = buf[i]
.fi
.IP "FI_MSWAP"
Swap masked bits with data
.nf
addr[i] = (buf[i] & compare[i]) | (addr[i] & ~compare[i])
.fi
.SS "Base Atomic Functions"
The base atomic functions -- fi_atomic, fi_atomicv, fi_atomicto,
fi_atomicmsg -- are used to transmit data to a remote node,
where the specified atomic operation is performed against the target data.
The result of a base atomic function is stored at the remote memory region.
The main difference between atomic functions are the number and type of
parameters that they accept as input. Otherwise, they perform the same
general function.
.PP
The call fi_atomic transfers the data contained in the user-specified data
buffer to a remote node. The local endpoint must be connected to
a remote endpoint or destination before fi_atomic is called. Unless the
endpoint has been configured differently, the data buffer passed into
fi_atomic must not be touched by the application until the fi_atomic
call completes asynchronously. The target buffer of a base atomic
operation must allow for remote read an/or write access, as appropriate.
.PP
The fi_atomicv call adds support for a scatter-gather list to fi_atomic.
The fi_atomicv transfers the set of data buffers referenced by
the ioc parameter to the remote node for processing.
.PP
The fi_atomicto function is equivalent to fi_atomic for unconnected endpoints.
.PP
The fi_inject_atomic call is an optimized version of fi_atomic. The
fi_inject_atomic function behaves as if the FI_INJECT transfer flag were set,
and FI_EVENT were not. That is, the data buffer is available for reuse
immediately on returning from from fi_inject_atomic, and no completion event
will be generated for this atomic. The completion event will be suppressed
even if the endpoint has not been configured with FI_EVENT. See the flags
discussion below for more details.
.PP
The fi_inject_atomicto is equivalent to fi_inject_atomic for unconnected
endpoints.
.PP
The fi_atomicmsg call supports atomic functions over both connected and unconnected
endpoints, with the ability to control the atomic operation per call through the
use of flags. The fi_atomicmsg function takes a struct fi_msg_atomic as input.
.PP
.nf
struct fi_msg_atomic {
const struct fi_ioc *msg_iov; /* local scatter-gather array */
void **desc; /* local access descriptors */
size_t iov_count;/* # elements in ioc */
const void *addr; /* optional endpoint address */
const struct fi_rma_ioc *rma_iov; /* remote SGL */
size_t rma_iov_count;/* # elements in remote SGL */
enum fi_datatype datatype; /* operand datatype */
enum fi_op op; /* atomic operation */
void *context; /* user-defined context */
uint64_t data; /* optional data */
};
struct fi_rma_ioc {
uint64_t addr; /* target address */
size_t count; /* # target operands */
uint64_t key; /* access key */
};
.fi
.SS "Fetch-Atomic Functions"
The fetch atomic functions -- fi_fetch_atomic,
fi_fetch_atomicv, fi_fetch_atomicto, and
fi_fetch atomicmsg -- behave similar to the equivalent base atomic function.
The difference between the fetch and base atomic calls are the fetch
atomic routines return the initial value that was stored at the target
to the user. The initial value is read into the user provided result
buffer. The target buffer of fetch-atomic operations must be enabled
for remote read access.
.PP
The following list of atomic operations are usable with both the base
atomic and fetch atomic operations: FI_MIN, FI_MAX, FI_SUM, FI_PROD,
FI_LOR, FI_LAND, FI_BOR, FI_BAND, FI_LXOR, FI_BXOR, FI_ATOMIC_READ,
and FI_ATOMIC_WRITE.
.SS "Compare-Atomic Functions"
The compare atomic functions -- fi_compare_atomic,
fi_compare_atomicv, fi_compare_atomicto, and
fi_compare atomicmsg -- are used for operations that require comparing
the target data against a value before performing a swap operation.
The compare atomic functions support: FI_CSWAP, FI_CSWAP_NE, FI_CSWAP_LE,
FI_CSWAP_LT, FI_CSWAP_GE, FI_CSWAP_GT, and FI_MSWAP.
.SS "Atomic Valid Functions"
The atomic valid functions -- fi_atomicvalid, fi_fetch_atomicvalid,
and fi_compare_atomicvalid --indicate which operations the local provider
supports. Needed operations not supported by the provider must be emulated
by the application. Each valid call corresponds to a set of atomic functions.
fi_atomicvalid checks whether a provider supports a specific base atomic
operation for a given datatype and operation. fi_fetch_atomicvalid indicates
if a provider supports a specific fetch-atomic operation for a given datatype
and operation. And fi_compare_atomicvalid checks if a provider supports a
specified compare-atomic operation for a given datatype and operation.
.PP
If an operation is supported, an atomic valid call will return 0, along with
a count of atomic data units that a single function call will operate on.
.SS "Completions"
Completed atomic operations are reported to the user through one or more event
collectors associated with the endpoint. Users provide context which are
associated with each operation, and is returned to the user
as part of the event completion. See fi_eq for completion event details.
.PP
Updates to the target buffer of an atomic operation are visible to
processes running on the target system either after a completion has been
generated, or after the completion of an operation initiated after the
atomic call with a fencing operation occurring in between. For example,
the target process may be notified by the initiator sending a message
after the atomic call completes, or sending a fenced message immediately
after initiating the atomic operation.
.SH "FLAGS"
The fi_atomicmsg, fi_fetch_atomicmsg, and fi_compare_atomicmsg calls allow
the user to specify flags which can change the default data transfer operation.
Flags specified with atomic message operations override most flags
previously configured with the endpoint, except where noted (see fi_control).
The following list of flags are usable with atomic message calls.
.IP "FI_EVENT"
Indicates that a completion entry should be generated for the specified
operation. The endpoint must be bound to an event queue
with FI_EVENT that corresponds to the specified operation, or this flag
is ignored.
.IP "FI_MORE"
Indicates that the user has additional requests that will immediately be
posted after the current call returns. Use of this flag may improve
performance by enabling the provider to optimize its access to the fabric
hardware.
.IP "FI_REMOTE_SIGNAL"
Indicates that a completion event at the target process should be generated
for the given operation. The remote endpoint must be configured with
FI_REMOTE_SIGNAL, or this flag will be ignored by the target.
.IP "FI_INJECT"
Indicates that the outbound non-const data buffers (buf and compare parameters)
should be returned to user immediately after the call returns, even if the
operation is handled asynchronously. This may require that the underlying
provider implementation copy the data into a local buffer and transfer out of
that buffer. The use of output result buffers are not affected by this flag.
.SH "RETURN VALUE"
Returns 0 on success. On error, a negative value corresponding to fabric
errno is returned. Fabric errno values are defined in
.IR "rdma/fi_errno.h".
.SH "ERRORS"
.IP "-FI_EOPNOTSUPP"
The requested atomic operation is not supported on this endpoint.
.IP "-FI_EMSGSIZE"
The number of atomic operations in a single request exceeds that supported
by the underlying provider.
.SH "NOTES"
.SH "SEE ALSO"
fi_getinfo(3), fi_endpoint(3), fi_domain(3), fi_eq(3), fi_rma(3)

Просмотреть файл

@ -0,0 +1 @@
.so man3/fi_atomic.3

Просмотреть файл

@ -0,0 +1 @@
.so man3/fi_atomic.3

Просмотреть файл

@ -0,0 +1 @@
.so man3/fi_atomic.3

Просмотреть файл

@ -0,0 +1 @@
.so man3/fi_atomic.3

Просмотреть файл

@ -0,0 +1,361 @@
.TH "FI_AV" 3 "2014-11-11" "Libfabric v0.0.2" "Libfabric Programmer's Manual" libfabric
.SH NAME
fi_av \- Address vector operations
.PP
fi_av_open / fi_close
.RS
Open or close an address vector
.RE
.PP
fi_av_bind
.RS
Associate an address vector with an event queue.
.RE
.PP
fi_av_insert / fi_av_insertsvc / fi_av_remove
.RS
Insert/remove an address into/from the address vector.
.RE
.PP
fi_av_lookup
.RS
Retrieve an address stored in the address vector.
.RE
.PP
fi_av_straddr
.RS
Convert an address into a printable string.
.RE
.SH SYNOPSIS
.B #include <rdma/fi_domain.h>
.HP
.BI "int fi_av_open(struct fid_domain *" domain ", struct fi_av_attr *" attr ", "
.BI "struct fid_av **" av ", void * " context ");"
.HP
.BI "int fi_close(struct fid *" av ");"
.PP
.HP
.BI "int fi_av_bind(struct fid_av *" av ", struct fid *" eq ", uint64_t " flags ");"
.PP
.HP
.BI "int fi_av_insert(struct fid_av *" av ", void *" addr ", size_t " count ", "
.BI "fi_addr_t *" fi_addr ", uint64_t " flags ", void *" context ");"
.HP
.BI "int fi_av_insertsvc(struct fid_av *" av ", const char *" node ", "
.BI "const char *" service ", fi_addr_t *" fi_addr ", uint64_t " flags ", "
.BI "void *" context ");"
.HP
.BI "int fi_av_insertsym(struct fid_av *" av ", const char *" node ", "
.BI "size_t " nodecnt ", const char *" service ", size_t " svccnt ", "
.BI "fi_addr_t *" fi_addr ", uint64_t " flags ", void *" context ");"
.HP
.BI "int fi_av_remove(struct fid_av *" av ", fi_addr_t " fi_addr ", size_t " count ", "
.BI "uint64_t " flags ");"
.HP
.BI "int fi_av_lookup(struct fid_av *" av ", fi_addr_t " fi_addr ", "
.BI "void *" addr ", size_t *" addrlen ");"
.HP
.BI "fi_addr_t fi_rx_addr(fi_addr_t " fi_addr ", int " rx_index ", "
.BI "int " rx_ctx_bits ");"
.HP
.BI "const char * fi_av_straddr(struct fid_av *" av ", const void *" addr ", "
.BI "void *" buf ", size_t " len ");"
.SH ARGUMENTS
.IP "domain"
Resource domain
.IP "av"
Address vector
.IP "eq"
Event queue
.IP "attr"
Address vector attributes
.IP "context"
User specified context associated with the address vector or insert opertion.
.IP "addr"
Buffer containing one or more addresses to insert into address vector.
.IP "addrlen"
On input, specifies size of addr buffer. On output, stores number of bytes
written to addr buffer.
.IP "fi_addr"
For insert, a reference to an array where returned fabric addresses
will be written.
.br
For remove, one or more fabric addresses to remove.
.IP "count"
Number of addresses to insert/remove from an AV.
.IP "flags"
Additional flags to apply to the operation.
.SH "DESCRIPTION"
Address vectors are used to map higher level addresses, which may be
more natural for an application to use, into fabric specific addresses.
The mapping of addresses is fabric and provider specific, but may involve
lengthy address resolution and fabric management protocols. AV operations
are synchronous by default, but may be set to operate asynchronously by
specifying the FI_EVENT flag to
.B fi_av_open.
When requesting asynchronous operation, the
application must first bind an event queue to the AV before inserting
addresses.
.SS "fi_av_open"
fi_av_open allocates or opens an address vector. The properties and behavior of
the address vector are defined by struct fi_av_attr.
.PP
.nf
struct fi_av_attr {
enum fi_av_type type; /* type of AV */
int rx_ctx_bits; /* address bits to identify rx ctx */
size_t count; /* # entries for AV */
size_t ep_per_node; /* # endpoints per fabric address */
const char *name; /* system name of AV */
void *map_addr; /* base mmap address */
uint64_t flags; /* operation flags */
};
.fi
.IP "type"
An AV type corresponds to a conceptual implementation of an address vector.
The type specifies how an application views data stored in the AV,
including how it may be accessed. Valid values are:
.RS
.IP "FI_AV_MAP"
Addresses which are inserted into an AV are mapped to a native fabric
address for use by the application. The use of FI_AV_MAP requires that
an application store the returned fi_addr_t value that is associated with
each inserted address. The advantage of using FI_AV_MAP is that the returned
fi_addr_t value may contain encoded address data, which is immediately
available when processing data transfer requests. This can eliminate
or reduce the number of memory lookups needed when initiating a transfer.
The disadvantage of FI_AV_MAP is the increase in memory usage needed to
store the returned addresses.
Addresses are stored in the AV using a provider specific
mechanism, including, but not limited to a tree, hash table, or maintained
on the heap.
.IP "FI_AV_TABLE"
Addresses which are inserted into an AV of type FI_AV_TABLE are accessible
using a simple index. Conceptually, the AV may be treated as an array
of addresses, though the provider may implement the AV using a variety
of mechanisms. When FI_AV_TABLE is used, the returned fi_addr_t is an
index, with the index for an inserted address the same as its
insertion order into the table.
The index of the first address inserted into an FI_AV_TABLE will be 0, and
successive insertions will be given sequential indices.
Sequential indices will be assigned across insertion calls on the same AV.
.RE
.IP "Receive Context Bits (rx_ctx_bits)"
The receive context bits field is only for use with scalable endpoints. It
indicates the number of bits reserved in a returned fi_addr_t, which will
be used to identify a specific target receive context. See fi_rx_addr()
and fi_endpoint(3) for additional details on receive contexts. The
requested number of bits should be selected such that 2 ^ rx_ctx_bits >=
rx_ctx_cnt for the endpoint.
.IP "count"
Indicates the expected number of addresses that will be inserted into
the AV. The provider uses this to optimize resource allocations.
.IP "ep_per_node"
This field indicates the number of endpoints that will be associated
with a specific fabric, or network, address. If the number of endpoints
per node is unknown, this value should be set to 0.
The provider uses this value to optimize resource allocations.
For example, distributed, parallel applications may set this to the number
of processes allocated per node, times the number of endpoints each process
will open.
.IP "name"
An optional system name associated with the address vector to create or open.
Address vectors may be shared across multiple processes which access
the same named domain on the same node. The name field allows the
underlying provider to identify a shared AV.
.sp
If the name field is non-NULL and the AV is not opened for read-only
access, a named AV will be created, if it does not already exist.
.IP "map_addr"
The map_addr determines the base FI_ADDR address that a provider
should use when sharing an AV of type FI_AV_MAP between processes.
Processes that provide the same value for map_addr to a shared AV may
use the same FI_ADDR values returned from an fi_av_insert call.
.sp
The map_addr may be used by the provider to mmap memory allocated for a
shared AV between processes; however, the provider is not required to use
the map_addr in this fashion. The only requirement is that an FI_ADDR
returned as part of an fi_av_insert call on one process is usable on
another process which opens an AV of the same name at the same map_addr
value. The relationship between the map_addr and any returned FI_ADDR
is not defined.
.sp
If name is non-NULL and map_addr is 0, then the map_addr used by the
provider will be returned through the attribute structure. The map_addr
field is ignored if name is NULL.
.IP "flags"
The following flags may be used when opening an AV.
.RS
.IP "FI_EVENT"
When the flag FI_EVENT is specified, all insert operations on this
AV will occur
asynchronously. There will be one EQ error entry generated for each
failed address insertion, followed by one non-error event indicating that the
insertion operation has completed.
There will always be one non-error completion event for each
insert operation, even if all addresses fail. The context field in all
completions will be the context specified to the insert call, and the data
field in the final completion entry will report the number of addresses
successfully inserted.
.sp
If an AV is opened with FI_EVENT, any insertions attempted before an EQ
is bound to the AV will fail with -FI_ENOEQ.
.sp
Error completions for failed insertions will contain the index of the failed
address in the index field of the error completion entry.
.sp
Note that the order of delivery of insert completions may not match
the order in which the calls to fi_av_insert were made. The only guarantee
is that all error completions for a given call to fi_av_insert will preceed
the single associated non-error completion.
.IP "FI_READ"
Opens an AV for read-only access. An AV opened for read-only access
must be named (name attribute specified), and the AV must exist.
.IP "FI_SYMMETRIC"
Indicates that each node will be associated with the
same number of endpoints, the same transport addresses will be allocated
on each node, and the transport addresses will be sequential. This feature
targets distributed applications on large fabrics and allows for
highly-optimized storage of remote endpoint addressing.
.RE
.SS "fi_close"
The fi_close call is used to release all resources associated with an
address vector. Note that any events queued on an event queue referencing
the AV are left untouched. It is recommended that callers retrieve all
events associated with the AV before closing it.
.SS "fi_av_bind"
Associates an event queue with the AV. If an AV has been opened with
.B FI_EVENT,
then an event queue must be bound to the AV before any insertion
calls are attempted. Any calls to insert addresses before an event queue
has been bound will fail with -FI_ENOEQ.
.SS "fi_av_insert"
The fi_av_insert call inserts zero or more addresses into an AV. The number
of addresses is specified through the count parameter. The addr parameter
references an array of addresses to insert into the AV. Addresses
inserted into an address vector must be in the same format as specified
in struct fi_info:addr_format for the corresponding domain. A NULL
value for an address may be used to indicate that an entry should be
associated with 'any' address (similar to the IPv4 address of 0.0.0.0).
.PP
For AV's of type FI_AV_MAP, once inserted addresses have been mapped,
the mapped values are written into the buffer referenced by fi_addr.
The fi_addr buffer must remain valid until the
AV insertion has completed and an event has been generated
to an associated event queue. The value of the returned fi_addr should
be considered opaque by the application for AVs of type FI_AV_MAP.
The returned value may point to an internal structure or a provider
specific encoding of low-level addressing data, for example. In the latter
case, use of FI_AV_MAP may be able to avoid memory references during data
transfer operations.
.PP
For AV's of type FI_AV_TABLE, addresses are placed into the table in
order. That is, the first address inserted may be referenced at
index 0. The fi_addr parameter may be NULL in this case. Otherwise,
fi_addr must reference an array of fi_addr_t, and the buffer must
remain valid until the insertion operation completes. Note that if fi_addr
is NULL and synchronous operation is requested, individual insertion failures
cannot be reported and the application must use other calls, such as
.B fi_av_lookup
to learn which specific addresses failed to insert.
When addresses
are inserted into an AV of type FI_AV_TABLE, the assigned fi_addr values
will be simple indices corresponding to the entry into the table where the
address was inserted. Addresses are indexed in order of their insertion.
Index values accumulate across successive insert calls in the order the calls
are made, not necessarily in the order the insertions complete.
.IP "flags"
The following flag may be passed to fi_av_insert
.RS
.IP "FI_MORE"
In order to allow optimized address insertion, the application may
specify the FI_MORE flag to the insert call to give a hint to the provider
that more insertion requests will follow, allowing the provider to aggregate
insertion requests if desired. Providers are free to ignore FI_MORE.
.RE
.SS "fi_av_insertsvc"
The fi_av_insertsvc call behaves similar to fi_av_insert, but allows the
application to specify the node and service names, similar to the
fi_getinfo inputs, rather than an encoded address. The node and service
parameters are defined the same as fi_getinfo(3). Node should be a string
that corresponds to a hostname or network address. The service string
corresponds to a textual representation of a transport address.
.SS "fi_av_insertsym"
fi_av_insertsym performs a symmetric insert that inserts a sequential
range of nodes and/or service addresses into an AV. The svccnt parameter
indicates the number of transport (endpoint) addresses to insert into the AV
for each node address, with the service parameter specifying the starting
transport address. Inserted transport addresses will be of the range
{service, service + svccnt - 1}, inclusive. All service addresses for a
node will be inserted before the next node is inserted.
.PP
The nodecnt parameter indicates the number of node (network) addresses to
insert into the AV, with the node parameter specifying the starting
node address. Inserted node addresses will be of the range
{node, node + nodecnt - 1}, inclusive. If node is a non-numeric string,
such as a hostname, it must contain a numeric suffix if nodecnt > 1.
.PP
As an example, if node = "10.1.1.1", nodecnt = 2, service = "5000", and
svccnt = 2, the following addresses will be inserted into the AV in the
order shown: 10.1.1.1:5000, 10.1.1.1:5001, 10.1.1.2:5000, 10.1.1.1:5001.
If node were replaced by the hostname "host10", the addresses would be:
host10:5000, host10:5001, host11:5000, host11:5001.
.PP
The total number of inserted addresses will be nodecnt x svccnt.
.SS "fi_av_remove"
fi_av_remove removes a set of addresses from an address vector. All
resources associated with the indicated addresses are released, and
no future references to either the mapped address (in the case of
FI_AV_MAP) or index (FI_AV_TABLE) are allowed.
.PP
The use of fi_av_remove is an optimization that applications may use
to free memory allocated with addresses that will no longer be accessed.
Inserted addresses are not required to be removed. fi_av_close will
automatically cleanup any resources associated with addresses remaining
in the AV when it is invoked.
.SS "fi_av_lookup"
This call returns the address stored in the address vector that corresponds
to the given fi_addr. The returned address is the same format as those
stored by the AV. On input, the addrlen parameter should
indicate the size of the addr buffer. If the actual address is larger than
what can fit into the buffer, it will be truncated. On output, addrlen
is set to the size of the buffer needed to store the address, which may
be larger than the input value.
.SS "fi_rx_addr"
This function is used to convert an endpoint address, returned by fi_av_insert,
into an address that specifies a target receive context. The specified
fi_addr parameter must either be a value returned from fi_av_insert, in the
case of FI_AV_MAP, or an index, in the case of FI_AV_TABLE. The value for
rx_ctx_bits must match that specified in the AV attributes for the given
address.
.P
Connected endpoints that support multiple receive contexts, but are not
associated with address vectors should specify FI_ADDR_NOTAVAIL for the
fi_addr parameter.
.SS "fi_av_straddr"
The fi_av_straddr function converts the provided address into a printable
string. The specified address must be of the same format as those
stored by the AV, though the address itself is not required to have been
inserted. On input, the len parameter should specify the size of the buffer
referenced by buf. On output, the actual size needed to write the entire
string will be returned. This size may be larger than the input len. If
the provided buffer is too small, the results will be truncated. fi_av_straddr
returns a pointer to buf.
.SH "NOTES"
Providers may implement AV's using a variety of mechanisms. Specifically,
a provider may begin resolving inserted addresses as soon as they have
been added to an AV, even if asynchronous operation has been specified.
Similarly, a provider may lazily release resources from removed entries.
.SH "RETURN VALUES"
The insert calls return the number of addresses successfully inserted or
the number of asynchronous insertions initiated if FI_EVENT is set.
.PP
Other calls return 0 on success.
.PP
On error, a negative value corresponding to
fabric errno is returned.
Fabric errno values are defined in
.IR "rdma/fi_errno.h".
.SH "ERRORS"
.SH "SEE ALSO"
fi_getinfo(3), fi_endpoint(3), fi_domain(3), fi_eq(3)

Просмотреть файл

@ -0,0 +1 @@
.so man3/fi_av.3

Просмотреть файл

@ -0,0 +1 @@
.so man3/fi_av.3

Просмотреть файл

@ -0,0 +1 @@
.so man3/fi_av.3

Просмотреть файл

@ -0,0 +1 @@
.so man3/fi_av.3

Просмотреть файл

@ -0,0 +1 @@
.so man3/fi_av.3

Просмотреть файл

@ -0,0 +1 @@
.so man3/fi_av.3

Просмотреть файл

@ -0,0 +1 @@
.so man3/fi_av.3

Просмотреть файл

@ -0,0 +1 @@
.so man3/fi_endpoint.3

Просмотреть файл

@ -0,0 +1 @@
.so man3/fi_domain.3

Некоторые файлы не были показаны из-за слишком большого количества измененных файлов Показать больше