1
1
KNC is effectively dead.  Remove corresponding SCIF
support in Open MPI.

Signed-off-by: Howard Pritchard <howardp@lanl.gov>
Этот коммит содержится в:
Howard Pritchard 2018-09-19 08:35:05 -06:00
родитель 3dae8703a5
Коммит b9ac3d8931
20 изменённых файлов: 4 добавлений и 2501 удалений

4
README
Просмотреть файл

@ -605,7 +605,6 @@ Network Support
- Loopback (send-to-self)
- Shared memory
- TCP
- Intel Phi SCIF
- SMCUDA
- Cisco usNIC
- uGNI (Cray Gemini, Aries)
@ -1000,9 +999,6 @@ NETWORKING SUPPORT / OPTIONS
covers most cases. This option is only needed for special
configurations.
--with-scif=<dir>
Look in directory for Intel SCIF support libraries
--with-verbs=<directory>
Specify the directory where the verbs (also known as OpenFabrics
verbs, or Linux verbs, and previously known as OpenIB) libraries and

Просмотреть файл

@ -88,12 +88,8 @@ EXTRA_DIST = \
platform/lanl/darwin/mic-common \
platform/lanl/darwin/debug \
platform/lanl/darwin/debug.conf \
platform/lanl/darwin/debug-mic \
platform/lanl/darwin/debug-mic.conf \
platform/lanl/darwin/optimized \
platform/lanl/darwin/optimized.conf \
platform/lanl/darwin/optimized-mic \
platform/lanl/darwin/optimized-mic.conf \
platform/snl/portals4-m5 \
platform/snl/portals4-orte \
platform/ibm/debug-ppc32-gcc \

Просмотреть файл

@ -1,100 +0,0 @@
#
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2005 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# Copyright (c) 2006 Cisco Systems, Inc. All rights reserved.
# Copyright (c) 2011-2013 Los Alamos National Security, LLC.
# All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# This is the default system-wide MCA parameters defaults file.
# Specifically, the MCA parameter "mca_param_files" defaults to a
# value of
# "$HOME/.openmpi/mca-params.conf:$sysconf/openmpi-mca-params.conf"
# (this file is the latter of the two). So if the default value of
# mca_param_files is not changed, this file is used to set system-wide
# MCA parameters. This file can therefore be used to set system-wide
# default MCA parameters for all users. Of course, users can override
# these values if they want, but this file is an excellent location
# for setting system-specific MCA parameters for those users who don't
# know / care enough to investigate the proper values for them.
# Note that this file is only applicable where it is visible (in a
# filesystem sense). Specifically, MPI processes each read this file
# during their startup to determine what default values for MCA
# parameters should be used. mpirun does not bundle up the values in
# this file from the node where it was run and send them to all nodes;
# the default value decisions are effectively distributed. Hence,
# these values are only applicable on nodes that "see" this file. If
# $sysconf is a directory on a local disk, it is likely that changes
# to this file will need to be propagated to other nodes. If $sysconf
# is a directory that is shared via a networked filesystem, changes to
# this file will be visible to all nodes that share this $sysconf.
# The format is straightforward: one per line, mca_param_name =
# rvalue. Quoting is ignored (so if you use quotes or escape
# characters, they'll be included as part of the value). For example:
# Disable run-time MPI parameter checking
# mpi_param_check = 0
# Note that the value "~/" will be expanded to the current user's home
# directory. For example:
# Change component loading path
# component_path = /usr/local/lib/openmpi:~/my_openmpi_components
# See "ompi_info --param all all" for a full listing of Open MPI MCA
# parameters available and their default values.
#
# Basic behavior to smooth startup
mca_base_component_show_load_errors = 0
opal_set_max_sys_limits = 1
orte_report_launch_progress = 1
# Define timeout for daemons to report back during launch
orte_startup_timeout = 10000
## Protect the shared file systems
orte_no_session_dirs = /panfs,/scratch,/users,/usr/projects
orte_tmpdir_base = /tmp
## Require an allocation to run - protects the frontend
## from inadvertent job executions
orte_allocation_required = 1
## Add the interface for out-of-band communication
## and set it up
oob_tcp_if_include=mic0
oob_tcp_peer_retries = 1000
oob_tcp_sndbuf = 32768
oob_tcp_rcvbuf = 32768
## Define the MPI interconnects
btl = sm,scif,openib,self
## Setup OpenIB - just in case
btl_openib_want_fork_support = 0
btl_openib_receive_queues = S,4096,1024:S,12288,512:S,65536,512
## Enable cpu affinity
hwloc_base_binding_policy = core
## Setup MPI options
mpi_show_handle_leaks = 1
mpi_warn_on_fork = 1
#mpi_abort_print_stack = 1

Просмотреть файл

@ -10,7 +10,7 @@
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# Copyright (c) 2006 Cisco Systems, Inc. All rights reserved.
# Copyright (c) 2011-2013 Los Alamos National Security, LLC.
# Copyright (c) 2011-2018 Los Alamos National Security, LLC.
# All rights reserved.
# $COPYRIGHT$
#
@ -84,7 +84,7 @@ oob_tcp_sndbuf = 32768
oob_tcp_rcvbuf = 32768
## Define the MPI interconnects
btl = sm,scif,openib,self
btl = sm,openib,self
## Setup OpenIB - just in case
btl_openib_want_fork_support = 0

Просмотреть файл

@ -1,100 +0,0 @@
#
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2005 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# Copyright (c) 2006 Cisco Systems, Inc. All rights reserved.
# Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights
# reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# This is the default system-wide MCA parameters defaults file.
# Specifically, the MCA parameter "mca_param_files" defaults to a
# value of
# "$HOME/.openmpi/mca-params.conf:$sysconf/openmpi-mca-params.conf"
# (this file is the latter of the two). So if the default value of
# mca_param_files is not changed, this file is used to set system-wide
# MCA parameters. This file can therefore be used to set system-wide
# default MCA parameters for all users. Of course, users can override
# these values if they want, but this file is an excellent location
# for setting system-specific MCA parameters for those users who don't
# know / care enough to investigate the proper values for them.
# Note that this file is only applicable where it is visible (in a
# filesystem sense). Specifically, MPI processes each read this file
# during their startup to determine what default values for MCA
# parameters should be used. mpirun does not bundle up the values in
# this file from the node where it was run and send them to all nodes;
# the default value decisions are effectively distributed. Hence,
# these values are only applicable on nodes that "see" this file. If
# $sysconf is a directory on a local disk, it is likely that changes
# to this file will need to be propagated to other nodes. If $sysconf
# is a directory that is shared via a networked filesystem, changes to
# this file will be visible to all nodes that share this $sysconf.
# The format is straightforward: one per line, mca_param_name =
# rvalue. Quoting is ignored (so if you use quotes or escape
# characters, they'll be included as part of the value). For example:
# Disable run-time MPI parameter checking
# mpi_param_check = 0
# Note that the value "~/" will be expanded to the current user's home
# directory. For example:
# Change component loading path
# component_path = /usr/local/lib/openmpi:~/my_openmpi_components
# See "ompi_info --param all all" for a full listing of Open MPI MCA
# parameters available and their default values.
#
# Basic behavior to smooth startup
mca_base_component_show_load_errors = 0
opal_set_max_sys_limits = 1
orte_report_launch_progress = 1
# Define timeout for daemons to report back during launch
orte_startup_timeout = 10000
## Protect the shared file systems
orte_no_session_dirs = /panfs,/scratch,/users,/usr/projects
orte_tmpdir_base = /tmp
## Require an allocation to run - protects the frontend
## from inadvertent job executions
orte_allocation_required = 1
## Add the interface for out-of-band communication
## and set it up
oob_tcp_if_include = mic0
oob_tcp_peer_retries = 1000
oob_tcp_sndbuf = 32768
oob_tcp_rcvbuf = 32768
## Define the MPI interconnects
btl = sm,scif,openib,self
## Setup OpenIB - just in case
btl_openib_want_fork_support = 0
btl_openib_receive_queues = S,4096,1024:S,12288,512:S,65536,512
## Enable cpu affinity
hwloc_base_binding_policy = core
## Setup MPI options
mpi_show_handle_leaks = 0
mpi_warn_on_fork = 1
#mpi_abort_print_stack = 0

Просмотреть файл

@ -10,7 +10,7 @@
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# Copyright (c) 2006 Cisco Systems, Inc. All rights reserved.
# Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights
# Copyright (c) 2011-2018 Los Alamos National Security, LLC. All rights
# reserved.
# $COPYRIGHT$
#
@ -84,7 +84,7 @@ oob_tcp_sndbuf = 32768
oob_tcp_rcvbuf = 32768
## Define the MPI interconnects
btl = sm,scif,openib,self
btl = sm,openib,self
## Setup OpenIB - just in case
btl_openib_want_fork_support = 0

Просмотреть файл

@ -1,50 +0,0 @@
# -*- indent-tabs-mode:nil -*-
#
# Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights
# reserved.
#
# Additional copyrights may follow
#
# $HEADER$
#
# Make the output library in this directory, and name it either
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
# (for static builds).
AM_CPPFLAGS = $(btl_scif_CPPFLAGS)
if MCA_BUILD_opal_btl_scif_DSO
component_noinst =
component_install = mca_btl_scif.la
else
component_noinst = libmca_btl_scif.la
component_install =
endif
scif_SOURCES = \
btl_scif_component.c \
btl_scif_module.c \
btl_scif_add_procs.c \
btl_scif_endpoint.h \
btl_scif_endpoint.c \
btl_scif_frag.c \
btl_scif_frag.h \
btl_scif_send.c \
btl_scif_put.c \
btl_scif_get.c \
btl_scif.h
mcacomponentdir = $(opallibdir)
mcacomponent_LTLIBRARIES = $(component_install)
mca_btl_scif_la_SOURCES = $(scif_SOURCES)
nodist_mca_btl_scif_la_SOURCES = $(scif_nodist_SOURCES)
mca_btl_scif_la_LIBADD = $(top_builddir)/opal/lib@OPAL_LIB_PREFIX@open-pal.la \
$(btl_scif_LIBS)
mca_btl_scif_la_LDFLAGS = -module -avoid-version $(btl_scif_LDFLAGS)
noinst_LTLIBRARIES = $(component_noinst)
libmca_btl_scif_la_SOURCES = $(scif_SOURCES)
nodist_libmca_btl_scif_la_SOURCES = $(scif_nodist_SOURCES)
libmca_btl_scif_la_LIBADD = $(btl_scif_LIBS)
libmca_btl_scif_la_LDFLAGS = -module -avoid-version $(btl_scif_LDFLAGS)

Просмотреть файл

@ -1,249 +0,0 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2013-2016 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2014 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef MCA_BTL_SCIF_H
#define MCA_BTL_SCIF_H
#include "opal_config.h"
#include "opal/util/output.h"
#include "opal_stdint.h"
#include "opal/util/proc.h"
#include "opal/mca/btl/btl.h"
#include "opal/mca/btl/base/base.h"
#include "opal/mca/btl/base/btl_base_error.h"
#include "opal/mca/rcache/rcache.h"
#include "opal/mca/rcache/base/base.h"
#include <scif.h>
#include <errno.h>
#include <stdint.h>
#include <sys/types.h>
#include <assert.h>
#include <sys/time.h>
/* Turn on timers for debug builds */
#if OPAL_ENABLE_DEBUG
/* #define SCIF_TIMING */
#endif
#if defined(SCIF_TIMING)
#include <sys/time.h>
#include <math.h>
static inline void timerspecsub (struct timespec *end, struct timespec *start,
struct timespec *diff) {
diff->tv_nsec = end->tv_nsec - start->tv_nsec;
diff->tv_sec = end->tv_sec - start->tv_sec;
if (diff->tv_nsec < 0) {
--diff->tv_sec;
diff->tv_nsec += 1000000000;
}
}
#define SCIF_UPDATE_TIMER(agg, max, start) \
do { \
struct timespec _te, _diff; \
double _tmpd; \
clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &_te); \
timerspecsub(&_te, &(start), &_diff); \
_tmpd = (double) _diff.tv_sec + (double) _diff.tv_nsec / 1000000000.0; \
(agg) += _tmpd; \
(max) = fmax ((max), _tmpd); \
} while (0)
#endif
typedef struct mca_btl_scif_modex_t {
struct scif_portID port_id;
} mca_btl_scif_modex_t;
typedef struct mca_btl_scif_module_t {
mca_btl_base_module_t super;
/* listening endpoint */
scif_epd_t scif_fd;
/* listening port */
struct scif_portID port_id;
size_t endpoint_count;
struct mca_btl_base_endpoint_t *endpoints;
opal_list_t failed_frags;
/* fragments for DMA */
opal_free_list_t dma_frags;
/* fragments for eager send */
opal_free_list_t eager_frags;
pthread_t listen_thread;
volatile bool exiting;
bool listening;
mca_rcache_base_module_t *rcache;
} mca_btl_scif_module_t;
typedef struct mca_btl_scif_component_t {
/* base BTL component */
mca_btl_base_component_3_0_0_t super;
/* DMA free list settings */
int scif_free_list_num;
int scif_free_list_max;
int scif_free_list_inc;
unsigned int segment_size;
bool rma_use_cpu;
bool rma_sync;
#if defined(SCIF_TIMING)
/* performance timers */
double aquire_buffer_time;
double aquire_buffer_time_max;
double send_time;
double send_time_max;
double sendi_time;
double sendi_time_max;
double get_time;
double get_time_max;
unsigned long get_count;
double put_time;
double put_time_max;
unsigned long put_count;
#endif
} mca_btl_scif_component_t;
int mca_btl_scif_module_init (void);
/**
* BML->BTL notification of change in the process list.
*
* location: btl_scif_add_procs.c
*
* @param btl (IN) BTL module
* @param nprocs (IN) Number of processes
* @param procs (IN) Array of processes
* @param endpoint (OUT) Array of mca_btl_base_endpoint_t structures by BTL.
* @param reachable (OUT) Bitmask indicating set of peer processes that are reachable by this BTL.
* @return OPAL_SUCCESS or error status on failure.
*/
int
mca_btl_scif_add_procs (struct mca_btl_base_module_t* btl,
size_t nprocs,
struct opal_proc_t **procs,
struct mca_btl_base_endpoint_t **peers,
opal_bitmap_t *reachable);
/**
* Notification of change to the process list.
*
* location: btl_scif_add_procs.c
*
* @param btl (IN) BTL module
* @param nprocs (IN) Number of processes
* @param proc (IN) Set of processes
* @param peer (IN) Set of peer addressing information.
* @return Status indicating if cleanup was successful
*/
int
mca_btl_scif_del_procs (struct mca_btl_base_module_t *btl,
size_t nprocs,
struct opal_proc_t **procs,
struct mca_btl_base_endpoint_t **peers);
/**
* Initiate an asynchronous send.
*
* location: btl_scif_send.c
*
* @param btl (IN) BTL module
* @param endpoint (IN) BTL addressing information
* @param descriptor (IN) Description of the data to be transfered
* @param tag (IN) The tag value used to notify the peer.
*/
int
mca_btl_scif_send (struct mca_btl_base_module_t *btl,
struct mca_btl_base_endpoint_t *btl_peer,
struct mca_btl_base_descriptor_t *descriptor,
mca_btl_base_tag_t tag);
int mca_btl_scif_sendi (struct mca_btl_base_module_t *btl,
struct mca_btl_base_endpoint_t *endpoint,
struct opal_convertor_t *convertor,
void *header, size_t header_size,
size_t payload_size, uint8_t order,
uint32_t flags, mca_btl_base_tag_t tag,
mca_btl_base_descriptor_t **descriptor);
/**
* Initiate a get operation.
*
* location: btl_scif_get.c
*/
int mca_btl_scif_get (mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, void *local_address,
uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle,
mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags,
int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata);
/**
* Initiate a put operation.
*
* location: btl_scif_put.c
*/
int mca_btl_scif_put (mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, void *local_address,
uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle,
mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags,
int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata);
mca_btl_base_descriptor_t *
mca_btl_scif_alloc(struct mca_btl_base_module_t *btl,
struct mca_btl_base_endpoint_t *endpoint,
uint8_t order, size_t size, uint32_t flags);
int mca_btl_scif_progress_send_wait_list (struct mca_btl_base_endpoint_t *endpoint);
struct mca_btl_scif_reg_t;
struct mca_btl_base_registration_handle_t {
/** scif offset */
off_t scif_offset;
/** base address of this scif region */
uintptr_t scif_base;
};
struct mca_btl_scif_registration_handle_t {
mca_btl_base_registration_handle_t btl_handle;
struct mca_btl_scif_reg_t *reg;
};
typedef struct mca_btl_scif_registration_handle_t mca_btl_scif_registration_handle_t;
typedef struct mca_btl_scif_reg_t {
mca_rcache_base_registration_t base;
/** per-endpoint btl handles for this registration */
mca_btl_scif_registration_handle_t *handles;
} mca_btl_scif_reg_t;
/* Global structures */
OPAL_MODULE_DECLSPEC extern mca_btl_scif_component_t mca_btl_scif_component;
OPAL_MODULE_DECLSPEC extern mca_btl_scif_module_t mca_btl_scif_module;
#endif

Просмотреть файл

@ -1,259 +0,0 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2013-2016 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2014 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "opal_config.h"
#include "opal/util/sys_limits.h"
#include "btl_scif.h"
#include "btl_scif_frag.h"
static int mca_btl_scif_setup_rcache (mca_btl_scif_module_t *scif_module);
static void *mca_btl_scif_connect_accept (void *arg);
int mca_btl_scif_add_procs(struct mca_btl_base_module_t* btl,
size_t nprocs,
struct opal_proc_t **procs,
struct mca_btl_base_endpoint_t **peers,
opal_bitmap_t *reachable) {
mca_btl_scif_module_t *scif_module = (mca_btl_scif_module_t *) btl;
size_t procs_on_board, i, board_proc;
opal_proc_t *my_proc = opal_proc_local_get();
int rc;
/* determine how many procs are on this board */
for (i = 0, procs_on_board = 0 ; i < nprocs ; ++i) {
struct opal_proc_t *opal_proc = procs[i];
if (my_proc == opal_proc) {
continue;
}
if (!OPAL_PROC_ON_LOCAL_HOST(opal_proc->proc_flags) ||
my_proc == opal_proc) {
/* scif can only be used with procs on this board */
continue;
}
procs_on_board++;
}
/* allocate space for the detected peers and setup the rcache */
if (NULL == scif_module->endpoints) {
scif_module->endpoints = calloc (procs_on_board, sizeof (mca_btl_base_endpoint_t));
if (OPAL_UNLIKELY(NULL == scif_module->endpoints)) {
return OPAL_ERR_OUT_OF_RESOURCE;
}
rc = mca_btl_scif_setup_rcache (scif_module);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
BTL_ERROR(("btl/scif error setting up rcache or free lists"));
return rc;
}
}
for (i = 0, board_proc = 0 ; i < nprocs ; ++i) {
struct opal_proc_t *opal_proc = procs[i];
if (my_proc == opal_proc) {
continue;
}
if (!OPAL_PROC_ON_LOCAL_HOST(opal_proc->proc_flags) ||
my_proc == opal_proc) {
peers[i] = NULL;
/* scif can only be used with procs on this board */
continue;
}
/* Initialize endpoints */
rc = mca_btl_scif_ep_init (scif_module->endpoints + board_proc, (mca_btl_scif_module_t *) btl, opal_proc);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
BTL_ERROR(("btl/scif error initializing endpoint"));
return rc;
}
scif_module->endpoints[board_proc].id = board_proc;
/* Set the reachable bit */
rc = opal_bitmap_set_bit (reachable, i);
/* Store a reference to this peer */
peers[i] = scif_module->endpoints + board_proc;
board_proc++;
}
BTL_VERBOSE(("%lu procs on board\n", (unsigned long) procs_on_board));
scif_module->endpoint_count = procs_on_board;
if (!mca_btl_scif_module.listening) {
/* start listening thread */
rc = pthread_create (&mca_btl_scif_module.listen_thread, NULL, mca_btl_scif_connect_accept, NULL);
if (0 > rc) {
return OPAL_ERROR;
}
mca_btl_scif_module.listening = true;
}
return OPAL_SUCCESS;
}
static void *mca_btl_scif_connect_accept (void *arg)
{
struct scif_pollepd pollepd = {.epd = mca_btl_scif_module.scif_fd, .events = SCIF_POLLIN, .revents = 0};
int rc;
BTL_VERBOSE(("btl/scif: listening for new connections"));
/* listen for connections */
while (1) {
pollepd.revents = 0;
rc = scif_poll (&pollepd, 1, -1);
if (1 == rc) {
if (SCIF_POLLIN != pollepd.revents) {
break;
}
if (mca_btl_scif_module.exiting) {
/* accept the connection so scif_connect() does not timeout */
struct scif_portID peer;
scif_epd_t newepd;
scif_accept(mca_btl_scif_module.scif_fd, &peer, &newepd, SCIF_ACCEPT_SYNC);
scif_close(newepd);
break;
}
rc = mca_btl_scif_ep_connect_start_passive ();
if (OPAL_SUCCESS != rc) {
BTL_VERBOSE(("btl/scif: error accepting scif connection"));
continue;
}
} else {
break;
}
}
BTL_VERBOSE(("btl/scif: stopped listening for new connections"));
return NULL;
}
int mca_btl_scif_del_procs (struct mca_btl_base_module_t *btl,
size_t nprocs, struct opal_proc_t **procs,
struct mca_btl_base_endpoint_t **peers) {
/* do nothing for now */
return OPAL_SUCCESS;
}
static int scif_dereg_mem (void *reg_data, mca_rcache_base_registration_t *reg)
{
mca_btl_scif_reg_t *scif_reg = (mca_btl_scif_reg_t *)reg;
size_t size = (size_t)((uintptr_t) reg->bound - (uintptr_t) reg->base);
int i;
/* register the fragment with all connected endpoints */
for (i = 0 ; i < (int) mca_btl_scif_module.endpoint_count ; ++i) {
if ((off_t)-1 != scif_reg->handles[i].btl_handle.scif_offset &&
MCA_BTL_SCIF_EP_STATE_CONNECTED == mca_btl_scif_module.endpoints[i].state) {
(void) scif_unregister(mca_btl_scif_module.endpoints[i].scif_epd,
scif_reg->handles[i].btl_handle.scif_offset, size);
}
}
free (scif_reg->handles);
return OPAL_SUCCESS;
}
static int scif_reg_mem (void *reg_data, void *base, size_t size,
mca_rcache_base_registration_t *reg)
{
mca_btl_scif_reg_t *scif_reg = (mca_btl_scif_reg_t *)reg;
int rc = OPAL_SUCCESS;
unsigned int i;
scif_reg->handles = calloc (mca_btl_scif_module.endpoint_count, sizeof (scif_reg->handles[0]));
/* intialize all scif offsets to -1 and initialize the pointer back to the rcache registration */
for (i = 0 ; i < mca_btl_scif_module.endpoint_count ; ++i) {
scif_reg->handles[i].btl_handle.scif_offset = -1;
scif_reg->handles[i].btl_handle.scif_base = (intptr_t) base;
scif_reg->handles[i].reg = scif_reg;
}
/* register the pointer with all connected endpoints */
for (i = 0 ; i < mca_btl_scif_module.endpoint_count ; ++i) {
if (MCA_BTL_SCIF_EP_STATE_CONNECTED == mca_btl_scif_module.endpoints[i].state) {
scif_reg->handles[i].btl_handle.scif_offset = scif_register (mca_btl_scif_module.endpoints[i].scif_epd,
base, size, 0, SCIF_PROT_READ |
SCIF_PROT_WRITE, 0);
if (SCIF_REGISTER_FAILED == scif_reg->handles[i].btl_handle.scif_offset) {
/* cleanup */
scif_dereg_mem (reg_data, reg);
rc = OPAL_ERR_OUT_OF_RESOURCE;
break;
}
}
}
return rc;
}
static int mca_btl_scif_setup_rcache (mca_btl_scif_module_t *scif_module)
{
mca_rcache_base_resources_t rcache_resources;
int rc;
/* initialize the grdma rcache */
rcache_resources.cache_name = "scif";
rcache_resources.reg_data = (void *) scif_module;
rcache_resources.sizeof_reg = sizeof (mca_btl_scif_reg_t);
rcache_resources.register_mem = scif_reg_mem;
rcache_resources.deregister_mem = scif_dereg_mem;
scif_module->rcache = mca_rcache_base_module_create ("grdma", scif_module, &rcache_resources);
if (NULL == scif_module->rcache) {
BTL_ERROR(("error creating grdma rcache"));
return OPAL_ERROR;
}
/* setup free lists for fragments. dma fragments will be used for
* rma operations and in-place sends. eager frags will be used for
* buffered sends. */
rc = opal_free_list_init (&scif_module->dma_frags,
sizeof (mca_btl_scif_dma_frag_t), 64,
OBJ_CLASS(mca_btl_scif_dma_frag_t),
128, opal_getpagesize (),
mca_btl_scif_component.scif_free_list_num,
mca_btl_scif_component.scif_free_list_max,
mca_btl_scif_component.scif_free_list_inc,
NULL, 0, NULL, NULL, NULL);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
return rc;
}
rc = opal_free_list_init (&scif_module->eager_frags,
sizeof (mca_btl_scif_eager_frag_t), 8,
OBJ_CLASS(mca_btl_scif_eager_frag_t),
128 + scif_module->super.btl_eager_limit, 64,
mca_btl_scif_component.scif_free_list_num,
mca_btl_scif_component.scif_free_list_max,
mca_btl_scif_component.scif_free_list_inc,
NULL, 0, NULL, NULL, NULL);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
BTL_ERROR(("error creating eager receive fragment free list"));
return rc;
}
return OPAL_SUCCESS;
}

Просмотреть файл

@ -1,386 +0,0 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2013-2014 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2014 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "btl_scif.h"
#include "btl_scif_frag.h"
#include "opal/runtime/opal_params.h"
#include "opal/include/opal/align.h"
#include "opal/memoryhooks/memory.h"
#include "opal/mca/pmix/pmix.h"
#include "opal/mca/base/mca_base_pvar.h"
#include <scif.h>
static int btl_scif_component_register(void);
static int btl_scif_component_open(void);
static int btl_scif_component_close(void);
static mca_btl_base_module_t **mca_btl_scif_component_init(int *, bool, bool);
static int mca_btl_scif_component_progress(void);
mca_btl_scif_component_t mca_btl_scif_component = {
{
/* First, the mca_base_component_t struct containing meta information
about the component itself */
.btl_version = {
MCA_BTL_DEFAULT_VERSION("scif"),
.mca_open_component = btl_scif_component_open,
.mca_close_component = btl_scif_component_close,
.mca_register_component_params = btl_scif_component_register,
},
.btl_data = {
.param_field = MCA_BASE_METADATA_PARAM_CHECKPOINT
},
.btl_init = mca_btl_scif_component_init,
.btl_progress = mca_btl_scif_component_progress,
}
};
static int btl_scif_component_register(void)
{
(void) mca_base_var_group_component_register(&mca_btl_scif_component.super.btl_version,
"SCIF byte transport layer");
mca_btl_scif_component.scif_free_list_num = 8;
(void) mca_base_component_var_register(&mca_btl_scif_component.super.btl_version,
"free_list_num", "Initial fragment free list size",
MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_LOCAL,
&mca_btl_scif_component.scif_free_list_num);
mca_btl_scif_component.scif_free_list_max = 16384;
(void) mca_base_component_var_register(&mca_btl_scif_component.super.btl_version,
"free_list_max", "Maximum fragment free list size",
MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_LOCAL,
&mca_btl_scif_component.scif_free_list_max);
mca_btl_scif_component.scif_free_list_inc = 64;
(void) mca_base_component_var_register(&mca_btl_scif_component.super.btl_version,
"free_list_inc", "Fragment free list size increment",
MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_LOCAL,
&mca_btl_scif_component.scif_free_list_inc);
mca_btl_scif_component.segment_size = 8 * 1024;
(void) mca_base_component_var_register(&mca_btl_scif_component.super.btl_version,
"segment_size", "Size of memory segment to "
"allocate for each remote process (default: "
"8k)", MCA_BASE_VAR_TYPE_UNSIGNED_INT, NULL, 0,
MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_LOCAL,
&mca_btl_scif_component.segment_size);
mca_btl_scif_component.rma_use_cpu = false;
(void) mca_base_component_var_register(&mca_btl_scif_component.super.btl_version,
"rma_use_cpu", "Use CPU instead of DMA "
"for RMA copies (default: false)", MCA_BASE_VAR_TYPE_BOOL,
NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_LOCAL,
&mca_btl_scif_component.rma_use_cpu);
mca_btl_scif_component.rma_sync = true;
(void) mca_base_component_var_register(&mca_btl_scif_component.super.btl_version,
"rma_sync", "Use synchronous RMA instead of "
"an RMA fence (default: true)", MCA_BASE_VAR_TYPE_BOOL,
NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_LOCAL,
&mca_btl_scif_component.rma_sync);
#if defined(SCIF_TIMING)
mca_btl_scif_component.aquire_buffer_time = 0.0;
(void) mca_base_component_pvar_register(&mca_btl_scif_component.super.btl_version,
"aquire_buffer_time", "Aggregate time spent "
"aquiring send buffers", OPAL_INFO_LVL_9,
MCA_BASE_PVAR_CLASS_AGGREGATE, MCA_BASE_VAR_TYPE_DOUBLE,
NULL, MCA_BASE_VAR_BIND_NO_OBJECT, MCA_BASE_PVAR_FLAG_READONLY |
MCA_BASE_PVAR_FLAG_CONTINUOUS, NULL, NULL, NULL,
&mca_btl_scif_component.aquire_buffer_time);
mca_btl_scif_component.send_time = 0.0;
(void) mca_base_component_pvar_register(&mca_btl_scif_component.super.btl_version,
"send_time", "Aggregate time spent writing to "
"send buffers", OPAL_INFO_LVL_9, MCA_BASE_PVAR_CLASS_AGGREGATE,
MCA_BASE_VAR_TYPE_DOUBLE, NULL, MCA_BASE_VAR_BIND_NO_OBJECT,
MCA_BASE_PVAR_FLAG_READONLY | MCA_BASE_PVAR_FLAG_CONTINUOUS,
NULL, NULL, NULL, &mca_btl_scif_component.send_time);
mca_btl_scif_component.sendi_time = 0.0;
(void) mca_base_component_pvar_register(&mca_btl_scif_component.super.btl_version,
"sendi_time", "Aggregate time spent writing to "
"send buffers in sendi", OPAL_INFO_LVL_9, MCA_BASE_PVAR_CLASS_AGGREGATE,
MCA_BASE_VAR_TYPE_DOUBLE, NULL, MCA_BASE_VAR_BIND_NO_OBJECT,
MCA_BASE_PVAR_FLAG_READONLY | MCA_BASE_PVAR_FLAG_CONTINUOUS,
NULL, NULL, NULL, &mca_btl_scif_component.sendi_time);
mca_btl_scif_component.get_time = 0.0;
(void) mca_base_component_pvar_register(&mca_btl_scif_component.super.btl_version,
"get_time", "Aggregate time spent in DMA read (scif_readfrom)",
OPAL_INFO_LVL_9, MCA_BASE_PVAR_CLASS_AGGREGATE,
MCA_BASE_VAR_TYPE_DOUBLE, NULL, MCA_BASE_VAR_BIND_NO_OBJECT,
MCA_BASE_PVAR_FLAG_READONLY | MCA_BASE_PVAR_FLAG_CONTINUOUS,
NULL, NULL, NULL, &mca_btl_scif_component.get_time);
mca_btl_scif_component.get_count = 0;
(void) mca_base_component_pvar_register(&mca_btl_scif_component.super.btl_version,
"get_count", "Number of times btl_scif_get was called",
OPAL_INFO_LVL_9, MCA_BASE_PVAR_CLASS_COUNTER,
MCA_BASE_VAR_TYPE_UNSIGNED_LONG, NULL, MCA_BASE_VAR_BIND_NO_OBJECT,
MCA_BASE_PVAR_FLAG_READONLY | MCA_BASE_PVAR_FLAG_CONTINUOUS,
NULL, NULL, NULL, &mca_btl_scif_component.get_count);
mca_btl_scif_component.put_time = 0.0;
(void) mca_base_component_pvar_register(&mca_btl_scif_component.super.btl_version,
"put_time", "Aggregate time spent in DMA write (scif_writeto)",
OPAL_INFO_LVL_9, MCA_BASE_PVAR_CLASS_AGGREGATE,
MCA_BASE_VAR_TYPE_DOUBLE, NULL, MCA_BASE_VAR_BIND_NO_OBJECT,
MCA_BASE_PVAR_FLAG_READONLY | MCA_BASE_PVAR_FLAG_CONTINUOUS,
NULL, NULL, NULL, &mca_btl_scif_component.put_time);
mca_btl_scif_component.put_count = 0;
(void) mca_base_component_pvar_register(&mca_btl_scif_component.super.btl_version,
"put_count", "Number of times btl_scif_put was called",
OPAL_INFO_LVL_9, MCA_BASE_PVAR_CLASS_COUNTER,
MCA_BASE_VAR_TYPE_UNSIGNED_LONG, NULL, MCA_BASE_VAR_BIND_NO_OBJECT,
MCA_BASE_PVAR_FLAG_READONLY | MCA_BASE_PVAR_FLAG_CONTINUOUS,
NULL, NULL, NULL, &mca_btl_scif_component.put_count);
#endif
mca_btl_scif_module.super.btl_exclusivity = MCA_BTL_EXCLUSIVITY_HIGH - 1;
mca_btl_scif_module.super.btl_eager_limit = 1 * 1024;
mca_btl_scif_module.super.btl_rndv_eager_limit = 1 * 1024;
mca_btl_scif_module.super.btl_rdma_pipeline_frag_size = 4 * 1024 * 1024;
mca_btl_scif_module.super.btl_max_send_size = 1 * 1024;
mca_btl_scif_module.super.btl_rdma_pipeline_send_length = 1 * 1024;
/* threshold for put */
mca_btl_scif_module.super.btl_min_rdma_pipeline_size = 1 * 1024;
mca_btl_scif_module.super.btl_flags = MCA_BTL_FLAGS_SEND |
MCA_BTL_FLAGS_RDMA | MCA_BTL_FLAGS_SEND_INPLACE;
mca_btl_scif_module.super.btl_registration_handle_size = sizeof (mca_btl_base_registration_handle_t);
mca_btl_scif_module.super.btl_bandwidth = 50000; /* Mbs */
mca_btl_scif_module.super.btl_latency = 2; /* Microsecs */
/* Call the BTL based to register its MCA params */
mca_btl_base_param_register(&mca_btl_scif_component.super.btl_version,
&mca_btl_scif_module.super);
return OPAL_SUCCESS;
}
static int btl_scif_component_open(void)
{
return OPAL_SUCCESS;
}
static int btl_scif_component_close(void)
{
return OPAL_SUCCESS;
}
static void mca_btl_scif_autoset_leave_pinned (void) {
int value = opal_mem_hooks_support_level();
if ((OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT) ==
((OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT) & value)) {
/* Set leave pinned to 1 if leave pinned pipeline is not set */
if (-1 == opal_leave_pinned) {
opal_leave_pinned = !opal_leave_pinned_pipeline;
}
} else {
opal_leave_pinned = 0;
opal_leave_pinned_pipeline = 0;
}
}
static int mca_btl_scif_modex_send (void)
{
mca_btl_scif_modex_t modex;
int rc;
memset(&modex, 0, sizeof(mca_btl_scif_modex_t));
modex.port_id = mca_btl_scif_module.port_id;
OPAL_MODEX_SEND(rc, OPAL_PMIX_LOCAL,
&mca_btl_scif_component.super.btl_version,
&modex, sizeof (modex));
return rc;
}
static mca_btl_base_module_t **mca_btl_scif_component_init (int *num_btl_modules,
bool enable_progress_threads,
bool enable_mpi_threads)
{
struct mca_btl_base_module_t **base_modules;
int rc;
BTL_VERBOSE(("btl/scif initializing"));
signal (SIGSEGV, SIG_DFL);
/* we currently need the memory hooks to determine when
* registrations are no longer valid. */
mca_btl_scif_autoset_leave_pinned ();
if (32768 < mca_btl_scif_module.super.btl_eager_limit) {
mca_btl_scif_module.super.btl_eager_limit = 32768;
}
/* the segment should be large enough to hold at least one eager packet */
if (4 * mca_btl_scif_module.super.btl_eager_limit > mca_btl_scif_component.segment_size) {
mca_btl_scif_component.segment_size = 4 * mca_btl_scif_module.super.btl_eager_limit;
}
/* round up to a multiple of 4096 */
mca_btl_scif_component.segment_size = (mca_btl_scif_component.segment_size + 0xfff) & ~0xfff;
base_modules = (struct mca_btl_base_module_t **)
calloc (1, sizeof (struct mca_btl_base_module_t *));
if (OPAL_UNLIKELY(NULL == base_modules)) {
BTL_ERROR(("Malloc failed : %s:%d", __FILE__, __LINE__));
return NULL;
}
/* initialize the module */
rc = mca_btl_scif_module_init ();
if (OPAL_SUCCESS != rc) {
BTL_VERBOSE(("btl/scif error initializing module"));
free (base_modules);
return NULL;
}
base_modules[0] = &mca_btl_scif_module.super;
mca_btl_scif_module.exiting = false;
mca_btl_scif_module.listening = false;
rc = mca_btl_scif_modex_send ();
if (OPAL_SUCCESS != rc) {
BTL_VERBOSE(("btl/scif error sending modex"));
free (base_modules);
return NULL;
}
*num_btl_modules = 1;
BTL_VERBOSE(("btl/scif done initializing modules"));
return base_modules;
}
static int mca_btl_scif_progress_recvs (mca_btl_base_endpoint_t *ep)
{
const mca_btl_active_message_callback_t *reg;
unsigned int start = ep->recv_buffer.start;
unsigned int end = ep->recv_buffer.endp[0];
mca_btl_scif_base_frag_t frag;
mca_btl_scif_frag_hdr_t *hdr;
/* changing this value does not appear to have a signifigant impact
* on performance */
int frags_per_loop = 5;
if (end == start) {
return 0;
}
end &= ~ (1 << 31);
start &= ~ (1 << 31);
/* force all prior reads to complete before continuing */
opal_atomic_rmb ();
do {
hdr = (mca_btl_scif_frag_hdr_t *) (ep->recv_buffer.buffer + start);
/* force all prior reads to complete before continuing */
MB();
BTL_VERBOSE(("got frag with header {.tag = %d, .size = %d} from offset %u",
hdr->tag, hdr->size, start));
#if defined(SCIF_USE_SEQ)
if (hdr->seq != ep->seq_expected) {
break;
}
ep->seq_expected++;
#endif
/* message to skip the rest of the buffer */
if (0xff != hdr->tag) {
reg = mca_btl_base_active_message_trigger + hdr->tag;
/* fragment fits entirely in the remaining buffer space. some
* btl users do not handle fragmented data so we can't split
* the fragment without introducing another copy here. this
* limitation has not appeared to cause any performance
* problems. */
frag.base.des_segment_count = 1;
frag.segments[0].seg_len = hdr->size;
frag.segments[0].seg_addr.pval = (void *) (hdr + 1);
frag.base.des_segments = frag.segments;
/* call the registered callback function */
reg->cbfunc(&mca_btl_scif_module.super, hdr->tag, &frag.base, reg->cbdata);
}
start = (start + hdr->size + sizeof (*hdr) + 63) & ~63;
/* skip unusable space at the end of the buffer */
if (mca_btl_scif_component.segment_size == start) {
start = 64;
ep->recv_buffer.start = ((ep->recv_buffer.start & (1 << 31)) ^ (1 << 31)) | 64;
} else {
ep->recv_buffer.start = (ep->recv_buffer.start & (1 << 31)) | start;
}
} while (start != end && --frags_per_loop);
/* let the sender know where we stopped */
ep->recv_buffer.startp[0] = ep->recv_buffer.start;
/* return the number of fragments processed */
return 5 - frags_per_loop;
}
static int mca_btl_scif_progress_sends (mca_btl_base_endpoint_t *ep)
{
/* try sending any wait listed fragments */
if (OPAL_UNLIKELY(0 != opal_list_get_size (&ep->frag_wait_list))) {
return mca_btl_scif_progress_send_wait_list (ep);
}
return 0;
}
static int mca_btl_scif_component_progress (void)
{
unsigned int i;
int count = 0;
/* progress all connected endpoints */
for (i = 0, count = 0 ; i < mca_btl_scif_module.endpoint_count ; ++i) {
if (MCA_BTL_SCIF_EP_STATE_CONNECTED == mca_btl_scif_module.endpoints[i].state) {
/* poll all connected endpoints */
count += mca_btl_scif_progress_recvs (mca_btl_scif_module.endpoints + i);
/* if any fragments are waiting try to send them now */
count += mca_btl_scif_progress_sends (mca_btl_scif_module.endpoints + i);
}
}
return count;
}

Просмотреть файл

@ -1,301 +0,0 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2013 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2014 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "btl_scif.h"
#include "btl_scif_endpoint.h"
#include "opal/mca/memchecker/base/base.h"
#include "opal/util/sys_limits.h"
static void mca_btl_scif_ep_construct (mca_btl_base_endpoint_t *ep) {
memset ((char *) ep + sizeof(ep->super), 0, sizeof (*ep) - sizeof (ep->super));
OBJ_CONSTRUCT(&ep->lock, opal_mutex_t);
OBJ_CONSTRUCT(&ep->frag_wait_list, opal_list_t);
}
static void mca_btl_scif_ep_destruct (mca_btl_base_endpoint_t *ep) {
if (ep->send_buffer.buffer) {
scif_munmap (ep->send_buffer.buffer, mca_btl_scif_component.segment_size);
}
if (ep->recv_buffer.buffer) {
scif_unregister (ep->scif_epd, ep->recv_buffer.scif_offset, mca_btl_scif_component.segment_size);
free (ep->recv_buffer.buffer);
}
if (ep->scif_epd) {
scif_close (ep->scif_epd);
}
OBJ_DESTRUCT(&ep->lock);
OBJ_DESTRUCT(&ep->frag_wait_list);
}
OBJ_CLASS_INSTANCE(mca_btl_scif_endpoint_t, opal_list_item_t,
mca_btl_scif_ep_construct, mca_btl_scif_ep_destruct);
static void mca_btl_scif_ep_free_buffer (mca_btl_base_endpoint_t *ep) {
if (ep->recv_buffer.buffer) {
scif_unregister (ep->scif_epd, ep->recv_buffer.scif_offset, mca_btl_scif_component.segment_size);
free (ep->recv_buffer.buffer);
ep->recv_buffer.buffer = NULL;
ep->recv_buffer.scif_offset = (off_t) -1;
}
}
static inline int mca_btl_scif_ep_get_buffer (mca_btl_base_endpoint_t *ep) {
int rc;
rc = posix_memalign ((void **) &ep->recv_buffer.buffer, opal_getpagesize(), mca_btl_scif_component.segment_size);
if (0 > rc) {
return OPAL_ERR_OUT_OF_RESOURCE;
}
memset (ep->recv_buffer.buffer, 0, mca_btl_scif_component.segment_size);
ep->recv_buffer.scif_offset = scif_register (ep->scif_epd, ep->recv_buffer.buffer,
mca_btl_scif_component.segment_size, 0,
SCIF_PROT_READ | SCIF_PROT_WRITE, 0);
if (SCIF_REGISTER_FAILED == ep->recv_buffer.scif_offset) {
BTL_VERBOSE(("failed to register a scif buffer of size %d. errno = %d",
mca_btl_scif_component.segment_size, errno));
free (ep->recv_buffer.buffer);
ep->recv_buffer.buffer = NULL;
return OPAL_ERROR;
}
ep->recv_buffer.startp = (uint32_t *) ep->recv_buffer.buffer;
ep->recv_buffer.endp = ep->recv_buffer.startp + 1;
ep->recv_buffer.startp[0] = ep->recv_buffer.endp[0] = 64;
BTL_VERBOSE(("allocated buffer of size %d bytes. with scif registration %lu",
mca_btl_scif_component.segment_size, (unsigned long) ep->recv_buffer.scif_offset));
return OPAL_SUCCESS;
}
/* must be called with the endpoint lock held */
static int mca_btl_scif_ep_connect_finish (mca_btl_base_endpoint_t *ep, bool passive) {
int rc;
rc = mca_btl_scif_ep_get_buffer (ep);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
BTL_VERBOSE(("error allocating buffer for scif peer"));
return rc;
}
if (passive) {
rc = scif_recv (ep->scif_epd, &ep->send_buffer.scif_offset,
sizeof (ep->send_buffer.scif_offset), SCIF_RECV_BLOCK);
if (OPAL_LIKELY(-1 != rc)) {
rc = scif_send (ep->scif_epd, &ep->recv_buffer.scif_offset,
sizeof (ep->recv_buffer.scif_offset), SCIF_SEND_BLOCK);
}
} else {
rc = scif_send (ep->scif_epd, &ep->recv_buffer.scif_offset,
sizeof (ep->recv_buffer.scif_offset), SCIF_SEND_BLOCK);
if (OPAL_LIKELY(-1 != rc)) {
rc = scif_recv (ep->scif_epd, &ep->send_buffer.scif_offset,
sizeof (ep->send_buffer.scif_offset), SCIF_RECV_BLOCK);
}
}
if (OPAL_UNLIKELY(-1 == rc)) {
BTL_VERBOSE(("error exchanging connection data with peer %d", ep->peer_proc->proc_name.vpid));
mca_btl_scif_ep_free_buffer (ep);
return OPAL_ERROR;
}
BTL_VERBOSE(("remote peer %d has scif offset %lu", ep->peer_proc->proc_name.vpid,
(unsigned long) ep->send_buffer.scif_offset));
ep->send_buffer.buffer = scif_mmap (0, mca_btl_scif_component.segment_size,
SCIF_PROT_READ | SCIF_PROT_WRITE,
0, ep->scif_epd, ep->send_buffer.scif_offset);
if (OPAL_UNLIKELY(NULL == ep->send_buffer.buffer)) {
BTL_VERBOSE(("error in scif_mmap"));
mca_btl_scif_ep_free_buffer (ep);
return OPAL_ERROR;
}
opal_memchecker_base_mem_defined (ep->send_buffer.buffer, mca_btl_scif_component.segment_size);
BTL_VERBOSE(("remote peer %d buffer mapped to local pointer %p", ep->peer_proc->proc_name.vpid,
ep->send_buffer.buffer));
/* setup the circular send buffers */
ep->send_buffer.start = ep->send_buffer.end = 64;
ep->send_buffer.startp = (uint32_t *) ep->send_buffer.buffer;
ep->send_buffer.endp = ep->send_buffer.startp + 1;
ep->recv_buffer.start = 64;
/* connection complete */
ep->state = MCA_BTL_SCIF_EP_STATE_CONNECTED;
BTL_VERBOSE(("btl/scif connection to remote peer %d established", ep->peer_proc->proc_name.vpid));
return OPAL_SUCCESS;
}
int mca_btl_scif_ep_connect_start_passive (void) {
mca_btl_base_endpoint_t *ep = NULL;
opal_process_name_t remote_name;
struct scif_portID port_id;
unsigned int i;
scif_epd_t epd;
int rc;
/* accept the connection request. if the endpoint is already connecting we
* may close this endpoint and alloc mca_btl_scif_ep_connect_start_active
* to finish the connection. */
rc = scif_accept (mca_btl_scif_module.scif_fd, &port_id, &epd, SCIF_ACCEPT_SYNC);
if (OPAL_UNLIKELY(0 > rc)) {
BTL_VERBOSE(("error accepting connecton from scif peer. %d", errno));
return OPAL_ERROR;
}
/* determine which peer sent the connection request */
rc = scif_recv (epd, &remote_name, sizeof (remote_name), SCIF_RECV_BLOCK);
if (OPAL_UNLIKELY(-1 == rc)) {
BTL_VERBOSE(("error in scif_recv"));
scif_close (epd);
return OPAL_ERROR;
}
BTL_VERBOSE(("got connection request from vpid %d on port %u on node %u",
remote_name.vpid, port_id.port, port_id.node));
for (i = 0 ; i < mca_btl_scif_module.endpoint_count ; ++i) {
if (mca_btl_scif_module.endpoints[i].peer_proc->proc_name.vpid ==
remote_name.vpid) {
ep = mca_btl_scif_module.endpoints + i;
break;
}
}
/* peer not found */
if (i == mca_btl_scif_module.endpoint_count) {
BTL_VERBOSE(("remote peer %d unknown", remote_name.vpid));
scif_close (epd);
return OPAL_ERROR;
}
/* similtaneous connections (active side) */
if ((MCA_BTL_SCIF_EP_STATE_CONNECTING == ep->state &&
ep->port_id.port < mca_btl_scif_module.port_id.port) ||
MCA_BTL_SCIF_EP_STATE_CONNECTED == ep->state) {
BTL_VERBOSE(("active connection in progress. connection request from peer %d rejected", remote_name.vpid));
scif_close (epd);
return OPAL_SUCCESS;
}
opal_mutex_lock (&ep->lock);
if (MCA_BTL_SCIF_EP_STATE_CONNECTED == ep->state) {
opal_mutex_unlock (&ep->lock);
scif_close (epd);
return OPAL_SUCCESS;
}
BTL_VERBOSE(("accepted connection from port %d", ep->port_id.port));
ep->state = MCA_BTL_SCIF_EP_STATE_CONNECTING;
ep->scif_epd = epd;
rc = mca_btl_scif_ep_connect_finish (ep, true);
if (OPAL_SUCCESS != rc) {
scif_close (ep->scif_epd);
ep->scif_epd = -1;
ep->state = MCA_BTL_SCIF_EP_STATE_INIT;
}
opal_mutex_unlock (&ep->lock);
return rc;
}
static inline int mca_btl_scif_ep_connect_start_active (mca_btl_base_endpoint_t *ep) {
int rc = OPAL_SUCCESS;
BTL_VERBOSE(("initiaiting connection to remote peer %d with port: %u on local scif node: %u",
ep->peer_proc->proc_name.vpid, ep->port_id.port, ep->port_id.node));
opal_mutex_lock (&ep->lock);
do {
if (MCA_BTL_SCIF_EP_STATE_INIT != ep->state) {
/* the accept thread has already finished this connection */
rc = OPAL_SUCCESS;
break;
}
ep->state = MCA_BTL_SCIF_EP_STATE_CONNECTING;
ep->scif_epd = scif_open ();
if (OPAL_UNLIKELY(SCIF_OPEN_FAILED == ep->scif_epd)) {
BTL_VERBOSE(("error creating new scif endpoint"));
rc = OPAL_ERROR;
break;
}
rc = scif_connect (ep->scif_epd, &ep->port_id);
if (OPAL_UNLIKELY(-1 == rc)) {
/* the connection attempt failed. this could mean the peer is currently
* processing connections. we will to try again later. */
BTL_VERBOSE(("error connecting to scif peer. %d", errno));
rc = OPAL_ERR_RESOURCE_BUSY;
break;
}
rc = scif_send (ep->scif_epd, &OPAL_PROC_MY_NAME, sizeof (OPAL_PROC_MY_NAME), SCIF_SEND_BLOCK);
if (OPAL_UNLIKELY(-1 == rc)) {
BTL_VERBOSE(("error in scif_send"));
rc = OPAL_ERROR;
break;
}
/* build connection data */
rc = mca_btl_scif_ep_connect_finish (ep, false);
} while (0);
if (OPAL_SUCCESS != rc) {
scif_close (ep->scif_epd);
ep->scif_epd = -1;
ep->state = MCA_BTL_SCIF_EP_STATE_INIT;
}
opal_mutex_unlock (&ep->lock);
return rc;
}
int mca_btl_scif_ep_connect (mca_btl_base_endpoint_t *ep) {
int rc;
if (OPAL_LIKELY(MCA_BTL_SCIF_EP_STATE_CONNECTED == ep->state)) {
return OPAL_SUCCESS;
} else if (MCA_BTL_SCIF_EP_STATE_CONNECTING == ep->state) {
return OPAL_ERR_RESOURCE_BUSY;
}
if (MCA_BTL_SCIF_EP_STATE_INIT == ep->state) {
rc = mca_btl_scif_ep_connect_start_active (ep);
if (OPAL_SUCCESS != rc) {
return rc;
}
}
return OPAL_SUCCESS;
}

Просмотреть файл

@ -1,110 +0,0 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2013 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2014 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef MCA_BTL_SCIF_ENDPOINT_H
#define MCA_BTL_SCIF_ENDPOINT_H
#include "btl_scif.h"
#include "opal/mca/pmix/pmix.h"
typedef enum mca_btl_scif_endpoint_state_t {
MCA_BTL_SCIF_EP_STATE_INIT,
MCA_BTL_SCIF_EP_STATE_CONNECTING,
MCA_BTL_SCIF_EP_STATE_CONNECTED
} mca_btl_scif_endpoint_state_t;
typedef struct mca_btl_scif_endpoint_buffer_t {
unsigned char *buffer;
off_t scif_offset;
unsigned int start, end;
uint32_t *startp, *endp;
} mca_btl_scif_endpoint_buffer_t;
typedef struct mca_btl_base_endpoint_t {
opal_list_item_t super;
mca_btl_scif_module_t *btl;
/* location in the module endpoints array */
int id;
opal_mutex_t lock;
/* scif endpoint */
scif_epd_t scif_epd;
/* connection information */
struct scif_portID port_id;
/* buffer information */
mca_btl_scif_endpoint_buffer_t send_buffer;
mca_btl_scif_endpoint_buffer_t recv_buffer;
/* current connect state */
mca_btl_scif_endpoint_state_t state;
/* frags waiting for resources */
opal_list_t frag_wait_list;
/* associated process */
opal_proc_t *peer_proc;
#if defined(SCIF_USE_SEQ)
uint32_t seq_next;
uint32_t seq_expected;
#endif
} mca_btl_base_endpoint_t;
typedef mca_btl_base_endpoint_t mca_btl_scif_endpoint_t;
OBJ_CLASS_DECLARATION(mca_btl_scif_endpoint_t);
int mca_btl_scif_ep_connect (mca_btl_scif_endpoint_t *ep);
int mca_btl_scif_ep_connect_start_passive (void);
static inline int mca_btl_scif_ep_init (mca_btl_scif_endpoint_t *endpoint,
mca_btl_scif_module_t *btl,
opal_proc_t *peer_proc) {
mca_btl_scif_modex_t *modex;
size_t msg_size;
int rc;
OBJ_CONSTRUCT(endpoint, mca_btl_scif_endpoint_t);
endpoint->state = MCA_BTL_SCIF_EP_STATE_INIT;
OPAL_MODEX_RECV(rc, &mca_btl_scif_component.super.btl_version,
&peer_proc->proc_name, (void **) &modex, &msg_size);
if (OPAL_SUCCESS != rc) {
return rc;
}
assert (msg_size == sizeof (endpoint->port_id));
endpoint->port_id = modex->port_id;
endpoint->peer_proc = peer_proc;
endpoint->btl = btl;
#if defined(SCIF_USE_SEQ)
endpoint->seq_next = 0x00001010;
endpoint->seq_expected = 0x00001010;
#endif
free (modex);
return OPAL_SUCCESS;
}
static inline int mca_btl_scif_ep_release (mca_btl_scif_endpoint_t *ep)
{
OBJ_DESTRUCT(ep);
return OPAL_SUCCESS;
}
#endif /* MCA_BTL_SCIF_ENDPOINT_H */

Просмотреть файл

@ -1,31 +0,0 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2013 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "btl_scif.h"
#include "btl_scif_frag.h"
static inline void mca_btl_scif_base_frag_constructor (mca_btl_scif_base_frag_t *frag)
{
memset ((char *) frag + sizeof (frag->base), 0, sizeof (*frag) - sizeof (frag->base));
frag->segments[0].seg_addr.pval = frag->base.super.ptr;
}
static inline void mca_btl_scif_eager_frag_constructor (mca_btl_scif_base_frag_t *frag)
{
memset ((char *) frag + sizeof (frag->base), 0, sizeof (*frag) - sizeof (frag->base));
frag->segments[0].seg_addr.pval = frag->base.super.ptr;
}
OBJ_CLASS_INSTANCE(mca_btl_scif_eager_frag_t, mca_btl_base_descriptor_t,
mca_btl_scif_base_frag_constructor, NULL);
OBJ_CLASS_INSTANCE(mca_btl_scif_dma_frag_t, mca_btl_base_descriptor_t,
mca_btl_scif_base_frag_constructor, NULL);

Просмотреть файл

@ -1,95 +0,0 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#if !defined(MCA_BTL_SCIF_FRAG_H)
#define MCA_BTL_SCIF_FRAG_H
#include "btl_scif.h"
#include "btl_scif_endpoint.h"
typedef struct mca_btl_scif_frag_hdr_t {
#if defined(SCIF_USE_SEQ)
uint32_t seq;
#endif
uint8_t tag;
uint8_t flags;
uint16_t size;
} mca_btl_scif_frag_hdr_t;
struct mca_btl_scif_base_frag_t;
typedef void (*frag_cb_t) (struct mca_btl_scif_base_frag_t *, int);
typedef struct mca_btl_scif_base_frag_t {
mca_btl_base_descriptor_t base;
mca_btl_scif_frag_hdr_t hdr;
mca_btl_base_segment_t segments[2];
mca_btl_base_endpoint_t *endpoint;
mca_btl_scif_reg_t *registration;
opal_free_list_t *my_list;
} mca_btl_scif_base_frag_t;
typedef mca_btl_scif_base_frag_t mca_btl_scif_dma_frag_t;
typedef mca_btl_scif_base_frag_t mca_btl_scif_eager_frag_t;
OBJ_CLASS_DECLARATION(mca_btl_scif_dma_frag_t);
OBJ_CLASS_DECLARATION(mca_btl_scif_eager_frag_t);
static inline int mca_btl_scif_frag_alloc (mca_btl_base_endpoint_t *ep,
opal_free_list_t *list,
mca_btl_scif_base_frag_t **frag)
{
*frag = (mca_btl_scif_base_frag_t *) opal_free_list_get (list);
if (OPAL_LIKELY(NULL != *frag)) {
(*frag)->my_list = list;
(*frag)->endpoint = ep;
return OPAL_SUCCESS;
}
return OPAL_ERR_OUT_OF_RESOURCE;
}
static inline int mca_btl_scif_frag_return (mca_btl_scif_base_frag_t *frag)
{
if (frag->registration) {
frag->endpoint->btl->rcache->rcache_deregister (frag->endpoint->btl->rcache,
&frag->registration->base);
frag->registration = NULL;
}
frag->segments[0].seg_addr.pval = frag->base.super.ptr;
frag->segments[0].seg_len = 0;
frag->segments[1].seg_len = 0;
opal_free_list_return (frag->my_list, (opal_free_list_item_t *) frag);
return OPAL_SUCCESS;
}
static inline void mca_btl_scif_frag_complete (mca_btl_scif_base_frag_t *frag, int rc) {
BTL_VERBOSE(("frag complete. flags = %d", frag->base.des_flags));
/* call callback if specified */
if (frag->base.des_flags & MCA_BTL_DES_SEND_ALWAYS_CALLBACK) {
frag->base.des_cbfunc(&frag->endpoint->btl->super, frag->endpoint, &frag->base, rc);
}
if (frag->base.des_flags & MCA_BTL_DES_FLAGS_BTL_OWNERSHIP) {
mca_btl_scif_frag_return (frag);
}
}
#define MCA_BTL_SCIF_FRAG_ALLOC_EAGER(ep, frag) \
mca_btl_scif_frag_alloc((ep), &(ep)->btl->eager_frags, &(frag))
#define MCA_BTL_SCIF_FRAG_ALLOC_DMA(ep, frag) \
mca_btl_scif_frag_alloc((ep), &(ep)->btl->dma_frags, &(frag))
#endif /* MCA_BTL_SCIF_FRAG_H */

Просмотреть файл

@ -1,75 +0,0 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2013-2014 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2014 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "opal_config.h"
#include "btl_scif_frag.h"
#include <sys/time.h>
#define lmin(a,b) ((a) < (b) ? (a) : (b))
/**
* Initiate a get operation.
*/
int mca_btl_scif_get (mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, void *local_address,
uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle,
mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags,
int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata)
{
int rc, mark, scif_flags = 0;
off_t roffset, loffset;
#if defined(SCIF_TIMING)
struct timespec ts;
clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts);
mca_btl_scif_component.get_count++;
#endif
BTL_VERBOSE(("Using DMA Get from remote address %" PRIx64 " to local address %p",
remote_address, local_address));
roffset = remote_handle->scif_offset + (off_t)(remote_address - remote_handle->scif_base);
loffset = local_handle->scif_offset + (off_t)((intptr_t)local_address - local_handle->scif_base);
if (mca_btl_scif_component.rma_use_cpu) {
scif_flags = SCIF_RMA_USECPU;
}
if (mca_btl_scif_component.rma_sync) {
scif_flags |= SCIF_RMA_SYNC;
}
/* start the read */
rc = scif_readfrom (endpoint->scif_epd, loffset, size, roffset, scif_flags);
if (OPAL_UNLIKELY(-1 == rc)) {
return OPAL_ERROR;
}
if (!(scif_flags & SCIF_RMA_SYNC)) {
/* according to the scif documentation is is better to use a fence rather
* than using the SCIF_RMA_SYNC flag with scif_readfrom */
scif_fence_mark (endpoint->scif_epd, SCIF_FENCE_INIT_SELF, &mark);
scif_fence_wait (endpoint->scif_epd, mark);
}
#if defined(SCIF_TIMING)
SCIF_UPDATE_TIMER(mca_btl_scif_component.get_time,
mca_btl_scif_component.get_time_max, ts);
#endif
/* always call the callback function */
cbfunc (btl, endpoint, local_address, local_handle, cbcontext, cbdata, OPAL_SUCCESS);
return OPAL_SUCCESS;
}

Просмотреть файл

@ -1,308 +0,0 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2013-2016 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2014 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "opal_config.h"
#include "btl_scif.h"
#include "btl_scif_frag.h"
#include "btl_scif_endpoint.h"
static int
mca_btl_scif_free (struct mca_btl_base_module_t *btl,
mca_btl_base_descriptor_t *des);
static int
mca_btl_scif_module_finalize (struct mca_btl_base_module_t* btl);
static mca_btl_base_registration_handle_t *mca_btl_scif_register_mem (struct mca_btl_base_module_t *btl,
mca_btl_base_endpoint_t *endpoint,
void *base, size_t size, uint32_t flags);
static int mca_btl_scif_deregister_mem (struct mca_btl_base_module_t *btl, mca_btl_base_registration_handle_t *handle);
static struct mca_btl_base_descriptor_t *
mca_btl_scif_prepare_src (struct mca_btl_base_module_t *btl,
struct mca_btl_base_endpoint_t *endpoint,
struct opal_convertor_t *convertor,
uint8_t order, size_t reserve, size_t *size,
uint32_t flags);
mca_btl_scif_module_t mca_btl_scif_module = {
.super = {
.btl_component = &mca_btl_scif_component.super,
.btl_add_procs = mca_btl_scif_add_procs,
.btl_del_procs = mca_btl_scif_del_procs,
.btl_finalize = mca_btl_scif_module_finalize,
.btl_alloc = mca_btl_scif_alloc,
.btl_free = mca_btl_scif_free,
.btl_prepare_src = mca_btl_scif_prepare_src,
.btl_send = mca_btl_scif_send,
.btl_sendi = mca_btl_scif_sendi,
.btl_put = mca_btl_scif_put,
.btl_get = mca_btl_scif_get,
.btl_register_mem = mca_btl_scif_register_mem,
.btl_deregister_mem = mca_btl_scif_deregister_mem,
}
};
int mca_btl_scif_module_init (void)
{
int rc;
/* create an endpoint to listen for connections */
mca_btl_scif_module.scif_fd = scif_open ();
if (-1 == mca_btl_scif_module.scif_fd) {
BTL_VERBOSE(("scif_open failed. errno = %d", errno));
return OPAL_ERROR;
}
/* bind the endpoint to a port */
mca_btl_scif_module.port_id.port = scif_bind (mca_btl_scif_module.scif_fd, 0);
if (-1 == mca_btl_scif_module.port_id.port) {
BTL_VERBOSE(("scif_bind failed. errno = %d", errno));
scif_close (mca_btl_scif_module.scif_fd);
mca_btl_scif_module.scif_fd = -1;
return OPAL_ERROR;
}
/* determine this processes node id */
rc = scif_get_nodeIDs (NULL, 0, &mca_btl_scif_module.port_id.node);
if (-1 == rc) {
BTL_VERBOSE(("btl/scif error getting node id of this node"));
return OPAL_ERROR;
}
/* Listen for connections */
/* TODO - base the maximum backlog off something */
rc = scif_listen (mca_btl_scif_module.scif_fd, 64);
if (-1 == rc) {
BTL_VERBOSE(("scif_listen failed. errno = %d", errno));
scif_close (mca_btl_scif_module.scif_fd);
mca_btl_scif_module.scif_fd = -1;
return OPAL_ERROR;
}
BTL_VERBOSE(("btl/scif: listening @ port %u on node %u\n",
mca_btl_scif_module.port_id.port, mca_btl_scif_module.port_id.node));
OBJ_CONSTRUCT(&mca_btl_scif_module.dma_frags, opal_free_list_t);
OBJ_CONSTRUCT(&mca_btl_scif_module.eager_frags, opal_free_list_t);
return OPAL_SUCCESS;
}
static int
mca_btl_scif_module_finalize (struct mca_btl_base_module_t *btl)
{
mca_btl_scif_module_t *scif_module = (mca_btl_scif_module_t *) btl;
unsigned int i;
OBJ_DESTRUCT(&mca_btl_scif_module.dma_frags);
OBJ_DESTRUCT(&mca_btl_scif_module.eager_frags);
mca_btl_scif_module.exiting = true;
/* close all open connections and release endpoints */
if (NULL != scif_module->endpoints) {
for (i = 0 ; i < scif_module->endpoint_count ; ++i) {
mca_btl_scif_ep_release (scif_module->endpoints + i);
}
free (scif_module->endpoints);
scif_module->endpoint_count = 0;
scif_module->endpoints = NULL;
}
if (NULL != scif_module->rcache) {
mca_rcache_base_module_destroy (scif_module->rcache);
scif_module->rcache = NULL;
}
/* close the listening endpoint */
if (mca_btl_scif_module.listening && -1 != mca_btl_scif_module.scif_fd) {
/* wake up the scif thread */
scif_epd_t tmpfd;
tmpfd = scif_open();
scif_connect (tmpfd, &mca_btl_scif_module.port_id);
pthread_join(mca_btl_scif_module.listen_thread, NULL);
scif_close(tmpfd);
scif_close (mca_btl_scif_module.scif_fd);
}
mca_btl_scif_module.scif_fd = -1;
return OPAL_SUCCESS;
}
mca_btl_base_descriptor_t *
mca_btl_scif_alloc(struct mca_btl_base_module_t *btl,
struct mca_btl_base_endpoint_t *endpoint,
uint8_t order, size_t size, uint32_t flags)
{
mca_btl_scif_base_frag_t *frag = NULL;
BTL_VERBOSE(("allocating fragment of size: %u", (unsigned int)size));
if (size <= mca_btl_scif_module.super.btl_eager_limit) {
(void) MCA_BTL_SCIF_FRAG_ALLOC_EAGER(endpoint, frag);
}
if (OPAL_UNLIKELY(NULL == frag)) {
return NULL;
}
BTL_VERBOSE(("btl/scif_module allocated frag of size: %u, flags: %x. frag = %p",
(unsigned int)size, flags, (void *) frag));
frag->base.des_flags = flags;
frag->base.order = order;
frag->base.des_segments = frag->segments;
frag->base.des_segment_count = 1;
frag->segments[0].seg_len = size;
return &frag->base;
}
static int
mca_btl_scif_free (struct mca_btl_base_module_t *btl,
mca_btl_base_descriptor_t *des)
{
return mca_btl_scif_frag_return ((mca_btl_scif_base_frag_t *) des);
}
static mca_btl_base_registration_handle_t *mca_btl_scif_register_mem (struct mca_btl_base_module_t *btl,
mca_btl_base_endpoint_t *endpoint,
void *base, size_t size, uint32_t flags)
{
mca_btl_scif_module_t *scif_module = &mca_btl_scif_module;
mca_btl_scif_reg_t *scif_reg;
int access_flags = flags & MCA_BTL_REG_FLAG_ACCESS_ANY;
int rc;
if (MCA_BTL_ENDPOINT_ANY == endpoint) {
/* it probably isn't possible to support registering memory to use with any endpoint so
* return NULL */
return NULL;
}
if (OPAL_LIKELY(MCA_BTL_SCIF_EP_STATE_CONNECTED != endpoint->state)) {
/* the endpoint needs to be connected before the fragment can be
* registered. */
rc = mca_btl_scif_ep_connect (endpoint);
if (OPAL_LIKELY(MCA_BTL_SCIF_EP_STATE_CONNECTED != endpoint->state)) {
/* not yet connected */
return NULL;
}
}
rc = scif_module->rcache->rcache_register (scif_module->rcache, base, size, 0, access_flags,
(mca_rcache_base_registration_t **) &scif_reg);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
return NULL;
}
/* register the memory location with this peer if it isn't already */
if ((off_t) -1 == scif_reg->handles[endpoint->id].btl_handle.scif_offset) {
size_t seg_size = (size_t)((uintptr_t) scif_reg->base.bound - (uintptr_t) scif_reg->base.base) + 1;
/* NTH: until we determine a way to pass permissions to the rcache just make all segments
* read/write */
scif_reg->handles[endpoint->id].btl_handle.scif_offset =
scif_register (endpoint->scif_epd, scif_reg->base.base, seg_size, 0, SCIF_PROT_READ |
SCIF_PROT_WRITE, 0);
BTL_VERBOSE(("registered fragment for scif DMA transaction. offset = %lu",
(unsigned long) scif_reg->handles[endpoint->id].btl_handle.scif_offset));
}
return &scif_reg->handles[endpoint->id].btl_handle;
}
static int mca_btl_scif_deregister_mem (struct mca_btl_base_module_t *btl, mca_btl_base_registration_handle_t *handle)
{
mca_btl_scif_registration_handle_t *scif_handle = (mca_btl_scif_registration_handle_t *) handle;
mca_btl_scif_module_t *scif_module = &mca_btl_scif_module;
mca_btl_scif_reg_t *scif_reg = scif_handle->reg;
scif_module->rcache->rcache_deregister (scif_module->rcache, &scif_reg->base);
return OPAL_SUCCESS;
}
static inline struct mca_btl_base_descriptor_t *
mca_btl_scif_prepare_src_send (struct mca_btl_base_module_t *btl,
mca_btl_base_endpoint_t *endpoint,
struct opal_convertor_t *convertor,
uint8_t order, size_t reserve, size_t *size,
uint32_t flags)
{
mca_btl_scif_base_frag_t *frag = NULL;
uint32_t iov_count = 1;
struct iovec iov;
size_t max_size = *size;
int rc;
if (OPAL_LIKELY((mca_btl_scif_module.super.btl_flags & MCA_BTL_FLAGS_SEND_INPLACE) &&
!opal_convertor_need_buffers (convertor) &&
reserve <= 128)) {
/* inplace send */
void *data_ptr;
opal_convertor_get_current_pointer (convertor, &data_ptr);
(void) MCA_BTL_SCIF_FRAG_ALLOC_DMA(endpoint, frag);
if (OPAL_UNLIKELY(NULL == frag)) {
return NULL;
}
frag->segments[0].seg_len = reserve;
frag->segments[1].seg_addr.pval = data_ptr;
frag->segments[1].seg_len = *size;
frag->base.des_segment_count = 2;
} else {
/* buffered send */
(void) MCA_BTL_SCIF_FRAG_ALLOC_EAGER(endpoint, frag);
if (OPAL_UNLIKELY(NULL == frag)) {
return NULL;
}
if (*size) {
iov.iov_len = *size;
iov.iov_base = (IOVBASE_TYPE *) ((uintptr_t) frag->segments[0].seg_addr.pval + reserve);
rc = opal_convertor_pack (convertor, &iov, &iov_count, &max_size);
if (OPAL_UNLIKELY(rc < 0)) {
mca_btl_scif_frag_return (frag);
return NULL;
}
*size = max_size;
}
frag->segments[0].seg_len = reserve + *size;
frag->base.des_segment_count = 1;
}
frag->base.des_segments = frag->segments;
frag->base.order = order;
frag->base.des_flags = flags;
return &frag->base;
}
static mca_btl_base_descriptor_t *mca_btl_scif_prepare_src (struct mca_btl_base_module_t *btl,
mca_btl_base_endpoint_t *endpoint,
struct opal_convertor_t *convertor,
uint8_t order, size_t reserve, size_t *size,
uint32_t flags)
{
return mca_btl_scif_prepare_src_send (btl, endpoint, convertor, order, reserve, size, flags);
}

Просмотреть файл

@ -1,72 +0,0 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2013-2014 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "opal_config.h"
#include "btl_scif_frag.h"
#define lmin(a,b) ((a) < (b) ? (a) : (b))
/**
* Initiate a put operation.
*/
int mca_btl_scif_put (mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, void *local_address,
uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle,
mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags,
int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata)
{
int rc, mark, scif_flags = 0;
off_t roffset, loffset;
#if defined(SCIF_TIMING)
struct timespec ts;
clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts);
mca_btl_scif_component.get_count++;
#endif
BTL_VERBOSE(("Using DMA Put from local address %p to remote address %" PRIx64,
local_address, remote_address));
roffset = remote_handle->scif_offset + (off_t)(remote_address - remote_handle->scif_base);
loffset = local_handle->scif_offset + (off_t)((intptr_t) local_address - local_handle->scif_base);
if (mca_btl_scif_component.rma_use_cpu) {
scif_flags = SCIF_RMA_USECPU;
}
if (mca_btl_scif_component.rma_sync) {
scif_flags |= SCIF_RMA_SYNC;
}
/* start the write */
rc = scif_writeto (endpoint->scif_epd, loffset, size, roffset, scif_flags);
rc = scif_readfrom (endpoint->scif_epd, loffset, size, roffset, scif_flags);
if (OPAL_UNLIKELY(-1 == rc)) {
return OPAL_ERROR;
}
if (!(scif_flags & SCIF_RMA_SYNC)) {
/* according to the scif documentation is is better to use a fence rather
* than using the SCIF_RMA_SYNC flag with scif_readfrom */
scif_fence_mark (endpoint->scif_epd, SCIF_FENCE_INIT_SELF, &mark);
scif_fence_wait (endpoint->scif_epd, mark);
}
#if defined(SCIF_TIMING)
SCIF_UPDATE_TIMER(mca_btl_scif_component.get_time,
mca_btl_scif_component.get_time_max, ts);
#endif
/* always call the callback function */
cbfunc (btl, endpoint, local_address, local_handle, cbcontext, cbdata, OPAL_SUCCESS);
return OPAL_SUCCESS;
}

Просмотреть файл

@ -1,299 +0,0 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2014 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "btl_scif.h"
#include "btl_scif_frag.h"
#define BUFFER_FREE(s,e,hbm) (((s) > (e) || ((s) == (e) && !hbm)) ? (s) - (e) : (mca_btl_scif_component.segment_size - (e)))
/* attempt to reserve a contiguous segment from the remote endpoint */
static inline int mca_btl_scif_send_get_buffer (mca_btl_base_endpoint_t *endpoint, size_t size, unsigned char * restrict *dst)
{
/* the high bit helps determine if the buffer is empty or full */
bool hbm = (endpoint->send_buffer.start >> 31) == (endpoint->send_buffer.end >> 31);
const unsigned int segment_size = mca_btl_scif_component.segment_size;
unsigned int start = endpoint->send_buffer.start & ~ (1 << 31);
unsigned int end = endpoint->send_buffer.end & ~ (1 << 31);
unsigned int buffer_free = BUFFER_FREE(start, end, hbm);
#if defined(SCIF_TIMING)
struct timespec ts;
clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts);
#endif
/* need space for the fragment + the header */
size += sizeof (mca_btl_scif_frag_hdr_t);
/* check if we need to free up space for this fragment */
if (OPAL_UNLIKELY(buffer_free < size)) {
BTL_VERBOSE(("not enough room for a fragment of size %u. in use buffer segment: {start: %x, end: %x, high bit matches: %d}\n",
(unsigned) size, start, end, (int) hbm));
/* read the current start pointer from the remote peer */
start = endpoint->send_buffer.start = endpoint->send_buffer.startp[0];
start &= ~ (1 << 31);
hbm = (endpoint->send_buffer.start >> 31) == (endpoint->send_buffer.end >> 31);
buffer_free = BUFFER_FREE(start, end, hbm);
opal_atomic_rmb ();
/* if this is the end of the buffer. does the fragment fit? */
if (OPAL_UNLIKELY(buffer_free > 0 && buffer_free < size && start <= end)) {
mca_btl_scif_frag_hdr_t hdr;
hdr.size = buffer_free - sizeof (mca_btl_scif_frag_hdr_t);
hdr.tag = 0xff;
#if defined(SCIF_USE_SEQ)
hdr.seq = endpoint->seq_next++;
((uint64_t *) (endpoint->send_buffer.buffer + end))[0] = *((uint64_t *) &hdr);
#else
((uint32_t *) (endpoint->send_buffer.buffer + end))[0] = *((uint32_t *) &hdr);
#endif
/* toggle the high bit */
end = 64;
endpoint->send_buffer.end = ((endpoint->send_buffer.end & (1 << 31)) ^ (1 << 31)) | end;
hbm = (endpoint->send_buffer.start >> 31) == (endpoint->send_buffer.end >> 31);
buffer_free = BUFFER_FREE(start, end, hbm);
}
if (OPAL_UNLIKELY(buffer_free < size)) {
#if defined(SCIF_TIMING)
SCIF_UPDATE_TIMER(mca_btl_scif_component.aquire_buffer_time, mca_btl_scif_component.aquire_buffer_time_max, ts);
#endif
return OPAL_ERR_OUT_OF_RESOURCE;
}
}
BTL_VERBOSE(("writing fragment of size %u to offset %u {start: %x, end: %x} of peer's buffer. free = %u",
(unsigned int) size, end, start, end, buffer_free));
*dst = endpoint->send_buffer.buffer + end;
/* align the buffer on a 64 byte boundary */
end = (end + size + 63) & ~63;
if (OPAL_UNLIKELY(segment_size == end)) {
endpoint->send_buffer.end = ((endpoint->send_buffer.end & (1 << 31)) ^ (1 << 31)) | 64;
} else {
endpoint->send_buffer.end = (endpoint->send_buffer.end & (1 << 31)) | end;
}
#if defined(SCIF_TIMING)
SCIF_UPDATE_TIMER(mca_btl_scif_component.aquire_buffer_time, mca_btl_scif_component.aquire_buffer_time_max, ts);
#endif
return OPAL_SUCCESS;
}
static void mark_buffer (struct mca_btl_base_endpoint_t *endpoint)
{
if (endpoint->port_id.node != mca_btl_scif_module.port_id.node) {
/* force the PCIe bus to flush by reading from the remote node */
volatile uint32_t start = endpoint->send_buffer.startp[0]; (void)start;
endpoint->send_buffer.endp[0] = endpoint->send_buffer.end;
endpoint->send_buffer.start = endpoint->send_buffer.startp[0];
} else {
MB();
endpoint->send_buffer.endp[0] = endpoint->send_buffer.end;
}
}
static int mca_btl_scif_send_frag (struct mca_btl_base_endpoint_t *endpoint,
mca_btl_scif_base_frag_t *frag)
{
size_t size = frag->hdr.size;
unsigned char * restrict dst;
BTL_VERBOSE(("btl/scif sending descriptor %p from %d -> %d. length = %" PRIu64, (void *) frag,
OPAL_PROC_MY_NAME.vpid, endpoint->peer_proc->proc_name.vpid, frag->segments[0].seg_len));
if (OPAL_LIKELY(OPAL_SUCCESS == mca_btl_scif_send_get_buffer (endpoint, size, &dst))) {
unsigned char * restrict data = (unsigned char * restrict) frag->segments[0].seg_addr.pval;
#if defined(SCIF_TIMING)
struct timespec ts;
clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts);
#endif
memcpy (dst + sizeof (frag->hdr), data, frag->segments[0].seg_len);
if (frag->segments[1].seg_len) {
memcpy (dst + sizeof (frag->hdr) + frag->segments[0].seg_len,
frag->segments[1].seg_addr.pval,
frag->segments[1].seg_len);
}
#if defined(SCIF_USE_SEQ)
frag->hdr.seq = endpoint->seq_next++;
/* write the tag to signal the fragment is available */
((uint64_t *) dst)[0] = *((uint64_t *) &frag->hdr);
#else
((uint32_t *) dst)[0] = *((uint32_t *) &frag->hdr);
#endif
opal_atomic_wmb ();
#if defined(SCIF_TIMING)
SCIF_UPDATE_TIMER(mca_btl_scif_component.send_time, mca_btl_scif_component.send_time_max, ts);
#endif
/* fragment is gone */
mca_btl_scif_frag_complete (frag, OPAL_SUCCESS);
return 1;
}
return OPAL_ERR_OUT_OF_RESOURCE;
}
int mca_btl_scif_send (struct mca_btl_base_module_t *btl,
struct mca_btl_base_endpoint_t *endpoint,
struct mca_btl_base_descriptor_t *descriptor,
mca_btl_base_tag_t tag)
{
mca_btl_scif_base_frag_t *frag = (mca_btl_scif_base_frag_t *) descriptor;
size_t size = frag->segments[0].seg_len + frag->segments[1].seg_len;
int rc;
frag->hdr.tag = tag;
frag->hdr.size = size;
if (OPAL_UNLIKELY(MCA_BTL_SCIF_EP_STATE_CONNECTED != endpoint->state)) {
rc = mca_btl_scif_ep_connect (endpoint);
if (OPAL_UNLIKELY(MCA_BTL_SCIF_EP_STATE_CONNECTED != endpoint->state)) {
/* the receiver was not ready to handle the fragment. queue up the fragment. */
descriptor->des_flags |= MCA_BTL_DES_SEND_ALWAYS_CALLBACK;
opal_list_append (&endpoint->frag_wait_list, (opal_list_item_t *) descriptor);
return OPAL_SUCCESS;
}
}
rc = mca_btl_scif_send_frag (endpoint, frag);
if (OPAL_LIKELY(1 == rc)) {
mark_buffer (endpoint);
return 1;
}
/* the receiver was not ready to handle the fragment. queue up the fragment. */
descriptor->des_flags |= MCA_BTL_DES_SEND_ALWAYS_CALLBACK;
opal_list_append (&endpoint->frag_wait_list, (opal_list_item_t *) descriptor);
return OPAL_SUCCESS;
}
int mca_btl_scif_sendi (struct mca_btl_base_module_t *btl,
struct mca_btl_base_endpoint_t *endpoint,
struct opal_convertor_t *convertor,
void *header, size_t header_size,
size_t payload_size, uint8_t order,
uint32_t flags, mca_btl_base_tag_t tag,
mca_btl_base_descriptor_t **descriptor)
{
size_t length = (header_size + payload_size);
unsigned char * restrict base;
mca_btl_scif_frag_hdr_t hdr;
size_t max_data;
int rc;
#if defined(SCIF_TIMING)
struct timespec ts;
#endif
assert (length < mca_btl_scif_module.super.btl_eager_limit);
assert (0 == (flags & MCA_BTL_DES_SEND_ALWAYS_CALLBACK));
if (OPAL_UNLIKELY(MCA_BTL_SCIF_EP_STATE_CONNECTED != endpoint->state)) {
rc = mca_btl_scif_ep_connect (endpoint);
if (OPAL_UNLIKELY(MCA_BTL_SCIF_EP_STATE_CONNECTED != endpoint->state)) {
return OPAL_ERR_RESOURCE_BUSY;
}
}
rc = mca_btl_scif_send_get_buffer (endpoint, length, &base);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
if (NULL != descriptor) {
*descriptor = NULL;
}
return OPAL_ERR_OUT_OF_RESOURCE;
}
#if defined(SCIF_TIMING)
clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts);
#endif
/* fill in the fragment header (except for the tag) */
hdr.size = length;
hdr.tag = tag;
#if defined(SCIF_USE_SEQ)
hdr.seq = endpoint->seq_next++;
#endif
/* write the match header (with MPI comm/tag/etc. info) */
memcpy (base + sizeof (hdr), header, header_size);
if (payload_size) {
uint32_t iov_count = 1;
struct iovec iov[1];
iov[0].iov_base = base + sizeof (hdr) + header_size;
iov[0].iov_len = payload_size;
/* move the data */
opal_convertor_pack (convertor, iov, &iov_count, &max_data);
assert (max_data == payload_size);
}
#if defined(SCIF_USE_SEQ)
/* signal the remote side that this fragment is available */
((uint64_t *)base)[0] = *((uint64_t *) &hdr);
#else
((uint32_t *)base)[0] = *((uint32_t *) &hdr);
#endif
opal_atomic_wmb ();
mark_buffer (endpoint);
#if defined(SCIF_TIMING)
SCIF_UPDATE_TIMER(mca_btl_scif_component.sendi_time, mca_btl_scif_component.sendi_time_max, ts);
#endif
return OPAL_SUCCESS;
}
int mca_btl_scif_progress_send_wait_list (mca_btl_base_endpoint_t *endpoint)
{
mca_btl_scif_base_frag_t *frag;
int rc = OPAL_SUCCESS;
while (NULL !=
(frag = (mca_btl_scif_base_frag_t *) opal_list_remove_first (&endpoint->frag_wait_list))) {
rc = mca_btl_scif_send_frag (endpoint, frag);
if (OPAL_UNLIKELY(OPAL_SUCCESS > rc)) {
if (OPAL_LIKELY(OPAL_ERR_OUT_OF_RESOURCE == rc)) {
opal_list_prepend (&endpoint->frag_wait_list, (opal_list_item_t *) frag);
} else {
mca_btl_scif_frag_complete (frag, rc);
}
break;
}
}
mark_buffer (endpoint);
return OPAL_SUCCESS;
}

Просмотреть файл

@ -1,47 +0,0 @@
# -*- shell-script -*-
#
# Copyright (c) 2013-2014 Los Alamos National Security, LLC. All rights
# reserved.
# Copyright (c) 2015 Research Organization for Information Science
# and Technology (RIST). All rights reserved.
# Copyright (c) 2016 Cisco Systems, Inc. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
AC_DEFUN([MCA_opal_btl_scif_CONFIG],[
OPAL_VAR_SCOPE_PUSH([opal_btl_scif_happy])
AC_CONFIG_FILES([opal/mca/btl/scif/Makefile])
AC_ARG_WITH([scif], [AC_HELP_STRING([--with-scif(=DIR)]),
[Build with SCIF, searching for headers in DIR])])
OPAL_CHECK_WITHDIR([scif], [$with_scif], [include/scif.h])
opal_btl_scif_happy="no"
if test "$with_scif" != "no" ; then
if test -n "$with_scif" && test "$with_scif" != "yes" ; then
opal_check_scif_dir=$with_scif
fi
OPAL_CHECK_PACKAGE([btl_scif], [scif.h], [scif], [scif_open], [],
[$opal_check_scif_dir], [], [opal_btl_scif_happy="yes"], [])
if test "$opal_btl_scif_happy" != "yes" && test -n "$with_scif" ; then
AC_MSG_ERROR([SCIF support requested but not found. Aborting])
fi
fi
AS_IF([test "$opal_btl_scif_happy" = "yes"], [$1], [$2])
OPAL_SUMMARY_ADD([[Transports]],[[Intel SCIF]],[[btl_scif]],[$opal_btl_scif_happy])
# substitute in the things needed to build scif
AC_SUBST([btl_scif_CPPFLAGS])
AC_SUBST([btl_scif_LDFLAGS])
AC_SUBST([btl_scif_LIBS])
OPAL_VAR_SCOPE_POP
])dnl

Просмотреть файл

@ -1,7 +0,0 @@
#
# owner/status file
# owner: institution that is responsible for this package
# status: e.g. active, maintenance, unmaintained
#
owner:LANL
status: maintenance