1
1

Merge pull request #5737 from hppritcha/topic/remove_scif_support

SCIF: remove it
Этот коммит содержится в:
Howard Pritchard 2018-09-19 11:38:27 -06:00 коммит произвёл GitHub
родитель 441727fcb0 b9ac3d8931
Коммит dccf780546
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
20 изменённых файлов: 4 добавлений и 2501 удалений

4
README
Просмотреть файл

@ -605,7 +605,6 @@ Network Support
- Loopback (send-to-self) - Loopback (send-to-self)
- Shared memory - Shared memory
- TCP - TCP
- Intel Phi SCIF
- SMCUDA - SMCUDA
- Cisco usNIC - Cisco usNIC
- uGNI (Cray Gemini, Aries) - uGNI (Cray Gemini, Aries)
@ -1000,9 +999,6 @@ NETWORKING SUPPORT / OPTIONS
covers most cases. This option is only needed for special covers most cases. This option is only needed for special
configurations. configurations.
--with-scif=<dir>
Look in directory for Intel SCIF support libraries
--with-verbs=<directory> --with-verbs=<directory>
Specify the directory where the verbs (also known as OpenFabrics Specify the directory where the verbs (also known as OpenFabrics
verbs, or Linux verbs, and previously known as OpenIB) libraries and verbs, or Linux verbs, and previously known as OpenIB) libraries and

Просмотреть файл

@ -88,12 +88,8 @@ EXTRA_DIST = \
platform/lanl/darwin/mic-common \ platform/lanl/darwin/mic-common \
platform/lanl/darwin/debug \ platform/lanl/darwin/debug \
platform/lanl/darwin/debug.conf \ platform/lanl/darwin/debug.conf \
platform/lanl/darwin/debug-mic \
platform/lanl/darwin/debug-mic.conf \
platform/lanl/darwin/optimized \ platform/lanl/darwin/optimized \
platform/lanl/darwin/optimized.conf \ platform/lanl/darwin/optimized.conf \
platform/lanl/darwin/optimized-mic \
platform/lanl/darwin/optimized-mic.conf \
platform/snl/portals4-m5 \ platform/snl/portals4-m5 \
platform/snl/portals4-orte \ platform/snl/portals4-orte \
platform/ibm/debug-ppc32-gcc \ platform/ibm/debug-ppc32-gcc \

Просмотреть файл

@ -1,100 +0,0 @@
#
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2005 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# Copyright (c) 2006 Cisco Systems, Inc. All rights reserved.
# Copyright (c) 2011-2013 Los Alamos National Security, LLC.
# All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# This is the default system-wide MCA parameters defaults file.
# Specifically, the MCA parameter "mca_param_files" defaults to a
# value of
# "$HOME/.openmpi/mca-params.conf:$sysconf/openmpi-mca-params.conf"
# (this file is the latter of the two). So if the default value of
# mca_param_files is not changed, this file is used to set system-wide
# MCA parameters. This file can therefore be used to set system-wide
# default MCA parameters for all users. Of course, users can override
# these values if they want, but this file is an excellent location
# for setting system-specific MCA parameters for those users who don't
# know / care enough to investigate the proper values for them.
# Note that this file is only applicable where it is visible (in a
# filesystem sense). Specifically, MPI processes each read this file
# during their startup to determine what default values for MCA
# parameters should be used. mpirun does not bundle up the values in
# this file from the node where it was run and send them to all nodes;
# the default value decisions are effectively distributed. Hence,
# these values are only applicable on nodes that "see" this file. If
# $sysconf is a directory on a local disk, it is likely that changes
# to this file will need to be propagated to other nodes. If $sysconf
# is a directory that is shared via a networked filesystem, changes to
# this file will be visible to all nodes that share this $sysconf.
# The format is straightforward: one per line, mca_param_name =
# rvalue. Quoting is ignored (so if you use quotes or escape
# characters, they'll be included as part of the value). For example:
# Disable run-time MPI parameter checking
# mpi_param_check = 0
# Note that the value "~/" will be expanded to the current user's home
# directory. For example:
# Change component loading path
# component_path = /usr/local/lib/openmpi:~/my_openmpi_components
# See "ompi_info --param all all" for a full listing of Open MPI MCA
# parameters available and their default values.
#
# Basic behavior to smooth startup
mca_base_component_show_load_errors = 0
opal_set_max_sys_limits = 1
orte_report_launch_progress = 1
# Define timeout for daemons to report back during launch
orte_startup_timeout = 10000
## Protect the shared file systems
orte_no_session_dirs = /panfs,/scratch,/users,/usr/projects
orte_tmpdir_base = /tmp
## Require an allocation to run - protects the frontend
## from inadvertent job executions
orte_allocation_required = 1
## Add the interface for out-of-band communication
## and set it up
oob_tcp_if_include=mic0
oob_tcp_peer_retries = 1000
oob_tcp_sndbuf = 32768
oob_tcp_rcvbuf = 32768
## Define the MPI interconnects
btl = sm,scif,openib,self
## Setup OpenIB - just in case
btl_openib_want_fork_support = 0
btl_openib_receive_queues = S,4096,1024:S,12288,512:S,65536,512
## Enable cpu affinity
hwloc_base_binding_policy = core
## Setup MPI options
mpi_show_handle_leaks = 1
mpi_warn_on_fork = 1
#mpi_abort_print_stack = 1

Просмотреть файл

@ -10,7 +10,7 @@
# Copyright (c) 2004-2005 The Regents of the University of California. # Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved. # All rights reserved.
# Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2006 Cisco Systems, Inc. All rights reserved.
# Copyright (c) 2011-2013 Los Alamos National Security, LLC. # Copyright (c) 2011-2018 Los Alamos National Security, LLC.
# All rights reserved. # All rights reserved.
# $COPYRIGHT$ # $COPYRIGHT$
# #
@ -84,7 +84,7 @@ oob_tcp_sndbuf = 32768
oob_tcp_rcvbuf = 32768 oob_tcp_rcvbuf = 32768
## Define the MPI interconnects ## Define the MPI interconnects
btl = sm,scif,openib,self btl = sm,openib,self
## Setup OpenIB - just in case ## Setup OpenIB - just in case
btl_openib_want_fork_support = 0 btl_openib_want_fork_support = 0

Просмотреть файл

@ -1,100 +0,0 @@
#
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2005 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# Copyright (c) 2006 Cisco Systems, Inc. All rights reserved.
# Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights
# reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# This is the default system-wide MCA parameters defaults file.
# Specifically, the MCA parameter "mca_param_files" defaults to a
# value of
# "$HOME/.openmpi/mca-params.conf:$sysconf/openmpi-mca-params.conf"
# (this file is the latter of the two). So if the default value of
# mca_param_files is not changed, this file is used to set system-wide
# MCA parameters. This file can therefore be used to set system-wide
# default MCA parameters for all users. Of course, users can override
# these values if they want, but this file is an excellent location
# for setting system-specific MCA parameters for those users who don't
# know / care enough to investigate the proper values for them.
# Note that this file is only applicable where it is visible (in a
# filesystem sense). Specifically, MPI processes each read this file
# during their startup to determine what default values for MCA
# parameters should be used. mpirun does not bundle up the values in
# this file from the node where it was run and send them to all nodes;
# the default value decisions are effectively distributed. Hence,
# these values are only applicable on nodes that "see" this file. If
# $sysconf is a directory on a local disk, it is likely that changes
# to this file will need to be propagated to other nodes. If $sysconf
# is a directory that is shared via a networked filesystem, changes to
# this file will be visible to all nodes that share this $sysconf.
# The format is straightforward: one per line, mca_param_name =
# rvalue. Quoting is ignored (so if you use quotes or escape
# characters, they'll be included as part of the value). For example:
# Disable run-time MPI parameter checking
# mpi_param_check = 0
# Note that the value "~/" will be expanded to the current user's home
# directory. For example:
# Change component loading path
# component_path = /usr/local/lib/openmpi:~/my_openmpi_components
# See "ompi_info --param all all" for a full listing of Open MPI MCA
# parameters available and their default values.
#
# Basic behavior to smooth startup
mca_base_component_show_load_errors = 0
opal_set_max_sys_limits = 1
orte_report_launch_progress = 1
# Define timeout for daemons to report back during launch
orte_startup_timeout = 10000
## Protect the shared file systems
orte_no_session_dirs = /panfs,/scratch,/users,/usr/projects
orte_tmpdir_base = /tmp
## Require an allocation to run - protects the frontend
## from inadvertent job executions
orte_allocation_required = 1
## Add the interface for out-of-band communication
## and set it up
oob_tcp_if_include = mic0
oob_tcp_peer_retries = 1000
oob_tcp_sndbuf = 32768
oob_tcp_rcvbuf = 32768
## Define the MPI interconnects
btl = sm,scif,openib,self
## Setup OpenIB - just in case
btl_openib_want_fork_support = 0
btl_openib_receive_queues = S,4096,1024:S,12288,512:S,65536,512
## Enable cpu affinity
hwloc_base_binding_policy = core
## Setup MPI options
mpi_show_handle_leaks = 0
mpi_warn_on_fork = 1
#mpi_abort_print_stack = 0

Просмотреть файл

@ -10,7 +10,7 @@
# Copyright (c) 2004-2005 The Regents of the University of California. # Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved. # All rights reserved.
# Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2006 Cisco Systems, Inc. All rights reserved.
# Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights # Copyright (c) 2011-2018 Los Alamos National Security, LLC. All rights
# reserved. # reserved.
# $COPYRIGHT$ # $COPYRIGHT$
# #
@ -84,7 +84,7 @@ oob_tcp_sndbuf = 32768
oob_tcp_rcvbuf = 32768 oob_tcp_rcvbuf = 32768
## Define the MPI interconnects ## Define the MPI interconnects
btl = sm,scif,openib,self btl = sm,openib,self
## Setup OpenIB - just in case ## Setup OpenIB - just in case
btl_openib_want_fork_support = 0 btl_openib_want_fork_support = 0

Просмотреть файл

@ -1,50 +0,0 @@
# -*- indent-tabs-mode:nil -*-
#
# Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights
# reserved.
#
# Additional copyrights may follow
#
# $HEADER$
#
# Make the output library in this directory, and name it either
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
# (for static builds).
AM_CPPFLAGS = $(btl_scif_CPPFLAGS)
if MCA_BUILD_opal_btl_scif_DSO
component_noinst =
component_install = mca_btl_scif.la
else
component_noinst = libmca_btl_scif.la
component_install =
endif
scif_SOURCES = \
btl_scif_component.c \
btl_scif_module.c \
btl_scif_add_procs.c \
btl_scif_endpoint.h \
btl_scif_endpoint.c \
btl_scif_frag.c \
btl_scif_frag.h \
btl_scif_send.c \
btl_scif_put.c \
btl_scif_get.c \
btl_scif.h
mcacomponentdir = $(opallibdir)
mcacomponent_LTLIBRARIES = $(component_install)
mca_btl_scif_la_SOURCES = $(scif_SOURCES)
nodist_mca_btl_scif_la_SOURCES = $(scif_nodist_SOURCES)
mca_btl_scif_la_LIBADD = $(top_builddir)/opal/lib@OPAL_LIB_PREFIX@open-pal.la \
$(btl_scif_LIBS)
mca_btl_scif_la_LDFLAGS = -module -avoid-version $(btl_scif_LDFLAGS)
noinst_LTLIBRARIES = $(component_noinst)
libmca_btl_scif_la_SOURCES = $(scif_SOURCES)
nodist_libmca_btl_scif_la_SOURCES = $(scif_nodist_SOURCES)
libmca_btl_scif_la_LIBADD = $(btl_scif_LIBS)
libmca_btl_scif_la_LDFLAGS = -module -avoid-version $(btl_scif_LDFLAGS)

Просмотреть файл

@ -1,249 +0,0 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2013-2016 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2014 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef MCA_BTL_SCIF_H
#define MCA_BTL_SCIF_H
#include "opal_config.h"
#include "opal/util/output.h"
#include "opal_stdint.h"
#include "opal/util/proc.h"
#include "opal/mca/btl/btl.h"
#include "opal/mca/btl/base/base.h"
#include "opal/mca/btl/base/btl_base_error.h"
#include "opal/mca/rcache/rcache.h"
#include "opal/mca/rcache/base/base.h"
#include <scif.h>
#include <errno.h>
#include <stdint.h>
#include <sys/types.h>
#include <assert.h>
#include <sys/time.h>
/* Turn on timers for debug builds */
#if OPAL_ENABLE_DEBUG
/* #define SCIF_TIMING */
#endif
#if defined(SCIF_TIMING)
#include <sys/time.h>
#include <math.h>
static inline void timerspecsub (struct timespec *end, struct timespec *start,
struct timespec *diff) {
diff->tv_nsec = end->tv_nsec - start->tv_nsec;
diff->tv_sec = end->tv_sec - start->tv_sec;
if (diff->tv_nsec < 0) {
--diff->tv_sec;
diff->tv_nsec += 1000000000;
}
}
#define SCIF_UPDATE_TIMER(agg, max, start) \
do { \
struct timespec _te, _diff; \
double _tmpd; \
clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &_te); \
timerspecsub(&_te, &(start), &_diff); \
_tmpd = (double) _diff.tv_sec + (double) _diff.tv_nsec / 1000000000.0; \
(agg) += _tmpd; \
(max) = fmax ((max), _tmpd); \
} while (0)
#endif
typedef struct mca_btl_scif_modex_t {
struct scif_portID port_id;
} mca_btl_scif_modex_t;
typedef struct mca_btl_scif_module_t {
mca_btl_base_module_t super;
/* listening endpoint */
scif_epd_t scif_fd;
/* listening port */
struct scif_portID port_id;
size_t endpoint_count;
struct mca_btl_base_endpoint_t *endpoints;
opal_list_t failed_frags;
/* fragments for DMA */
opal_free_list_t dma_frags;
/* fragments for eager send */
opal_free_list_t eager_frags;
pthread_t listen_thread;
volatile bool exiting;
bool listening;
mca_rcache_base_module_t *rcache;
} mca_btl_scif_module_t;
typedef struct mca_btl_scif_component_t {
/* base BTL component */
mca_btl_base_component_3_0_0_t super;
/* DMA free list settings */
int scif_free_list_num;
int scif_free_list_max;
int scif_free_list_inc;
unsigned int segment_size;
bool rma_use_cpu;
bool rma_sync;
#if defined(SCIF_TIMING)
/* performance timers */
double aquire_buffer_time;
double aquire_buffer_time_max;
double send_time;
double send_time_max;
double sendi_time;
double sendi_time_max;
double get_time;
double get_time_max;
unsigned long get_count;
double put_time;
double put_time_max;
unsigned long put_count;
#endif
} mca_btl_scif_component_t;
int mca_btl_scif_module_init (void);
/**
* BML->BTL notification of change in the process list.
*
* location: btl_scif_add_procs.c
*
* @param btl (IN) BTL module
* @param nprocs (IN) Number of processes
* @param procs (IN) Array of processes
* @param endpoint (OUT) Array of mca_btl_base_endpoint_t structures by BTL.
* @param reachable (OUT) Bitmask indicating set of peer processes that are reachable by this BTL.
* @return OPAL_SUCCESS or error status on failure.
*/
int
mca_btl_scif_add_procs (struct mca_btl_base_module_t* btl,
size_t nprocs,
struct opal_proc_t **procs,
struct mca_btl_base_endpoint_t **peers,
opal_bitmap_t *reachable);
/**
* Notification of change to the process list.
*
* location: btl_scif_add_procs.c
*
* @param btl (IN) BTL module
* @param nprocs (IN) Number of processes
* @param proc (IN) Set of processes
* @param peer (IN) Set of peer addressing information.
* @return Status indicating if cleanup was successful
*/
int
mca_btl_scif_del_procs (struct mca_btl_base_module_t *btl,
size_t nprocs,
struct opal_proc_t **procs,
struct mca_btl_base_endpoint_t **peers);
/**
* Initiate an asynchronous send.
*
* location: btl_scif_send.c
*
* @param btl (IN) BTL module
* @param endpoint (IN) BTL addressing information
* @param descriptor (IN) Description of the data to be transfered
* @param tag (IN) The tag value used to notify the peer.
*/
int
mca_btl_scif_send (struct mca_btl_base_module_t *btl,
struct mca_btl_base_endpoint_t *btl_peer,
struct mca_btl_base_descriptor_t *descriptor,
mca_btl_base_tag_t tag);
int mca_btl_scif_sendi (struct mca_btl_base_module_t *btl,
struct mca_btl_base_endpoint_t *endpoint,
struct opal_convertor_t *convertor,
void *header, size_t header_size,
size_t payload_size, uint8_t order,
uint32_t flags, mca_btl_base_tag_t tag,
mca_btl_base_descriptor_t **descriptor);
/**
* Initiate a get operation.
*
* location: btl_scif_get.c
*/
int mca_btl_scif_get (mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, void *local_address,
uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle,
mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags,
int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata);
/**
* Initiate a put operation.
*
* location: btl_scif_put.c
*/
int mca_btl_scif_put (mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, void *local_address,
uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle,
mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags,
int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata);
mca_btl_base_descriptor_t *
mca_btl_scif_alloc(struct mca_btl_base_module_t *btl,
struct mca_btl_base_endpoint_t *endpoint,
uint8_t order, size_t size, uint32_t flags);
int mca_btl_scif_progress_send_wait_list (struct mca_btl_base_endpoint_t *endpoint);
struct mca_btl_scif_reg_t;
struct mca_btl_base_registration_handle_t {
/** scif offset */
off_t scif_offset;
/** base address of this scif region */
uintptr_t scif_base;
};
struct mca_btl_scif_registration_handle_t {
mca_btl_base_registration_handle_t btl_handle;
struct mca_btl_scif_reg_t *reg;
};
typedef struct mca_btl_scif_registration_handle_t mca_btl_scif_registration_handle_t;
typedef struct mca_btl_scif_reg_t {
mca_rcache_base_registration_t base;
/** per-endpoint btl handles for this registration */
mca_btl_scif_registration_handle_t *handles;
} mca_btl_scif_reg_t;
/* Global structures */
OPAL_MODULE_DECLSPEC extern mca_btl_scif_component_t mca_btl_scif_component;
OPAL_MODULE_DECLSPEC extern mca_btl_scif_module_t mca_btl_scif_module;
#endif

Просмотреть файл

@ -1,259 +0,0 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2013-2016 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2014 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "opal_config.h"
#include "opal/util/sys_limits.h"
#include "btl_scif.h"
#include "btl_scif_frag.h"
static int mca_btl_scif_setup_rcache (mca_btl_scif_module_t *scif_module);
static void *mca_btl_scif_connect_accept (void *arg);
int mca_btl_scif_add_procs(struct mca_btl_base_module_t* btl,
size_t nprocs,
struct opal_proc_t **procs,
struct mca_btl_base_endpoint_t **peers,
opal_bitmap_t *reachable) {
mca_btl_scif_module_t *scif_module = (mca_btl_scif_module_t *) btl;
size_t procs_on_board, i, board_proc;
opal_proc_t *my_proc = opal_proc_local_get();
int rc;
/* determine how many procs are on this board */
for (i = 0, procs_on_board = 0 ; i < nprocs ; ++i) {
struct opal_proc_t *opal_proc = procs[i];
if (my_proc == opal_proc) {
continue;
}
if (!OPAL_PROC_ON_LOCAL_HOST(opal_proc->proc_flags) ||
my_proc == opal_proc) {
/* scif can only be used with procs on this board */
continue;
}
procs_on_board++;
}
/* allocate space for the detected peers and setup the rcache */
if (NULL == scif_module->endpoints) {
scif_module->endpoints = calloc (procs_on_board, sizeof (mca_btl_base_endpoint_t));
if (OPAL_UNLIKELY(NULL == scif_module->endpoints)) {
return OPAL_ERR_OUT_OF_RESOURCE;
}
rc = mca_btl_scif_setup_rcache (scif_module);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
BTL_ERROR(("btl/scif error setting up rcache or free lists"));
return rc;
}
}
for (i = 0, board_proc = 0 ; i < nprocs ; ++i) {
struct opal_proc_t *opal_proc = procs[i];
if (my_proc == opal_proc) {
continue;
}
if (!OPAL_PROC_ON_LOCAL_HOST(opal_proc->proc_flags) ||
my_proc == opal_proc) {
peers[i] = NULL;
/* scif can only be used with procs on this board */
continue;
}
/* Initialize endpoints */
rc = mca_btl_scif_ep_init (scif_module->endpoints + board_proc, (mca_btl_scif_module_t *) btl, opal_proc);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
BTL_ERROR(("btl/scif error initializing endpoint"));
return rc;
}
scif_module->endpoints[board_proc].id = board_proc;
/* Set the reachable bit */
rc = opal_bitmap_set_bit (reachable, i);
/* Store a reference to this peer */
peers[i] = scif_module->endpoints + board_proc;
board_proc++;
}
BTL_VERBOSE(("%lu procs on board\n", (unsigned long) procs_on_board));
scif_module->endpoint_count = procs_on_board;
if (!mca_btl_scif_module.listening) {
/* start listening thread */
rc = pthread_create (&mca_btl_scif_module.listen_thread, NULL, mca_btl_scif_connect_accept, NULL);
if (0 > rc) {
return OPAL_ERROR;
}
mca_btl_scif_module.listening = true;
}
return OPAL_SUCCESS;
}
static void *mca_btl_scif_connect_accept (void *arg)
{
struct scif_pollepd pollepd = {.epd = mca_btl_scif_module.scif_fd, .events = SCIF_POLLIN, .revents = 0};
int rc;
BTL_VERBOSE(("btl/scif: listening for new connections"));
/* listen for connections */
while (1) {
pollepd.revents = 0;
rc = scif_poll (&pollepd, 1, -1);
if (1 == rc) {
if (SCIF_POLLIN != pollepd.revents) {
break;
}
if (mca_btl_scif_module.exiting) {
/* accept the connection so scif_connect() does not timeout */
struct scif_portID peer;
scif_epd_t newepd;
scif_accept(mca_btl_scif_module.scif_fd, &peer, &newepd, SCIF_ACCEPT_SYNC);
scif_close(newepd);
break;
}
rc = mca_btl_scif_ep_connect_start_passive ();
if (OPAL_SUCCESS != rc) {
BTL_VERBOSE(("btl/scif: error accepting scif connection"));
continue;
}
} else {
break;
}
}
BTL_VERBOSE(("btl/scif: stopped listening for new connections"));
return NULL;
}
int mca_btl_scif_del_procs (struct mca_btl_base_module_t *btl,
size_t nprocs, struct opal_proc_t **procs,
struct mca_btl_base_endpoint_t **peers) {
/* do nothing for now */
return OPAL_SUCCESS;
}
static int scif_dereg_mem (void *reg_data, mca_rcache_base_registration_t *reg)
{
mca_btl_scif_reg_t *scif_reg = (mca_btl_scif_reg_t *)reg;
size_t size = (size_t)((uintptr_t) reg->bound - (uintptr_t) reg->base);
int i;
/* register the fragment with all connected endpoints */
for (i = 0 ; i < (int) mca_btl_scif_module.endpoint_count ; ++i) {
if ((off_t)-1 != scif_reg->handles[i].btl_handle.scif_offset &&
MCA_BTL_SCIF_EP_STATE_CONNECTED == mca_btl_scif_module.endpoints[i].state) {
(void) scif_unregister(mca_btl_scif_module.endpoints[i].scif_epd,
scif_reg->handles[i].btl_handle.scif_offset, size);
}
}
free (scif_reg->handles);
return OPAL_SUCCESS;
}
static int scif_reg_mem (void *reg_data, void *base, size_t size,
mca_rcache_base_registration_t *reg)
{
mca_btl_scif_reg_t *scif_reg = (mca_btl_scif_reg_t *)reg;
int rc = OPAL_SUCCESS;
unsigned int i;
scif_reg->handles = calloc (mca_btl_scif_module.endpoint_count, sizeof (scif_reg->handles[0]));
/* intialize all scif offsets to -1 and initialize the pointer back to the rcache registration */
for (i = 0 ; i < mca_btl_scif_module.endpoint_count ; ++i) {
scif_reg->handles[i].btl_handle.scif_offset = -1;
scif_reg->handles[i].btl_handle.scif_base = (intptr_t) base;
scif_reg->handles[i].reg = scif_reg;
}
/* register the pointer with all connected endpoints */
for (i = 0 ; i < mca_btl_scif_module.endpoint_count ; ++i) {
if (MCA_BTL_SCIF_EP_STATE_CONNECTED == mca_btl_scif_module.endpoints[i].state) {
scif_reg->handles[i].btl_handle.scif_offset = scif_register (mca_btl_scif_module.endpoints[i].scif_epd,
base, size, 0, SCIF_PROT_READ |
SCIF_PROT_WRITE, 0);
if (SCIF_REGISTER_FAILED == scif_reg->handles[i].btl_handle.scif_offset) {
/* cleanup */
scif_dereg_mem (reg_data, reg);
rc = OPAL_ERR_OUT_OF_RESOURCE;
break;
}
}
}
return rc;
}
static int mca_btl_scif_setup_rcache (mca_btl_scif_module_t *scif_module)
{
mca_rcache_base_resources_t rcache_resources;
int rc;
/* initialize the grdma rcache */
rcache_resources.cache_name = "scif";
rcache_resources.reg_data = (void *) scif_module;
rcache_resources.sizeof_reg = sizeof (mca_btl_scif_reg_t);
rcache_resources.register_mem = scif_reg_mem;
rcache_resources.deregister_mem = scif_dereg_mem;
scif_module->rcache = mca_rcache_base_module_create ("grdma", scif_module, &rcache_resources);
if (NULL == scif_module->rcache) {
BTL_ERROR(("error creating grdma rcache"));
return OPAL_ERROR;
}
/* setup free lists for fragments. dma fragments will be used for
* rma operations and in-place sends. eager frags will be used for
* buffered sends. */
rc = opal_free_list_init (&scif_module->dma_frags,
sizeof (mca_btl_scif_dma_frag_t), 64,
OBJ_CLASS(mca_btl_scif_dma_frag_t),
128, opal_getpagesize (),
mca_btl_scif_component.scif_free_list_num,
mca_btl_scif_component.scif_free_list_max,
mca_btl_scif_component.scif_free_list_inc,
NULL, 0, NULL, NULL, NULL);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
return rc;
}
rc = opal_free_list_init (&scif_module->eager_frags,
sizeof (mca_btl_scif_eager_frag_t), 8,
OBJ_CLASS(mca_btl_scif_eager_frag_t),
128 + scif_module->super.btl_eager_limit, 64,
mca_btl_scif_component.scif_free_list_num,
mca_btl_scif_component.scif_free_list_max,
mca_btl_scif_component.scif_free_list_inc,
NULL, 0, NULL, NULL, NULL);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
BTL_ERROR(("error creating eager receive fragment free list"));
return rc;
}
return OPAL_SUCCESS;
}

Просмотреть файл

@ -1,386 +0,0 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2013-2014 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2014 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "btl_scif.h"
#include "btl_scif_frag.h"
#include "opal/runtime/opal_params.h"
#include "opal/include/opal/align.h"
#include "opal/memoryhooks/memory.h"
#include "opal/mca/pmix/pmix.h"
#include "opal/mca/base/mca_base_pvar.h"
#include <scif.h>
static int btl_scif_component_register(void);
static int btl_scif_component_open(void);
static int btl_scif_component_close(void);
static mca_btl_base_module_t **mca_btl_scif_component_init(int *, bool, bool);
static int mca_btl_scif_component_progress(void);
mca_btl_scif_component_t mca_btl_scif_component = {
{
/* First, the mca_base_component_t struct containing meta information
about the component itself */
.btl_version = {
MCA_BTL_DEFAULT_VERSION("scif"),
.mca_open_component = btl_scif_component_open,
.mca_close_component = btl_scif_component_close,
.mca_register_component_params = btl_scif_component_register,
},
.btl_data = {
.param_field = MCA_BASE_METADATA_PARAM_CHECKPOINT
},
.btl_init = mca_btl_scif_component_init,
.btl_progress = mca_btl_scif_component_progress,
}
};
static int btl_scif_component_register(void)
{
(void) mca_base_var_group_component_register(&mca_btl_scif_component.super.btl_version,
"SCIF byte transport layer");
mca_btl_scif_component.scif_free_list_num = 8;
(void) mca_base_component_var_register(&mca_btl_scif_component.super.btl_version,
"free_list_num", "Initial fragment free list size",
MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_LOCAL,
&mca_btl_scif_component.scif_free_list_num);
mca_btl_scif_component.scif_free_list_max = 16384;
(void) mca_base_component_var_register(&mca_btl_scif_component.super.btl_version,
"free_list_max", "Maximum fragment free list size",
MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_LOCAL,
&mca_btl_scif_component.scif_free_list_max);
mca_btl_scif_component.scif_free_list_inc = 64;
(void) mca_base_component_var_register(&mca_btl_scif_component.super.btl_version,
"free_list_inc", "Fragment free list size increment",
MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_LOCAL,
&mca_btl_scif_component.scif_free_list_inc);
mca_btl_scif_component.segment_size = 8 * 1024;
(void) mca_base_component_var_register(&mca_btl_scif_component.super.btl_version,
"segment_size", "Size of memory segment to "
"allocate for each remote process (default: "
"8k)", MCA_BASE_VAR_TYPE_UNSIGNED_INT, NULL, 0,
MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_LOCAL,
&mca_btl_scif_component.segment_size);
mca_btl_scif_component.rma_use_cpu = false;
(void) mca_base_component_var_register(&mca_btl_scif_component.super.btl_version,
"rma_use_cpu", "Use CPU instead of DMA "
"for RMA copies (default: false)", MCA_BASE_VAR_TYPE_BOOL,
NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_LOCAL,
&mca_btl_scif_component.rma_use_cpu);
mca_btl_scif_component.rma_sync = true;
(void) mca_base_component_var_register(&mca_btl_scif_component.super.btl_version,
"rma_sync", "Use synchronous RMA instead of "
"an RMA fence (default: true)", MCA_BASE_VAR_TYPE_BOOL,
NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_LOCAL,
&mca_btl_scif_component.rma_sync);
#if defined(SCIF_TIMING)
mca_btl_scif_component.aquire_buffer_time = 0.0;
(void) mca_base_component_pvar_register(&mca_btl_scif_component.super.btl_version,
"aquire_buffer_time", "Aggregate time spent "
"aquiring send buffers", OPAL_INFO_LVL_9,
MCA_BASE_PVAR_CLASS_AGGREGATE, MCA_BASE_VAR_TYPE_DOUBLE,
NULL, MCA_BASE_VAR_BIND_NO_OBJECT, MCA_BASE_PVAR_FLAG_READONLY |
MCA_BASE_PVAR_FLAG_CONTINUOUS, NULL, NULL, NULL,
&mca_btl_scif_component.aquire_buffer_time);
mca_btl_scif_component.send_time = 0.0;
(void) mca_base_component_pvar_register(&mca_btl_scif_component.super.btl_version,
"send_time", "Aggregate time spent writing to "
"send buffers", OPAL_INFO_LVL_9, MCA_BASE_PVAR_CLASS_AGGREGATE,
MCA_BASE_VAR_TYPE_DOUBLE, NULL, MCA_BASE_VAR_BIND_NO_OBJECT,
MCA_BASE_PVAR_FLAG_READONLY | MCA_BASE_PVAR_FLAG_CONTINUOUS,
NULL, NULL, NULL, &mca_btl_scif_component.send_time);
mca_btl_scif_component.sendi_time = 0.0;
(void) mca_base_component_pvar_register(&mca_btl_scif_component.super.btl_version,
"sendi_time", "Aggregate time spent writing to "
"send buffers in sendi", OPAL_INFO_LVL_9, MCA_BASE_PVAR_CLASS_AGGREGATE,
MCA_BASE_VAR_TYPE_DOUBLE, NULL, MCA_BASE_VAR_BIND_NO_OBJECT,
MCA_BASE_PVAR_FLAG_READONLY | MCA_BASE_PVAR_FLAG_CONTINUOUS,
NULL, NULL, NULL, &mca_btl_scif_component.sendi_time);
mca_btl_scif_component.get_time = 0.0;
(void) mca_base_component_pvar_register(&mca_btl_scif_component.super.btl_version,
"get_time", "Aggregate time spent in DMA read (scif_readfrom)",
OPAL_INFO_LVL_9, MCA_BASE_PVAR_CLASS_AGGREGATE,
MCA_BASE_VAR_TYPE_DOUBLE, NULL, MCA_BASE_VAR_BIND_NO_OBJECT,
MCA_BASE_PVAR_FLAG_READONLY | MCA_BASE_PVAR_FLAG_CONTINUOUS,
NULL, NULL, NULL, &mca_btl_scif_component.get_time);
mca_btl_scif_component.get_count = 0;
(void) mca_base_component_pvar_register(&mca_btl_scif_component.super.btl_version,
"get_count", "Number of times btl_scif_get was called",
OPAL_INFO_LVL_9, MCA_BASE_PVAR_CLASS_COUNTER,
MCA_BASE_VAR_TYPE_UNSIGNED_LONG, NULL, MCA_BASE_VAR_BIND_NO_OBJECT,
MCA_BASE_PVAR_FLAG_READONLY | MCA_BASE_PVAR_FLAG_CONTINUOUS,
NULL, NULL, NULL, &mca_btl_scif_component.get_count);
mca_btl_scif_component.put_time = 0.0;
(void) mca_base_component_pvar_register(&mca_btl_scif_component.super.btl_version,
"put_time", "Aggregate time spent in DMA write (scif_writeto)",
OPAL_INFO_LVL_9, MCA_BASE_PVAR_CLASS_AGGREGATE,
MCA_BASE_VAR_TYPE_DOUBLE, NULL, MCA_BASE_VAR_BIND_NO_OBJECT,
MCA_BASE_PVAR_FLAG_READONLY | MCA_BASE_PVAR_FLAG_CONTINUOUS,
NULL, NULL, NULL, &mca_btl_scif_component.put_time);
mca_btl_scif_component.put_count = 0;
(void) mca_base_component_pvar_register(&mca_btl_scif_component.super.btl_version,
"put_count", "Number of times btl_scif_put was called",
OPAL_INFO_LVL_9, MCA_BASE_PVAR_CLASS_COUNTER,
MCA_BASE_VAR_TYPE_UNSIGNED_LONG, NULL, MCA_BASE_VAR_BIND_NO_OBJECT,
MCA_BASE_PVAR_FLAG_READONLY | MCA_BASE_PVAR_FLAG_CONTINUOUS,
NULL, NULL, NULL, &mca_btl_scif_component.put_count);
#endif
mca_btl_scif_module.super.btl_exclusivity = MCA_BTL_EXCLUSIVITY_HIGH - 1;
mca_btl_scif_module.super.btl_eager_limit = 1 * 1024;
mca_btl_scif_module.super.btl_rndv_eager_limit = 1 * 1024;
mca_btl_scif_module.super.btl_rdma_pipeline_frag_size = 4 * 1024 * 1024;
mca_btl_scif_module.super.btl_max_send_size = 1 * 1024;
mca_btl_scif_module.super.btl_rdma_pipeline_send_length = 1 * 1024;
/* threshold for put */
mca_btl_scif_module.super.btl_min_rdma_pipeline_size = 1 * 1024;
mca_btl_scif_module.super.btl_flags = MCA_BTL_FLAGS_SEND |
MCA_BTL_FLAGS_RDMA | MCA_BTL_FLAGS_SEND_INPLACE;
mca_btl_scif_module.super.btl_registration_handle_size = sizeof (mca_btl_base_registration_handle_t);
mca_btl_scif_module.super.btl_bandwidth = 50000; /* Mbs */
mca_btl_scif_module.super.btl_latency = 2; /* Microsecs */
/* Call the BTL based to register its MCA params */
mca_btl_base_param_register(&mca_btl_scif_component.super.btl_version,
&mca_btl_scif_module.super);
return OPAL_SUCCESS;
}
static int btl_scif_component_open(void)
{
return OPAL_SUCCESS;
}
static int btl_scif_component_close(void)
{
return OPAL_SUCCESS;
}
static void mca_btl_scif_autoset_leave_pinned (void) {
int value = opal_mem_hooks_support_level();
if ((OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT) ==
((OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT) & value)) {
/* Set leave pinned to 1 if leave pinned pipeline is not set */
if (-1 == opal_leave_pinned) {
opal_leave_pinned = !opal_leave_pinned_pipeline;
}
} else {
opal_leave_pinned = 0;
opal_leave_pinned_pipeline = 0;
}
}
static int mca_btl_scif_modex_send (void)
{
mca_btl_scif_modex_t modex;
int rc;
memset(&modex, 0, sizeof(mca_btl_scif_modex_t));
modex.port_id = mca_btl_scif_module.port_id;
OPAL_MODEX_SEND(rc, OPAL_PMIX_LOCAL,
&mca_btl_scif_component.super.btl_version,
&modex, sizeof (modex));
return rc;
}
static mca_btl_base_module_t **mca_btl_scif_component_init (int *num_btl_modules,
bool enable_progress_threads,
bool enable_mpi_threads)
{
struct mca_btl_base_module_t **base_modules;
int rc;
BTL_VERBOSE(("btl/scif initializing"));
signal (SIGSEGV, SIG_DFL);
/* we currently need the memory hooks to determine when
* registrations are no longer valid. */
mca_btl_scif_autoset_leave_pinned ();
if (32768 < mca_btl_scif_module.super.btl_eager_limit) {
mca_btl_scif_module.super.btl_eager_limit = 32768;
}
/* the segment should be large enough to hold at least one eager packet */
if (4 * mca_btl_scif_module.super.btl_eager_limit > mca_btl_scif_component.segment_size) {
mca_btl_scif_component.segment_size = 4 * mca_btl_scif_module.super.btl_eager_limit;
}
/* round up to a multiple of 4096 */
mca_btl_scif_component.segment_size = (mca_btl_scif_component.segment_size + 0xfff) & ~0xfff;
base_modules = (struct mca_btl_base_module_t **)
calloc (1, sizeof (struct mca_btl_base_module_t *));
if (OPAL_UNLIKELY(NULL == base_modules)) {
BTL_ERROR(("Malloc failed : %s:%d", __FILE__, __LINE__));
return NULL;
}
/* initialize the module */
rc = mca_btl_scif_module_init ();
if (OPAL_SUCCESS != rc) {
BTL_VERBOSE(("btl/scif error initializing module"));
free (base_modules);
return NULL;
}
base_modules[0] = &mca_btl_scif_module.super;
mca_btl_scif_module.exiting = false;
mca_btl_scif_module.listening = false;
rc = mca_btl_scif_modex_send ();
if (OPAL_SUCCESS != rc) {
BTL_VERBOSE(("btl/scif error sending modex"));
free (base_modules);
return NULL;
}
*num_btl_modules = 1;
BTL_VERBOSE(("btl/scif done initializing modules"));
return base_modules;
}
static int mca_btl_scif_progress_recvs (mca_btl_base_endpoint_t *ep)
{
const mca_btl_active_message_callback_t *reg;
unsigned int start = ep->recv_buffer.start;
unsigned int end = ep->recv_buffer.endp[0];
mca_btl_scif_base_frag_t frag;
mca_btl_scif_frag_hdr_t *hdr;
/* changing this value does not appear to have a signifigant impact
* on performance */
int frags_per_loop = 5;
if (end == start) {
return 0;
}
end &= ~ (1 << 31);
start &= ~ (1 << 31);
/* force all prior reads to complete before continuing */
opal_atomic_rmb ();
do {
hdr = (mca_btl_scif_frag_hdr_t *) (ep->recv_buffer.buffer + start);
/* force all prior reads to complete before continuing */
MB();
BTL_VERBOSE(("got frag with header {.tag = %d, .size = %d} from offset %u",
hdr->tag, hdr->size, start));
#if defined(SCIF_USE_SEQ)
if (hdr->seq != ep->seq_expected) {
break;
}
ep->seq_expected++;
#endif
/* message to skip the rest of the buffer */
if (0xff != hdr->tag) {
reg = mca_btl_base_active_message_trigger + hdr->tag;
/* fragment fits entirely in the remaining buffer space. some
* btl users do not handle fragmented data so we can't split
* the fragment without introducing another copy here. this
* limitation has not appeared to cause any performance
* problems. */
frag.base.des_segment_count = 1;
frag.segments[0].seg_len = hdr->size;
frag.segments[0].seg_addr.pval = (void *) (hdr + 1);
frag.base.des_segments = frag.segments;
/* call the registered callback function */
reg->cbfunc(&mca_btl_scif_module.super, hdr->tag, &frag.base, reg->cbdata);
}
start = (start + hdr->size + sizeof (*hdr) + 63) & ~63;
/* skip unusable space at the end of the buffer */
if (mca_btl_scif_component.segment_size == start) {
start = 64;
ep->recv_buffer.start = ((ep->recv_buffer.start & (1 << 31)) ^ (1 << 31)) | 64;
} else {
ep->recv_buffer.start = (ep->recv_buffer.start & (1 << 31)) | start;
}
} while (start != end && --frags_per_loop);
/* let the sender know where we stopped */
ep->recv_buffer.startp[0] = ep->recv_buffer.start;
/* return the number of fragments processed */
return 5 - frags_per_loop;
}
static int mca_btl_scif_progress_sends (mca_btl_base_endpoint_t *ep)
{
/* try sending any wait listed fragments */
if (OPAL_UNLIKELY(0 != opal_list_get_size (&ep->frag_wait_list))) {
return mca_btl_scif_progress_send_wait_list (ep);
}
return 0;
}
static int mca_btl_scif_component_progress (void)
{
unsigned int i;
int count = 0;
/* progress all connected endpoints */
for (i = 0, count = 0 ; i < mca_btl_scif_module.endpoint_count ; ++i) {
if (MCA_BTL_SCIF_EP_STATE_CONNECTED == mca_btl_scif_module.endpoints[i].state) {
/* poll all connected endpoints */
count += mca_btl_scif_progress_recvs (mca_btl_scif_module.endpoints + i);
/* if any fragments are waiting try to send them now */
count += mca_btl_scif_progress_sends (mca_btl_scif_module.endpoints + i);
}
}
return count;
}

Просмотреть файл

@ -1,301 +0,0 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2013 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2014 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "btl_scif.h"
#include "btl_scif_endpoint.h"
#include "opal/mca/memchecker/base/base.h"
#include "opal/util/sys_limits.h"
static void mca_btl_scif_ep_construct (mca_btl_base_endpoint_t *ep) {
memset ((char *) ep + sizeof(ep->super), 0, sizeof (*ep) - sizeof (ep->super));
OBJ_CONSTRUCT(&ep->lock, opal_mutex_t);
OBJ_CONSTRUCT(&ep->frag_wait_list, opal_list_t);
}
static void mca_btl_scif_ep_destruct (mca_btl_base_endpoint_t *ep) {
if (ep->send_buffer.buffer) {
scif_munmap (ep->send_buffer.buffer, mca_btl_scif_component.segment_size);
}
if (ep->recv_buffer.buffer) {
scif_unregister (ep->scif_epd, ep->recv_buffer.scif_offset, mca_btl_scif_component.segment_size);
free (ep->recv_buffer.buffer);
}
if (ep->scif_epd) {
scif_close (ep->scif_epd);
}
OBJ_DESTRUCT(&ep->lock);
OBJ_DESTRUCT(&ep->frag_wait_list);
}
OBJ_CLASS_INSTANCE(mca_btl_scif_endpoint_t, opal_list_item_t,
mca_btl_scif_ep_construct, mca_btl_scif_ep_destruct);
static void mca_btl_scif_ep_free_buffer (mca_btl_base_endpoint_t *ep) {
if (ep->recv_buffer.buffer) {
scif_unregister (ep->scif_epd, ep->recv_buffer.scif_offset, mca_btl_scif_component.segment_size);
free (ep->recv_buffer.buffer);
ep->recv_buffer.buffer = NULL;
ep->recv_buffer.scif_offset = (off_t) -1;
}
}
static inline int mca_btl_scif_ep_get_buffer (mca_btl_base_endpoint_t *ep) {
int rc;
rc = posix_memalign ((void **) &ep->recv_buffer.buffer, opal_getpagesize(), mca_btl_scif_component.segment_size);
if (0 > rc) {
return OPAL_ERR_OUT_OF_RESOURCE;
}
memset (ep->recv_buffer.buffer, 0, mca_btl_scif_component.segment_size);
ep->recv_buffer.scif_offset = scif_register (ep->scif_epd, ep->recv_buffer.buffer,
mca_btl_scif_component.segment_size, 0,
SCIF_PROT_READ | SCIF_PROT_WRITE, 0);
if (SCIF_REGISTER_FAILED == ep->recv_buffer.scif_offset) {
BTL_VERBOSE(("failed to register a scif buffer of size %d. errno = %d",
mca_btl_scif_component.segment_size, errno));
free (ep->recv_buffer.buffer);
ep->recv_buffer.buffer = NULL;
return OPAL_ERROR;
}
ep->recv_buffer.startp = (uint32_t *) ep->recv_buffer.buffer;
ep->recv_buffer.endp = ep->recv_buffer.startp + 1;
ep->recv_buffer.startp[0] = ep->recv_buffer.endp[0] = 64;
BTL_VERBOSE(("allocated buffer of size %d bytes. with scif registration %lu",
mca_btl_scif_component.segment_size, (unsigned long) ep->recv_buffer.scif_offset));
return OPAL_SUCCESS;
}
/* must be called with the endpoint lock held */
static int mca_btl_scif_ep_connect_finish (mca_btl_base_endpoint_t *ep, bool passive) {
int rc;
rc = mca_btl_scif_ep_get_buffer (ep);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
BTL_VERBOSE(("error allocating buffer for scif peer"));
return rc;
}
if (passive) {
rc = scif_recv (ep->scif_epd, &ep->send_buffer.scif_offset,
sizeof (ep->send_buffer.scif_offset), SCIF_RECV_BLOCK);
if (OPAL_LIKELY(-1 != rc)) {
rc = scif_send (ep->scif_epd, &ep->recv_buffer.scif_offset,
sizeof (ep->recv_buffer.scif_offset), SCIF_SEND_BLOCK);
}
} else {
rc = scif_send (ep->scif_epd, &ep->recv_buffer.scif_offset,
sizeof (ep->recv_buffer.scif_offset), SCIF_SEND_BLOCK);
if (OPAL_LIKELY(-1 != rc)) {
rc = scif_recv (ep->scif_epd, &ep->send_buffer.scif_offset,
sizeof (ep->send_buffer.scif_offset), SCIF_RECV_BLOCK);
}
}
if (OPAL_UNLIKELY(-1 == rc)) {
BTL_VERBOSE(("error exchanging connection data with peer %d", ep->peer_proc->proc_name.vpid));
mca_btl_scif_ep_free_buffer (ep);
return OPAL_ERROR;
}
BTL_VERBOSE(("remote peer %d has scif offset %lu", ep->peer_proc->proc_name.vpid,
(unsigned long) ep->send_buffer.scif_offset));
ep->send_buffer.buffer = scif_mmap (0, mca_btl_scif_component.segment_size,
SCIF_PROT_READ | SCIF_PROT_WRITE,
0, ep->scif_epd, ep->send_buffer.scif_offset);
if (OPAL_UNLIKELY(NULL == ep->send_buffer.buffer)) {
BTL_VERBOSE(("error in scif_mmap"));
mca_btl_scif_ep_free_buffer (ep);
return OPAL_ERROR;
}
opal_memchecker_base_mem_defined (ep->send_buffer.buffer, mca_btl_scif_component.segment_size);
BTL_VERBOSE(("remote peer %d buffer mapped to local pointer %p", ep->peer_proc->proc_name.vpid,
ep->send_buffer.buffer));
/* setup the circular send buffers */
ep->send_buffer.start = ep->send_buffer.end = 64;
ep->send_buffer.startp = (uint32_t *) ep->send_buffer.buffer;
ep->send_buffer.endp = ep->send_buffer.startp + 1;
ep->recv_buffer.start = 64;
/* connection complete */
ep->state = MCA_BTL_SCIF_EP_STATE_CONNECTED;
BTL_VERBOSE(("btl/scif connection to remote peer %d established", ep->peer_proc->proc_name.vpid));
return OPAL_SUCCESS;
}
int mca_btl_scif_ep_connect_start_passive (void) {
mca_btl_base_endpoint_t *ep = NULL;
opal_process_name_t remote_name;
struct scif_portID port_id;
unsigned int i;
scif_epd_t epd;
int rc;
/* accept the connection request. if the endpoint is already connecting we
* may close this endpoint and alloc mca_btl_scif_ep_connect_start_active
* to finish the connection. */
rc = scif_accept (mca_btl_scif_module.scif_fd, &port_id, &epd, SCIF_ACCEPT_SYNC);
if (OPAL_UNLIKELY(0 > rc)) {
BTL_VERBOSE(("error accepting connecton from scif peer. %d", errno));
return OPAL_ERROR;
}
/* determine which peer sent the connection request */
rc = scif_recv (epd, &remote_name, sizeof (remote_name), SCIF_RECV_BLOCK);
if (OPAL_UNLIKELY(-1 == rc)) {
BTL_VERBOSE(("error in scif_recv"));
scif_close (epd);
return OPAL_ERROR;
}
BTL_VERBOSE(("got connection request from vpid %d on port %u on node %u",
remote_name.vpid, port_id.port, port_id.node));
for (i = 0 ; i < mca_btl_scif_module.endpoint_count ; ++i) {
if (mca_btl_scif_module.endpoints[i].peer_proc->proc_name.vpid ==
remote_name.vpid) {
ep = mca_btl_scif_module.endpoints + i;
break;
}
}
/* peer not found */
if (i == mca_btl_scif_module.endpoint_count) {
BTL_VERBOSE(("remote peer %d unknown", remote_name.vpid));
scif_close (epd);
return OPAL_ERROR;
}
/* similtaneous connections (active side) */
if ((MCA_BTL_SCIF_EP_STATE_CONNECTING == ep->state &&
ep->port_id.port < mca_btl_scif_module.port_id.port) ||
MCA_BTL_SCIF_EP_STATE_CONNECTED == ep->state) {
BTL_VERBOSE(("active connection in progress. connection request from peer %d rejected", remote_name.vpid));
scif_close (epd);
return OPAL_SUCCESS;
}
opal_mutex_lock (&ep->lock);
if (MCA_BTL_SCIF_EP_STATE_CONNECTED == ep->state) {
opal_mutex_unlock (&ep->lock);
scif_close (epd);
return OPAL_SUCCESS;
}
BTL_VERBOSE(("accepted connection from port %d", ep->port_id.port));
ep->state = MCA_BTL_SCIF_EP_STATE_CONNECTING;
ep->scif_epd = epd;
rc = mca_btl_scif_ep_connect_finish (ep, true);
if (OPAL_SUCCESS != rc) {
scif_close (ep->scif_epd);
ep->scif_epd = -1;
ep->state = MCA_BTL_SCIF_EP_STATE_INIT;
}
opal_mutex_unlock (&ep->lock);
return rc;
}
static inline int mca_btl_scif_ep_connect_start_active (mca_btl_base_endpoint_t *ep) {
int rc = OPAL_SUCCESS;
BTL_VERBOSE(("initiaiting connection to remote peer %d with port: %u on local scif node: %u",
ep->peer_proc->proc_name.vpid, ep->port_id.port, ep->port_id.node));
opal_mutex_lock (&ep->lock);
do {
if (MCA_BTL_SCIF_EP_STATE_INIT != ep->state) {
/* the accept thread has already finished this connection */
rc = OPAL_SUCCESS;
break;
}
ep->state = MCA_BTL_SCIF_EP_STATE_CONNECTING;
ep->scif_epd = scif_open ();
if (OPAL_UNLIKELY(SCIF_OPEN_FAILED == ep->scif_epd)) {
BTL_VERBOSE(("error creating new scif endpoint"));
rc = OPAL_ERROR;
break;
}
rc = scif_connect (ep->scif_epd, &ep->port_id);
if (OPAL_UNLIKELY(-1 == rc)) {
/* the connection attempt failed. this could mean the peer is currently
* processing connections. we will to try again later. */
BTL_VERBOSE(("error connecting to scif peer. %d", errno));
rc = OPAL_ERR_RESOURCE_BUSY;
break;
}
rc = scif_send (ep->scif_epd, &OPAL_PROC_MY_NAME, sizeof (OPAL_PROC_MY_NAME), SCIF_SEND_BLOCK);
if (OPAL_UNLIKELY(-1 == rc)) {
BTL_VERBOSE(("error in scif_send"));
rc = OPAL_ERROR;
break;
}
/* build connection data */
rc = mca_btl_scif_ep_connect_finish (ep, false);
} while (0);
if (OPAL_SUCCESS != rc) {
scif_close (ep->scif_epd);
ep->scif_epd = -1;
ep->state = MCA_BTL_SCIF_EP_STATE_INIT;
}
opal_mutex_unlock (&ep->lock);
return rc;
}
int mca_btl_scif_ep_connect (mca_btl_base_endpoint_t *ep) {
int rc;
if (OPAL_LIKELY(MCA_BTL_SCIF_EP_STATE_CONNECTED == ep->state)) {
return OPAL_SUCCESS;
} else if (MCA_BTL_SCIF_EP_STATE_CONNECTING == ep->state) {
return OPAL_ERR_RESOURCE_BUSY;
}
if (MCA_BTL_SCIF_EP_STATE_INIT == ep->state) {
rc = mca_btl_scif_ep_connect_start_active (ep);
if (OPAL_SUCCESS != rc) {
return rc;
}
}
return OPAL_SUCCESS;
}

Просмотреть файл

@ -1,110 +0,0 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2013 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2014 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef MCA_BTL_SCIF_ENDPOINT_H
#define MCA_BTL_SCIF_ENDPOINT_H
#include "btl_scif.h"
#include "opal/mca/pmix/pmix.h"
typedef enum mca_btl_scif_endpoint_state_t {
MCA_BTL_SCIF_EP_STATE_INIT,
MCA_BTL_SCIF_EP_STATE_CONNECTING,
MCA_BTL_SCIF_EP_STATE_CONNECTED
} mca_btl_scif_endpoint_state_t;
typedef struct mca_btl_scif_endpoint_buffer_t {
unsigned char *buffer;
off_t scif_offset;
unsigned int start, end;
uint32_t *startp, *endp;
} mca_btl_scif_endpoint_buffer_t;
typedef struct mca_btl_base_endpoint_t {
opal_list_item_t super;
mca_btl_scif_module_t *btl;
/* location in the module endpoints array */
int id;
opal_mutex_t lock;
/* scif endpoint */
scif_epd_t scif_epd;
/* connection information */
struct scif_portID port_id;
/* buffer information */
mca_btl_scif_endpoint_buffer_t send_buffer;
mca_btl_scif_endpoint_buffer_t recv_buffer;
/* current connect state */
mca_btl_scif_endpoint_state_t state;
/* frags waiting for resources */
opal_list_t frag_wait_list;
/* associated process */
opal_proc_t *peer_proc;
#if defined(SCIF_USE_SEQ)
uint32_t seq_next;
uint32_t seq_expected;
#endif
} mca_btl_base_endpoint_t;
typedef mca_btl_base_endpoint_t mca_btl_scif_endpoint_t;
OBJ_CLASS_DECLARATION(mca_btl_scif_endpoint_t);
int mca_btl_scif_ep_connect (mca_btl_scif_endpoint_t *ep);
int mca_btl_scif_ep_connect_start_passive (void);
static inline int mca_btl_scif_ep_init (mca_btl_scif_endpoint_t *endpoint,
mca_btl_scif_module_t *btl,
opal_proc_t *peer_proc) {
mca_btl_scif_modex_t *modex;
size_t msg_size;
int rc;
OBJ_CONSTRUCT(endpoint, mca_btl_scif_endpoint_t);
endpoint->state = MCA_BTL_SCIF_EP_STATE_INIT;
OPAL_MODEX_RECV(rc, &mca_btl_scif_component.super.btl_version,
&peer_proc->proc_name, (void **) &modex, &msg_size);
if (OPAL_SUCCESS != rc) {
return rc;
}
assert (msg_size == sizeof (endpoint->port_id));
endpoint->port_id = modex->port_id;
endpoint->peer_proc = peer_proc;
endpoint->btl = btl;
#if defined(SCIF_USE_SEQ)
endpoint->seq_next = 0x00001010;
endpoint->seq_expected = 0x00001010;
#endif
free (modex);
return OPAL_SUCCESS;
}
static inline int mca_btl_scif_ep_release (mca_btl_scif_endpoint_t *ep)
{
OBJ_DESTRUCT(ep);
return OPAL_SUCCESS;
}
#endif /* MCA_BTL_SCIF_ENDPOINT_H */

Просмотреть файл

@ -1,31 +0,0 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2013 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "btl_scif.h"
#include "btl_scif_frag.h"
static inline void mca_btl_scif_base_frag_constructor (mca_btl_scif_base_frag_t *frag)
{
memset ((char *) frag + sizeof (frag->base), 0, sizeof (*frag) - sizeof (frag->base));
frag->segments[0].seg_addr.pval = frag->base.super.ptr;
}
static inline void mca_btl_scif_eager_frag_constructor (mca_btl_scif_base_frag_t *frag)
{
memset ((char *) frag + sizeof (frag->base), 0, sizeof (*frag) - sizeof (frag->base));
frag->segments[0].seg_addr.pval = frag->base.super.ptr;
}
OBJ_CLASS_INSTANCE(mca_btl_scif_eager_frag_t, mca_btl_base_descriptor_t,
mca_btl_scif_base_frag_constructor, NULL);
OBJ_CLASS_INSTANCE(mca_btl_scif_dma_frag_t, mca_btl_base_descriptor_t,
mca_btl_scif_base_frag_constructor, NULL);

Просмотреть файл

@ -1,95 +0,0 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#if !defined(MCA_BTL_SCIF_FRAG_H)
#define MCA_BTL_SCIF_FRAG_H
#include "btl_scif.h"
#include "btl_scif_endpoint.h"
typedef struct mca_btl_scif_frag_hdr_t {
#if defined(SCIF_USE_SEQ)
uint32_t seq;
#endif
uint8_t tag;
uint8_t flags;
uint16_t size;
} mca_btl_scif_frag_hdr_t;
struct mca_btl_scif_base_frag_t;
typedef void (*frag_cb_t) (struct mca_btl_scif_base_frag_t *, int);
typedef struct mca_btl_scif_base_frag_t {
mca_btl_base_descriptor_t base;
mca_btl_scif_frag_hdr_t hdr;
mca_btl_base_segment_t segments[2];
mca_btl_base_endpoint_t *endpoint;
mca_btl_scif_reg_t *registration;
opal_free_list_t *my_list;
} mca_btl_scif_base_frag_t;
typedef mca_btl_scif_base_frag_t mca_btl_scif_dma_frag_t;
typedef mca_btl_scif_base_frag_t mca_btl_scif_eager_frag_t;
OBJ_CLASS_DECLARATION(mca_btl_scif_dma_frag_t);
OBJ_CLASS_DECLARATION(mca_btl_scif_eager_frag_t);
static inline int mca_btl_scif_frag_alloc (mca_btl_base_endpoint_t *ep,
opal_free_list_t *list,
mca_btl_scif_base_frag_t **frag)
{
*frag = (mca_btl_scif_base_frag_t *) opal_free_list_get (list);
if (OPAL_LIKELY(NULL != *frag)) {
(*frag)->my_list = list;
(*frag)->endpoint = ep;
return OPAL_SUCCESS;
}
return OPAL_ERR_OUT_OF_RESOURCE;
}
static inline int mca_btl_scif_frag_return (mca_btl_scif_base_frag_t *frag)
{
if (frag->registration) {
frag->endpoint->btl->rcache->rcache_deregister (frag->endpoint->btl->rcache,
&frag->registration->base);
frag->registration = NULL;
}
frag->segments[0].seg_addr.pval = frag->base.super.ptr;
frag->segments[0].seg_len = 0;
frag->segments[1].seg_len = 0;
opal_free_list_return (frag->my_list, (opal_free_list_item_t *) frag);
return OPAL_SUCCESS;
}
static inline void mca_btl_scif_frag_complete (mca_btl_scif_base_frag_t *frag, int rc) {
BTL_VERBOSE(("frag complete. flags = %d", frag->base.des_flags));
/* call callback if specified */
if (frag->base.des_flags & MCA_BTL_DES_SEND_ALWAYS_CALLBACK) {
frag->base.des_cbfunc(&frag->endpoint->btl->super, frag->endpoint, &frag->base, rc);
}
if (frag->base.des_flags & MCA_BTL_DES_FLAGS_BTL_OWNERSHIP) {
mca_btl_scif_frag_return (frag);
}
}
#define MCA_BTL_SCIF_FRAG_ALLOC_EAGER(ep, frag) \
mca_btl_scif_frag_alloc((ep), &(ep)->btl->eager_frags, &(frag))
#define MCA_BTL_SCIF_FRAG_ALLOC_DMA(ep, frag) \
mca_btl_scif_frag_alloc((ep), &(ep)->btl->dma_frags, &(frag))
#endif /* MCA_BTL_SCIF_FRAG_H */

Просмотреть файл

@ -1,75 +0,0 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2013-2014 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2014 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "opal_config.h"
#include "btl_scif_frag.h"
#include <sys/time.h>
#define lmin(a,b) ((a) < (b) ? (a) : (b))
/**
* Initiate a get operation.
*/
int mca_btl_scif_get (mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, void *local_address,
uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle,
mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags,
int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata)
{
int rc, mark, scif_flags = 0;
off_t roffset, loffset;
#if defined(SCIF_TIMING)
struct timespec ts;
clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts);
mca_btl_scif_component.get_count++;
#endif
BTL_VERBOSE(("Using DMA Get from remote address %" PRIx64 " to local address %p",
remote_address, local_address));
roffset = remote_handle->scif_offset + (off_t)(remote_address - remote_handle->scif_base);
loffset = local_handle->scif_offset + (off_t)((intptr_t)local_address - local_handle->scif_base);
if (mca_btl_scif_component.rma_use_cpu) {
scif_flags = SCIF_RMA_USECPU;
}
if (mca_btl_scif_component.rma_sync) {
scif_flags |= SCIF_RMA_SYNC;
}
/* start the read */
rc = scif_readfrom (endpoint->scif_epd, loffset, size, roffset, scif_flags);
if (OPAL_UNLIKELY(-1 == rc)) {
return OPAL_ERROR;
}
if (!(scif_flags & SCIF_RMA_SYNC)) {
/* according to the scif documentation is is better to use a fence rather
* than using the SCIF_RMA_SYNC flag with scif_readfrom */
scif_fence_mark (endpoint->scif_epd, SCIF_FENCE_INIT_SELF, &mark);
scif_fence_wait (endpoint->scif_epd, mark);
}
#if defined(SCIF_TIMING)
SCIF_UPDATE_TIMER(mca_btl_scif_component.get_time,
mca_btl_scif_component.get_time_max, ts);
#endif
/* always call the callback function */
cbfunc (btl, endpoint, local_address, local_handle, cbcontext, cbdata, OPAL_SUCCESS);
return OPAL_SUCCESS;
}

Просмотреть файл

@ -1,308 +0,0 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2013-2016 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2014 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "opal_config.h"
#include "btl_scif.h"
#include "btl_scif_frag.h"
#include "btl_scif_endpoint.h"
static int
mca_btl_scif_free (struct mca_btl_base_module_t *btl,
mca_btl_base_descriptor_t *des);
static int
mca_btl_scif_module_finalize (struct mca_btl_base_module_t* btl);
static mca_btl_base_registration_handle_t *mca_btl_scif_register_mem (struct mca_btl_base_module_t *btl,
mca_btl_base_endpoint_t *endpoint,
void *base, size_t size, uint32_t flags);
static int mca_btl_scif_deregister_mem (struct mca_btl_base_module_t *btl, mca_btl_base_registration_handle_t *handle);
static struct mca_btl_base_descriptor_t *
mca_btl_scif_prepare_src (struct mca_btl_base_module_t *btl,
struct mca_btl_base_endpoint_t *endpoint,
struct opal_convertor_t *convertor,
uint8_t order, size_t reserve, size_t *size,
uint32_t flags);
mca_btl_scif_module_t mca_btl_scif_module = {
.super = {
.btl_component = &mca_btl_scif_component.super,
.btl_add_procs = mca_btl_scif_add_procs,
.btl_del_procs = mca_btl_scif_del_procs,
.btl_finalize = mca_btl_scif_module_finalize,
.btl_alloc = mca_btl_scif_alloc,
.btl_free = mca_btl_scif_free,
.btl_prepare_src = mca_btl_scif_prepare_src,
.btl_send = mca_btl_scif_send,
.btl_sendi = mca_btl_scif_sendi,
.btl_put = mca_btl_scif_put,
.btl_get = mca_btl_scif_get,
.btl_register_mem = mca_btl_scif_register_mem,
.btl_deregister_mem = mca_btl_scif_deregister_mem,
}
};
int mca_btl_scif_module_init (void)
{
int rc;
/* create an endpoint to listen for connections */
mca_btl_scif_module.scif_fd = scif_open ();
if (-1 == mca_btl_scif_module.scif_fd) {
BTL_VERBOSE(("scif_open failed. errno = %d", errno));
return OPAL_ERROR;
}
/* bind the endpoint to a port */
mca_btl_scif_module.port_id.port = scif_bind (mca_btl_scif_module.scif_fd, 0);
if (-1 == mca_btl_scif_module.port_id.port) {
BTL_VERBOSE(("scif_bind failed. errno = %d", errno));
scif_close (mca_btl_scif_module.scif_fd);
mca_btl_scif_module.scif_fd = -1;
return OPAL_ERROR;
}
/* determine this processes node id */
rc = scif_get_nodeIDs (NULL, 0, &mca_btl_scif_module.port_id.node);
if (-1 == rc) {
BTL_VERBOSE(("btl/scif error getting node id of this node"));
return OPAL_ERROR;
}
/* Listen for connections */
/* TODO - base the maximum backlog off something */
rc = scif_listen (mca_btl_scif_module.scif_fd, 64);
if (-1 == rc) {
BTL_VERBOSE(("scif_listen failed. errno = %d", errno));
scif_close (mca_btl_scif_module.scif_fd);
mca_btl_scif_module.scif_fd = -1;
return OPAL_ERROR;
}
BTL_VERBOSE(("btl/scif: listening @ port %u on node %u\n",
mca_btl_scif_module.port_id.port, mca_btl_scif_module.port_id.node));
OBJ_CONSTRUCT(&mca_btl_scif_module.dma_frags, opal_free_list_t);
OBJ_CONSTRUCT(&mca_btl_scif_module.eager_frags, opal_free_list_t);
return OPAL_SUCCESS;
}
static int
mca_btl_scif_module_finalize (struct mca_btl_base_module_t *btl)
{
mca_btl_scif_module_t *scif_module = (mca_btl_scif_module_t *) btl;
unsigned int i;
OBJ_DESTRUCT(&mca_btl_scif_module.dma_frags);
OBJ_DESTRUCT(&mca_btl_scif_module.eager_frags);
mca_btl_scif_module.exiting = true;
/* close all open connections and release endpoints */
if (NULL != scif_module->endpoints) {
for (i = 0 ; i < scif_module->endpoint_count ; ++i) {
mca_btl_scif_ep_release (scif_module->endpoints + i);
}
free (scif_module->endpoints);
scif_module->endpoint_count = 0;
scif_module->endpoints = NULL;
}
if (NULL != scif_module->rcache) {
mca_rcache_base_module_destroy (scif_module->rcache);
scif_module->rcache = NULL;
}
/* close the listening endpoint */
if (mca_btl_scif_module.listening && -1 != mca_btl_scif_module.scif_fd) {
/* wake up the scif thread */
scif_epd_t tmpfd;
tmpfd = scif_open();
scif_connect (tmpfd, &mca_btl_scif_module.port_id);
pthread_join(mca_btl_scif_module.listen_thread, NULL);
scif_close(tmpfd);
scif_close (mca_btl_scif_module.scif_fd);
}
mca_btl_scif_module.scif_fd = -1;
return OPAL_SUCCESS;
}
mca_btl_base_descriptor_t *
mca_btl_scif_alloc(struct mca_btl_base_module_t *btl,
struct mca_btl_base_endpoint_t *endpoint,
uint8_t order, size_t size, uint32_t flags)
{
mca_btl_scif_base_frag_t *frag = NULL;
BTL_VERBOSE(("allocating fragment of size: %u", (unsigned int)size));
if (size <= mca_btl_scif_module.super.btl_eager_limit) {
(void) MCA_BTL_SCIF_FRAG_ALLOC_EAGER(endpoint, frag);
}
if (OPAL_UNLIKELY(NULL == frag)) {
return NULL;
}
BTL_VERBOSE(("btl/scif_module allocated frag of size: %u, flags: %x. frag = %p",
(unsigned int)size, flags, (void *) frag));
frag->base.des_flags = flags;
frag->base.order = order;
frag->base.des_segments = frag->segments;
frag->base.des_segment_count = 1;
frag->segments[0].seg_len = size;
return &frag->base;
}
static int
mca_btl_scif_free (struct mca_btl_base_module_t *btl,
mca_btl_base_descriptor_t *des)
{
return mca_btl_scif_frag_return ((mca_btl_scif_base_frag_t *) des);
}
static mca_btl_base_registration_handle_t *mca_btl_scif_register_mem (struct mca_btl_base_module_t *btl,
mca_btl_base_endpoint_t *endpoint,
void *base, size_t size, uint32_t flags)
{
mca_btl_scif_module_t *scif_module = &mca_btl_scif_module;
mca_btl_scif_reg_t *scif_reg;
int access_flags = flags & MCA_BTL_REG_FLAG_ACCESS_ANY;
int rc;
if (MCA_BTL_ENDPOINT_ANY == endpoint) {
/* it probably isn't possible to support registering memory to use with any endpoint so
* return NULL */
return NULL;
}
if (OPAL_LIKELY(MCA_BTL_SCIF_EP_STATE_CONNECTED != endpoint->state)) {
/* the endpoint needs to be connected before the fragment can be
* registered. */
rc = mca_btl_scif_ep_connect (endpoint);
if (OPAL_LIKELY(MCA_BTL_SCIF_EP_STATE_CONNECTED != endpoint->state)) {
/* not yet connected */
return NULL;
}
}
rc = scif_module->rcache->rcache_register (scif_module->rcache, base, size, 0, access_flags,
(mca_rcache_base_registration_t **) &scif_reg);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
return NULL;
}
/* register the memory location with this peer if it isn't already */
if ((off_t) -1 == scif_reg->handles[endpoint->id].btl_handle.scif_offset) {
size_t seg_size = (size_t)((uintptr_t) scif_reg->base.bound - (uintptr_t) scif_reg->base.base) + 1;
/* NTH: until we determine a way to pass permissions to the rcache just make all segments
* read/write */
scif_reg->handles[endpoint->id].btl_handle.scif_offset =
scif_register (endpoint->scif_epd, scif_reg->base.base, seg_size, 0, SCIF_PROT_READ |
SCIF_PROT_WRITE, 0);
BTL_VERBOSE(("registered fragment for scif DMA transaction. offset = %lu",
(unsigned long) scif_reg->handles[endpoint->id].btl_handle.scif_offset));
}
return &scif_reg->handles[endpoint->id].btl_handle;
}
static int mca_btl_scif_deregister_mem (struct mca_btl_base_module_t *btl, mca_btl_base_registration_handle_t *handle)
{
mca_btl_scif_registration_handle_t *scif_handle = (mca_btl_scif_registration_handle_t *) handle;
mca_btl_scif_module_t *scif_module = &mca_btl_scif_module;
mca_btl_scif_reg_t *scif_reg = scif_handle->reg;
scif_module->rcache->rcache_deregister (scif_module->rcache, &scif_reg->base);
return OPAL_SUCCESS;
}
static inline struct mca_btl_base_descriptor_t *
mca_btl_scif_prepare_src_send (struct mca_btl_base_module_t *btl,
mca_btl_base_endpoint_t *endpoint,
struct opal_convertor_t *convertor,
uint8_t order, size_t reserve, size_t *size,
uint32_t flags)
{
mca_btl_scif_base_frag_t *frag = NULL;
uint32_t iov_count = 1;
struct iovec iov;
size_t max_size = *size;
int rc;
if (OPAL_LIKELY((mca_btl_scif_module.super.btl_flags & MCA_BTL_FLAGS_SEND_INPLACE) &&
!opal_convertor_need_buffers (convertor) &&
reserve <= 128)) {
/* inplace send */
void *data_ptr;
opal_convertor_get_current_pointer (convertor, &data_ptr);
(void) MCA_BTL_SCIF_FRAG_ALLOC_DMA(endpoint, frag);
if (OPAL_UNLIKELY(NULL == frag)) {
return NULL;
}
frag->segments[0].seg_len = reserve;
frag->segments[1].seg_addr.pval = data_ptr;
frag->segments[1].seg_len = *size;
frag->base.des_segment_count = 2;
} else {
/* buffered send */
(void) MCA_BTL_SCIF_FRAG_ALLOC_EAGER(endpoint, frag);
if (OPAL_UNLIKELY(NULL == frag)) {
return NULL;
}
if (*size) {
iov.iov_len = *size;
iov.iov_base = (IOVBASE_TYPE *) ((uintptr_t) frag->segments[0].seg_addr.pval + reserve);
rc = opal_convertor_pack (convertor, &iov, &iov_count, &max_size);
if (OPAL_UNLIKELY(rc < 0)) {
mca_btl_scif_frag_return (frag);
return NULL;
}
*size = max_size;
}
frag->segments[0].seg_len = reserve + *size;
frag->base.des_segment_count = 1;
}
frag->base.des_segments = frag->segments;
frag->base.order = order;
frag->base.des_flags = flags;
return &frag->base;
}
static mca_btl_base_descriptor_t *mca_btl_scif_prepare_src (struct mca_btl_base_module_t *btl,
mca_btl_base_endpoint_t *endpoint,
struct opal_convertor_t *convertor,
uint8_t order, size_t reserve, size_t *size,
uint32_t flags)
{
return mca_btl_scif_prepare_src_send (btl, endpoint, convertor, order, reserve, size, flags);
}

Просмотреть файл

@ -1,72 +0,0 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2013-2014 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "opal_config.h"
#include "btl_scif_frag.h"
#define lmin(a,b) ((a) < (b) ? (a) : (b))
/**
* Initiate a put operation.
*/
int mca_btl_scif_put (mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, void *local_address,
uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle,
mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags,
int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata)
{
int rc, mark, scif_flags = 0;
off_t roffset, loffset;
#if defined(SCIF_TIMING)
struct timespec ts;
clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts);
mca_btl_scif_component.get_count++;
#endif
BTL_VERBOSE(("Using DMA Put from local address %p to remote address %" PRIx64,
local_address, remote_address));
roffset = remote_handle->scif_offset + (off_t)(remote_address - remote_handle->scif_base);
loffset = local_handle->scif_offset + (off_t)((intptr_t) local_address - local_handle->scif_base);
if (mca_btl_scif_component.rma_use_cpu) {
scif_flags = SCIF_RMA_USECPU;
}
if (mca_btl_scif_component.rma_sync) {
scif_flags |= SCIF_RMA_SYNC;
}
/* start the write */
rc = scif_writeto (endpoint->scif_epd, loffset, size, roffset, scif_flags);
rc = scif_readfrom (endpoint->scif_epd, loffset, size, roffset, scif_flags);
if (OPAL_UNLIKELY(-1 == rc)) {
return OPAL_ERROR;
}
if (!(scif_flags & SCIF_RMA_SYNC)) {
/* according to the scif documentation is is better to use a fence rather
* than using the SCIF_RMA_SYNC flag with scif_readfrom */
scif_fence_mark (endpoint->scif_epd, SCIF_FENCE_INIT_SELF, &mark);
scif_fence_wait (endpoint->scif_epd, mark);
}
#if defined(SCIF_TIMING)
SCIF_UPDATE_TIMER(mca_btl_scif_component.get_time,
mca_btl_scif_component.get_time_max, ts);
#endif
/* always call the callback function */
cbfunc (btl, endpoint, local_address, local_handle, cbcontext, cbdata, OPAL_SUCCESS);
return OPAL_SUCCESS;
}

Просмотреть файл

@ -1,299 +0,0 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2014 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "btl_scif.h"
#include "btl_scif_frag.h"
#define BUFFER_FREE(s,e,hbm) (((s) > (e) || ((s) == (e) && !hbm)) ? (s) - (e) : (mca_btl_scif_component.segment_size - (e)))
/* attempt to reserve a contiguous segment from the remote endpoint */
static inline int mca_btl_scif_send_get_buffer (mca_btl_base_endpoint_t *endpoint, size_t size, unsigned char * restrict *dst)
{
/* the high bit helps determine if the buffer is empty or full */
bool hbm = (endpoint->send_buffer.start >> 31) == (endpoint->send_buffer.end >> 31);
const unsigned int segment_size = mca_btl_scif_component.segment_size;
unsigned int start = endpoint->send_buffer.start & ~ (1 << 31);
unsigned int end = endpoint->send_buffer.end & ~ (1 << 31);
unsigned int buffer_free = BUFFER_FREE(start, end, hbm);
#if defined(SCIF_TIMING)
struct timespec ts;
clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts);
#endif
/* need space for the fragment + the header */
size += sizeof (mca_btl_scif_frag_hdr_t);
/* check if we need to free up space for this fragment */
if (OPAL_UNLIKELY(buffer_free < size)) {
BTL_VERBOSE(("not enough room for a fragment of size %u. in use buffer segment: {start: %x, end: %x, high bit matches: %d}\n",
(unsigned) size, start, end, (int) hbm));
/* read the current start pointer from the remote peer */
start = endpoint->send_buffer.start = endpoint->send_buffer.startp[0];
start &= ~ (1 << 31);
hbm = (endpoint->send_buffer.start >> 31) == (endpoint->send_buffer.end >> 31);
buffer_free = BUFFER_FREE(start, end, hbm);
opal_atomic_rmb ();
/* if this is the end of the buffer. does the fragment fit? */
if (OPAL_UNLIKELY(buffer_free > 0 && buffer_free < size && start <= end)) {
mca_btl_scif_frag_hdr_t hdr;
hdr.size = buffer_free - sizeof (mca_btl_scif_frag_hdr_t);
hdr.tag = 0xff;
#if defined(SCIF_USE_SEQ)
hdr.seq = endpoint->seq_next++;
((uint64_t *) (endpoint->send_buffer.buffer + end))[0] = *((uint64_t *) &hdr);
#else
((uint32_t *) (endpoint->send_buffer.buffer + end))[0] = *((uint32_t *) &hdr);
#endif
/* toggle the high bit */
end = 64;
endpoint->send_buffer.end = ((endpoint->send_buffer.end & (1 << 31)) ^ (1 << 31)) | end;
hbm = (endpoint->send_buffer.start >> 31) == (endpoint->send_buffer.end >> 31);
buffer_free = BUFFER_FREE(start, end, hbm);
}
if (OPAL_UNLIKELY(buffer_free < size)) {
#if defined(SCIF_TIMING)
SCIF_UPDATE_TIMER(mca_btl_scif_component.aquire_buffer_time, mca_btl_scif_component.aquire_buffer_time_max, ts);
#endif
return OPAL_ERR_OUT_OF_RESOURCE;
}
}
BTL_VERBOSE(("writing fragment of size %u to offset %u {start: %x, end: %x} of peer's buffer. free = %u",
(unsigned int) size, end, start, end, buffer_free));
*dst = endpoint->send_buffer.buffer + end;
/* align the buffer on a 64 byte boundary */
end = (end + size + 63) & ~63;
if (OPAL_UNLIKELY(segment_size == end)) {
endpoint->send_buffer.end = ((endpoint->send_buffer.end & (1 << 31)) ^ (1 << 31)) | 64;
} else {
endpoint->send_buffer.end = (endpoint->send_buffer.end & (1 << 31)) | end;
}
#if defined(SCIF_TIMING)
SCIF_UPDATE_TIMER(mca_btl_scif_component.aquire_buffer_time, mca_btl_scif_component.aquire_buffer_time_max, ts);
#endif
return OPAL_SUCCESS;
}
static void mark_buffer (struct mca_btl_base_endpoint_t *endpoint)
{
if (endpoint->port_id.node != mca_btl_scif_module.port_id.node) {
/* force the PCIe bus to flush by reading from the remote node */
volatile uint32_t start = endpoint->send_buffer.startp[0]; (void)start;
endpoint->send_buffer.endp[0] = endpoint->send_buffer.end;
endpoint->send_buffer.start = endpoint->send_buffer.startp[0];
} else {
MB();
endpoint->send_buffer.endp[0] = endpoint->send_buffer.end;
}
}
static int mca_btl_scif_send_frag (struct mca_btl_base_endpoint_t *endpoint,
mca_btl_scif_base_frag_t *frag)
{
size_t size = frag->hdr.size;
unsigned char * restrict dst;
BTL_VERBOSE(("btl/scif sending descriptor %p from %d -> %d. length = %" PRIu64, (void *) frag,
OPAL_PROC_MY_NAME.vpid, endpoint->peer_proc->proc_name.vpid, frag->segments[0].seg_len));
if (OPAL_LIKELY(OPAL_SUCCESS == mca_btl_scif_send_get_buffer (endpoint, size, &dst))) {
unsigned char * restrict data = (unsigned char * restrict) frag->segments[0].seg_addr.pval;
#if defined(SCIF_TIMING)
struct timespec ts;
clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts);
#endif
memcpy (dst + sizeof (frag->hdr), data, frag->segments[0].seg_len);
if (frag->segments[1].seg_len) {
memcpy (dst + sizeof (frag->hdr) + frag->segments[0].seg_len,
frag->segments[1].seg_addr.pval,
frag->segments[1].seg_len);
}
#if defined(SCIF_USE_SEQ)
frag->hdr.seq = endpoint->seq_next++;
/* write the tag to signal the fragment is available */
((uint64_t *) dst)[0] = *((uint64_t *) &frag->hdr);
#else
((uint32_t *) dst)[0] = *((uint32_t *) &frag->hdr);
#endif
opal_atomic_wmb ();
#if defined(SCIF_TIMING)
SCIF_UPDATE_TIMER(mca_btl_scif_component.send_time, mca_btl_scif_component.send_time_max, ts);
#endif
/* fragment is gone */
mca_btl_scif_frag_complete (frag, OPAL_SUCCESS);
return 1;
}
return OPAL_ERR_OUT_OF_RESOURCE;
}
int mca_btl_scif_send (struct mca_btl_base_module_t *btl,
struct mca_btl_base_endpoint_t *endpoint,
struct mca_btl_base_descriptor_t *descriptor,
mca_btl_base_tag_t tag)
{
mca_btl_scif_base_frag_t *frag = (mca_btl_scif_base_frag_t *) descriptor;
size_t size = frag->segments[0].seg_len + frag->segments[1].seg_len;
int rc;
frag->hdr.tag = tag;
frag->hdr.size = size;
if (OPAL_UNLIKELY(MCA_BTL_SCIF_EP_STATE_CONNECTED != endpoint->state)) {
rc = mca_btl_scif_ep_connect (endpoint);
if (OPAL_UNLIKELY(MCA_BTL_SCIF_EP_STATE_CONNECTED != endpoint->state)) {
/* the receiver was not ready to handle the fragment. queue up the fragment. */
descriptor->des_flags |= MCA_BTL_DES_SEND_ALWAYS_CALLBACK;
opal_list_append (&endpoint->frag_wait_list, (opal_list_item_t *) descriptor);
return OPAL_SUCCESS;
}
}
rc = mca_btl_scif_send_frag (endpoint, frag);
if (OPAL_LIKELY(1 == rc)) {
mark_buffer (endpoint);
return 1;
}
/* the receiver was not ready to handle the fragment. queue up the fragment. */
descriptor->des_flags |= MCA_BTL_DES_SEND_ALWAYS_CALLBACK;
opal_list_append (&endpoint->frag_wait_list, (opal_list_item_t *) descriptor);
return OPAL_SUCCESS;
}
int mca_btl_scif_sendi (struct mca_btl_base_module_t *btl,
struct mca_btl_base_endpoint_t *endpoint,
struct opal_convertor_t *convertor,
void *header, size_t header_size,
size_t payload_size, uint8_t order,
uint32_t flags, mca_btl_base_tag_t tag,
mca_btl_base_descriptor_t **descriptor)
{
size_t length = (header_size + payload_size);
unsigned char * restrict base;
mca_btl_scif_frag_hdr_t hdr;
size_t max_data;
int rc;
#if defined(SCIF_TIMING)
struct timespec ts;
#endif
assert (length < mca_btl_scif_module.super.btl_eager_limit);
assert (0 == (flags & MCA_BTL_DES_SEND_ALWAYS_CALLBACK));
if (OPAL_UNLIKELY(MCA_BTL_SCIF_EP_STATE_CONNECTED != endpoint->state)) {
rc = mca_btl_scif_ep_connect (endpoint);
if (OPAL_UNLIKELY(MCA_BTL_SCIF_EP_STATE_CONNECTED != endpoint->state)) {
return OPAL_ERR_RESOURCE_BUSY;
}
}
rc = mca_btl_scif_send_get_buffer (endpoint, length, &base);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
if (NULL != descriptor) {
*descriptor = NULL;
}
return OPAL_ERR_OUT_OF_RESOURCE;
}
#if defined(SCIF_TIMING)
clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts);
#endif
/* fill in the fragment header (except for the tag) */
hdr.size = length;
hdr.tag = tag;
#if defined(SCIF_USE_SEQ)
hdr.seq = endpoint->seq_next++;
#endif
/* write the match header (with MPI comm/tag/etc. info) */
memcpy (base + sizeof (hdr), header, header_size);
if (payload_size) {
uint32_t iov_count = 1;
struct iovec iov[1];
iov[0].iov_base = base + sizeof (hdr) + header_size;
iov[0].iov_len = payload_size;
/* move the data */
opal_convertor_pack (convertor, iov, &iov_count, &max_data);
assert (max_data == payload_size);
}
#if defined(SCIF_USE_SEQ)
/* signal the remote side that this fragment is available */
((uint64_t *)base)[0] = *((uint64_t *) &hdr);
#else
((uint32_t *)base)[0] = *((uint32_t *) &hdr);
#endif
opal_atomic_wmb ();
mark_buffer (endpoint);
#if defined(SCIF_TIMING)
SCIF_UPDATE_TIMER(mca_btl_scif_component.sendi_time, mca_btl_scif_component.sendi_time_max, ts);
#endif
return OPAL_SUCCESS;
}
int mca_btl_scif_progress_send_wait_list (mca_btl_base_endpoint_t *endpoint)
{
mca_btl_scif_base_frag_t *frag;
int rc = OPAL_SUCCESS;
while (NULL !=
(frag = (mca_btl_scif_base_frag_t *) opal_list_remove_first (&endpoint->frag_wait_list))) {
rc = mca_btl_scif_send_frag (endpoint, frag);
if (OPAL_UNLIKELY(OPAL_SUCCESS > rc)) {
if (OPAL_LIKELY(OPAL_ERR_OUT_OF_RESOURCE == rc)) {
opal_list_prepend (&endpoint->frag_wait_list, (opal_list_item_t *) frag);
} else {
mca_btl_scif_frag_complete (frag, rc);
}
break;
}
}
mark_buffer (endpoint);
return OPAL_SUCCESS;
}

Просмотреть файл

@ -1,47 +0,0 @@
# -*- shell-script -*-
#
# Copyright (c) 2013-2014 Los Alamos National Security, LLC. All rights
# reserved.
# Copyright (c) 2015 Research Organization for Information Science
# and Technology (RIST). All rights reserved.
# Copyright (c) 2016 Cisco Systems, Inc. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
AC_DEFUN([MCA_opal_btl_scif_CONFIG],[
OPAL_VAR_SCOPE_PUSH([opal_btl_scif_happy])
AC_CONFIG_FILES([opal/mca/btl/scif/Makefile])
AC_ARG_WITH([scif], [AC_HELP_STRING([--with-scif(=DIR)]),
[Build with SCIF, searching for headers in DIR])])
OPAL_CHECK_WITHDIR([scif], [$with_scif], [include/scif.h])
opal_btl_scif_happy="no"
if test "$with_scif" != "no" ; then
if test -n "$with_scif" && test "$with_scif" != "yes" ; then
opal_check_scif_dir=$with_scif
fi
OPAL_CHECK_PACKAGE([btl_scif], [scif.h], [scif], [scif_open], [],
[$opal_check_scif_dir], [], [opal_btl_scif_happy="yes"], [])
if test "$opal_btl_scif_happy" != "yes" && test -n "$with_scif" ; then
AC_MSG_ERROR([SCIF support requested but not found. Aborting])
fi
fi
AS_IF([test "$opal_btl_scif_happy" = "yes"], [$1], [$2])
OPAL_SUMMARY_ADD([[Transports]],[[Intel SCIF]],[[btl_scif]],[$opal_btl_scif_happy])
# substitute in the things needed to build scif
AC_SUBST([btl_scif_CPPFLAGS])
AC_SUBST([btl_scif_LDFLAGS])
AC_SUBST([btl_scif_LIBS])
OPAL_VAR_SCOPE_POP
])dnl

Просмотреть файл

@ -1,7 +0,0 @@
#
# owner/status file
# owner: institution that is responsible for this package
# status: e.g. active, maintenance, unmaintained
#
owner:LANL
status: maintenance