1
1

Add the PCIE BTL. This won't actually work yet - still need to work through issues with system header files, generalize specification of resources, etc. - but it won't build unless specifically directed to do so. Meantime, any more changes that impact these areas of the code base can be reflected here rather than having to be dealt with later.

This commit was SVN r20734.
Этот коммит содержится в:
Ralph Castain 2009-03-05 02:40:25 +00:00
родитель 9215100ac4
Коммит 20b81ff634
29 изменённых файлов: 5544 добавлений и 2 удалений

56
config/ompi_check_pcie.m4 Обычный файл
Просмотреть файл

@ -0,0 +1,56 @@
# -*- shell-script -*-
#
# Copyright (c) 2007 Los Alamos National Security, LLC. All rights
# reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# OMPI_CHECK_PCIE(prefix, [action-if-found], [action-if-not-found])
# --------------------------------------------------------
AC_DEFUN([OMPI_CHECK_PCIE],[
AC_ARG_WITH([pcie],
[AC_HELP_STRING([--with-pcie(=DIR)],
[Build PCIE (QLogic InfiniPath PCIE) support, searching for libraries in DIR])])
AC_ARG_WITH([pcie-libdir],
[AC_HELP_STRING([--with-pcie-libdir=DIR],
[Search for PCIE (QLogic InfiniPath PCIE) libraries in DIR])])
ompi_check_pcie_$1_save_CPPFLAGS="$CPPFLAGS"
ompi_check_pcie_$1_save_LDFLAGS="$LDFLAGS"
ompi_check_pcie_$1_save_LIBS="$LIBS"
ompi_check_pcie_happy="yes"
AS_IF([test "$with_pcie" != "no"],
[AS_IF([test ! -z "$with_pcie" -a "$with_pcie" != "yes"],
[ompi_check_pcie_dir="$with_pcie"])
AS_IF([test ! -z "$with_pcie_libdir" -a "$with_pcie_libdir" != "yes"],
[ompi_check_pcie_libdir="$with_pcie_libdir"])
OMPI_CHECK_PACKAGE([$1],
[axon_ioctl.h],
[],
[$ompi_check_pcie_dir],
[$ompi_check_pcie_libdir],
[ompi_check_pcie_happy="yes"],
[ompi_check_pcie_happy="no"])],
[ompi_check_pcie_happy="no"])
CPPFLAGS="$ompi_check_pcie_$1_save_CPPFLAGS"
LDFLAGS="$ompi_check_pcie_$1_save_LDFLAGS"
LIBS="$ompi_check_pcie_$1_save_LIBS"
AS_IF([test "$ompi_check_pcie_happy" = "yes" -a "$enable_progress_threads" = "yes"],
[AC_MSG_WARN([PCIE driver does not currently support progress threads. Disabling BTL.])
ompi_check_pcie_happy="no"])
AS_IF([test "$ompi_check_pcie_happy" = "yes"],
[$2],
[AS_IF([test ! -z "$with_pcie" -a "$with_pcie" != "no"],
[AC_MSG_ERROR([PCIe support requested but not found. Aborting])])
$3])
])

Просмотреть файл

@ -638,9 +638,9 @@ ompi_show_title "Header file tests"
AC_CHECK_HEADERS([alloca.h aio.h arpa/inet.h dirent.h \
dlfcn.h execinfo.h err.h fcntl.h grp.h inttypes.h libgen.h \
libutil.h netdb.h netinet/in.h netinet/tcp.h \
libutil.h memory.h netdb.h netinet/in.h netinet/tcp.h \
poll.h pthread.h pty.h pwd.h sched.h stdint.h \
string.h strings.h stropts.h sys/fcntl.h sys/ipc.h \
stdlib.h string.h strings.h stropts.h sys/fcntl.h sys/ipc.h \
sys/ioctl.h sys/mman.h sys/param.h sys/queue.h \
sys/resource.h sys/select.h sys/socket.h sys/sockio.h \
stdarg.h sys/stat.h sys/statvfs.h sys/time.h sys/tree.h \

75
ompi/mca/btl/pcie/Makefile.am Обычный файл
Просмотреть файл

@ -0,0 +1,75 @@
#
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2005 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# Copyright (c) 2007 Los Alamos National Security, LLC. All rights
# reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
AM_CPPFLAGS = $(btl_pcie_CPPFLAGS)
dist_pkgdata_DATA = \
help-mpi-btl-pcie.txt
sources = \
btl_pcie.c \
btl_pcie.h \
btl_pcie_component.c \
btl_pcie_endpoint.c \
btl_pcie_endpoint.h \
btl_pcie_fifo.c \
btl_pcie_fifo.h \
btl_pcie_frag.c \
btl_pcie_frag.h \
btl_pcie_proc.c \
btl_pcie_proc.h \
btl_pcie_lex.c \
btl_pcie_lex.h \
btl_pcie_cfg.c \
btl_pcie_ddriver.h \
btl_pcie_ddriver.c
EXTRA_DIST = btl_pcie_lex.l
if OMPI_BUILD_btl_pcie_DSO
lib =
lib_sources =
component = mca_btl_pcie.la
component_sources = $(sources)
else
lib = libmca_btl_pcie.la
lib_sources = $(sources)
component =
component_sources =
endif
mcacomponentdir = $(pkglibdir)
mcacomponent_LTLIBRARIES = $(component)
mca_btl_pcie_la_SOURCES = $(component_sources)
mca_btl_pcie_la_LDFLAGS = -module -avoid-version $(btl_pcie_LDFLAGS)
mca_btl_pcie_la_LIBADD = $(btl_pcie_LIBS)
noinst_LTLIBRARIES = $(lib)
libmca_btl_pcie_la_SOURCES = $(lib_sources)
libmca_btl_pcie_la_LDFLAGS= -module -avoid-version $(btl_pcie_LDFLAGS)
libmca_btl_pcie_la_LIBADD = $(btl_pcie_LIBS)
ompi_sysconfdir = $(OMPI_SYSCONFDIR)
ompi_sysconf_DATA = \
mca-btl-pcie-local-resources.cfg \
mca-btl-pcie-remote-resources.cfg

572
ompi/mca/btl/pcie/btl_pcie.c Обычный файл
Просмотреть файл

@ -0,0 +1,572 @@
/*
* Copyright (c) 2007 Los Alamos National Security, LLC.
* All righs reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include <string.h>
#include <sched.h>
#include "opal/types.h"
#include "opal/util/output.h"
#include "opal/util/if.h"
#include "opal/sys/atomic.h"
#include "opal/mca/paffinity/paffinity.h"
#include "ompi/datatype/convertor.h"
#include "ompi/mca/btl/btl.h"
#include "ompi/mca/btl/base/btl_base_error.h"
#include "ompi/mca/mpool/mpool.h"
#include "ompi/mca/mpool/base/base.h"
#include "ompi/mca/pml/pml.h"
#include "btl_pcie.h"
#include "btl_pcie_frag.h"
#include "btl_pcie_proc.h"
#include "btl_pcie_endpoint.h"
mca_btl_pcie_module_t mca_btl_pcie_module = {
{
&mca_btl_pcie_component.super,
0, /* max size of first fragment */
0, /* Threshold below which BTL should not fragment */
0, /* max send fragment size */
0, /* pipeline protocol length */
0, /* max rdma fragment size */
0, /* min packet size for pipeline protocol */
0, /* exclusivity */
0, /* latency */
0, /* bandwidth */
0, /* flags */
mca_btl_pcie_add_procs,
mca_btl_pcie_del_procs,
mca_btl_pcie_register,
mca_btl_pcie_finalize,
mca_btl_pcie_alloc,
mca_btl_pcie_free,
mca_btl_pcie_prepare_src,
mca_btl_pcie_prepare_dst,
mca_btl_pcie_send,
NULL,
mca_btl_pcie_put, /* put */
NULL, /* get */
NULL, /*dump */
NULL, /* mpool */
NULL, /* register error cb */
NULL /* ft event */
}
};
/**
*
*/
int mca_btl_pcie_add_procs(
struct mca_btl_base_module_t* btl,
size_t nprocs,
struct ompi_proc_t **ompi_procs,
struct mca_btl_base_endpoint_t** peers,
opal_bitmap_t* reachable)
{
mca_btl_pcie_module_t* pcie_btl = (mca_btl_pcie_module_t*)btl;
int i;
for(i = 0; i < (int) nprocs; i++) {
struct ompi_proc_t* ompi_proc = ompi_procs[i];
mca_btl_pcie_proc_t* pcie_proc;
int rc;
/* Don't connect to anyone on our local node, including
ourselves. The PCIe doesn't work that way, and the mapper
sometimes gets confused by that fact. */
if (OPAL_PROC_ON_LOCAL_NODE(ompi_proc->proc_flags)) continue;
rc = mca_btl_pcie_proc_create(ompi_proc, pcie_btl, &pcie_proc);
if(OMPI_SUCCESS != rc) {
return rc;
} else if (pcie_proc) {
opal_bitmap_set_bit(reachable, i);
peers[i] = pcie_proc->endpoint_proc;
}
}
return OMPI_SUCCESS;
}
int mca_btl_pcie_del_procs(struct mca_btl_base_module_t* btl,
size_t nprocs,
struct ompi_proc_t **procs,
struct mca_btl_base_endpoint_t ** peers)
{
/* TODO */
return OMPI_SUCCESS;
}
/**
* Register callback function to support send/recv semantics
*/
int mca_btl_pcie_register(
struct mca_btl_base_module_t* btl,
mca_btl_base_tag_t tag,
mca_btl_base_module_recv_cb_fn_t cbfunc,
void* cbdata)
{
mca_btl_pcie_module_t* pcie_btl = (mca_btl_pcie_module_t*) btl;
pcie_btl->pcie_reg[tag].cbfunc = cbfunc;
pcie_btl->pcie_reg[tag].cbdata = cbdata;
return OMPI_SUCCESS;
}
/**
* Allocate a segment.
*
* @param btl (IN) BTL module
* @param size (IN) Request segment size.
*/
mca_btl_base_descriptor_t* mca_btl_pcie_alloc(
struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* endpoint,
uint8_t order,
size_t size,
uint32_t flags)
{
mca_btl_pcie_module_t* pcie_btl = (mca_btl_pcie_module_t*) btl;
mca_btl_pcie_frag_t* frag = NULL;
int rc;
if (size <= btl->btl_eager_limit) {
MCA_BTL_PCIE_FRAG_ALLOC_EAGER(pcie_btl, frag, rc);
if (frag) {
frag->segment.seg_len = size;
frag->base.des_flags = 0;
frag->hdr->length = size;
}
}
if (NULL == frag && size <= btl->btl_max_send_size) {
MCA_BTL_PCIE_FRAG_ALLOC_MAX(pcie_btl, frag, rc);
if (frag) {
frag->segment.seg_len = size;
frag->base.des_flags = 0;
frag->hdr->length = size;
}
}
BTL_VERBOSE(("btl_pcie_alloc called for %d bytes, returning 0x%lx", size, frag));
return (mca_btl_base_descriptor_t*) frag;
}
/**
* Return a segment
*/
int mca_btl_pcie_free(
struct mca_btl_base_module_t* btl,
mca_btl_base_descriptor_t* des)
{
mca_btl_pcie_frag_t* frag = (mca_btl_pcie_frag_t*)des;
mca_btl_pcie_module_t* pcie_btl = (mca_btl_pcie_module_t*) btl;
int ret;
BTL_VERBOSE(("btl_pcie_free returning 0x%lx", frag));
if (frag->registration != NULL) {
pcie_btl->rdma_mpool->mpool_deregister(pcie_btl->rdma_mpool,
(mca_mpool_base_registration_t*)
frag->registration);
frag->registration = NULL;
}
MCA_BTL_PCIE_FRAG_RETURN(pcie_btl, frag, ret);
return ret;
}
/**
* Pack data and return a descriptor that can be
* used for send/put.
*
* @param btl (IN) BTL module
* @param peer (IN) BTL peer addressing
*/
mca_btl_base_descriptor_t* mca_btl_pcie_prepare_src(
struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* endpoint,
struct mca_mpool_base_registration_t* registration,
struct ompi_convertor_t* convertor,
uint8_t order,
size_t reserve,
size_t* size,
uint32_t flags
)
{
mca_btl_pcie_frag_t* frag = NULL;
mca_btl_pcie_reg_t* pcie_reg;
mca_btl_pcie_module_t* pcie_btl = (mca_btl_pcie_module_t*) btl;
struct iovec iov;
uint32_t iov_count = 1;
size_t max_data = *size;
int rc;
BTL_VERBOSE(("btl_pcie_prepare_src called with reserve %d", reserve));
/* check and see if the data is contiguous */
if(ompi_convertor_need_buffers(convertor) == false && 0 == reserve) {
MCA_BTL_PCIE_FRAG_ALLOC_DMA(btl, frag, rc);
if(NULL == frag) {
return NULL;
}
iov.iov_len = max_data;
iov.iov_base = NULL;
/* get the user buffer's address */
ompi_convertor_pack(convertor, &iov, &iov_count, &max_data);
*size = max_data;
if(NULL == registration) {
rc = pcie_btl->rdma_mpool->mpool_register(pcie_btl->rdma_mpool,
iov.iov_base, max_data, 0, &registration);
if(OMPI_SUCCESS != rc || NULL == registration){
MCA_BTL_PCIE_FRAG_RETURN(pcie_btl, frag, rc);
return NULL;
}
frag->registration = (mca_btl_pcie_reg_t*) registration;
}
pcie_reg = (mca_btl_pcie_reg_t*) registration;
frag->base.des_flags = 0;
frag->base.des_src = &frag->segment;
frag->base.des_src_cnt = 1;
frag->base.des_dst = NULL;
frag->base.des_dst_cnt = 0;
frag->base.des_flags = 0;
frag->segment.seg_len = max_data;
frag->segment.seg_addr.pval = iov.iov_base;
frag->segment.seg_key.key64 = (uint64_t)pcie_reg->handle;
BTL_VERBOSE(("prepare_src: frag->segment.seg_len = %lu .seg_addr.pval= %llu "
"frag->segment.seg_key.key64 = %llu",
frag->segment.seg_len, frag->segment.seg_addr.pval,
frag->segment.seg_key.key64));
return &frag->base;
} else {
/*
* if we aren't pinning the data and the requested size is less
* than the eager limit pack into a fragment from the eager pool
*/
if (max_data+reserve <= btl->btl_eager_limit) {
MCA_BTL_PCIE_FRAG_ALLOC_EAGER(btl, frag, rc);
if(NULL == frag) {
return NULL;
}
iov.iov_len = max_data;
iov.iov_base = (unsigned char*) frag->segment.seg_addr.pval + reserve;
rc = ompi_convertor_pack(convertor, &iov, &iov_count, &max_data );
*size = max_data;
if( rc < 0 ) {
MCA_BTL_PCIE_FRAG_RETURN(btl, frag, rc);
return NULL;
}
frag->segment.seg_len = max_data + reserve;
}
/*
* otherwise pack as much data as we can into a fragment
* that is the max send size.
*/
else {
MCA_BTL_PCIE_FRAG_ALLOC_MAX(btl, frag, rc);
if(NULL == frag) {
return NULL;
}
if(max_data + reserve > frag->size){
max_data = frag->size - reserve;
}
iov.iov_len = max_data;
iov.iov_base = (unsigned char*) frag->segment.seg_addr.pval + reserve;
rc = ompi_convertor_pack(convertor, &iov, &iov_count, &max_data );
*size = max_data;
if( rc < 0 ) {
MCA_BTL_PCIE_FRAG_RETURN(btl, frag, rc);
return NULL;
}
frag->segment.seg_len = max_data + reserve;
}
frag->hdr->length = *size + reserve;
frag->base.des_src = &frag->segment;
frag->base.des_src_cnt = 1;
frag->base.des_dst = NULL;
frag->base.des_dst_cnt = 0;
frag->base.des_flags = 0;
return &frag->base;
}
}
/**
* Prepare a descriptor for send/rdma using the supplied
* convertor. If the convertor references data that is contigous,
* the descriptor may simply point to the user buffer. Otherwise,
* this routine is responsible for allocating buffer space and
* packing if required.
*
* @param btl (IN) BTL module
* @param endpoint (IN) BTL peer addressing
* @param convertor (IN) Data type convertor
* @param reserve (IN) Additional bytes requested by upper layer to precede user data
* @param size (IN/OUT) Number of bytes to prepare (IN), number of bytes actually prepared (OUT)
*/
mca_btl_base_descriptor_t* mca_btl_pcie_prepare_dst(
struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* endpoint,
struct mca_mpool_base_registration_t* registration,
struct ompi_convertor_t* convertor,
uint8_t order,
size_t reserve,
size_t* size,
uint32_t flags)
{
mca_btl_pcie_frag_t* frag;
mca_btl_pcie_reg_t* pcie_reg;
mca_btl_pcie_module_t* pcie_btl = (mca_btl_pcie_module_t*) btl;
int rc;
ptrdiff_t lb;
MCA_BTL_PCIE_FRAG_ALLOC_DMA(pcie_btl, frag, rc);
if(NULL == frag) {
return NULL;
}
ompi_ddt_type_lb(convertor->pDesc, &lb);
frag->segment.seg_addr.pval = convertor->pBaseBuf + lb +
convertor->bConverted;
if(NULL == registration) {
rc = pcie_btl->rdma_mpool->mpool_register(pcie_btl->rdma_mpool,
frag->segment.seg_addr.pval, *size, 0,
&registration);
if(OMPI_SUCCESS != rc || NULL == registration) {
MCA_BTL_PCIE_FRAG_RETURN(pcie_btl, frag, rc);
return NULL;
}
frag->registration = (mca_btl_pcie_reg_t*) registration;
}
pcie_reg = (mca_btl_pcie_reg_t*)registration;
frag->segment.seg_len = *size;
frag->segment.seg_key.key64 = (uint64_t) pcie_reg->handle;
frag->base.des_dst = &frag->segment;
frag->base.des_dst_cnt = 1;
frag->base.des_src = NULL;
frag->base.des_src_cnt = 0;
frag->base.des_flags = 0;
BTL_VERBOSE(("prepare_dst: frag->segment.seg_len = %lu .seg_addr.pval= %llu "
"frag->segment.seg_key.key64 = %llu",
frag->segment.seg_len, frag->segment.seg_addr.pval,
frag->segment.seg_key.key64));
return &frag->base;
}
/**
* Initiate an asynchronous send.
*
* @param btl (IN) BTL module
* @param endpoint (IN) BTL addressing information
* @param descriptor (IN) Description of the data to be transfered
* @param tag (IN) The tag value used to notify the peer.
*/
int mca_btl_pcie_send(
struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* endpoint,
struct mca_btl_base_descriptor_t* descriptor,
mca_btl_base_tag_t tag)
{
/* mca_btl_pcie_module_t* pcie_btl = (mca_btl_pcie_module_t*) btl; */
mca_btl_pcie_module_t* pcie_btl = (mca_btl_pcie_module_t*) btl;
mca_btl_pcie_frag_t* frag = (mca_btl_pcie_frag_t*)descriptor;
mca_btl_pcie_sma_buf_t *buf = NULL;
int rc;
btl_pcie_fifo_entry_t idx;
/* setup these fields so they get pulled over in the memcpy */
frag->hdr->tag = tag;
frag->hdr->length = frag->segment.seg_len;
if (frag->type == MCA_BTL_PCIE_TYPE_EAGER) {
MCA_BTL_PCIE_SMA_BUF_ALLOC_EAGER(pcie_btl, buf, rc);
} else {
MCA_BTL_PCIE_SMA_BUF_ALLOC_MAX(pcie_btl, buf, rc);
}
if (NULL == frag) {
BTL_ERROR(("can't alloc buf for frag of type %d", frag->type));
return OMPI_ERR_OUT_OF_RESOURCE;
}
frag->endpoint = endpoint;
frag->sma_buf = buf;
/* Put fragment into network byte order before copy to save work
done in sma region */
OMPI_BTL_PCIE_HEADER_HTON(*frag->hdr);
/* BWB - FIX ME - both pointers are 16 byte aligned and the
buffers behind them are a multiple of 16 in length (but
frag->segment.seg_len might not be). There might be a more
optimized memcpy option given that behavior. */
memcpy(buf->pcie_data.pval, frag->hdr,
sizeof(mca_btl_pcie_header_t) +
frag->segment.seg_len);
/* send the fragment pointer to the receiver,
who will later ACK it back so that we can return it */
idx = ((char*) buf->pcie_data.pval) - ((char*) endpoint->rem_frag_base);
idx |= BTL_PCIE_FIFO_TYPE_SEND;
/* make sure the top bit is zero */
assert((idx & BTL_PCIE_FIFO_TYPE_MASK) == BTL_PCIE_FIFO_TYPE_SEND);
/* need to barrier prior to writing remote completion */
opal_atomic_wmb();
BTL_VERBOSE(("sent frag 0x%lx (offset %lx), tag %d, length %d, rc = %d",
frag, idx, frag->hdr->tag, frag->segment.seg_len, rc));
idx = opal_swap_bytes8(idx);
rc = ompi_btl_pcie_fifo_set_msg(&endpoint->send_fifo, idx);
if(OMPI_SUCCESS != rc) {
if(OMPI_ERR_RESOURCE_BUSY == rc) {
/* BWB - FIX ME - queue for later */
abort();
} else {
return rc;
}
}
return OMPI_SUCCESS;
}
/**
* Initiate an asynchronous put.
*
* @param btl (IN) BTL module
* @param endpoint (IN) BTL addressing information
* @param descriptor (IN) Description of the data to be transferred
*/
int mca_btl_pcie_put(
mca_btl_base_module_t* btl,
mca_btl_base_endpoint_t* endpoint,
mca_btl_base_descriptor_t* descriptor)
{
mca_btl_pcie_frag_t* frag = (mca_btl_pcie_frag_t*) descriptor;
struct AXON_dma_request dma_req;
int dma_reqs_started;
int rc;
volatile uint64_t *dma_status_addr;
uint64_t dma_status;
frag->endpoint = endpoint;
memset(&dma_req,0x00,sizeof(dma_req));
dma_req.dma_type = AXON_DMATYPE_PUT;
dma_req.local_descriptor[0].src_address = frag->base.des_src->seg_addr.lval;
dma_req.local_descriptor[0].src_memory_region_handle = frag->base.des_src->seg_key.key64;
dma_req.remote_descriptor[0].src_address =
opal_swap_bytes8(frag->base.des_dst->seg_addr.lval);
dma_req.remote_descriptor[0].src_memory_region_handle =
opal_swap_bytes8(frag->base.des_dst->seg_key.key64);
dma_req.transfer_size =
dma_req.remote_descriptor[0].transfer_size =
dma_req.local_descriptor[0].transfer_size = frag->base.des_src->seg_len;
dma_req.localDmaStatusOffset = endpoint->lcl_dma_status - (char*) endpoint->lcl_sma_ptr;
dma_req.remoteDmaStatusOffset = 0;
dma_req.local_descriptor_count = 1;
dma_req.remote_descriptor_count = 1;
dma_status_addr = (uint64_t*) endpoint->lcl_dma_status;
*dma_status_addr = 0;
rc = dd_dma_request(&endpoint->pcie_adapter,
&dma_req,
1,
&dma_reqs_started);
if (0 != rc) abort();
/* wait for completion, for now anyway */
while (0 == (dma_status = *dma_status_addr)) {
/* sched_yield(); */
}
frag->base.des_cbfunc(btl, endpoint, &(frag->base), OMPI_SUCCESS);
return OMPI_SUCCESS;
}
/**
* Initiate an asynchronous get.
*
* @param btl (IN) BTL module
* @param endpoint (IN) BTL addressing information
* @param descriptor (IN) Description of the data to be transferred
*
*/
int mca_btl_pcie_get(
mca_btl_base_module_t* btl,
mca_btl_base_endpoint_t* endpoint,
mca_btl_base_descriptor_t* descriptor)
{
return OMPI_ERR_NOT_IMPLEMENTED;
}
/*
* Cleanup/release module resources.
*/
int mca_btl_pcie_finalize(struct mca_btl_base_module_t* btl)
{
mca_btl_pcie_module_t* pcie_btl = (mca_btl_pcie_module_t*) btl;
OBJ_DESTRUCT(&pcie_btl->pcie_lock);
OBJ_DESTRUCT(&pcie_btl->pcie_sma_buf_eager);
OBJ_DESTRUCT(&pcie_btl->pcie_sma_buf_max);
OBJ_DESTRUCT(&pcie_btl->pcie_frag_eager);
OBJ_DESTRUCT(&pcie_btl->pcie_frag_max);
OBJ_DESTRUCT(&pcie_btl->pcie_frag_dma);
OBJ_DESTRUCT(&pcie_btl->pcie_recv_frag);
return OMPI_SUCCESS;
}

368
ompi/mca/btl/pcie/btl_pcie.h Обычный файл
Просмотреть файл

@ -0,0 +1,368 @@
/*
* Copyright (c) 2007 Los Alamos National Security, LLC.
* All righs reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef MCA_BTL_PCIE_H
#define MCA_BTL_PCIE_H
#include "ompi_config.h"
#include <sys/types.h>
#include <string.h>
#include "opal/align.h"
#include "opal/event/event.h"
#include "opal/util/output.h"
#include "opal/class/opal_bitmap.h"
#include "orte/util/proc_info.h"
#include "ompi/class/ompi_free_list.h"
#include "ompi/mca/btl/btl.h"
#include "ompi/mca/btl/base/base.h"
#include "ompi/mca/mpool/mpool.h"
#include "ompi/mca/pml/pml.h"
#include "btl_pcie_ddriver.h"
#include "btl_pcie_frag.h"
#include "btl_pcie_fifo.h"
BEGIN_C_DECLS
#define MCA_BTL_HAS_MPOOL 1
/**
* PCIE BTL component.
*/
struct mca_btl_pcie_component_t {
/** BTL base component */
mca_btl_base_component_1_0_1_t super;
/* ***** Configuration information ***** */
/** initial size of free lists */
int pcie_free_list_num;
/** maximum size of free lists */
int pcie_free_list_max;
/** number of elements to alloc when growing free lists */
int pcie_free_list_inc;
/** name of send/recv memory pool */
char* pcie_send_mpool_name;
/** name of put/get memory pool */
char *pcie_dma_mpool_name;
/** Number of entries in the send/recv queue structure */
int pcie_recv_queue_len;
/* **** Component data ***** */
/** array of available modules */
struct mca_btl_pcie_module_t *pcie_btls;
/** Number of initialized pcie_btl modules */
uint32_t pcie_num_btls;
/** list of pcie proc structures, created during add_procs */
opal_list_t pcie_procs;
/** lock for accessing component state */
opal_mutex_t pcie_lock;
};
typedef struct mca_btl_pcie_component_t mca_btl_pcie_component_t;
OMPI_MODULE_DECLSPEC extern mca_btl_pcie_component_t mca_btl_pcie_component;
/**
* BTL Module Interface
*/
struct mca_btl_pcie_module_t {
mca_btl_base_module_t super; /**< base BTL interface */
bool active;
mca_btl_base_recv_reg_t pcie_reg[MCA_BTL_TAG_MAX];
/** name of the pcie device */
char *lcl_dev_name;
/** Free list of communication buffers in the SMA region */
ompi_free_list_t pcie_sma_buf_eager;
ompi_free_list_t pcie_sma_buf_max;
/** Free list of bounce fragments, normal user memory */
ompi_free_list_t pcie_frag_eager;
ompi_free_list_t pcie_frag_max;
/* free list of DMA fragments */
ompi_free_list_t pcie_frag_dma;
/* single receive fragment to handle upcalls on message reception.
This will need to be a free list if multiple receive callbacks
could be triggered at the same time, which will happen if the
code goes MT hot. */
mca_btl_pcie_frag_recv_t pcie_recv_frag;
/* lock for accessing module state */
opal_mutex_t pcie_lock;
/* mpool for allocating the members of pcie_sma_buf* */
struct mca_mpool_base_module_t* pcie_mpool;
/* mpool for RDMA pinning */
struct mca_mpool_base_module_t* rdma_mpool;
/* Endpoint associated with this module (there's a one-to-one
mapping of modules and endpoints, since a device can only
handle one endpoint at a time */
struct mca_btl_base_endpoint_t* endpoint;
};
typedef struct mca_btl_pcie_module_t mca_btl_pcie_module_t;
extern mca_btl_pcie_module_t mca_btl_pcie_module;
struct mca_btl_pcie_reg_t {
mca_mpool_base_registration_t base;
AXON_memory_region_handle handle;
};
typedef struct mca_btl_pcie_reg_t mca_btl_pcie_reg_t;
struct mca_btl_pcie_modex_info_t {
char hostname[ORTE_MAX_HOSTNAME_SIZE];
char devicename[OMPI_PATH_MAX];
};
typedef struct mca_btl_pcie_modex_info_t mca_btl_pcie_modex_info_t;
#define MCA_BTL_PCIE_MODEX_INFO_HTON(h)
#define MCA_BTL_PCIE_MODEX_INFO_NTOH(h)
/**
* Register TEMPLATE component parameters with the MCA framework
*/
extern int mca_btl_pcie_component_open(void);
/**
* Any final cleanup before being unloaded.
*/
extern int mca_btl_pcie_component_close(void);
/**
* TEMPLATE component initialization.
*
* @param num_btl_modules (OUT) Number of BTLs returned in BTL array.
* @param allow_multi_user_threads (OUT) Flag indicating wether BTL supports user threads (TRUE)
* @param have_hidden_threads (OUT) Flag indicating wether BTL uses threads (TRUE)
*/
extern mca_btl_base_module_t** mca_btl_pcie_component_init(
int *num_btl_modules,
bool allow_multi_user_threads,
bool have_hidden_threads
);
/**
* TEMPLATE component progress.
*/
extern int mca_btl_pcie_component_progress(void);
/**
* Cleanup any resources held by the BTL.
*
* @param btl BTL instance.
* @return OMPI_SUCCESS or error status on failure.
*/
extern int mca_btl_pcie_finalize(
struct mca_btl_base_module_t* btl
);
/**
* PML->BTL notification of change in the process list.
*
* @param btl (IN)
* @param nprocs (IN) Number of processes
* @param procs (IN) Set of processes
* @param peers (OUT) Set of (optional) peer addressing info.
* @param peers (IN/OUT) Set of processes that are reachable via this BTL.
* @return OMPI_SUCCESS or error status on failure.
*
*/
extern int mca_btl_pcie_add_procs(
struct mca_btl_base_module_t* btl,
size_t nprocs,
struct ompi_proc_t **procs,
struct mca_btl_base_endpoint_t** peers,
opal_bitmap_t* reachable
);
/**
* PML->BTL notification of change in the process list.
*
* @param btl (IN) BTL instance
* @param nproc (IN) Number of processes.
* @param procs (IN) Set of processes.
* @param peers (IN) Set of peer data structures.
* @return Status indicating if cleanup was successful
*
*/
extern int mca_btl_pcie_del_procs(
struct mca_btl_base_module_t* btl,
size_t nprocs,
struct ompi_proc_t **procs,
struct mca_btl_base_endpoint_t** peers
);
/**
* Initiate an asynchronous send.
*
* @param btl (IN) BTL module
* @param endpoint (IN) BTL addressing information
* @param descriptor (IN) Description of the data to be transfered
* @param tag (IN) The tag value used to notify the peer.
*/
extern int mca_btl_pcie_send(
struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* btl_peer,
struct mca_btl_base_descriptor_t* descriptor,
mca_btl_base_tag_t tag
);
/**
* Initiate an asynchronous put.
*
* @param btl (IN) BTL module
* @param endpoint (IN) BTL addressing information
* @param descriptor (IN) Description of the data to be transferred
*/
extern int mca_btl_pcie_put(
struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* btl_peer,
struct mca_btl_base_descriptor_t* decriptor
);
/**
* Initiate an asynchronous get.
*
* @param btl (IN) BTL module
* @param endpoint (IN) BTL addressing information
* @param descriptor (IN) Description of the data to be transferred
*/
extern int mca_btl_pcie_get(
struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* btl_peer,
struct mca_btl_base_descriptor_t* decriptor
);
/**
* Register a callback function that is called on receipt
* of a fragment.
*
* @param btl (IN) BTL module
* @return Status indicating if registration was successful
*
*/
extern int mca_btl_pcie_register(
struct mca_btl_base_module_t* btl,
mca_btl_base_tag_t tag,
mca_btl_base_module_recv_cb_fn_t cbfunc,
void* cbdata);
/**
* Allocate a descriptor with a segment of the requested size.
* Note that the BTL layer may choose to return a smaller size
* if it cannot support the request.
*
* @param btl (IN) BTL module
* @param size (IN) Request segment size.
*/
extern mca_btl_base_descriptor_t* mca_btl_pcie_alloc(
struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* endpoint,
uint8_t order,
size_t size,
uint32_t flags);
/**
* Return a segment allocated by this BTL.
*
* @param btl (IN) BTL module
* @param descriptor (IN) Allocated descriptor.
*/
extern int mca_btl_pcie_free(
struct mca_btl_base_module_t* btl,
mca_btl_base_descriptor_t* des);
/**
* Prepare a descriptor for send/rdma using the supplied
* convertor. If the convertor references data that is contigous,
* the descriptor may simply point to the user buffer. Otherwise,
* this routine is responsible for allocating buffer space and
* packing if required.
*
* @param btl (IN) BTL module
* @param endpoint (IN) BTL peer addressing
* @param convertor (IN) Data type convertor
* @param reserve (IN) Additional bytes requested by upper layer to precede user data
* @param size (IN/OUT) Number of bytes to prepare (IN), number of bytes actually prepared (OUT)
*/
mca_btl_base_descriptor_t* mca_btl_pcie_prepare_src(
struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* endpoint,
struct mca_mpool_base_registration_t* registration,
struct ompi_convertor_t* convertor,
uint8_t order,
size_t reserve,
size_t* size,
uint32_t flags
);
extern mca_btl_base_descriptor_t* mca_btl_pcie_prepare_dst(
struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* endpoint,
struct mca_mpool_base_registration_t* registration,
struct ompi_convertor_t* convertor,
uint8_t order,
size_t reserve,
size_t* size,
uint32_t flags);
/**
* Fault Tolerance Event Notification Function
* @param state Checkpoint Stae
* @return OMPI_SUCCESS or failure status
*/
int mca_btl_pcie_ft_event(int state);
char* ompi_btl_pcie_cfg_get_local_device(char* hostname, int core);
char* ompi_btl_pcie_cfg_get_matching_device(char* remote_hostname,
char* remote_device);
END_C_DECLS
#endif /* #ifndef MCA_BTL_PCIE_H */

196
ompi/mca/btl/pcie/btl_pcie_cfg.c Обычный файл
Просмотреть файл

@ -0,0 +1,196 @@
/*
* Copyright (c) 2007 Los Alamos National Security, LLC.
* All righs reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include <string.h>
#include <ctype.h>
#include <stdlib.h>
#include "opal/util/output.h"
#include "opal/util/os_path.h"
#include "opal/mca/base/mca_base_param.h"
#include "opal/mca/installdirs/installdirs.h"
#include "orte/util/proc_info.h"
#include "orte/util/show_help.h"
#include "btl_pcie.h"
#include "btl_pcie_lex.h"
static char *cfg_filename;
static char *key_buffer = NULL;
static size_t key_buffer_len = 0;
/*
* Local functions
*/
static char* parse_file(char *filename, bool local, char *key);
/**************************************************************************/
char *
ompi_btl_pcie_cfg_get_local_device(char* hostname, int core)
{
char *key, *ret, *file;
file = opal_os_path(false,
opal_install_dirs.sysconfdir,
"mca-btl-pcie-local-resources.cfg",
NULL);
asprintf(&key, "%s:%d", hostname, core);
ret = parse_file(file, true, key);
free(key);
free(file);
return ret;
}
char *
ompi_btl_pcie_cfg_get_matching_device(char* remote_hostname,
char* remote_device)
{
char *key, *ret, *pos, *file;
file = opal_os_path(false,
opal_install_dirs.sysconfdir,
"mca-btl-pcie-remote-resources.cfg",
NULL);
asprintf(&key, "%s:%s", remote_hostname, remote_device);
ret = parse_file(file, false, key);
free(file);
free(key);
if (ret == NULL) return NULL;
pos = strchr(ret, ':');
if (pos == NULL) {
free(ret);
return NULL;
}
/* make sure this is my hostname */
*pos = '\0';
if (0 != strcmp(orte_process_info.nodename, ret)) {
free(ret);
return NULL;
}
pos++;
pos = strdup(pos);
free(ret);
return pos;
}
/*
* Parse a single file
*/
static char* parse_file(char *filename, bool local, char* key)
{
int val;
bool me;
char *tmp = NULL;
/* Open the file */
cfg_filename = filename;
btl_pcie_cfg_yyin = fopen(filename, "r");
if (NULL == btl_pcie_cfg_yyin) {
orte_show_help("help-mpi-btl-pcie.txt", "ini file:file not found",
true, filename);
goto cleanup;
}
/* Do the parsing */
btl_pcie_cfg_parse_done = false;
btl_pcie_cfg_yynewlines = 1;
btl_pcie_cfg_init_buffer(btl_pcie_cfg_yyin);
while (!btl_pcie_cfg_parse_done) {
val = btl_pcie_cfg_yylex();
switch (val) {
case BTL_PCIE_CFG_PARSE_DONE:
/* This will also set btl_pcie_cfg_parse_done to true, so just
break here */
break;
case BTL_PCIE_CFG_PARSE_NEWLINE:
/* blank line! ignore it */
break;
case BTL_PCIE_CFG_PARSE_HOSTNAME_CORE:
if (!local) {
return NULL;
}
if (0 == strcmp(key, btl_pcie_cfg_yytext)) {
me = true;
} else {
me = false;
}
val = btl_pcie_cfg_yylex();
if (BTL_PCIE_CFG_PARSE_DEVICE != val) {
abort();
}
if (me) return strdup(btl_pcie_cfg_yytext);
break;
case BTL_PCIE_CFG_PARSE_HOSTNAME_DEVICE:
if (local) {
return NULL;
}
if (0 == strcmp(key, btl_pcie_cfg_yytext)) {
me = true;
} else {
tmp = strdup(btl_pcie_cfg_yytext);
me = false;
}
val = btl_pcie_cfg_yylex();
if (BTL_PCIE_CFG_PARSE_HOSTNAME_DEVICE != val) {
abort();
}
if (me) {
return strdup(btl_pcie_cfg_yytext);
} else {
if (0 == strcmp(key, btl_pcie_cfg_yytext)) {
return tmp;
} else {
free(tmp);
}
}
break;
default:
return NULL;
break;
}
}
fclose(btl_pcie_cfg_yyin);
cleanup:
if (NULL != key_buffer) {
free(key_buffer);
key_buffer = NULL;
key_buffer_len = 0;
}
return NULL;
}

487
ompi/mca/btl/pcie/btl_pcie_component.c Обычный файл
Просмотреть файл

@ -0,0 +1,487 @@
/*
* Copyright (c) 2007 Los Alamos National Security, LLC.
* All righs reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "ompi/constants.h"
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <errno.h>
#include <string.h>
#include <sched.h>
#include <ctype.h>
#include "opal/event/event.h"
#include "opal/util/argv.h"
#include "opal/util/if.h"
#include "opal/util/output.h"
#include "opal/mca/base/mca_base_param.h"
#include "opal/mca/paffinity/paffinity.h"
#include "opal/mca/paffinity/base/base.h"
#include "orte/util/proc_info.h"
#include "orte/util/show_help.h"
#include "orte/mca/errmgr/errmgr.h"
#include "ompi/constants.h"
#include "ompi/datatype/convertor.h"
#include "ompi/mca/btl/btl.h"
#include "ompi/mca/btl/base/base.h"
#include "ompi/mca/btl/base/btl_base_error.h"
#include "ompi/mca/mpool/base/base.h"
#include "ompi/mca/mpool/rdma/mpool_rdma.h"
#include "ompi/mca/pml/pml.h"
#include "ompi/runtime/ompi_module_exchange.h"
#include "btl_pcie.h"
#include "btl_pcie_frag.h"
#include "btl_pcie_endpoint.h"
#include "btl_pcie_ddriver.h"
static int pcie_reg_mr(void *reg_data, void *base, size_t size,
mca_mpool_base_registration_t *reg);
static int pcie_dereg_mr(void* reg_data, mca_mpool_base_registration_t *reg);
mca_btl_pcie_component_t mca_btl_pcie_component = {
{
/* First, the mca_base_component_t struct containing meta information
about the component itself */
{
/* Indicate that we are a pml v2.0.0 component (which also implies a
specific MCA version) */
MCA_BTL_BASE_VERSION_2_0_0,
"pcie", /* MCA component name */
OMPI_MAJOR_VERSION, /* MCA component major version */
OMPI_MINOR_VERSION, /* MCA component minor version */
OMPI_RELEASE_VERSION, /* MCA component release version */
mca_btl_pcie_component_open, /* component open */
mca_btl_pcie_component_close /* component close */
},
/* Next the MCA v2.0.0 component meta data */
{
false
},
mca_btl_pcie_component_init,
mca_btl_pcie_component_progress,
}
};
/*
* utility routines for parameter registration
*/
static char*
mca_btl_pcie_param_register_string(const char* param_name,
const char* param_desc,
const char* default_value)
{
char *value;
mca_base_param_reg_string(&mca_btl_pcie_component.super.btl_version,
param_name, param_desc, false, false,
default_value, &value);
return value;
}
static int
mca_btl_pcie_param_register_int(const char* param_name,
const char* param_desc,
int default_value)
{
int value;
mca_base_param_reg_int(&mca_btl_pcie_component.super.btl_version,
param_name, param_desc, false, false,
default_value, &value);
return value;
}
/*
* Register PCIE device found in local config file. The MCA framework
* will make this available to all peers.
*/
static int
btl_pcie_modex_send(void)
{
size_t size;
unsigned int i;
mca_btl_pcie_modex_info_t *info;
size = mca_btl_pcie_component.pcie_num_btls *
sizeof(mca_btl_pcie_modex_info_t);
info = malloc(size);
if (NULL == info) return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
for (i = 0 ; i < mca_btl_pcie_component.pcie_num_btls ; ++i) {
strncpy(info[i].hostname,
orte_process_info.nodename,
ORTE_MAX_HOSTNAME_SIZE - 1);
info[i].hostname[ORTE_MAX_HOSTNAME_SIZE - 1] = '\0';
strncpy(info[i].devicename,
mca_btl_pcie_component.pcie_btls[i].lcl_dev_name,
OMPI_PATH_MAX - 1);
info[i].devicename[OMPI_PATH_MAX - 1] = '\0';
MCA_BTL_PCIE_MODEX_INFO_HTON(info[i]);
}
#if (OMPI_MAJOR_VERSION <= 1) && (OMPI_MINOR_VERSION <= 2)
return mca_pml_base_modex_send(&mca_btl_pcie_component.super.btl_version, info, size);
#else
return ompi_modex_send(&mca_btl_pcie_component.super.btl_version, info, size);
#endif
}
/*
* Called by MCA framework to open the component, registers
* component parameters.
*/
int
mca_btl_pcie_component_open(void)
{
/* initialize state */
mca_btl_pcie_component.pcie_num_btls = 0;
mca_btl_pcie_component.pcie_btls = NULL;
/* initialize objects */
OBJ_CONSTRUCT(&mca_btl_pcie_component.pcie_procs, opal_list_t);
/* component parameters */
mca_btl_pcie_component.pcie_free_list_num =
mca_btl_pcie_param_register_int ("free_list_num",
"Initial size of free lists (must be >= 1)",
16);
/* BWB - FIX ME - The need to limit the free list max size is an
artifact of the lack of flow control in the BTL. Since we're
already using bounce fragments, it should be possible to make
this unlimited, and then properly handle the case where an SMA
region isn't available when send is called on a given frag.
Something similar to what Open IB does when we don't have send
credits would work really well here. See comment in
btl_pcie_send() for more information. */
mca_btl_pcie_component.pcie_free_list_max =
mca_btl_pcie_param_register_int ("free_list_max",
"Max size of free lists. "
"free_list_max * (first_frag_size + max_send_size) "
"must be less than (SMA memory size - (recv_queue_len * 4) - 8)",
32);
mca_btl_pcie_component.pcie_free_list_inc =
mca_btl_pcie_param_register_int ("free_list_inc",
"Increment size of free lists (must be >= 1)",
8);
mca_btl_pcie_component.pcie_send_mpool_name =
mca_btl_pcie_param_register_string("send_mpool",
"Name of the memory pool to be used for send messages. "
"(it is unlikely that you will ever want to change this)",
"pcie");
mca_btl_pcie_component.pcie_dma_mpool_name =
mca_btl_pcie_param_register_string("dma_mpool",
"Name of the memory pool to be used for rdma messages. "
"(it is unlikely that you will ever want to change this)",
"rdma");
mca_btl_pcie_component.pcie_recv_queue_len =
mca_btl_pcie_param_register_int("recv_queue_len",
"Length of receive fifo. Must be 4 * free_list_max",
256);
mca_btl_pcie_module.super.btl_exclusivity =
mca_btl_pcie_param_register_int ("exclusivity",
"Priority of PCIe BTL. (must be > 0)",
MCA_BTL_EXCLUSIVITY_DEFAULT + 1);
mca_btl_pcie_module.super.btl_eager_limit =
mca_btl_pcie_param_register_int ("first_frag_size",
"Size (in bytes) of the first fragment sent of any "
"message. It is the maximum size of \"short\" messages "
"and the maximum size of the \"phase 1\" fragment sent "
"for all large messages (must be >= 1).",
1*1024) - sizeof(mca_btl_pcie_header_t);
mca_btl_pcie_module.super.btl_rndv_eager_limit =
mca_btl_pcie_param_register_int ("btl_rndv_eager_limit",
"Minimum message size (in bytes) that will be striped "
"across multiple network devices when using "
"send/receive semantics. Messages shorter than this "
"size will be sent across a single network (must be >= "
"1)",
2*1024) - sizeof(mca_btl_pcie_header_t);
mca_btl_pcie_module.super.btl_max_send_size =
mca_btl_pcie_param_register_int ("max_send_size",
"Maximum size (in bytes) of a single \"phase 2\" fragment "
"of a long message when using the pipeline protocol "
"(must be >= 1)",
4*1024) - sizeof(mca_btl_pcie_header_t);
mca_btl_pcie_module.super.btl_rdma_pipeline_send_length =
mca_btl_pcie_param_register_int("rdma_pipeline_send_length",
"Length of the \"phase 2\" portion of a large message (in "
"bytes) when using the pipeline protocol. This part of "
"the message will be split into fragments of size "
"max_send_size and sent using send/receive semantics "
"(must be >= 0; only relevant when the PUT flag is "
"set)",
12*1024);
mca_btl_pcie_module.super.btl_rdma_pipeline_frag_size =
mca_btl_pcie_param_register_int("rdma_pipeline_frag_size",
"Maximum size (in bytes) of a single \"phase 3\" fragment "
"from a long message when using the pipeline protocol. "
"These fragments will be sent using RDMA semantics "
"(must be >= 1; only relevant when the PUT flag is "
"set)",
2*1024*1024);
mca_btl_pcie_module.super.btl_min_rdma_pipeline_size =
mca_btl_pcie_param_register_int("min_rdma_pipeline_size",
"Messages smaller than this size (in bytes) will not "
"use the RDMA pipeline protocol. Instead, they will be "
"split into fragments of max_send_size and sent using "
"send/receive semantics (must be >=0, and is "
"automatically adjusted up to at least "
"(eager_limit+btl_rdma_pipeline_send_length); only "
"relevant when the PUT flag is set)",
16 * 1024);
mca_btl_pcie_module.super.btl_flags =
mca_btl_pcie_param_register_int("flags",
"BTL control flags. Defaults to (SEND|PUT|HETEROGENEOUS_RDMA)",
#ifdef MCA_BTL_FLAGS_HETEROGENEOUS_RDMA
MCA_BTL_FLAGS_HETEROGENEOUS_RDMA |
#endif
MCA_BTL_FLAGS_SEND |
MCA_BTL_FLAGS_PUT);
return OMPI_SUCCESS;
}
int
mca_btl_pcie_component_close(void)
{
return OMPI_SUCCESS;
}
mca_btl_base_module_t**
mca_btl_pcie_component_init(int *num_btl_modules,
bool enable_progress_threads,
bool enable_mpi_threads)
{
cpu_set_t cpu_set;
unsigned int i;
int num_cpus, *cpus;
struct stat stat_buf;
struct mca_mpool_base_resources_t mpool_resources;
mca_btl_base_module_t **btl_array;
*num_btl_modules = 0;
/* find all cpus we're bound to */
cpus = malloc(CPU_SETSIZE * sizeof(int));
memset(cpus, 0, CPU_SETSIZE * sizeof(int));
num_cpus = 0;
CPU_ZERO(&cpu_set);
sched_getaffinity(0, sizeof(cpu_set), &cpu_set);
for (i = 0 ; i < CPU_SETSIZE ; ++i) {
if (CPU_ISSET(i, &cpu_set)) cpus[num_cpus++] = i;
}
#if defined(__PPC__)
if (num_cpus > 1) {
orte_show_help("help-mpi-btl-pcie.txt", "initialization:more-than-one-cpu",
true, num_cpus);
return NULL;
}
#endif /* #ifdef __PPC__ */
if (0 == num_cpus) {
orte_show_help("help-mpi-btl-pcie.txt", "initialization:no-cpus",
true);
return NULL;
}
/* Create the module storage space */
mca_btl_pcie_component.pcie_num_btls = num_cpus;
mca_btl_pcie_component.pcie_btls = malloc(mca_btl_pcie_component.pcie_num_btls *
sizeof(struct mca_btl_pcie_module_t));
btl_array = malloc(mca_btl_pcie_component.pcie_num_btls *
sizeof(mca_btl_base_module_t*));
/* initialize the modules */
for (i = 0 ; i < mca_btl_pcie_component.pcie_num_btls ; ++i) {
mca_btl_pcie_module_t *btl = &(mca_btl_pcie_component.pcie_btls[i]);
btl_array[i] = (mca_btl_base_module_t*) btl;
memcpy(btl, &mca_btl_pcie_module, sizeof(mca_btl_pcie_module_t));
/* check if we have a device listed in our local config file */
btl->lcl_dev_name =
ompi_btl_pcie_cfg_get_local_device(orte_process_info.nodename, cpus[i]);
BTL_VERBOSE(("Local device for %s:%d = %s", orte_process_info.nodename, cpus[i],
btl->lcl_dev_name));
/* make sure said device is sane */
if(stat(btl->lcl_dev_name, &stat_buf)) {
BTL_ERROR(("Error %s opening device %s\n", strerror(errno),
btl->lcl_dev_name));
return NULL;
}
OBJ_CONSTRUCT(&btl->pcie_sma_buf_eager, ompi_free_list_t);
OBJ_CONSTRUCT(&btl->pcie_sma_buf_max, ompi_free_list_t);
OBJ_CONSTRUCT(&btl->pcie_frag_eager, ompi_free_list_t);
OBJ_CONSTRUCT(&btl->pcie_frag_max, ompi_free_list_t);
OBJ_CONSTRUCT(&btl->pcie_frag_dma, ompi_free_list_t);
OBJ_CONSTRUCT(&btl->pcie_lock, opal_mutex_t);
/* time to setup DMA mpool */
mpool_resources.reg_data = (void*) btl;
mpool_resources.sizeof_reg = sizeof(mca_btl_pcie_reg_t);
mpool_resources.register_mem = pcie_reg_mr;
mpool_resources.deregister_mem = pcie_dereg_mr;
btl->rdma_mpool =
mca_mpool_base_module_create("rdma",
&btl->super,
&mpool_resources);
btl->super.btl_mpool = btl->rdma_mpool;
btl->active = false;
}
/* push our address info to everyone */
btl_pcie_modex_send();
*num_btl_modules = mca_btl_pcie_component.pcie_num_btls;
return btl_array;;
}
int
mca_btl_pcie_component_progress()
{
unsigned int i;
btl_pcie_fifo_entry_t msg_idx;
int count = 0;
for (i = 0 ; i < mca_btl_pcie_component.pcie_num_btls ; ++i) {
mca_btl_pcie_module_t *pcie_btl =
&(mca_btl_pcie_component.pcie_btls[i]);
mca_btl_base_endpoint_t *endpoint = pcie_btl->endpoint;
if (!pcie_btl->active) continue;
msg_idx = ompi_btl_pcie_fifo_get_msg(&endpoint->recv_fifo);
/* Potential optimization is to drain every time we enter progress */
if (msg_idx) {
int rc;
int ack = ((msg_idx & BTL_PCIE_FIFO_TYPE_MASK) == BTL_PCIE_FIFO_TYPE_ACK) ? 1 : 0;
msg_idx &= BTL_PCIE_FIFO_DATA_MASK;
if (ack) {
/* we have a send frag ack */
mca_btl_pcie_frag_t *frag = (mca_btl_pcie_frag_t*) msg_idx;
mca_btl_pcie_sma_buf_t *buf = frag->sma_buf;
BTL_VERBOSE(("received ack for frag %lx (0x%lx)", msg_idx, frag));
/* Done with buffer, can return now */
MCA_BTL_PCIE_SMA_BUF_RETURN(pcie_btl, buf, rc);
frag->base.des_cbfunc(&pcie_btl->super, endpoint,
&(frag->base),
OMPI_SUCCESS);
/* return the send credit */
ompi_btl_pcie_fifo_complete_msg(&endpoint->send_fifo, 1);
count++;
} else {
/* we have a send frag (incoming data) */
mca_btl_pcie_frag_t *recv_frag = &pcie_btl->pcie_recv_frag;
mca_btl_pcie_header_t *hdr = (mca_btl_pcie_header_t*) (endpoint->lcl_frag_base + msg_idx);
recv_frag->hdr = hdr;
OMPI_BTL_PCIE_HEADER_NTOH((*recv_frag->hdr));
recv_frag->segment.seg_addr.pval = ((unsigned char*) recv_frag->hdr) + sizeof(mca_btl_pcie_header_t);
recv_frag->segment.seg_len = recv_frag->hdr->length;
BTL_VERBOSE(("received tag %d, base 0x%lx", recv_frag->hdr->tag, &recv_frag->base));
pcie_btl->pcie_reg[recv_frag->hdr->tag].cbfunc(&pcie_btl->super,
recv_frag->hdr->tag, &recv_frag->base,
pcie_btl->pcie_reg[recv_frag->hdr->tag].cbdata);
rc = ompi_btl_pcie_fifo_set_msg(&endpoint->send_fifo, hdr->send_frag.lval);
/* BWB - FIX ME - this is only safe if the number of
queue entries is twice the free list size */
ompi_btl_pcie_fifo_complete_msg(&endpoint->send_fifo, 1);
count++;
}
}
}
return count;
}
static int
pcie_reg_mr(void *reg_data, void *base, size_t size,
mca_mpool_base_registration_t *reg)
{
mca_btl_pcie_module_t * pcie_btl = (mca_btl_pcie_module_t*) reg_data;
mca_btl_pcie_endpoint_t * endpoint = pcie_btl->endpoint;
mca_btl_pcie_reg_t * pcie_reg = (mca_btl_pcie_reg_t*) reg;
if(dd_register_memory_region(&endpoint->pcie_adapter,
&pcie_reg->handle,
base,
size,
DD_ALLOW_LOCAL_READ |
DD_ALLOW_LOCAL_WRITE |
DD_ALLOW_REMOTE_ACCESS |
DD_ALLOW_REMOTE_READ |
DD_ALLOW_REMOTE_WRITE )) {
BTL_ERROR(("error deregistering memory!\n"));
return OMPI_ERROR;
}
return OMPI_SUCCESS;
}
static int
pcie_dereg_mr(void* reg_data, mca_mpool_base_registration_t *reg)
{
mca_btl_pcie_module_t * pcie_btl = (mca_btl_pcie_module_t*) reg_data;
mca_btl_pcie_endpoint_t * endpoint = pcie_btl->endpoint;
mca_btl_pcie_reg_t * pcie_reg = (mca_btl_pcie_reg_t*) reg;
if(pcie_reg->handle >= 0) {
if(dd_deregister_memory_region(&endpoint->pcie_adapter,
&pcie_reg->handle)) {
BTL_ERROR(("error deregistering memory!\n"));
return OMPI_ERROR;
}
} else {
return OMPI_ERROR;
}
return OMPI_SUCCESS;
}

274
ompi/mca/btl/pcie/btl_pcie_endpoint.c Обычный файл
Просмотреть файл

@ -0,0 +1,274 @@
/*
* Copyright (c) 2007 Los Alamos National Security, LLC.
* All righs reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include <sys/time.h>
#include <time.h>
#include "opal/align.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/oob/base/base.h"
#include "orte/mca/rml/rml.h"
#include "ompi/types.h"
#include "ompi/mca/btl/base/btl_base_error.h"
#include "ompi/mca/mpool/mpool.h"
#include "ompi/mca/mpool/base/base.h"
#include "ompi/mca/mpool/pcie/mpool_pcie.h"
#include "btl_pcie.h"
#include "btl_pcie_endpoint.h"
#include "btl_pcie_proc.h"
#include "btl_pcie_frag.h"
/*
* Initialize state of the endpoint instance.
*
*/
static void mca_btl_pcie_endpoint_construct(mca_btl_base_endpoint_t* endpoint)
{
endpoint->endpoint_btl = 0;
endpoint->endpoint_proc = 0;
}
/*
* Destroy a endpoint
*
*/
static void mca_btl_pcie_endpoint_destruct(mca_btl_base_endpoint_t* endpoint)
{
}
OBJ_CLASS_INSTANCE(
mca_btl_pcie_endpoint_t,
opal_list_item_t,
mca_btl_pcie_endpoint_construct,
mca_btl_pcie_endpoint_destruct);
/*
* Initialize an endpoint
*/
int mca_btl_pcie_endpoint_init(mca_btl_base_endpoint_t* endpoint)
{
int rc;
mca_btl_pcie_module_t* pcie_btl =
endpoint->endpoint_btl;
mca_mpool_base_resources_t mpool_resources;
size_t fifo_buffer_len, current_offset = 0;
/* Open our device */
rc = dd_open(endpoint->lcl_dev_name,
&endpoint->pcie_adapter);
if( 0 != rc) {
BTL_ERROR(("Failed to open pcie device dd_open says : %d\n", rc));
return OMPI_ERROR;
}
/* fill in endpoint data for begining of resources */
endpoint->lcl_sma_ptr = endpoint->pcie_adapter.local_sma_address;
if(NULL == endpoint->lcl_sma_ptr) {
BTL_ERROR(("Error: local sma address is null\n"));
return OMPI_ERROR;
}
endpoint->rem_sma_ptr = endpoint->pcie_adapter.remote_sma_address;
if(NULL == endpoint->rem_sma_ptr) {
BTL_ERROR(("Error: remote sma address is null\n"));
return OMPI_ERROR;
}
BTL_VERBOSE(("SMA for device %s: local=0x%lx,%d remote=0x%lx,%d",
endpoint->lcl_dev_name,
endpoint->lcl_sma_ptr,
endpoint->pcie_adapter.local_sma_size,
endpoint->rem_sma_ptr,
endpoint->pcie_adapter.remote_sma_size));
/* 16 bytes of the buffer reserved for the 8 byte local DMA completion */
endpoint->lcl_dma_status = ((char*) endpoint->lcl_sma_ptr) + current_offset;
current_offset += 16;
/* fifo_buffer_len bytes reserved for fifos */
fifo_buffer_len = sizeof(btl_pcie_fifo_entry_t) * mca_btl_pcie_component.pcie_recv_queue_len;
rc = ompi_btl_pcie_fifo_init_send(&(endpoint->send_fifo),
mca_btl_pcie_component.pcie_recv_queue_len,
((char*) endpoint->rem_sma_ptr) + current_offset);
if (OMPI_SUCCESS != rc) {
BTL_ERROR(("Error: Failed to init send fifo: %d", rc));
return rc;
}
rc = ompi_btl_pcie_fifo_init_recv(&(endpoint->recv_fifo),
mca_btl_pcie_component.pcie_recv_queue_len,
((char*) endpoint->lcl_sma_ptr) + current_offset,
fifo_buffer_len);
if (OMPI_SUCCESS != rc) {
BTL_ERROR(("Error: Failed to init recv fifo: %d", rc));
return rc;
}
current_offset += fifo_buffer_len;
/* reserve rest of the space for the mpool */
endpoint->rem_frag_base =
((char*) endpoint->rem_sma_ptr) + current_offset;
endpoint->lcl_frag_base =
((char*) endpoint->lcl_sma_ptr) + current_offset;
/* don't need to align this one as the free list */
/* will take care of it. */
mpool_resources.base = endpoint->rem_frag_base;
mpool_resources.len = endpoint->pcie_adapter.remote_sma_size -
current_offset;
/* setup my pcie mpool */
pcie_btl->pcie_mpool =
mca_mpool_base_module_create(mca_btl_pcie_component.pcie_send_mpool_name,
pcie_btl,
&mpool_resources);
/* setup the modules free lists and such as we now */
/* have enough info to setup the mpool */
/* eager SMA communication buffers */
#if (OMPI_MAJOR_VERSION <= 1) && (OMPI_MINOR_VERSION <= 2)
ompi_free_list_init_ex(&(pcie_btl->pcie_sma_buf_eager),
sizeof(mca_btl_pcie_sma_buf_eager_t) +
mca_btl_pcie_module.super.btl_eager_limit,
sizeof(mca_btl_pcie_sma_buf_eager_t),
MCA_BTL_PCIE_FRAG_ALIGN,
OBJ_CLASS(mca_btl_pcie_sma_buf_eager_t),
mca_btl_pcie_component.pcie_free_list_num,
mca_btl_pcie_component.pcie_free_list_max,
mca_btl_pcie_component.pcie_free_list_inc,
pcie_btl->pcie_mpool);
/* max size SMA communication buffers */
ompi_free_list_init_ex(&(pcie_btl->pcie_sma_buf_max),
sizeof(mca_btl_pcie_sma_buf_max_t) +
mca_btl_pcie_module.super.btl_max_send_size,
sizeof(mca_btl_pcie_sma_buf_max_t),
MCA_BTL_PCIE_FRAG_ALIGN,
OBJ_CLASS(mca_btl_pcie_sma_buf_max_t),
mca_btl_pcie_component.pcie_free_list_num,
mca_btl_pcie_component.pcie_free_list_max,
mca_btl_pcie_component.pcie_free_list_inc,
pcie_btl->pcie_mpool);
/* User eager fragment buffer */
ompi_free_list_init_ex(&(pcie_btl->pcie_frag_eager),
sizeof(mca_btl_pcie_frag_eager_t) +
mca_btl_pcie_module.super.btl_eager_limit,
sizeof(mca_btl_pcie_frag_eager_t),
MCA_BTL_PCIE_FRAG_ALIGN,
OBJ_CLASS(mca_btl_pcie_frag_eager_t),
mca_btl_pcie_component.pcie_free_list_num,
mca_btl_pcie_component.pcie_free_list_max,
mca_btl_pcie_component.pcie_free_list_inc,
NULL);
/* User max size fragment buffer */
ompi_free_list_init_ex(&(pcie_btl->pcie_frag_max),
sizeof(mca_btl_pcie_frag_max_t) +
mca_btl_pcie_module.super.btl_max_send_size,
sizeof(mca_btl_pcie_frag_max_t),
MCA_BTL_PCIE_FRAG_ALIGN,
OBJ_CLASS(mca_btl_pcie_frag_max_t),
mca_btl_pcie_component.pcie_free_list_num,
mca_btl_pcie_component.pcie_free_list_max,
mca_btl_pcie_component.pcie_free_list_inc,
NULL);
#else
ompi_free_list_init_ex(&(pcie_btl->pcie_sma_buf_eager),
mca_btl_pcie_module.super.btl_eager_limit,
MCA_BTL_PCIE_FRAG_ALIGN,
OBJ_CLASS(mca_btl_pcie_sma_buf_eager_t),
mca_btl_pcie_component.pcie_free_list_num,
mca_btl_pcie_component.pcie_free_list_max,
mca_btl_pcie_component.pcie_free_list_inc,
pcie_btl->pcie_mpool,
NULL,
NULL);
/* max size SMA communication buffers */
ompi_free_list_init_ex(&(pcie_btl->pcie_sma_buf_max),
mca_btl_pcie_module.super.btl_max_send_size,
MCA_BTL_PCIE_FRAG_ALIGN,
OBJ_CLASS(mca_btl_pcie_sma_buf_max_t),
mca_btl_pcie_component.pcie_free_list_num,
mca_btl_pcie_component.pcie_free_list_max,
mca_btl_pcie_component.pcie_free_list_inc,
pcie_btl->pcie_mpool,
NULL,
NULL);
/* User eager fragment buffer */
ompi_free_list_init_ex(&(pcie_btl->pcie_frag_eager),
mca_btl_pcie_module.super.btl_eager_limit,
MCA_BTL_PCIE_FRAG_ALIGN,
OBJ_CLASS(mca_btl_pcie_frag_eager_t),
mca_btl_pcie_component.pcie_free_list_num,
mca_btl_pcie_component.pcie_free_list_max,
mca_btl_pcie_component.pcie_free_list_inc,
NULL,
NULL,
NULL);
/* User max size fragment buffer */
ompi_free_list_init_ex(&(pcie_btl->pcie_frag_max),
mca_btl_pcie_module.super.btl_max_send_size,
MCA_BTL_PCIE_FRAG_ALIGN,
OBJ_CLASS(mca_btl_pcie_frag_max_t),
mca_btl_pcie_component.pcie_free_list_num,
mca_btl_pcie_component.pcie_free_list_max,
mca_btl_pcie_component.pcie_free_list_inc,
NULL,
NULL,
NULL);
#endif
/* dma frags. note that we can only have 16 outstanding memory
handles so we cannot currently support leave_pinned and we must
limit the number of outstanding DMAs via the free list of DMA
frags */
ompi_free_list_init(&(pcie_btl->pcie_frag_dma),
sizeof(mca_btl_pcie_frag_dma_t),
OBJ_CLASS(mca_btl_pcie_frag_dma_t),
16,
16,
0,
NULL);
/* recv frag */
OBJ_CONSTRUCT(&(pcie_btl->pcie_recv_frag),
mca_btl_pcie_frag_recv_t);
pcie_btl->endpoint = endpoint;
pcie_btl->active = true;
return OMPI_SUCCESS;
}
/*
* Finalize an endpoint
*/
int mca_btl_pcie_endpoint_fini(mca_btl_base_endpoint_t* endpoint)
{
return OMPI_SUCCESS;
}

92
ompi/mca/btl/pcie/btl_pcie_endpoint.h Обычный файл
Просмотреть файл

@ -0,0 +1,92 @@
/*
* Copyright (c) 2007 Los Alamos National Security, LLC.
* All righs reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef MCA_BTL_PCIE_ENDPOINT_H
#define MCA_BTL_PCIE_ENDPOINT_H
#include "ompi_config.h"
#include "opal/class/opal_list.h"
#include "opal/event/event.h"
#include "ompi/mca/btl/btl.h"
#include "ompi/mca/pml/pml.h"
#include "btl_pcie_ddriver.h"
#include "btl_pcie_frag.h"
#include "btl_pcie.h"
#include "btl_pcie_fifo.h"
BEGIN_C_DECLS
/**
* An abstraction that represents a connection to a endpoint process.
* An instance of mca_btl_base_endpoint_t is associated w/ each process
* and BTL pair at startup. However, connections to the endpoint
* are established dynamically on an as-needed basis:
*/
struct mca_btl_base_endpoint_t {
opal_list_item_t super;
struct mca_btl_pcie_module_t* endpoint_btl;
/**< BTL instance that created this connection */
struct mca_btl_pcie_proc_t* endpoint_proc;
/**< proc structure corresponding to endpoint */
/** the name of the remote PCIE device */
char* rem_dev_name;
/** the name of the local PCIE device */
char* lcl_dev_name;
/** the pcie adapter - returned by dd_open */
DD_adapter_handle pcie_adapter;
/** local pcie SMA memory for this endpoint */
char *lcl_sma_ptr;
/** remote pcie SMA memory for this endpoint */
char *rem_sma_ptr;
/** remote fragment starting point (in which to
* deliver data via "rdma" write
*/
char *rem_frag_base;
char *lcl_frag_base;
char *lcl_dma_status;
btl_pcie_fifo_t recv_fifo;
btl_pcie_fifo_t send_fifo;
};
typedef struct mca_btl_base_endpoint_t mca_btl_base_endpoint_t;
typedef mca_btl_base_endpoint_t mca_btl_pcie_endpoint_t;
OBJ_CLASS_DECLARATION(mca_btl_pcie_endpoint_t);
/*
* Initialize an endpoint
*/
int mca_btl_pcie_endpoint_init(mca_btl_base_endpoint_t* endpoint);
/*
* Finalize an endpoint
*/
int mca_btl_pcie_endpoint_fini(mca_btl_base_endpoint_t* endpoint);
END_C_DECLS
#endif /* #ifndef MCA_BTL_PCIE_ENDPOINT_H */

97
ompi/mca/btl/pcie/btl_pcie_fifo.c Обычный файл
Просмотреть файл

@ -0,0 +1,97 @@
/*
* Copyright (c) 2007 Los Alamos National Security, LLC.
* All righs reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include <string.h>
#include "opal/threads/mutex.h"
#include "opal/types.h"
#include "ompi/constants.h"
#include "btl_pcie_fifo.h"
static uint32_t
get_mask(unsigned int len)
{
int pop_count, highest_used_bit, tmp_input_integer;
unsigned int pow;
/* init counters */
pop_count=0;
highest_used_bit=1;
/* get population count and highest non-zero bit */
tmp_input_integer = len;
while (tmp_input_integer > 0) {
pop_count += (tmp_input_integer & 1);
highest_used_bit++;
tmp_input_integer >> 1;
}
if (1 < pop_count) {
/* round up */
highest_used_bit++;
}
/* generate power value */
pow = 1 << highest_used_bit;
if (pow != len) return 0;
return pow - 1;
}
int
ompi_btl_pcie_fifo_init_send(btl_pcie_fifo_t *fifo,
unsigned int fifo_len,
void *queue_space)
{
fifo->fifo_len = fifo_len;
fifo->current_index = 0;
fifo->num_outstanding = 0;
fifo->mask = get_mask(fifo_len);
fifo->queue = queue_space;
if (fifo->mask == 0) return OMPI_ERROR;
return OMPI_SUCCESS;
}
int
ompi_btl_pcie_fifo_init_recv(btl_pcie_fifo_t *fifo,
unsigned int fifo_len,
void *queue_space,
size_t queue_space_len)
{
fifo->fifo_len = fifo_len;
fifo->current_index = 1;
fifo->num_outstanding = 0;
fifo->mask = get_mask(fifo_len);
fifo->queue = queue_space;
if (fifo->mask == 0) return OMPI_ERROR;
if (fifo_len * sizeof(btl_pcie_fifo_entry_t) > queue_space_len) {
return OMPI_ERROR;
}
/* initialize the queue to empty */
memset(fifo->queue, 0, fifo_len * sizeof(btl_pcie_fifo_entry_t));
return OMPI_SUCCESS;
}
int
ompi_btl_pcie_fifo_finalize(btl_pcie_fifo_t *fifo)
{
return OMPI_SUCCESS;
}

171
ompi/mca/btl/pcie/btl_pcie_fifo.h Обычный файл
Просмотреть файл

@ -0,0 +1,171 @@
/*
* Copyright (c) 2007 Los Alamos National Security, LLC.
* All righs reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef BTL_PCIE_FIFO_H
#define BTL_PCIE_FIFO_H
#include "ompi_config.h"
#include "ompi/constants.h"
#include "opal/threads/mutex.h"
#include "opal/types.h"
#include "ompi/mca/btl/base/btl_base_error.h"
BEGIN_C_DECLS
typedef uint64_t btl_pcie_fifo_entry_t;
#define BTL_PCIE_FIFO_TYPE_MASK 0x8000000000000000
#define BTL_PCIE_FIFO_DATA_MASK 0x7FFFFFFFFFFFFFFF
#define BTL_PCIE_FIFO_TYPE_ACK 0x0000000000000000
#define BTL_PCIE_FIFO_TYPE_SEND 0x8000000000000000
struct btl_pcie_fifo_t {
/* number of entries in queue */
uint32_t fifo_len;
/* for sender: next place to write
* for receiver: next place to read */
uint32_t current_index;
/* for sender: number of entries "in flight". Must always be less
than or equal to fifo_len */
uint32_t num_outstanding;
uint32_t mask;
/* the actual buffer */
btl_pcie_fifo_entry_t* queue;
};
typedef struct btl_pcie_fifo_t btl_pcie_fifo_t;
/**
* Initialize fifo structure
*
* Initialize send/recv fifo structure. The fifo structure does
* double duty of maintaining both the sender and receiver. This
* function initializes the send view of the fifo structure, for
* use to receive messages. fifo_get_msg() should not be called on
* this fifo.
*
* @note fifo_len must match the value given to the matching
* fifo_init_recv(), although there are no checks to verify this.
*
* @param[in] fifo A pointer to a fifo structure to be
* initialized
* @param[in] fifo_len Requested length of the fifo queue
* @param[in] queue_space Space for the receive queue (remote pointer)
*
* @retval OMPI_SUCCESS Everything worked
* @retval OMPI_ERROR Good luck!
*/
int ompi_btl_pcie_fifo_init_send(btl_pcie_fifo_t *fifo,
unsigned int fifo_len,
void *queue_space);
/**
* Initialize fifo structure
*
* Initialize send/recv fifo structure. The fifo structure does
* double duty of maintaining both the sender and receiver. This
* function initializes the receive view of the fifo structure, for
* use to receive messages. fifo_set_msg() should not be called on
* this fifo.
*
* @note fifo_len must match the value given to the matching
* fifo_init_send(), although there are no checks to verify this.
*
* @param[in] fifo A pointer to a fifo structure to be
* initialized
* @param[in] fifo_len Requested length of the fifo queue
* @param[in] queue_space Space for the receive queue (local pointer)
* @param[in] queue_space_len Length of queue_space
*
* @retval OMPI_SUCCESS Everything worked
* @retval OMPI_ERROR Good luck!
*/
int ompi_btl_pcie_fifo_init_recv(btl_pcie_fifo_t *fifo,
unsigned int fifo_len,
void *queue_space,
size_t queue_space_len);
int ompi_btl_pcie_fifo_finalize(btl_pcie_fifo_t *fifo);
/**
* Read a message from the queue
*
* Read a message from the queue
*
* @param[in] fifo The receive view of the fifo
*
* @return A non-zero message or 0 if no new messages are
* available.
*/
static inline btl_pcie_fifo_entry_t
ompi_btl_pcie_fifo_get_msg(btl_pcie_fifo_t *fifo)
{
/* BWB - TODO - if we ever want to be multi-threaded, we'll
need to fix this */
btl_pcie_fifo_entry_t ret = 0;
if (0 != (ret = fifo->queue[fifo->current_index])) {
fifo->queue[fifo->current_index] = 0;
fifo->current_index++;
fifo->current_index &= fifo->mask;
}
return ret;
}
/**
* Write a message pointer into the queue
*
* Write a message pointer into the send queue view of the fifo.
*
* @param[in] fifo The send view of the fifo
* @param[in] msg The index to the payload to deliver
*
* @retval OMPI_SUCCESS Fifo successfully updated
* @retval OMPI_ERR_RESOURCE_BUSY There was no space in the fifo
*/
static inline int
ompi_btl_pcie_fifo_set_msg(btl_pcie_fifo_t *fifo, btl_pcie_fifo_entry_t msg)
{
uint32_t outstanding;
/* see if we have a slot */
outstanding = OPAL_THREAD_ADD32(&fifo->num_outstanding, 1);
if (outstanding > fifo->fifo_len) {
OPAL_THREAD_ADD32(&fifo->num_outstanding, -1);
return OMPI_ERR_RESOURCE_BUSY;
}
/* now that we have a slot, figure out where it is. Allow the
outstanding to wrap around forever - just mask out the bits we
don't care about. */
outstanding = OPAL_THREAD_ADD32(&fifo->current_index, 1);
outstanding &= fifo->mask;
fifo->queue[outstanding] = msg;
return OMPI_SUCCESS;
}
static inline int
ompi_btl_pcie_fifo_complete_msg(btl_pcie_fifo_t *fifo,
unsigned int num_msgs)
{
OPAL_THREAD_ADD32(&fifo->num_outstanding, -num_msgs);
return OMPI_SUCCESS;
}
END_C_DECLS
#endif /* BTL_PCIE_FIFO_H */

139
ompi/mca/btl/pcie/btl_pcie_frag.c Обычный файл
Просмотреть файл

@ -0,0 +1,139 @@
/*
* Copyright (c) 2007 Los Alamos National Security, LLC.
* All righs reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "btl_pcie_frag.h"
#include "btl_pcie.h"
static void
mca_btl_pcie_sma_buf_eager_constructor(mca_btl_pcie_sma_buf_t* buf)
{
buf->pcie_data.pval = buf + 1;
buf->type = MCA_BTL_PCIE_TYPE_EAGER;
}
static void
mca_btl_pcie_sma_buf_max_constructor(mca_btl_pcie_sma_buf_t* buf)
{
buf->pcie_data.pval = buf + 1;
buf->type = MCA_BTL_PCIE_TYPE_MAX;
}
OBJ_CLASS_INSTANCE(mca_btl_pcie_sma_buf_eager_t,
ompi_free_list_item_t,
mca_btl_pcie_sma_buf_eager_constructor,
NULL);
OBJ_CLASS_INSTANCE(mca_btl_pcie_sma_buf_max_t,
ompi_free_list_item_t,
mca_btl_pcie_sma_buf_max_constructor,
NULL);
static void
mca_btl_pcie_frag_dma_constructor(mca_btl_pcie_frag_t* frag)
{
frag->base.des_src = NULL;
frag->base.des_src_cnt = 0;
frag->base.des_dst = NULL;
frag->base.des_dst_cnt = 0;
frag->segment.seg_addr.pval = NULL;
frag->segment.seg_len = 0;
frag->endpoint = NULL;
frag->hdr = NULL;
frag->size = 0;
frag->registration = NULL;
frag->type = MCA_BTL_PCIE_TYPE_RDMA;
frag->sma_buf = NULL;
}
static void
mca_btl_pcie_frag_common_constructor(mca_btl_pcie_frag_t* frag)
{
frag->base.des_src = &frag->segment;
frag->base.des_src_cnt = 1;
frag->base.des_dst = NULL;
frag->base.des_dst_cnt = 0;
frag->hdr = (mca_btl_pcie_header_t*) (frag + 1);
frag->hdr->send_frag.pval = frag;
frag->segment.seg_addr.pval = ((unsigned char*) frag->hdr) + sizeof(mca_btl_pcie_header_t);
frag->segment.seg_len = frag->size;
frag->endpoint = NULL;
frag->registration = NULL;
frag->sma_buf = NULL;
}
static void
mca_btl_pcie_frag_eager_constructor(mca_btl_pcie_frag_t* frag)
{
frag->size = mca_btl_pcie_module.super.btl_eager_limit;
mca_btl_pcie_frag_common_constructor(frag);
frag->type = MCA_BTL_PCIE_TYPE_EAGER;
}
static void mca_btl_pcie_frag_max_constructor(mca_btl_pcie_frag_t* frag)
{
frag->size = mca_btl_pcie_module.super.btl_max_send_size;
mca_btl_pcie_frag_common_constructor(frag);
frag->type = MCA_BTL_PCIE_TYPE_MAX;
}
static void mca_btl_pcie_frag_recv_constructor(mca_btl_pcie_frag_t *frag)
{
frag->base.des_src = NULL;
frag->base.des_src_cnt = 0;
frag->base.des_dst = &frag->segment;
frag->base.des_dst_cnt = 1;
frag->segment.seg_addr.pval = NULL;
frag->segment.seg_len = 0;
frag->endpoint = NULL;
frag->hdr = NULL;
frag->size = 0;
frag->registration = NULL;
frag->type = MCA_BTL_PCIE_TYPE_RECV;
frag->sma_buf = NULL;
}
OBJ_CLASS_INSTANCE(
mca_btl_pcie_frag_eager_t,
mca_btl_base_descriptor_t,
mca_btl_pcie_frag_eager_constructor,
NULL);
OBJ_CLASS_INSTANCE(
mca_btl_pcie_frag_max_t,
mca_btl_base_descriptor_t,
mca_btl_pcie_frag_max_constructor,
NULL);
OBJ_CLASS_INSTANCE(
mca_btl_pcie_frag_recv_t,
mca_btl_base_descriptor_t,
mca_btl_pcie_frag_recv_constructor,
NULL);
OBJ_CLASS_INSTANCE(
mca_btl_pcie_frag_dma_t,
mca_btl_base_descriptor_t,
mca_btl_pcie_frag_dma_constructor,
NULL);

179
ompi/mca/btl/pcie/btl_pcie_frag.h Обычный файл
Просмотреть файл

@ -0,0 +1,179 @@
/*
* Copyright (c) 2007 Los Alamos National Security, LLC.
* All righs reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef MCA_BTL_PCIE_FRAG_H
#define MCA_BTL_PCIE_FRAG_H
#include "ompi_config.h"
#include "ompi/mca/btl/btl.h"
BEGIN_C_DECLS
#define MCA_BTL_PCIE_FRAG_ALIGN (16)
/* Header that sits at top of any send message */
struct mca_btl_pcie_header_t {
mca_btl_base_tag_t tag;
uint8_t pad[3];
uint32_t length;
ompi_ptr_t send_frag;
};
typedef struct mca_btl_pcie_header_t mca_btl_pcie_header_t;
#define OMPI_BTL_PCIE_HEADER_HTON(header) \
do { \
(header).length = htonl((header).length); \
} while (0)
#define OMPI_BTL_PCIE_HEADER_NTOH(header) \
do { \
(header).length = ntohl((header).length); \
} while (0)
struct mca_btl_pcie_frag_t;
/** Type description for fragments / buffers */
enum mca_btl_pcie_frag_type_t {
MCA_BTL_PCIE_TYPE_UNKNOWN,
MCA_BTL_PCIE_TYPE_EAGER,
MCA_BTL_PCIE_TYPE_MAX,
MCA_BTL_PCIE_TYPE_RDMA,
MCA_BTL_PCIE_TYPE_RECV
};
typedef enum mca_btl_pcie_frag_type_t mca_btl_pcie_frag_type_t;
/** SMA transfer fragment */
struct mca_btl_pcie_sma_buf_t {
ompi_free_list_item_t super;
/** Pointer to the SMA space available for this copy. An
ompi_ptr_t because in v1.2, this sits in the sma region,
and we need to not have different sizes on each endpoint. */
ompi_ptr_t pcie_data;
/** type of buffer */
mca_btl_pcie_frag_type_t type;
};
typedef struct mca_btl_pcie_sma_buf_t mca_btl_pcie_sma_buf_t;
typedef mca_btl_pcie_sma_buf_t mca_btl_pcie_sma_buf_eager_t;
OBJ_CLASS_DECLARATION(mca_btl_pcie_sma_buf_eager_t);
typedef mca_btl_pcie_sma_buf_t mca_btl_pcie_sma_buf_max_t;
OBJ_CLASS_DECLARATION(mca_btl_pcie_sma_buf_max_t);
#define MCA_BTL_PCIE_SMA_BUF_ALLOC_EAGER(btl, buf, rc) \
{ \
ompi_free_list_item_t *item; \
OMPI_FREE_LIST_GET(&((mca_btl_pcie_module_t*)btl)->pcie_sma_buf_eager, item, rc); \
buf = (mca_btl_pcie_sma_buf_t*) item; \
}
#define MCA_BTL_PCIE_SMA_BUF_ALLOC_MAX(btl, buf, rc) \
{ \
ompi_free_list_item_t *item; \
OMPI_FREE_LIST_GET(&((mca_btl_pcie_module_t*)btl)->pcie_sma_buf_max, item, rc); \
buf = (mca_btl_pcie_sma_buf_t*) item; \
}
#define MCA_BTL_PCIE_SMA_BUF_RETURN(btl, buf, ret) \
{ \
ret = OMPI_SUCCESS; \
switch ((buf)->type) { \
case MCA_BTL_PCIE_TYPE_EAGER: \
OMPI_FREE_LIST_RETURN(&((mca_btl_pcie_module_t*)btl)->pcie_sma_buf_eager, \
(ompi_free_list_item_t*)(buf)); \
break; \
case MCA_BTL_PCIE_TYPE_MAX: \
OMPI_FREE_LIST_RETURN(&((mca_btl_pcie_module_t*)btl)->pcie_sma_buf_max, \
(ompi_free_list_item_t*)(buf)); \
break; \
default: \
BTL_ERROR(("Invalid return type (%d) for frag 0x%lx in SMA_BUF_RETURN", \
buf->type, buf)); \
ret = OMPI_ERR_BAD_PARAM; \
} \
}
/** Fragment description -- used for send/rdma fragments */
struct mca_btl_pcie_frag_t {
mca_btl_base_descriptor_t base;
mca_btl_base_segment_t segment;
struct mca_btl_base_endpoint_t *endpoint;
mca_btl_pcie_header_t *hdr;
size_t size;
struct mca_btl_pcie_reg_t *registration;
mca_btl_pcie_frag_type_t type;
mca_btl_pcie_sma_buf_t *sma_buf;
};
typedef struct mca_btl_pcie_frag_t mca_btl_pcie_frag_t;
typedef struct mca_btl_pcie_frag_t mca_btl_pcie_frag_eager_t;
OBJ_CLASS_DECLARATION(mca_btl_pcie_frag_eager_t);
typedef struct mca_btl_pcie_frag_t mca_btl_pcie_frag_max_t;
OBJ_CLASS_DECLARATION(mca_btl_pcie_frag_max_t);
typedef struct mca_btl_pcie_frag_t mca_btl_pcie_frag_recv_t;
OBJ_CLASS_DECLARATION(mca_btl_pcie_frag_recv_t);
typedef struct mca_btl_pcie_frag_t mca_btl_pcie_frag_dma_t;
OBJ_CLASS_DECLARATION(mca_btl_pcie_frag_dma_t);
#define MCA_BTL_PCIE_FRAG_ALLOC_EAGER(btl, frag, rc) \
{ \
ompi_free_list_item_t *item; \
OMPI_FREE_LIST_GET(&((mca_btl_pcie_module_t*)btl)->pcie_frag_eager, item, rc); \
frag = (mca_btl_pcie_frag_t*) item; \
}
#define MCA_BTL_PCIE_FRAG_ALLOC_MAX(btl, frag, rc) \
{ \
ompi_free_list_item_t *item; \
OMPI_FREE_LIST_GET(&((mca_btl_pcie_module_t*)btl)->pcie_frag_max, item, rc); \
frag = (mca_btl_pcie_frag_t*) item; \
}
#define MCA_BTL_PCIE_FRAG_ALLOC_DMA(btl, frag, rc) \
{ \
\
ompi_free_list_item_t *item; \
OMPI_FREE_LIST_GET(&((mca_btl_pcie_module_t*)btl)->pcie_frag_dma, item, rc); \
frag = (mca_btl_pcie_frag_t*) item; \
}
#define MCA_BTL_PCIE_FRAG_RETURN(btl, frag, ret) \
{ \
ret = OMPI_SUCCESS; \
switch ((frag)->type) { \
case MCA_BTL_PCIE_TYPE_EAGER: \
OMPI_FREE_LIST_RETURN(&((mca_btl_pcie_module_t*)btl)->pcie_frag_eager, \
(ompi_free_list_item_t*)(frag)); \
break; \
case MCA_BTL_PCIE_TYPE_MAX: \
OMPI_FREE_LIST_RETURN(&((mca_btl_pcie_module_t*)btl)->pcie_frag_max, \
(ompi_free_list_item_t*)(frag)); \
break; \
case MCA_BTL_PCIE_TYPE_RDMA: \
OMPI_FREE_LIST_RETURN(&((mca_btl_pcie_module_t*)btl)->pcie_frag_dma, \
(ompi_free_list_item_t*)(frag)); \
break; \
default: \
BTL_ERROR(("Invalid return type (%d) for frag 0x%lx in FRAG_RETURN", \
frag->type, frag)); \
ret = OMPI_ERR_BAD_PARAM; \
} \
}
END_C_DECLS
#endif /* #ifndef MCA_BTL_PCIE_FRAG_H */

1698
ompi/mca/btl/pcie/btl_pcie_lex.c Обычный файл

Разница между файлами не показана из-за своего большого размера Загрузить разницу

58
ompi/mca/btl/pcie/btl_pcie_lex.h Обычный файл
Просмотреть файл

@ -0,0 +1,58 @@
/*
* Copyright (c) 2007 Los Alamos National Security, LLC.
* All righs reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef BTL_PCIE_CFG_LEX_H_
#define BTL_PCIE_CFG_LEX_H_
#include "opal_config.h"
#ifdef malloc
#undef malloc
#endif
#ifdef realloc
#undef realloc
#endif
#ifdef free
#undef free
#endif
#include <stdio.h>
int btl_pcie_cfg_yylex(void);
int btl_pcie_cfg_init_buffer(FILE *file);
extern FILE *btl_pcie_cfg_yyin;
extern bool btl_pcie_cfg_parse_done;
extern char *btl_pcie_cfg_yytext;
extern int btl_pcie_cfg_yynewlines;
/*
* Make lex-generated files not issue compiler warnings
*/
#define YY_STACK_USED 0
#define YY_ALWAYS_INTERACTIVE 0
#define YY_NEVER_INTERACTIVE 0
#define YY_MAIN 0
#define YY_NO_UNPUT 1
#define YY_SKIP_YYWRAP 1
enum {
BTL_PCIE_CFG_PARSE_DONE = 1,
BTL_PCIE_CFG_PARSE_ERROR,
BTL_PCIE_CFG_PARSE_NEWLINE,
BTL_PCIE_CFG_PARSE_HOSTNAME_CORE,
BTL_PCIE_CFG_PARSE_HOSTNAME_DEVICE,
BTL_PCIE_CFG_PARSE_DEVICE,
BTL_PCIE_CFG_PARSE_MAX
};
#endif /* #ifndef BTL_PCIE_CFG_LEX_H_ */

125
ompi/mca/btl/pcie/btl_pcie_lex.l Обычный файл
Просмотреть файл

@ -0,0 +1,125 @@
%{ /* -*- C -*- */
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006 Cisco Systems, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "opal_config.h"
#include <stdio.h>
#if HAVE_UNISTD_H
#include <unistd.h>
#endif
#include "btl_pcie_lex.h"
/*
* local functions
*/
static int finish_parsing(void) ;
static int btl_pcie_cfg_yywrap(void);
/*
* global variables
*/
int btl_pcie_cfg_yynewlines = 1;
bool btl_pcie_cfg_parse_done = false;
char *btl_pcie_cfg_string = NULL;
#define yyterminate() \
return finish_parsing()
%}
WHITE [\f\t\v ]
CHAR [A-Za-z0-9_\-\.]
NAME_CHAR [A-Za-z0-9_\-\.\\\/]
%x comment
%x section_name
%x section_end
%x value
%%
{WHITE}*\n { ++btl_pcie_cfg_yynewlines;
return BTL_PCIE_CFG_PARSE_NEWLINE; }
#.*\n { ++btl_pcie_cfg_yynewlines;
return BTL_PCIE_CFG_PARSE_NEWLINE; }
"//".*\n { ++btl_pcie_cfg_yynewlines;
return BTL_PCIE_CFG_PARSE_NEWLINE; }
"/*" { BEGIN(comment);
return BTL_PCIE_CFG_PARSE_NEWLINE; }
<comment>[^*\n]* ; /* Eat up non '*'s */
<comment>"*"+[^*/\n]* ; /* Eat '*'s not followed by a '/' */
<comment>\n { ++btl_pcie_cfg_yynewlines;
return BTL_PCIE_CFG_PARSE_NEWLINE; }
<comment>"*"+"/" { BEGIN(INITIAL); /* Done with block comment */
return BTL_PCIE_CFG_PARSE_NEWLINE; }
{CHAR}+":"[0-9] { return BTL_PCIE_CFG_PARSE_HOSTNAME_CORE; }
{CHAR}+":"{NAME_CHAR}+ { return BTL_PCIE_CFG_PARSE_HOSTNAME_DEVICE; }
{NAME_CHAR}+ { return BTL_PCIE_CFG_PARSE_DEVICE; }
{WHITE}+ ; /* whitespace */
%%
/*
* This cleans up at the end of the parse (since, in this case, we
* always parse the entire file) and prevents a memory leak.
*/
static int finish_parsing(void)
{
if (NULL != YY_CURRENT_BUFFER) {
yy_delete_buffer(YY_CURRENT_BUFFER);
#if defined(YY_CURRENT_BUFFER_LVALUE)
YY_CURRENT_BUFFER_LVALUE = NULL;
#else
YY_CURRENT_BUFFER = NULL;
#endif /* YY_CURRENT_BUFFER_LVALUE */
}
return YY_NULL;
}
static int btl_pcie_cfg_yywrap(void)
{
btl_pcie_cfg_parse_done = true;
return 1;
}
/*
* Ensure that we have a valid yybuffer to use. Specifically, if this
* scanner is invoked a second time, finish_parsing() (above) will
* have been executed, and the current buffer will have been freed.
* Flex doesn't recognize this fact because as far as it's concerned,
* its internal state was already initialized, so it thinks it should
* have a valid buffer. Hence, here we ensure to give it a valid
* buffer.
*/
int btl_pcie_cfg_init_buffer(FILE *file)
{
YY_BUFFER_STATE buf = yy_create_buffer(file, YY_BUF_SIZE);
yy_switch_to_buffer(buf);
return 0;
}

194
ompi/mca/btl/pcie/btl_pcie_proc.c Обычный файл
Просмотреть файл

@ -0,0 +1,194 @@
/*
* Copyright (c) 2007 Los Alamos National Security, LLC.
* All righs reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "opal/class/opal_hash_table.h"
#include "ompi/mca/btl/base/btl_base_error.h"
#include "ompi/mca/mpool/base/base.h"
#include "ompi/runtime/ompi_module_exchange.h"
#include "btl_pcie.h"
#include "btl_pcie_proc.h"
static void mca_btl_pcie_proc_construct(mca_btl_pcie_proc_t* proc);
static void mca_btl_pcie_proc_destruct(mca_btl_pcie_proc_t* proc);
OBJ_CLASS_INSTANCE(mca_btl_pcie_proc_t,
opal_list_item_t, mca_btl_pcie_proc_construct,
mca_btl_pcie_proc_destruct);
void mca_btl_pcie_proc_construct(mca_btl_pcie_proc_t* proc)
{
proc->proc_ompi = 0;
proc->proc_addr_count = 0;
proc->proc_endpoint_count = 0;
OBJ_CONSTRUCT(&proc->proc_lock, opal_mutex_t);
/* add to list of all proc instance */
OPAL_THREAD_LOCK(&mca_btl_pcie_component.pcie_lock);
opal_list_append(&mca_btl_pcie_component.pcie_procs, &proc->super);
OPAL_THREAD_UNLOCK(&mca_btl_pcie_component.pcie_lock);
}
/*
* Cleanup ib proc instance
*/
void mca_btl_pcie_proc_destruct(mca_btl_pcie_proc_t* proc)
{
/* remove from list of all proc instances */
OPAL_THREAD_LOCK(&mca_btl_pcie_component.pcie_lock);
opal_list_remove_item(&mca_btl_pcie_component.pcie_procs, &proc->super);
OPAL_THREAD_UNLOCK(&mca_btl_pcie_component.pcie_lock);
OBJ_DESTRUCT(&proc->proc_lock);
}
/*
* Look for an existing TEMPLATE process instances based on the associated
* ompi_proc_t instance.
*/
static mca_btl_pcie_proc_t* mca_btl_pcie_proc_lookup_ompi(ompi_proc_t* ompi_proc)
{
mca_btl_pcie_proc_t* pcie_proc;
OPAL_THREAD_LOCK(&mca_btl_pcie_component.pcie_lock);
for(pcie_proc = (mca_btl_pcie_proc_t*)
opal_list_get_first(&mca_btl_pcie_component.pcie_procs);
pcie_proc != (mca_btl_pcie_proc_t*)
opal_list_get_end(&mca_btl_pcie_component.pcie_procs);
pcie_proc = (mca_btl_pcie_proc_t*)opal_list_get_next(pcie_proc)) {
if(pcie_proc->proc_ompi == ompi_proc) {
OPAL_THREAD_UNLOCK(&mca_btl_pcie_component.pcie_lock);
return pcie_proc;
}
}
OPAL_THREAD_UNLOCK(&mca_btl_pcie_component.pcie_lock);
return NULL;
}
/*
* Create a TEMPLATE process structure. There is a one-to-one correspondence
* between a ompi_proc_t and a mca_btl_pcie_proc_t instance. We cache
* additional data (specifically the list of mca_btl_pcie_endpoint_t instances,
* and published addresses) associated w/ a given destination on this
* datastructure.
*/
int mca_btl_pcie_proc_create(ompi_proc_t* ompi_proc,
mca_btl_pcie_module_t* pcie_btl,
mca_btl_pcie_proc_t** ret_proc)
{
mca_btl_pcie_proc_t* pcie_proc = NULL;
char *rem_dev_name = NULL, *lcl_dev_name = NULL;
char *rem_hostname = NULL;
int rc, num_peers, i;
size_t size;
mca_btl_pcie_modex_info_t *modex_info;
/* Check if already have proc structure for this ompi process */
pcie_proc = mca_btl_pcie_proc_lookup_ompi(ompi_proc);
if(pcie_proc != NULL) {
/* Gotcha! */
*ret_proc = pcie_proc;
return OMPI_SUCCESS;
}
/* query for the peer's device name info */
rc = ompi_modex_recv(&mca_btl_pcie_component.super.btl_version,
ompi_proc,
(void*)&modex_info,
&size);
if (OMPI_SUCCESS != rc) {
opal_output(mca_btl_base_output, "[%s:%d] ompi_modex_recv failed for peer %s",
__FILE__, __LINE__, ORTE_NAME_PRINT(&ompi_proc->proc_name));
OBJ_RELEASE(pcie_proc);
*ret_proc = NULL;
return OMPI_ERROR;
}
if (0 == size || 0 != size % sizeof(mca_btl_pcie_modex_info_t)) {
*ret_proc = NULL;
return OMPI_SUCCESS;
}
num_peers = size / sizeof(mca_btl_pcie_modex_info_t);
for (i = 0 ; i < num_peers ; ++i) {
MCA_BTL_PCIE_MODEX_INFO_NTOH(modex_info[i]);
rem_hostname = modex_info[i].hostname;
rem_dev_name = modex_info[i].devicename;
lcl_dev_name = ompi_btl_pcie_cfg_get_matching_device(rem_hostname,
rem_dev_name);
if (NULL != lcl_dev_name &&
0 == strcmp(lcl_dev_name, pcie_btl->lcl_dev_name)) {
/* we have a match. continue onward */
break;
}
}
/* make sure the local device names match */
if(NULL == lcl_dev_name ||
0 != strcmp(lcl_dev_name, pcie_btl->lcl_dev_name)){
*ret_proc = NULL;
return OMPI_SUCCESS;
}
BTL_VERBOSE(("Have matching devices: %s:%s <-> %s:%s",
orte_process_info.nodename,
pcie_btl->lcl_dev_name,
rem_hostname,
rem_dev_name));
pcie_proc = OBJ_NEW(mca_btl_pcie_proc_t);
if(NULL == pcie_proc){
*ret_proc = NULL;
return OMPI_ERR_OUT_OF_RESOURCE;
}
pcie_proc->proc_ompi = ompi_proc;
/* build a unique identifier (of arbitrary
* size) to represent the proc */
pcie_proc->proc_guid = ompi_proc->proc_name;
/* Initialize number of peer */
pcie_proc->proc_endpoint_count = 1;
pcie_proc->endpoint_proc = OBJ_NEW(mca_btl_pcie_endpoint_t);
if(NULL == pcie_proc->endpoint_proc) {
free(rem_dev_name);
*ret_proc = NULL;
return OMPI_ERR_OUT_OF_RESOURCE;
}
pcie_proc->endpoint_proc->lcl_dev_name = lcl_dev_name;
pcie_proc->endpoint_proc->rem_dev_name = rem_dev_name;
pcie_proc->endpoint_proc->endpoint_proc = pcie_proc;
pcie_proc->endpoint_proc->endpoint_btl = pcie_btl;
if(OMPI_SUCCESS != mca_btl_pcie_endpoint_init(pcie_proc->endpoint_proc)) {
BTL_ERROR(("Error initializing the PCIE endpoint \n"));
*ret_proc = NULL;
return OMPI_ERROR;
}
*ret_proc = pcie_proc;
return OMPI_SUCCESS;
}

62
ompi/mca/btl/pcie/btl_pcie_proc.h Обычный файл
Просмотреть файл

@ -0,0 +1,62 @@
/*
* Copyright (c) 2007 Los Alamos National Security, LLC.
* All righs reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef MCA_BTL_PCIE_PROC_H
#define MCA_BTL_PCIE_PROC_H
#include "ompi_config.h"
#include "opal/class/opal_list.h"
#include "ompi/proc/proc.h"
#include "btl_pcie.h"
#include "btl_pcie_endpoint.h"
BEGIN_C_DECLS
/**
* Represents the state of a remote process and the set of addresses
* that it exports. Also cache an instance of mca_btl_base_endpoint_t for
* each
* BTL instance that attempts to open a connection to the process.
*/
struct mca_btl_pcie_proc_t {
opal_list_item_t super;
/**< allow proc to be placed on a list */
ompi_proc_t *proc_ompi;
/**< pointer to corresponding ompi_proc_t */
orte_process_name_t proc_guid;
/**< globally unique identifier for the process */
size_t proc_addr_count;
/**< number of addresses published by endpoint */
struct mca_btl_base_endpoint_t *endpoint_proc;
/**< endpoint that has been created to access this proc */
size_t proc_endpoint_count;
/**< number of endpoints */
opal_mutex_t proc_lock;
/**< lock to protect against concurrent access to proc state */
};
typedef struct mca_btl_pcie_proc_t mca_btl_pcie_proc_t;
OBJ_CLASS_DECLARATION(mca_btl_pcie_proc_t);
int mca_btl_pcie_proc_create(ompi_proc_t* ompi_proc,
mca_btl_pcie_module_t* pcie_btl,
mca_btl_pcie_proc_t** ret_proc);
END_C_DECLS
#endif /* #ifndef MCA_BTL_PCIE_PROC_H */

31
ompi/mca/btl/pcie/configure.m4 Обычный файл
Просмотреть файл

@ -0,0 +1,31 @@
# -*- shell-script -*-
#
# Copyright (c) 2007 Los Alamos National Security, LLC. All rights
# reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# MCA_btl_pcie_CONFIG(action-if-can-compile,
# [action-if-cant-compile])
# ------------------------------------------------
AC_DEFUN([MCA_btl_pcie_CONFIG],[
OMPI_CHECK_PCIE([btl_pcie],
[btl_pcie_happy="yes"],
[btl_pcie_happy="no"])
AS_IF([test "$btl_pcie_happy" = "yes"],
[btl_pcie_WRAPPER_EXTRA_LDFLAGS="$btl_pcie_LDFLAGS"
btl_pcie_WRAPPER_EXTRA_LIBS="$btl_pcie_LIBS"
$1],
[$2])
# substitute in the things needed to build pcie
AC_SUBST([btl_pcie_CPPFLAGS])
AC_SUBST([btl_pcie_LDFLAGS])
AC_SUBST([btl_pcie_LIBS])
])dnl

24
ompi/mca/btl/pcie/configure.params Обычный файл
Просмотреть файл

@ -0,0 +1,24 @@
# -*- shell-script -*-
#
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2005 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# Copyright (c) 2007 Los Alamos National Security, LLC. All rights
# reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# Specific to this module
PARAM_CONFIG_FILES="Makefile"

20
ompi/mca/btl/pcie/help-mpi-btl-pcie.txt Обычный файл
Просмотреть файл

@ -0,0 +1,20 @@
# -*- text -*-
# Copyright (c) 2007 Los Alamos National Security, LLC.
# All righs reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# This is the US/English help file for Open MPI's IBM PCIe support
[initialization:more-than-one-cpu]
The PCIe BTL found that the CPU affinity mask for the current process
includes more than one CPU (%d). When using Open MPI on the Cell
machines with the PCIe driver, the affinity mask must include exactly
one CPU.
[initialization:no-cpus]
The PCIe BTL was unable to find any CPUs in the affinity mask for the
current process. This usually indicates a system issue that must be
resolved by the system administrator.

Просмотреть файл

@ -0,0 +1,159 @@
# hostname:core device
n01-001-0:0 /dev/axon0
n01-001-0:1 /dev/axon1
n01-001-0:2 /dev/axon2
n01-001-0:3 /dev/axon3
n01-001-1:0 /dev/axon0
n01-001-1:1 /dev/axon1
n01-001-2:0 /dev/axon0
n01-001-2:1 /dev/axon1
n01-002-0:0 /dev/axon0
n01-002-0:1 /dev/axon1
n01-002-0:2 /dev/axon2
n01-002-0:3 /dev/axon3
n01-002-1:0 /dev/axon0
n01-002-1:1 /dev/axon1
n01-002-2:0 /dev/axon0
n01-002-2:1 /dev/axon1
n01-003-0:0 /dev/axon0
n01-003-0:1 /dev/axon1
n01-003-0:2 /dev/axon2
n01-003-0:3 /dev/axon3
n01-003-1:0 /dev/axon0
n01-003-1:1 /dev/axon1
n01-004-0:0 /dev/axon0
n01-004-0:1 /dev/axon1
n01-004-0:2 /dev/axon2
n01-004-0:3 /dev/axon3
n01-004-1:0 /dev/axon0
n01-004-1:1 /dev/axon1
n01-005-0:0 /dev/axon0
n01-005-0:1 /dev/axon1
n01-005-0:2 /dev/axon2
n01-005-0:3 /dev/axon3
n01-005-1:0 /dev/axon0
n01-005-1:1 /dev/axon1
n01-005-2:0 /dev/axon0
n01-005-2:1 /dev/axon1
n01-006-0:0 /dev/axon0
n01-006-0:1 /dev/axon1
n01-006-0:2 /dev/axon2
n01-006-0:3 /dev/axon3
n01-006-1:0 /dev/axon0
n01-006-1:1 /dev/axon1
n01-006-2:0 /dev/axon0
n01-006-2:1 /dev/axon1
n01-007-0:0 /dev/axon0
n01-007-0:1 /dev/axon1
n01-007-0:2 /dev/axon2
n01-007-0:3 /dev/axon3
n01-007-1:0 /dev/axon0
n01-007-1:1 /dev/axon1
n01-007-2:0 /dev/axon0
n01-007-2:1 /dev/axon1
n01-008-0:0 /dev/axon0
n01-008-0:1 /dev/axon1
n01-008-0:2 /dev/axon2
n01-008-0:3 /dev/axon3
n01-008-1:0 /dev/axon0
n01-008-1:1 /dev/axon1
n01-008-2:0 /dev/axon0
n01-008-2:1 /dev/axon1
n01-009-0:0 /dev/axon0
n01-009-0:1 /dev/axon1
n01-009-0:2 /dev/axon2
n01-009-0:3 /dev/axon3
n01-009-1:0 /dev/axon0
n01-009-1:1 /dev/axon1
n01-009-2:0 /dev/axon0
n01-009-2:1 /dev/axon1
n01-010-0:0 /dev/axon0
n01-010-0:1 /dev/axon1
n01-010-0:2 /dev/axon2
n01-010-0:3 /dev/axon3
n01-010-1:0 /dev/axon0
n01-010-1:1 /dev/axon1
n01-010-2:0 /dev/axon0
n01-010-2:1 /dev/axon1
n01-011-0:0 /dev/axon0
n01-011-0:1 /dev/axon1
n01-011-0:2 /dev/axon2
n01-011-0:3 /dev/axon3
n01-011-1:0 /dev/axon0
n01-011-1:1 /dev/axon1
n01-011-2:0 /dev/axon0
n01-011-2:1 /dev/axon1
n01-012-0:0 /dev/axon0
n01-012-0:1 /dev/axon1
n01-012-0:2 /dev/axon2
n01-012-0:3 /dev/axon3
n01-012-1:0 /dev/axon0
n01-012-1:1 /dev/axon1
n01-012-2:0 /dev/axon0
n01-012-2:1 /dev/axon1
n01-013-0:0 /dev/axon0
n01-013-0:1 /dev/axon1
n01-013-0:2 /dev/axon2
n01-013-0:3 /dev/axon3
n01-013-1:0 /dev/axon0
n01-013-1:1 /dev/axon1
n01-013-2:0 /dev/axon0
n01-013-2:1 /dev/axon1
n01-014-0:0 /dev/axon0
n01-014-0:1 /dev/axon1
n01-014-0:2 /dev/axon2
n01-014-0:3 /dev/axon3
n01-014-1:0 /dev/axon0
n01-014-1:1 /dev/axon1
n01-014-2:0 /dev/axon0
n01-014-2:1 /dev/axon1
n01-015-0:0 /dev/axon0
n01-015-0:1 /dev/axon1
n01-015-0:2 /dev/axon2
n01-015-0:3 /dev/axon3
n01-015-1:0 /dev/axon0
n01-015-1:1 /dev/axon1
n01-015-2:0 /dev/axon0
n01-015-2:1 /dev/axon1

Просмотреть файл

@ -0,0 +1,82 @@
# opteron_host:device cell_host:device
n01-001-0:/dev/axon0 n01-001-1:/dev/axon0
n01-001-0:/dev/axon1 n01-001-1:/dev/axon1
n01-001-0:/dev/axon2 n01-001-2:/dev/axon0
n01-001-0:/dev/axon3 n01-001-2:/dev/axon1
n01-002-0:/dev/axon0 n01-002-1:/dev/axon0
n01-002-0:/dev/axon1 n01-002-1:/dev/axon1
n01-002-0:/dev/axon2 n01-002-2:/dev/axon0
n01-002-0:/dev/axon3 n01-002-2:/dev/axon1
n01-003-0:/dev/axon0 n01-003-1:/dev/axon0
n01-003-0:/dev/axon1 n01-003-1:/dev/axon1
n01-003-0:/dev/axon2 n01-003-2:/dev/axon0
n01-003-0:/dev/axon3 n01-003-2:/dev/axon1
n01-004-0:/dev/axon0 n01-004-1:/dev/axon0
n01-004-0:/dev/axon1 n01-004-1:/dev/axon1
n01-004-0:/dev/axon2 n01-004-2:/dev/axon0
n01-004-0:/dev/axon3 n01-004-2:/dev/axon1
n01-005-0:/dev/axon0 n01-005-1:/dev/axon0
n01-005-0:/dev/axon1 n01-005-1:/dev/axon1
n01-005-0:/dev/axon2 n01-005-2:/dev/axon0
n01-005-0:/dev/axon3 n01-005-2:/dev/axon1
n01-006-0:/dev/axon0 n01-006-1:/dev/axon0
n01-006-0:/dev/axon1 n01-006-1:/dev/axon1
n01-006-0:/dev/axon2 n01-006-2:/dev/axon0
n01-006-0:/dev/axon3 n01-006-2:/dev/axon1
n01-007-0:/dev/axon0 n01-007-1:/dev/axon0
n01-007-0:/dev/axon1 n01-007-1:/dev/axon1
n01-007-0:/dev/axon2 n01-007-2:/dev/axon0
n01-007-0:/dev/axon3 n01-007-2:/dev/axon1
n01-008-0:/dev/axon0 n01-008-1:/dev/axon0
n01-008-0:/dev/axon1 n01-008-1:/dev/axon1
n01-008-0:/dev/axon2 n01-008-2:/dev/axon0
n01-008-0:/dev/axon3 n01-008-2:/dev/axon1
n01-009-0:/dev/axon0 n01-009-1:/dev/axon0
n01-009-0:/dev/axon1 n01-009-1:/dev/axon1
n01-009-0:/dev/axon2 n01-009-2:/dev/axon0
n01-009-0:/dev/axon3 n01-009-2:/dev/axon1
n01-010-0:/dev/axon0 n01-010-1:/dev/axon0
n01-010-0:/dev/axon1 n01-010-1:/dev/axon1
n01-010-0:/dev/axon2 n01-010-2:/dev/axon0
n01-010-0:/dev/axon3 n01-010-2:/dev/axon1
n01-011-0:/dev/axon0 n01-011-1:/dev/axon0
n01-011-0:/dev/axon1 n01-011-1:/dev/axon1
n01-011-0:/dev/axon2 n01-011-2:/dev/axon0
n01-011-0:/dev/axon3 n01-011-2:/dev/axon1
n01-012-0:/dev/axon0 n01-012-1:/dev/axon0
n01-012-0:/dev/axon1 n01-012-1:/dev/axon1
n01-012-0:/dev/axon2 n01-012-2:/dev/axon0
n01-012-0:/dev/axon3 n01-012-2:/dev/axon1
n01-013-0:/dev/axon0 n01-013-1:/dev/axon0
n01-013-0:/dev/axon1 n01-013-1:/dev/axon1
n01-013-0:/dev/axon2 n01-013-2:/dev/axon0
n01-013-0:/dev/axon3 n01-013-2:/dev/axon1
n01-014-0:/dev/axon0 n01-014-1:/dev/axon0
n01-014-0:/dev/axon1 n01-014-1:/dev/axon1
n01-014-0:/dev/axon2 n01-014-2:/dev/axon0
n01-014-0:/dev/axon3 n01-014-2:/dev/axon1
n01-015-0:/dev/axon0 n01-015-1:/dev/axon0
n01-015-0:/dev/axon1 n01-015-1:/dev/axon1
n01-015-0:/dev/axon2 n01-015-2:/dev/axon0
n01-015-0:/dev/axon3 n01-015-2:/dev/axon1
n01-016-0:/dev/axon0 n01-016-1:/dev/axon0
n01-016-0:/dev/axon1 n01-016-1:/dev/axon1
n01-016-0:/dev/axon2 n01-016-2:/dev/axon0
n01-016-0:/dev/axon3 n01-016-2:/dev/axon1

57
ompi/mca/mpool/pcie/Makefile.am Обычный файл
Просмотреть файл

@ -0,0 +1,57 @@
#
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2005 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# Copyright (c) 2007 Los Alamos National Security, LLC. All rights
# reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
AM_CPPFLAGS = $(mpool_pcie_CPPFLAGS)
sources = \
mpool_pcie.h \
mpool_pcie_component.c \
mpool_pcie_module.c
if WANT_INSTALL_HEADERS
ompidir = $(includedir)/openmpi/ompi/mca/mpool/pcie
ompi_HEADERS = mpool_pcie.h
else
ompidir = $(includedir)
ompi_HEADERS =
endif
# Make the output library in this directory, and name it either
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
# (for static builds).
if OMPI_BUILD_mpool_pcie_DSO
component_noinst =
component_install = mca_mpool_pcie.la
else
component_noinst = libmca_mpool_pcie.la
component_install =
endif
mcacomponentdir = $(pkglibdir)
mcacomponent_LTLIBRARIES = $(component_install)
mca_mpool_pcie_la_SOURCES = $(sources)
mca_mpool_pcie_la_LDFLAGS = -module -avoid-version
mca_mpool_pcie_la_LIBADD = $(mpool_pcie_LIBS)
noinst_LTLIBRARIES = $(component_noinst)
libmca_mpool_pcie_la_SOURCES = $(sources)
libmca_mpool_pcie_la_LDFLAGS = -module -avoid-version
libmca_mpool_pcie_la_LIBADD = $(mpool_pcie_LIBS)

31
ompi/mca/mpool/pcie/configure.m4 Обычный файл
Просмотреть файл

@ -0,0 +1,31 @@
# -*- shell-script -*-
#
# Copyright (c) 2007 Los Alamos National Security, LLC. All rights
# reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# MCA_mpool_pcie_CONFIG(action-if-can-compile,
# [action-if-cant-compile])
# ------------------------------------------------
AC_DEFUN([MCA_mpool_pcie_CONFIG],[
OMPI_CHECK_PCIE([mpool_pcie],
[mpool_pcie_happy="yes"],
[mpool_pcie_happy="no"])
AS_IF([test "$mpool_pcie_happy" = "yes"],
[mpool_pcie_WRAPPER_EXTRA_LDFLAGS="$mpool_pcie_LDFLAGS"
mpool_pcie_WRAPPER_EXTRA_LIBS="$mpool_pcie_LIBS"
$1],
[$2])
# substitute in the things needed to build pcie
AC_SUBST([mpool_pcie_CPPFLAGS])
AC_SUBST([mpool_pcie_LDFLAGS])
AC_SUBST([mpool_pcie_LIBS])
])dnl

26
ompi/mca/mpool/pcie/configure.params Обычный файл
Просмотреть файл

@ -0,0 +1,26 @@
# -*- shell-script -*-
#
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2005 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# Copyright (c) 2006 Voltaire. All rights reserved.
# Copyright (c) 2007 Los Alamos National Security, LLC. All rights
# reserved.
#
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# Specific to this module
PARAM_CONFIG_FILES="Makefile"

87
ompi/mca/mpool/pcie/mpool_pcie.h Обычный файл
Просмотреть файл

@ -0,0 +1,87 @@
/*
* Copyright (c) 2007 Los Alamos National Security, LLC.
* All righs reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef MCA_MPOOL_PCIE_H
#define MCA_MPOOL_PCIE_H
#include "opal/class/opal_list.h"
#include "opal/event/event.h"
#include "ompi/class/ompi_free_list.h"
#include "ompi/mca/allocator/allocator.h"
#include "ompi/mca/mpool/mpool.h"
BEGIN_C_DECLS
struct mca_mpool_pcie_component_t {
mca_mpool_base_component_t super;
int verbose;
};
typedef struct mca_mpool_pcie_component_t mca_mpool_pcie_component_t;
OMPI_MODULE_DECLSPEC extern mca_mpool_pcie_component_t mca_mpool_pcie_component;
struct mca_mpool_pcie_module_t {
mca_mpool_base_module_t super;
void* base;
size_t offset;
size_t len;
}; typedef struct mca_mpool_pcie_module_t mca_mpool_pcie_module_t;
struct mca_mpool_base_resources_t {
void *base;
size_t len;
};
typedef struct mca_mpool_base_resources_t mca_mpool_base_resources_t;
/*
* Initializes the mpool module.
*/
void mca_mpool_pcie_module_init(mca_mpool_pcie_module_t* mpool);
/*
* Returns base address of shared memory mapping.
*/
void* mca_mpool_pcie_base(mca_mpool_base_module_t*);
/**
* Allocate block of shared memory.
*/
void* mca_mpool_pcie_alloc(mca_mpool_base_module_t* mpool,
size_t size,
size_t align,
uint32_t flags,
mca_mpool_base_registration_t** registration);
/**
* realloc function typedef
*/
void* mca_mpool_pcie_realloc(mca_mpool_base_module_t* mpool,
void* addr,
size_t size,
mca_mpool_base_registration_t** registration);
/**
* free function typedef
*/
void mca_mpool_pcie_free(mca_mpool_base_module_t* mpool,
void * addr,
mca_mpool_base_registration_t* registration);
END_C_DECLS
#endif

112
ompi/mca/mpool/pcie/mpool_pcie_component.c Обычный файл
Просмотреть файл

@ -0,0 +1,112 @@
/*
* Copyright (c) 2006-2007 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#if HAVE_UNISTD_H
#include <unistd.h>
#endif /* HAVE_UNISTD_H*/
#ifdef HAVE_STDLIB_H
#include <stdlib.h>
#endif /* HAVE_STDLIB_H */
#include <errno.h>
#include "opal/util/output.h"
#include "opal/mca/base/base.h"
#include "opal/mca/base/mca_base_param.h"
#include "orte/util/proc_info.h"
#include "ompi/proc/proc.h"
#include "ompi/mca/allocator/base/base.h"
#include "mpool_pcie.h"
/*
* Local functions
*/
static int mca_mpool_pcie_open(void);
static int mca_mpool_pcie_close( void );
static mca_mpool_base_module_t* mca_mpool_pcie_init(
struct mca_mpool_base_resources_t* resources);
mca_mpool_pcie_component_t mca_mpool_pcie_component = {
{
/* First, the mca_base_component_t struct containing meta
information about the component itself */
{
/* Indicate that we are a mpool v2.0.0 component (which also
implies a specific MCA version) */
MCA_MPOOL_BASE_VERSION_2_0_0,
"pcie", /* MCA component name */
OMPI_MAJOR_VERSION, /* MCA component major version */
OMPI_MINOR_VERSION, /* MCA component minor version */
OMPI_RELEASE_VERSION, /* MCA component release version */
mca_mpool_pcie_open, /* component open */
mca_mpool_pcie_close
},
/* Next the MCA v2.0.0 component meta data */
{
/* The component is not checkpoint ready */
false
},
mca_mpool_pcie_init
}
};
static int
mca_mpool_pcie_open(void)
{
return OMPI_SUCCESS;
}
static int
mca_mpool_pcie_close(void)
{
return OMPI_SUCCESS;
}
static mca_mpool_base_module_t*
mca_mpool_pcie_init(struct mca_mpool_base_resources_t* resources)
{
mca_mpool_pcie_module_t* mpool_module;
mpool_module = (mca_mpool_pcie_module_t*)malloc(sizeof(mca_mpool_pcie_module_t));
if(NULL == mpool_module) return NULL;
mpool_module->super.mpool_component = &mca_mpool_pcie_component.super;
mpool_module->super.mpool_base = NULL; /* no base .. */
mpool_module->super.mpool_alloc = mca_mpool_pcie_alloc;
mpool_module->super.mpool_realloc = mca_mpool_pcie_realloc;
mpool_module->super.mpool_free = mca_mpool_pcie_free;
mpool_module->super.mpool_register = NULL;
mpool_module->super.mpool_find = NULL;
mpool_module->super.mpool_deregister = NULL;
mpool_module->super.mpool_release_memory = NULL;
mpool_module->super.mpool_finalize = NULL;
mpool_module->super.rcache = NULL;
mpool_module->super.flags = MCA_MPOOL_FLAGS_MPI_ALLOC_MEM;
mpool_module->base = resources->base;
mpool_module->len = resources->len;
mpool_module->offset = 0;
return (mca_mpool_base_module_t*) mpool_module;
}

70
ompi/mca/mpool/pcie/mpool_pcie_module.c Обычный файл
Просмотреть файл

@ -0,0 +1,70 @@
/*
* Copyright (c) 2006-2007 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include <string.h>
#include "opal/util/output.h"
#include "mpool_pcie.h"
void
mca_mpool_pcie_module_init(mca_mpool_pcie_module_t* mpool)
{
}
void* mca_mpool_pcie_base(mca_mpool_base_module_t* mpool)
{
return ((mca_mpool_pcie_module_t*) mpool)->base;
}
void*
mca_mpool_pcie_alloc(mca_mpool_base_module_t* mpool,
size_t size,
size_t align,
uint32_t flags,
mca_mpool_base_registration_t** registration)
{
mca_mpool_pcie_module_t* mpool_pcie =
(mca_mpool_pcie_module_t*) mpool;
void *addr;
if(mpool_pcie->offset + size > mpool_pcie->len) {
addr = NULL;
} else {
addr = (char*)mpool_pcie->base + mpool_pcie->offset;
mpool_pcie->offset += size;
}
return addr;
}
void*
mca_mpool_pcie_realloc(mca_mpool_base_module_t* mpool,
void* addr,
size_t size,
mca_mpool_base_registration_t** registration)
{
/* we don't need no realloc */
return NULL;
}
void
mca_mpool_pcie_free(mca_mpool_base_module_t* mpool, void * addr,
mca_mpool_base_registration_t* registration)
{
/* we don't need no free */
}