1
1

Remove a stale pcie btl that never got completed

This commit was SVN r22498.
Этот коммит содержится в:
Ralph Castain 2010-01-27 01:16:01 +00:00
родитель 93e930ae13
Коммит b3dd63fd81
22 изменённых файлов: 0 добавлений и 3690 удалений

Просмотреть файл

@ -1,71 +0,0 @@
#
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2005 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# Copyright (c) 2007 Los Alamos National Security, LLC. All rights
# reserved.
# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
AM_CPPFLAGS = $(btl_pcie_CPPFLAGS)
dist_pkgdata_DATA = \
help-mpi-btl-pcie.txt
sources = \
btl_pcie.c \
btl_pcie.h \
btl_pcie_component.c \
btl_pcie_endpoint.c \
btl_pcie_endpoint.h \
btl_pcie_fifo.c \
btl_pcie_fifo.h \
btl_pcie_frag.c \
btl_pcie_frag.h \
btl_pcie_proc.c \
btl_pcie_proc.h \
btl_pcie_lex.l \
btl_pcie_lex.h \
btl_pcie_cfg.c
if OMPI_BUILD_btl_pcie_DSO
lib =
lib_sources =
component = mca_btl_pcie.la
component_sources = $(sources)
else
lib = libmca_btl_pcie.la
lib_sources = $(sources)
component =
component_sources =
endif
mcacomponentdir = $(pkglibdir)
mcacomponent_LTLIBRARIES = $(component)
mca_btl_pcie_la_SOURCES = $(component_sources)
mca_btl_pcie_la_LDFLAGS = -module -avoid-version $(btl_pcie_LDFLAGS)
mca_btl_pcie_la_LIBADD = $(btl_pcie_LIBS)
noinst_LTLIBRARIES = $(lib)
libmca_btl_pcie_la_SOURCES = $(lib_sources)
libmca_btl_pcie_la_LDFLAGS= -module -avoid-version $(btl_pcie_LDFLAGS)
libmca_btl_pcie_la_LIBADD = $(btl_pcie_LIBS)
ompi_sysconfdir = $(sysconfdir)
ompi_sysconf_DATA = \
mca-btl-pcie-local-resources.cfg \
mca-btl-pcie-remote-resources.cfg

Просмотреть файл

@ -1,149 +0,0 @@
/**
* axon_ioctl - provides an io control interface to the axon driver
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*
* Copyright (C) IBM Corporation, 2008
*
* Authors: H Brett Bolen <hbbolen@us.ibm.com>,
* Tim Schimke <tschimke@us.ibm.com>,
* Jesse Arroyo <arroyoj@us.ibm.com>,
* Murali N Iyer <mniyer@us.ibm.com>
*
*/
#ifndef __AXON_IOCTL_H__
#define __AXON_IOCTL_H__
#define AXONIO_IOC_MAGIC 'x'
/* DMA Memory Registration */
#define AXONIO_DMA_REGISTER _IOWR(AXONIO_IOC_MAGIC, 0x7, \
struct AXON_MR_registration)
#define AXONIO_DMA_DEREGISTER _IOWR(AXONIO_IOC_MAGIC, 0x8, \
struct AXON_MR_deregistration)
#define AXONIO_DMA_EXTREGISTER _IOWR(AXONIO_IOC_MAGIC, 0x10, \
struct AXON_MR_ext_registration)
#define AXONIO_ISSUE_DMA_FAST _IOWR(AXONIO_IOC_MAGIC, 0x11, __u32)
/* Wakeup Notificaton of remote system */
#define AXONIO_NOTIFY _IOR(AXONIO_IOC_MAGIC, 0x13, __u32)
/*
* mmap offsets
*/
#define LOCAL_SMA_OFFSET 0x0
#define REMOTE_SMA_OFFSET 0x0100000
#define DMA_COMMAND_BUFFER_OFFSET 0x0800000
/*
* opaque handles
*/
typedef __u64 AXON_memory_region_handle;
/**
* Supports Memory Registration
* AXON_DMA_REGISTER
* AXON_DMA_DEREGISTER
*
* permissions bitmask
*
* 0x01 - allow local access ( always true)
* 0x02 - allow local read
* 0x04 - allow local write
* 0x10 - allow remote access
* 0x20 - allow remote read
* 0x40 - allow remote write
*/
enum {
AXON_MR_LOCAL_ACCESS = 0x00000001,
AXON_MR_LOCAL_READ = 0x00000002,
AXON_MR_LOCAL_WRITE = 0x00000004,
AXON_MR_REMOTE_ACCESS = 0x00000010,
AXON_MR_REMOTE_READ = 0x00000020,
AXON_MR_REMOTE_WRITE = 0x00000040,
};
struct AXON_MR_registration{
AXON_memory_region_handle memory_region_handle;
__u64 local_dma_memory;
__u64 local_dma_memory_size;
__u64 permissions;
};
struct AXON_MR_deregistration{
AXON_memory_region_handle memory_region_handle;
};
struct AXON_MR_ext_registration{
AXON_memory_region_handle memory_region_handle;
__u64 permissions;
};
/**
* Supports DMA GET/PUT status queries
*
* NOTE: AXON_dma_request required to be within SMA area
*/
struct AXON_dma_list_entry {
AXON_memory_region_handle src_memory_region_handle;
__u64 src_address;
__u64 transfer_size;
/* total size 0x18 */
};
enum {
AXON_DMATYPE_PUT = 0x01, /* dma local to remote */
AXON_DMATYPE_GET = 0x02, /* dma remote to local */
};
enum {
AXON_DMAFLAG_WRITE_REMOTE_STATUS = 0x00000001,
AXON_DMAFLAG_LOCAL_COMPLETION_SIGNAL = 0x00000002,
};
struct AXON_dma_request {
__u32 dma_type;
__u32 flags;
__u32 localDmaStatusOffset;
__u32 remoteDmaStatusOffset;
__u64 transfer_size; /* bytes */
__u32 local_descriptor_count;
__u32 remote_descriptor_count;
__u64 rsvd1;
struct AXON_dma_list_entry local_descriptor[10];
struct AXON_dma_list_entry remote_descriptor[10];
};
struct AXON_dma_command_list_fast {
__u32 dma_requests_available;
__u32 dma_requests_started;
__u32 dma_req_offset; /* offset into command block mmap area */
};
/**
* Wakeup Notificaton
*/
struct AXON_WAKEUP {
__u32 type;
};
#endif /* __AXON_IOCTL_H__ */

Просмотреть файл

@ -1,612 +0,0 @@
/*
* Copyright (c) 2009 Los Alamos National Security, LLC.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include <string.h>
#include <sched.h>
#include "opal/types.h"
#include "opal/util/output.h"
#include "opal/util/if.h"
#include "opal/sys/atomic.h"
#include "opal/mca/paffinity/paffinity.h"
#include "opal/datatype/opal_convertor.h"
#include "ompi/datatype/ompi_datatype.h"
#include "ompi/mca/btl/btl.h"
#include "ompi/mca/btl/base/btl_base_error.h"
#include "ompi/mca/mpool/mpool.h"
#include "ompi/mca/mpool/base/base.h"
#include "ompi/mca/pml/pml.h"
#include "btl_pcie.h"
#include "btl_pcie_frag.h"
#include "btl_pcie_proc.h"
#include "btl_pcie_endpoint.h"
#include "btl_pcie_ddriver.h"
mca_btl_pcie_module_t mca_btl_pcie_module = {
{
&mca_btl_pcie_component.super,
0, /* max size of first fragment */
0, /* Threshold below which BTL should not fragment */
0, /* max send fragment size */
0, /* pipeline protocol length */
0, /* max rdma fragment size */
0, /* min packet size for pipeline protocol */
0, /* exclusivity */
0, /* latency */
0, /* bandwidth */
0, /* flags */
mca_btl_pcie_add_procs,
mca_btl_pcie_del_procs,
mca_btl_pcie_register,
mca_btl_pcie_finalize,
mca_btl_pcie_alloc,
mca_btl_pcie_free,
mca_btl_pcie_prepare_src,
mca_btl_pcie_prepare_dst,
mca_btl_pcie_send,
NULL, /* send immediate */
mca_btl_pcie_put, /* put */
NULL, /* get */
mca_btl_base_dump, /*dump */
NULL, /* mpool */
NULL, /* register error cb */
NULL /* ft event */
}
};
/**
*
*/
int mca_btl_pcie_add_procs(
struct mca_btl_base_module_t* btl,
size_t nprocs,
struct ompi_proc_t **ompi_procs,
struct mca_btl_base_endpoint_t** peers,
opal_bitmap_t* reachable)
{
mca_btl_pcie_module_t* pcie_btl = (mca_btl_pcie_module_t*)btl;
int i;
for(i = 0; i < (int) nprocs; i++) {
struct ompi_proc_t* ompi_proc = ompi_procs[i];
mca_btl_pcie_proc_t* pcie_proc;
int rc;
/* Don't connect to anyone on our local node, including
ourselves. The PCIe doesn't work that way, and the mapper
sometimes gets confused by that fact. */
if (OPAL_PROC_ON_LOCAL_NODE(ompi_proc->proc_flags)) continue;
rc = mca_btl_pcie_proc_create(ompi_proc, pcie_btl, &pcie_proc);
if(OMPI_SUCCESS != rc) {
return rc;
} else if (pcie_proc) {
opal_bitmap_set_bit(reachable, i);
peers[i] = pcie_proc->endpoint_proc;
}
}
return OMPI_SUCCESS;
}
int mca_btl_pcie_del_procs(struct mca_btl_base_module_t* btl,
size_t nprocs,
struct ompi_proc_t **procs,
struct mca_btl_base_endpoint_t ** peers)
{
/* TODO */
return OMPI_SUCCESS;
}
/**
* Register callback function to support send/recv semantics
*/
int mca_btl_pcie_register(
struct mca_btl_base_module_t* btl,
mca_btl_base_tag_t tag,
mca_btl_base_module_recv_cb_fn_t cbfunc,
void* cbdata)
{
mca_btl_pcie_module_t* pcie_btl = (mca_btl_pcie_module_t*) btl;
pcie_btl->pcie_reg[tag].cbfunc = cbfunc;
pcie_btl->pcie_reg[tag].cbdata = cbdata;
return OMPI_SUCCESS;
}
/**
* Allocate a segment.
*
* @param btl (IN) BTL module
* @param size (IN) Request segment size.
*/
mca_btl_base_descriptor_t* mca_btl_pcie_alloc(
struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* endpoint,
uint8_t order,
size_t size,
uint32_t flags)
{
mca_btl_pcie_module_t* pcie_btl = (mca_btl_pcie_module_t*) btl;
mca_btl_pcie_frag_t* frag = NULL;
int rc;
if (size <= btl->btl_eager_limit) {
MCA_BTL_PCIE_FRAG_ALLOC_EAGER(pcie_btl, frag, rc);
if (frag) {
frag->segment.seg_len = size;
frag->base.des_flags = 0;
frag->hdr->length = size;
}
}
if (NULL == frag && size <= btl->btl_max_send_size) {
MCA_BTL_PCIE_FRAG_ALLOC_MAX(pcie_btl, frag, rc);
if (frag) {
frag->segment.seg_len = size;
frag->base.des_flags = 0;
frag->hdr->length = size;
}
}
BTL_VERBOSE(("btl_pcie_alloc called for %lu bytes, returning 0x%lx", (unsigned long)size, (long)frag));
return (mca_btl_base_descriptor_t*) frag;
}
/**
* Return a segment
*/
int mca_btl_pcie_free(
struct mca_btl_base_module_t* btl,
mca_btl_base_descriptor_t* des)
{
mca_btl_pcie_frag_t* frag = (mca_btl_pcie_frag_t*)des;
mca_btl_pcie_module_t* pcie_btl = (mca_btl_pcie_module_t*) btl;
int ret;
BTL_VERBOSE(("btl_pcie_free returning 0x%lx", (long)frag));
if (frag->registration != NULL) {
pcie_btl->rdma_mpool->mpool_deregister(pcie_btl->rdma_mpool,
(mca_mpool_base_registration_t*)
frag->registration);
frag->registration = NULL;
}
MCA_BTL_PCIE_FRAG_RETURN(pcie_btl, frag, ret);
return ret;
}
/**
* Pack data and return a descriptor that can be
* used for send/put.
*
* @param btl (IN) BTL module
* @param peer (IN) BTL peer addressing
*/
mca_btl_base_descriptor_t* mca_btl_pcie_prepare_src(
struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* endpoint,
struct mca_mpool_base_registration_t* registration,
struct opal_convertor_t* convertor,
uint8_t order,
size_t reserve,
size_t* size,
uint32_t flags
)
{
mca_btl_pcie_frag_t* frag = NULL;
mca_btl_pcie_reg_t* pcie_reg;
mca_btl_pcie_module_t* pcie_btl = (mca_btl_pcie_module_t*) btl;
struct iovec iov;
uint32_t iov_count = 1;
size_t max_data = *size;
int rc;
BTL_VERBOSE(("btl_pcie_prepare_src called with reserve %lu", (unsigned long)reserve));
/* check and see if the data is contiguous */
if(opal_convertor_need_buffers(convertor) == false && 0 == reserve) {
MCA_BTL_PCIE_FRAG_ALLOC_DMA(btl, frag, rc);
if(NULL == frag) {
return NULL;
}
iov.iov_len = max_data;
iov.iov_base = NULL;
/* get the user buffer's address */
opal_convertor_pack(convertor, &iov, &iov_count, &max_data);
*size = max_data;
if(NULL == registration) {
rc = pcie_btl->rdma_mpool->mpool_register(pcie_btl->rdma_mpool,
iov.iov_base, max_data, 0, &registration);
if(OMPI_SUCCESS != rc || NULL == registration){
MCA_BTL_PCIE_FRAG_RETURN(pcie_btl, frag, rc);
return NULL;
}
frag->registration = (mca_btl_pcie_reg_t*) registration;
}
pcie_reg = (mca_btl_pcie_reg_t*) registration;
frag->base.des_flags = 0;
frag->base.des_src = &frag->segment;
frag->base.des_src_cnt = 1;
frag->base.des_dst = NULL;
frag->base.des_dst_cnt = 0;
frag->base.des_flags = 0;
frag->segment.seg_len = max_data;
frag->segment.seg_addr.pval = iov.iov_base;
frag->segment.seg_key.key64 = (uint64_t)pcie_reg->handle;
BTL_VERBOSE(("prepare_src: frag->segment.seg_len = %lu .seg_addr.pval= %lu "
"frag->segment.seg_key.key64 = %lu",
(unsigned long)frag->segment.seg_len, (unsigned long)frag->segment.seg_addr.pval,
(unsigned long)frag->segment.seg_key.key64));
return &frag->base;
} else {
/*
* if we aren't pinning the data and the requested size is less
* than the eager limit pack into a fragment from the eager pool
*/
if (max_data+reserve <= btl->btl_eager_limit) {
MCA_BTL_PCIE_FRAG_ALLOC_EAGER(btl, frag, rc);
if(NULL == frag) {
return NULL;
}
iov.iov_len = max_data;
iov.iov_base = (unsigned char*) frag->segment.seg_addr.pval + reserve;
rc = opal_convertor_pack(convertor, &iov, &iov_count, &max_data );
*size = max_data;
if( rc < 0 ) {
MCA_BTL_PCIE_FRAG_RETURN(btl, frag, rc);
return NULL;
}
frag->segment.seg_len = max_data + reserve;
}
/*
* otherwise pack as much data as we can into a fragment
* that is the max send size.
*/
else {
MCA_BTL_PCIE_FRAG_ALLOC_MAX(btl, frag, rc);
if(NULL == frag) {
return NULL;
}
if(max_data + reserve > frag->size){
max_data = frag->size - reserve;
}
iov.iov_len = max_data;
iov.iov_base = (unsigned char*) frag->segment.seg_addr.pval + reserve;
rc = opal_convertor_pack(convertor, &iov, &iov_count, &max_data );
*size = max_data;
if( rc < 0 ) {
MCA_BTL_PCIE_FRAG_RETURN(btl, frag, rc);
return NULL;
}
frag->segment.seg_len = max_data + reserve;
}
frag->hdr->length = *size + reserve;
frag->base.des_src = &frag->segment;
frag->base.des_src_cnt = 1;
frag->base.des_dst = NULL;
frag->base.des_dst_cnt = 0;
frag->base.des_flags = 0;
return &frag->base;
}
}
/**
* Prepare a descriptor for send/rdma using the supplied
* convertor. If the convertor references data that is contigous,
* the descriptor may simply point to the user buffer. Otherwise,
* this routine is responsible for allocating buffer space and
* packing if required.
*
* @param btl (IN) BTL module
* @param endpoint (IN) BTL peer addressing
* @param convertor (IN) Data type convertor
* @param reserve (IN) Additional bytes requested by upper layer to precede user data
* @param size (IN/OUT) Number of bytes to prepare (IN), number of bytes actually prepared (OUT)
*/
mca_btl_base_descriptor_t* mca_btl_pcie_prepare_dst(
struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* endpoint,
struct mca_mpool_base_registration_t* registration,
struct opal_convertor_t* convertor,
uint8_t order,
size_t reserve,
size_t* size,
uint32_t flags)
{
mca_btl_pcie_frag_t* frag;
mca_btl_pcie_reg_t* pcie_reg;
mca_btl_pcie_module_t* pcie_btl = (mca_btl_pcie_module_t*) btl;
int rc;
ptrdiff_t lb;
MCA_BTL_PCIE_FRAG_ALLOC_DMA(pcie_btl, frag, rc);
if(NULL == frag) {
return NULL;
}
ompi_datatype_type_lb((ompi_datatype_t*)convertor->pDesc, &lb);
frag->segment.seg_addr.pval = convertor->pBaseBuf + lb +
convertor->bConverted;
if(NULL == registration) {
rc = pcie_btl->rdma_mpool->mpool_register(pcie_btl->rdma_mpool,
frag->segment.seg_addr.pval, *size, 0,
&registration);
if(OMPI_SUCCESS != rc || NULL == registration) {
MCA_BTL_PCIE_FRAG_RETURN(pcie_btl, frag, rc);
return NULL;
}
frag->registration = (mca_btl_pcie_reg_t*) registration;
}
pcie_reg = (mca_btl_pcie_reg_t*)registration;
frag->segment.seg_len = *size;
frag->segment.seg_key.key64 = (uint64_t) pcie_reg->handle;
frag->base.des_dst = &frag->segment;
frag->base.des_dst_cnt = 1;
frag->base.des_src = NULL;
frag->base.des_src_cnt = 0;
frag->base.des_flags = 0;
BTL_VERBOSE(("prepare_dst: frag->segment.seg_len = %lu .seg_addr.pval= %lu "
"frag->segment.seg_key.key64 = %lu",
(unsigned long)frag->segment.seg_len, (unsigned long)frag->segment.seg_addr.pval,
(unsigned long)frag->segment.seg_key.key64));
return &frag->base;
}
/**
* Initiate an asynchronous send.
*
* @param btl (IN) BTL module
* @param endpoint (IN) BTL addressing information
* @param descriptor (IN) Description of the data to be transfered
* @param tag (IN) The tag value used to notify the peer.
*/
int mca_btl_pcie_send(
struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* endpoint,
struct mca_btl_base_descriptor_t* descriptor,
mca_btl_base_tag_t tag)
{
/* mca_btl_pcie_module_t* pcie_btl = (mca_btl_pcie_module_t*) btl; */
mca_btl_pcie_module_t* pcie_btl = (mca_btl_pcie_module_t*) btl;
mca_btl_pcie_frag_t* frag = (mca_btl_pcie_frag_t*)descriptor;
mca_btl_pcie_sma_buf_t *buf = NULL;
int rc;
btl_pcie_fifo_entry_t idx;
/* setup these fields so they get pulled over in the memcpy */
frag->hdr->tag = tag;
frag->hdr->length = frag->segment.seg_len;
if (frag->type == MCA_BTL_PCIE_TYPE_EAGER) {
MCA_BTL_PCIE_SMA_BUF_ALLOC_EAGER(pcie_btl, buf, rc);
} else {
MCA_BTL_PCIE_SMA_BUF_ALLOC_MAX(pcie_btl, buf, rc);
}
if (NULL == frag) {
BTL_ERROR(("can't alloc buf for frag of type %d", frag->type));
return OMPI_ERR_OUT_OF_RESOURCE;
}
frag->endpoint = endpoint;
frag->sma_buf = buf;
/* Put fragment into network byte order before copy to save work
done in sma region */
OMPI_BTL_PCIE_HEADER_HTON(*frag->hdr);
/* BWB - FIX ME - both pointers are 16 byte aligned and the
buffers behind them are a multiple of 16 in length (but
frag->segment.seg_len might not be). There might be a more
optimized memcpy option given that behavior. */
memcpy(buf->pcie_data.pval, frag->hdr,
sizeof(mca_btl_pcie_header_t) +
frag->segment.seg_len);
/* send the fragment pointer to the receiver,
who will later ACK it back so that we can return it */
idx = ((char*) buf->pcie_data.pval) - ((char*) endpoint->rem_frag_base);
idx |= BTL_PCIE_FIFO_TYPE_SEND;
/* make sure the top bit is zero */
assert((idx & BTL_PCIE_FIFO_TYPE_MASK) == BTL_PCIE_FIFO_TYPE_SEND);
/* need to barrier prior to writing remote completion */
opal_atomic_wmb();
BTL_VERBOSE(("sent frag 0x%lx (offset %lx), tag %d, length %d, rc = %d",
(long)frag, idx, frag->hdr->tag, frag->segment.seg_len, rc));
idx = opal_swap_bytes8(idx);
rc = ompi_btl_pcie_fifo_set_msg(&endpoint->send_fifo, idx);
if(OMPI_SUCCESS != rc) {
if(OMPI_ERR_RESOURCE_BUSY == rc) {
/* BWB - FIX ME - queue for later */
abort();
} else {
return rc;
}
}
return OMPI_SUCCESS;
}
static int
dd_dma_request(DD_adapter_handle *a_handle,
struct AXON_dma_request *dma_req,
int dma_requests_available,
int *dma_requests_started)
{
int rc;
#if 0
struct AXON_dma_command_list dma_op;
memset (&dma_op, 0x00, sizeof(dma_op));
dma_op.dma_req = dma_req;
dma_op.dma_requests_available = dma_requests_available;
rc = ioctl (a_handle->fd, AXONIO_ISSUE_DMA ,&dma_op);
if (0 == rc) {
*dma_requests_started = dma_op.dma_requests_started;
}
#else
struct AXON_dma_command_list_fast *command = a_handle->cmd_block;
command->dma_req_offset = sizeof (struct AXON_dma_command_list_fast);
command->dma_requests_available = dma_requests_available;
command->dma_requests_started = 0;
dma_req->flags = AXON_DMAFLAG_WRITE_REMOTE_STATUS;
memcpy ((char *) command + command->dma_req_offset,
dma_req,
sizeof (struct AXON_dma_request));
rc = ioctl (a_handle->fd, AXONIO_ISSUE_DMA_FAST, 0);
if (0 == rc)
*dma_requests_started = command->dma_requests_started;
#endif
return rc;
}
/**
* Initiate an asynchronous put.
*
* @param btl (IN) BTL module
* @param endpoint (IN) BTL addressing information
* @param descriptor (IN) Description of the data to be transferred
*/
int mca_btl_pcie_put(
mca_btl_base_module_t* btl,
mca_btl_base_endpoint_t* endpoint,
mca_btl_base_descriptor_t* descriptor)
{
mca_btl_pcie_frag_t* frag = (mca_btl_pcie_frag_t*) descriptor;
struct AXON_dma_request dma_req;
int dma_reqs_started;
int rc;
volatile uint64_t *dma_status_addr;
uint64_t dma_status;
frag->endpoint = endpoint;
memset(&dma_req,0x00,sizeof(dma_req));
dma_req.dma_type = AXON_DMATYPE_PUT;
dma_req.local_descriptor[0].src_address = frag->base.des_src->seg_addr.lval;
dma_req.local_descriptor[0].src_memory_region_handle = frag->base.des_src->seg_key.key64;
dma_req.remote_descriptor[0].src_address =
opal_swap_bytes8(frag->base.des_dst->seg_addr.lval);
dma_req.remote_descriptor[0].src_memory_region_handle =
opal_swap_bytes8(frag->base.des_dst->seg_key.key64);
dma_req.transfer_size =
dma_req.remote_descriptor[0].transfer_size =
dma_req.local_descriptor[0].transfer_size = frag->base.des_src->seg_len;
dma_req.localDmaStatusOffset = endpoint->lcl_dma_status - (char*) endpoint->lcl_sma_ptr;
dma_req.remoteDmaStatusOffset = 0;
dma_req.local_descriptor_count = 1;
dma_req.remote_descriptor_count = 1;
dma_status_addr = (uint64_t*) endpoint->lcl_dma_status;
*dma_status_addr = 0;
rc = dd_dma_request(&endpoint->pcie_adapter,
&dma_req,
1,
&dma_reqs_started);
if (0 != rc) abort();
/* wait for completion, for now anyway */
while (0 == (dma_status = *dma_status_addr)) {
/* sched_yield(); */
}
frag->base.des_cbfunc(btl, endpoint, &(frag->base), OMPI_SUCCESS);
return OMPI_SUCCESS;
}
/**
* Initiate an asynchronous get.
*
* @param btl (IN) BTL module
* @param endpoint (IN) BTL addressing information
* @param descriptor (IN) Description of the data to be transferred
*
*/
int mca_btl_pcie_get(
mca_btl_base_module_t* btl,
mca_btl_base_endpoint_t* endpoint,
mca_btl_base_descriptor_t* descriptor)
{
return OMPI_ERR_NOT_IMPLEMENTED;
}
/*
* Cleanup/release module resources.
*/
int mca_btl_pcie_finalize(struct mca_btl_base_module_t* btl)
{
mca_btl_pcie_module_t* pcie_btl = (mca_btl_pcie_module_t*) btl;
OBJ_DESTRUCT(&pcie_btl->pcie_lock);
OBJ_DESTRUCT(&pcie_btl->pcie_sma_buf_eager);
OBJ_DESTRUCT(&pcie_btl->pcie_sma_buf_max);
OBJ_DESTRUCT(&pcie_btl->pcie_frag_eager);
OBJ_DESTRUCT(&pcie_btl->pcie_frag_max);
OBJ_DESTRUCT(&pcie_btl->pcie_frag_dma);
OBJ_DESTRUCT(&pcie_btl->pcie_recv_frag);
return OMPI_SUCCESS;
}

Просмотреть файл

@ -1,368 +0,0 @@
/*
* Copyright (c) 2009 Los Alamos National Security, LLC.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef MCA_BTL_PCIE_H
#define MCA_BTL_PCIE_H
#include "ompi_config.h"
#include <sys/types.h>
#include <string.h>
#include "opal/align.h"
#include "opal/event/event.h"
#include "opal/util/output.h"
#include "opal/class/opal_bitmap.h"
#include "orte/util/proc_info.h"
#include "ompi/class/ompi_free_list.h"
#include "ompi/mca/btl/btl.h"
#include "ompi/mca/btl/base/base.h"
#include "ompi/mca/mpool/mpool.h"
#include "ompi/mca/pml/pml.h"
#include "btl_pcie_ddriver.h"
#include "btl_pcie_frag.h"
#include "btl_pcie_fifo.h"
BEGIN_C_DECLS
#define MCA_BTL_HAS_MPOOL 1
/**
* PCIE BTL component.
*/
struct mca_btl_pcie_component_t {
/** BTL base component */
mca_btl_base_component_1_0_1_t super;
/* ***** Configuration information ***** */
/** initial size of free lists */
int pcie_free_list_num;
/** maximum size of free lists */
int pcie_free_list_max;
/** number of elements to alloc when growing free lists */
int pcie_free_list_inc;
/** name of send/recv memory pool */
char* pcie_send_mpool_name;
/** name of put/get memory pool */
char *pcie_dma_mpool_name;
/** Number of entries in the send/recv queue structure */
int pcie_recv_queue_len;
/* **** Component data ***** */
/** array of available modules */
struct mca_btl_pcie_module_t *pcie_btls;
/** Number of initialized pcie_btl modules */
uint32_t pcie_num_btls;
/** list of pcie proc structures, created during add_procs */
opal_list_t pcie_procs;
/** lock for accessing component state */
opal_mutex_t pcie_lock;
};
typedef struct mca_btl_pcie_component_t mca_btl_pcie_component_t;
OMPI_MODULE_DECLSPEC extern mca_btl_pcie_component_t mca_btl_pcie_component;
/**
* BTL Module Interface
*/
struct mca_btl_pcie_module_t {
mca_btl_base_module_t super; /**< base BTL interface */
bool active;
mca_btl_base_recv_reg_t pcie_reg[MCA_BTL_TAG_MAX];
/** name of the pcie device */
char *lcl_dev_name;
/** Free list of communication buffers in the SMA region */
ompi_free_list_t pcie_sma_buf_eager;
ompi_free_list_t pcie_sma_buf_max;
/** Free list of bounce fragments, normal user memory */
ompi_free_list_t pcie_frag_eager;
ompi_free_list_t pcie_frag_max;
/* free list of DMA fragments */
ompi_free_list_t pcie_frag_dma;
/* single receive fragment to handle upcalls on message reception.
This will need to be a free list if multiple receive callbacks
could be triggered at the same time, which will happen if the
code goes MT hot. */
mca_btl_pcie_frag_recv_t pcie_recv_frag;
/* lock for accessing module state */
opal_mutex_t pcie_lock;
/* mpool for allocating the members of pcie_sma_buf* */
struct mca_mpool_base_module_t* pcie_mpool;
/* mpool for RDMA pinning */
struct mca_mpool_base_module_t* rdma_mpool;
/* Endpoint associated with this module (there's a one-to-one
mapping of modules and endpoints, since a device can only
handle one endpoint at a time */
struct mca_btl_base_endpoint_t* endpoint;
};
typedef struct mca_btl_pcie_module_t mca_btl_pcie_module_t;
extern mca_btl_pcie_module_t mca_btl_pcie_module;
struct mca_btl_pcie_reg_t {
mca_mpool_base_registration_t base;
AXON_memory_region_handle handle;
};
typedef struct mca_btl_pcie_reg_t mca_btl_pcie_reg_t;
struct mca_btl_pcie_modex_info_t {
char hostname[ORTE_MAX_HOSTNAME_SIZE];
char devicename[PATH_MAX];
};
typedef struct mca_btl_pcie_modex_info_t mca_btl_pcie_modex_info_t;
#define MCA_BTL_PCIE_MODEX_INFO_HTON(h)
#define MCA_BTL_PCIE_MODEX_INFO_NTOH(h)
/**
* Register TEMPLATE component parameters with the MCA framework
*/
extern int mca_btl_pcie_component_open(void);
/**
* Any final cleanup before being unloaded.
*/
extern int mca_btl_pcie_component_close(void);
/**
* TEMPLATE component initialization.
*
* @param num_btl_modules (OUT) Number of BTLs returned in BTL array.
* @param allow_multi_user_threads (OUT) Flag indicating wether BTL supports user threads (TRUE)
* @param have_hidden_threads (OUT) Flag indicating wether BTL uses threads (TRUE)
*/
extern mca_btl_base_module_t** mca_btl_pcie_component_init(
int *num_btl_modules,
bool allow_multi_user_threads,
bool have_hidden_threads
);
/**
* TEMPLATE component progress.
*/
extern int mca_btl_pcie_component_progress(void);
/**
* Cleanup any resources held by the BTL.
*
* @param btl BTL instance.
* @return OMPI_SUCCESS or error status on failure.
*/
extern int mca_btl_pcie_finalize(
struct mca_btl_base_module_t* btl
);
/**
* PML->BTL notification of change in the process list.
*
* @param btl (IN)
* @param nprocs (IN) Number of processes
* @param procs (IN) Set of processes
* @param peers (OUT) Set of (optional) peer addressing info.
* @param peers (IN/OUT) Set of processes that are reachable via this BTL.
* @return OMPI_SUCCESS or error status on failure.
*
*/
extern int mca_btl_pcie_add_procs(
struct mca_btl_base_module_t* btl,
size_t nprocs,
struct ompi_proc_t **procs,
struct mca_btl_base_endpoint_t** peers,
opal_bitmap_t* reachable
);
/**
* PML->BTL notification of change in the process list.
*
* @param btl (IN) BTL instance
* @param nproc (IN) Number of processes.
* @param procs (IN) Set of processes.
* @param peers (IN) Set of peer data structures.
* @return Status indicating if cleanup was successful
*
*/
extern int mca_btl_pcie_del_procs(
struct mca_btl_base_module_t* btl,
size_t nprocs,
struct ompi_proc_t **procs,
struct mca_btl_base_endpoint_t** peers
);
/**
* Initiate an asynchronous send.
*
* @param btl (IN) BTL module
* @param endpoint (IN) BTL addressing information
* @param descriptor (IN) Description of the data to be transfered
* @param tag (IN) The tag value used to notify the peer.
*/
extern int mca_btl_pcie_send(
struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* btl_peer,
struct mca_btl_base_descriptor_t* descriptor,
mca_btl_base_tag_t tag
);
/**
* Initiate an asynchronous put.
*
* @param btl (IN) BTL module
* @param endpoint (IN) BTL addressing information
* @param descriptor (IN) Description of the data to be transferred
*/
extern int mca_btl_pcie_put(
struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* btl_peer,
struct mca_btl_base_descriptor_t* decriptor
);
/**
* Initiate an asynchronous get.
*
* @param btl (IN) BTL module
* @param endpoint (IN) BTL addressing information
* @param descriptor (IN) Description of the data to be transferred
*/
extern int mca_btl_pcie_get(
struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* btl_peer,
struct mca_btl_base_descriptor_t* decriptor
);
/**
* Register a callback function that is called on receipt
* of a fragment.
*
* @param btl (IN) BTL module
* @return Status indicating if registration was successful
*
*/
extern int mca_btl_pcie_register(
struct mca_btl_base_module_t* btl,
mca_btl_base_tag_t tag,
mca_btl_base_module_recv_cb_fn_t cbfunc,
void* cbdata);
/**
* Allocate a descriptor with a segment of the requested size.
* Note that the BTL layer may choose to return a smaller size
* if it cannot support the request.
*
* @param btl (IN) BTL module
* @param size (IN) Request segment size.
*/
extern mca_btl_base_descriptor_t* mca_btl_pcie_alloc(
struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* endpoint,
uint8_t order,
size_t size,
uint32_t flags);
/**
* Return a segment allocated by this BTL.
*
* @param btl (IN) BTL module
* @param descriptor (IN) Allocated descriptor.
*/
extern int mca_btl_pcie_free(
struct mca_btl_base_module_t* btl,
mca_btl_base_descriptor_t* des);
/**
* Prepare a descriptor for send/rdma using the supplied
* convertor. If the convertor references data that is contigous,
* the descriptor may simply point to the user buffer. Otherwise,
* this routine is responsible for allocating buffer space and
* packing if required.
*
* @param btl (IN) BTL module
* @param endpoint (IN) BTL peer addressing
* @param convertor (IN) Data type convertor
* @param reserve (IN) Additional bytes requested by upper layer to precede user data
* @param size (IN/OUT) Number of bytes to prepare (IN), number of bytes actually prepared (OUT)
*/
mca_btl_base_descriptor_t* mca_btl_pcie_prepare_src(
struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* endpoint,
struct mca_mpool_base_registration_t* registration,
struct opal_convertor_t* convertor,
uint8_t order,
size_t reserve,
size_t* size,
uint32_t flags
);
extern mca_btl_base_descriptor_t* mca_btl_pcie_prepare_dst(
struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* endpoint,
struct mca_mpool_base_registration_t* registration,
struct opal_convertor_t* convertor,
uint8_t order,
size_t reserve,
size_t* size,
uint32_t flags);
/**
* Fault Tolerance Event Notification Function
* @param state Checkpoint Stae
* @return OMPI_SUCCESS or failure status
*/
int mca_btl_pcie_ft_event(int state);
char* ompi_btl_pcie_cfg_get_local_device(char* hostname, int core);
char* ompi_btl_pcie_cfg_get_matching_device(char* remote_hostname,
char* remote_device);
END_C_DECLS
#endif /* #ifndef MCA_BTL_PCIE_H */

Просмотреть файл

@ -1,196 +0,0 @@
/*
* Copyright (c) 2007 Los Alamos National Security, LLC.
* All righs reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include <string.h>
#include <ctype.h>
#include <stdlib.h>
#include "opal/util/output.h"
#include "opal/util/os_path.h"
#include "opal/mca/base/mca_base_param.h"
#include "opal/mca/installdirs/installdirs.h"
#include "orte/util/proc_info.h"
#include "orte/util/show_help.h"
#include "btl_pcie.h"
#include "btl_pcie_lex.h"
static char *cfg_filename;
static char *key_buffer = NULL;
static size_t key_buffer_len = 0;
/*
* Local functions
*/
static char* parse_file(char *filename, bool local, char *key);
/**************************************************************************/
char *
ompi_btl_pcie_cfg_get_local_device(char* hostname, int core)
{
char *key, *ret, *file;
file = opal_os_path(false,
opal_install_dirs.sysconfdir,
"mca-btl-pcie-local-resources.cfg",
NULL);
asprintf(&key, "%s:%d", hostname, core);
ret = parse_file(file, true, key);
free(key);
free(file);
return ret;
}
char *
ompi_btl_pcie_cfg_get_matching_device(char* remote_hostname,
char* remote_device)
{
char *key, *ret, *pos, *file;
file = opal_os_path(false,
opal_install_dirs.sysconfdir,
"mca-btl-pcie-remote-resources.cfg",
NULL);
asprintf(&key, "%s:%s", remote_hostname, remote_device);
ret = parse_file(file, false, key);
free(file);
free(key);
if (ret == NULL) return NULL;
pos = strchr(ret, ':');
if (pos == NULL) {
free(ret);
return NULL;
}
/* make sure this is my hostname */
*pos = '\0';
if (0 != strcmp(orte_process_info.nodename, ret)) {
free(ret);
return NULL;
}
pos++;
pos = strdup(pos);
free(ret);
return pos;
}
/*
* Parse a single file
*/
static char* parse_file(char *filename, bool local, char* key)
{
int val;
bool me;
char *tmp = NULL;
/* Open the file */
cfg_filename = filename;
btl_pcie_cfg_yyin = fopen(filename, "r");
if (NULL == btl_pcie_cfg_yyin) {
orte_show_help("help-mpi-btl-pcie.txt", "ini file:file not found",
true, filename);
goto cleanup;
}
/* Do the parsing */
btl_pcie_cfg_parse_done = false;
btl_pcie_cfg_yynewlines = 1;
btl_pcie_cfg_init_buffer(btl_pcie_cfg_yyin);
while (!btl_pcie_cfg_parse_done) {
val = btl_pcie_cfg_yylex();
switch (val) {
case BTL_PCIE_CFG_PARSE_DONE:
/* This will also set btl_pcie_cfg_parse_done to true, so just
break here */
break;
case BTL_PCIE_CFG_PARSE_NEWLINE:
/* blank line! ignore it */
break;
case BTL_PCIE_CFG_PARSE_HOSTNAME_CORE:
if (!local) {
return NULL;
}
if (0 == strcmp(key, btl_pcie_cfg_yytext)) {
me = true;
} else {
me = false;
}
val = btl_pcie_cfg_yylex();
if (BTL_PCIE_CFG_PARSE_DEVICE != val) {
abort();
}
if (me) return strdup(btl_pcie_cfg_yytext);
break;
case BTL_PCIE_CFG_PARSE_HOSTNAME_DEVICE:
if (local) {
return NULL;
}
if (0 == strcmp(key, btl_pcie_cfg_yytext)) {
me = true;
} else {
tmp = strdup(btl_pcie_cfg_yytext);
me = false;
}
val = btl_pcie_cfg_yylex();
if (BTL_PCIE_CFG_PARSE_HOSTNAME_DEVICE != val) {
abort();
}
if (me) {
return strdup(btl_pcie_cfg_yytext);
} else {
if (0 == strcmp(key, btl_pcie_cfg_yytext)) {
return tmp;
} else {
free(tmp);
}
}
break;
default:
return NULL;
break;
}
}
fclose(btl_pcie_cfg_yyin);
cleanup:
if (NULL != key_buffer) {
free(key_buffer);
key_buffer = NULL;
key_buffer_len = 0;
}
return NULL;
}

Просмотреть файл

@ -1,518 +0,0 @@
/*
* Copyright (c) 2007 Los Alamos National Security, LLC.
* All righs reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "ompi/constants.h"
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <errno.h>
#include <string.h>
#include <sched.h>
#include <ctype.h>
#include "opal/event/event.h"
#include "opal/util/argv.h"
#include "opal/util/if.h"
#include "opal/util/output.h"
#include "opal/mca/base/mca_base_param.h"
#include "opal/mca/paffinity/paffinity.h"
#include "opal/mca/paffinity/base/base.h"
#include "orte/util/proc_info.h"
#include "orte/util/show_help.h"
#include "orte/mca/errmgr/errmgr.h"
#include "ompi/constants.h"
#include "ompi/mca/btl/btl.h"
#include "ompi/mca/btl/base/base.h"
#include "ompi/mca/btl/base/btl_base_error.h"
#include "ompi/mca/mpool/base/base.h"
#include "ompi/mca/mpool/rdma/mpool_rdma.h"
#include "ompi/mca/pml/pml.h"
#include "ompi/runtime/ompi_module_exchange.h"
#include "btl_pcie.h"
#include "btl_pcie_frag.h"
#include "btl_pcie_endpoint.h"
#include "btl_pcie_ddriver.h"
static int
dd_register_memory_region (DD_adapter_handle *a_handle,
AXON_memory_region_handle *mr_handle,
void *starting_addr,
int size,
int flags)
{
int rc;
struct AXON_MR_registration regInfo;
memset (&regInfo, 0, sizeof(regInfo));
regInfo.local_dma_memory_size = size;
regInfo.local_dma_memory = (__u64) starting_addr;
/* codeme Q-should we check for valid permission flags here or just
* let the ioctl code handle it?
*/
regInfo.permissions = flags|1; /* always turn on local access */
rc = ioctl (a_handle->fd, AXONIO_DMA_REGISTER, &regInfo);
if(-1 == rc) {
return -1;
}
*mr_handle = (AXON_memory_region_handle)regInfo.memory_region_handle;
return 0;
}
static int
dd_deregister_memory_region (DD_adapter_handle *a_handle,
AXON_memory_region_handle *mr_handle)
{
return (ioctl (a_handle->fd, AXONIO_DMA_DEREGISTER, mr_handle));
}
static int pcie_reg_mr(void *reg_data, void *base, size_t size,
mca_mpool_base_registration_t *reg);
static int pcie_dereg_mr(void* reg_data, mca_mpool_base_registration_t *reg);
mca_btl_pcie_component_t mca_btl_pcie_component = {
{
/* First, the mca_base_component_t struct containing meta information
about the component itself */
{
/* Indicate that we are a pml v2.0.0 component (which also implies a
specific MCA version) */
MCA_BTL_BASE_VERSION_2_0_0,
"pcie", /* MCA component name */
OMPI_MAJOR_VERSION, /* MCA component major version */
OMPI_MINOR_VERSION, /* MCA component minor version */
OMPI_RELEASE_VERSION, /* MCA component release version */
mca_btl_pcie_component_open, /* component open */
mca_btl_pcie_component_close /* component close */
},
/* Next the MCA v2.0.0 component meta data */
{
false
},
mca_btl_pcie_component_init,
mca_btl_pcie_component_progress,
}
};
/*
* utility routines for parameter registration
*/
static char*
mca_btl_pcie_param_register_string(const char* param_name,
const char* param_desc,
const char* default_value)
{
char *value;
mca_base_param_reg_string(&mca_btl_pcie_component.super.btl_version,
param_name, param_desc, false, false,
default_value, &value);
return value;
}
static int
mca_btl_pcie_param_register_int(const char* param_name,
const char* param_desc,
int default_value)
{
int value;
mca_base_param_reg_int(&mca_btl_pcie_component.super.btl_version,
param_name, param_desc, false, false,
default_value, &value);
return value;
}
/*
* Register PCIE device found in local config file. The MCA framework
* will make this available to all peers.
*/
static int
btl_pcie_modex_send(void)
{
size_t size;
unsigned int i;
mca_btl_pcie_modex_info_t *info;
size = mca_btl_pcie_component.pcie_num_btls *
sizeof(mca_btl_pcie_modex_info_t);
info = malloc(size);
if (NULL == info) return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
for (i = 0 ; i < mca_btl_pcie_component.pcie_num_btls ; ++i) {
strncpy(info[i].hostname,
orte_process_info.nodename,
ORTE_MAX_HOSTNAME_SIZE - 1);
info[i].hostname[ORTE_MAX_HOSTNAME_SIZE - 1] = '\0';
strncpy(info[i].devicename,
mca_btl_pcie_component.pcie_btls[i].lcl_dev_name, PATH_MAX - 1);
info[i].devicename[PATH_MAX - 1] = '\0';
MCA_BTL_PCIE_MODEX_INFO_HTON(info[i]);
}
#if (OMPI_MAJOR_VERSION <= 1) && (OMPI_MINOR_VERSION <= 2)
return mca_pml_base_modex_send(&mca_btl_pcie_component.super.btl_version, info, size);
#else
return ompi_modex_send(&mca_btl_pcie_component.super.btl_version, info, size);
#endif
}
/*
* Called by MCA framework to open the component, registers
* component parameters.
*/
int
mca_btl_pcie_component_open(void)
{
/* initialize state */
mca_btl_pcie_component.pcie_num_btls = 0;
mca_btl_pcie_component.pcie_btls = NULL;
/* initialize objects */
OBJ_CONSTRUCT(&mca_btl_pcie_component.pcie_procs, opal_list_t);
/* component parameters */
mca_btl_pcie_component.pcie_free_list_num =
mca_btl_pcie_param_register_int ("free_list_num",
"Initial size of free lists (must be >= 1)",
16);
/* BWB - FIX ME - The need to limit the free list max size is an
artifact of the lack of flow control in the BTL. Since we're
already using bounce fragments, it should be possible to make
this unlimited, and then properly handle the case where an SMA
region isn't available when send is called on a given frag.
Something similar to what Open IB does when we don't have send
credits would work really well here. See comment in
btl_pcie_send() for more information. */
mca_btl_pcie_component.pcie_free_list_max =
mca_btl_pcie_param_register_int ("free_list_max",
"Max size of free lists. "
"free_list_max * (first_frag_size + max_send_size) "
"must be less than (SMA memory size - (recv_queue_len * 4) - 8)",
32);
mca_btl_pcie_component.pcie_free_list_inc =
mca_btl_pcie_param_register_int ("free_list_inc",
"Increment size of free lists (must be >= 1)",
8);
mca_btl_pcie_component.pcie_send_mpool_name =
mca_btl_pcie_param_register_string("send_mpool",
"Name of the memory pool to be used for send messages. "
"(it is unlikely that you will ever want to change this)",
"pcie");
mca_btl_pcie_component.pcie_dma_mpool_name =
mca_btl_pcie_param_register_string("dma_mpool",
"Name of the memory pool to be used for rdma messages. "
"(it is unlikely that you will ever want to change this)",
"rdma");
mca_btl_pcie_component.pcie_recv_queue_len =
mca_btl_pcie_param_register_int("recv_queue_len",
"Length of receive fifo. Must be 4 * free_list_max",
256);
mca_btl_pcie_module.super.btl_exclusivity =
mca_btl_pcie_param_register_int ("exclusivity",
"Priority of PCIe BTL. (must be > 0)",
MCA_BTL_EXCLUSIVITY_DEFAULT + 1);
mca_btl_pcie_module.super.btl_eager_limit =
mca_btl_pcie_param_register_int ("first_frag_size",
"Size (in bytes) of the first fragment sent of any "
"message. It is the maximum size of \"short\" messages "
"and the maximum size of the \"phase 1\" fragment sent "
"for all large messages (must be >= 1).",
1*1024) - sizeof(mca_btl_pcie_header_t);
mca_btl_pcie_module.super.btl_rndv_eager_limit =
mca_btl_pcie_param_register_int ("btl_rndv_eager_limit",
"Minimum message size (in bytes) that will be striped "
"across multiple network devices when using "
"send/receive semantics. Messages shorter than this "
"size will be sent across a single network (must be >= "
"1)",
2*1024) - sizeof(mca_btl_pcie_header_t);
mca_btl_pcie_module.super.btl_max_send_size =
mca_btl_pcie_param_register_int ("max_send_size",
"Maximum size (in bytes) of a single \"phase 2\" fragment "
"of a long message when using the pipeline protocol "
"(must be >= 1)",
4*1024) - sizeof(mca_btl_pcie_header_t);
mca_btl_pcie_module.super.btl_rdma_pipeline_send_length =
mca_btl_pcie_param_register_int("rdma_pipeline_send_length",
"Length of the \"phase 2\" portion of a large message (in "
"bytes) when using the pipeline protocol. This part of "
"the message will be split into fragments of size "
"max_send_size and sent using send/receive semantics "
"(must be >= 0; only relevant when the PUT flag is "
"set)",
12*1024);
mca_btl_pcie_module.super.btl_rdma_pipeline_frag_size =
mca_btl_pcie_param_register_int("rdma_pipeline_frag_size",
"Maximum size (in bytes) of a single \"phase 3\" fragment "
"from a long message when using the pipeline protocol. "
"These fragments will be sent using RDMA semantics "
"(must be >= 1; only relevant when the PUT flag is "
"set)",
2*1024*1024);
mca_btl_pcie_module.super.btl_min_rdma_pipeline_size =
mca_btl_pcie_param_register_int("min_rdma_pipeline_size",
"Messages smaller than this size (in bytes) will not "
"use the RDMA pipeline protocol. Instead, they will be "
"split into fragments of max_send_size and sent using "
"send/receive semantics (must be >=0, and is "
"automatically adjusted up to at least "
"(eager_limit+btl_rdma_pipeline_send_length); only "
"relevant when the PUT flag is set)",
16 * 1024);
mca_btl_pcie_module.super.btl_flags =
mca_btl_pcie_param_register_int("flags",
"BTL control flags. Defaults to (SEND|PUT|HETEROGENEOUS_RDMA)",
#ifdef MCA_BTL_FLAGS_HETEROGENEOUS_RDMA
MCA_BTL_FLAGS_HETEROGENEOUS_RDMA |
#endif
MCA_BTL_FLAGS_SEND |
MCA_BTL_FLAGS_PUT);
return OMPI_SUCCESS;
}
int
mca_btl_pcie_component_close(void)
{
return OMPI_SUCCESS;
}
mca_btl_base_module_t**
mca_btl_pcie_component_init(int *num_btl_modules,
bool enable_progress_threads,
bool enable_mpi_threads)
{
cpu_set_t cpu_set;
unsigned int i;
int num_cpus, *cpus;
struct stat stat_buf;
struct mca_mpool_base_resources_t mpool_resources;
mca_btl_base_module_t **btl_array;
*num_btl_modules = 0;
/* find all cpus we're bound to */
cpus = malloc(CPU_SETSIZE * sizeof(int));
memset(cpus, 0, CPU_SETSIZE * sizeof(int));
num_cpus = 0;
CPU_ZERO(&cpu_set);
sched_getaffinity(0, sizeof(cpu_set), &cpu_set);
for (i = 0 ; i < CPU_SETSIZE ; ++i) {
if (CPU_ISSET(i, &cpu_set)) cpus[num_cpus++] = i;
}
#if defined(__PPC__)
if (num_cpus > 1) {
orte_show_help("help-mpi-btl-pcie.txt", "initialization:more-than-one-cpu",
true, num_cpus);
return NULL;
}
#endif /* #ifdef __PPC__ */
if (0 == num_cpus) {
orte_show_help("help-mpi-btl-pcie.txt", "initialization:no-cpus",
true);
return NULL;
}
/* Create the module storage space */
mca_btl_pcie_component.pcie_num_btls = num_cpus;
mca_btl_pcie_component.pcie_btls = malloc(mca_btl_pcie_component.pcie_num_btls *
sizeof(struct mca_btl_pcie_module_t));
btl_array = malloc(mca_btl_pcie_component.pcie_num_btls *
sizeof(mca_btl_base_module_t*));
/* initialize the modules */
for (i = 0 ; i < mca_btl_pcie_component.pcie_num_btls ; ++i) {
mca_btl_pcie_module_t *btl = &(mca_btl_pcie_component.pcie_btls[i]);
btl_array[i] = (mca_btl_base_module_t*) btl;
memcpy(btl, &mca_btl_pcie_module, sizeof(mca_btl_pcie_module_t));
/* check if we have a device listed in our local config file */
btl->lcl_dev_name =
ompi_btl_pcie_cfg_get_local_device(orte_process_info.nodename, cpus[i]);
BTL_VERBOSE(("Local device for %s:%d = %s", orte_process_info.nodename, cpus[i],
btl->lcl_dev_name));
/* make sure said device is sane */
if(stat(btl->lcl_dev_name, &stat_buf)) {
BTL_ERROR(("Error %s opening device %s\n", strerror(errno),
btl->lcl_dev_name));
return NULL;
}
OBJ_CONSTRUCT(&btl->pcie_sma_buf_eager, ompi_free_list_t);
OBJ_CONSTRUCT(&btl->pcie_sma_buf_max, ompi_free_list_t);
OBJ_CONSTRUCT(&btl->pcie_frag_eager, ompi_free_list_t);
OBJ_CONSTRUCT(&btl->pcie_frag_max, ompi_free_list_t);
OBJ_CONSTRUCT(&btl->pcie_frag_dma, ompi_free_list_t);
OBJ_CONSTRUCT(&btl->pcie_lock, opal_mutex_t);
/* time to setup DMA mpool */
mpool_resources.reg_data = (void*) btl;
mpool_resources.sizeof_reg = sizeof(mca_btl_pcie_reg_t);
mpool_resources.register_mem = pcie_reg_mr;
mpool_resources.deregister_mem = pcie_dereg_mr;
btl->rdma_mpool =
mca_mpool_base_module_create("rdma",
&btl->super,
&mpool_resources);
btl->super.btl_mpool = btl->rdma_mpool;
btl->active = false;
}
/* push our address info to everyone */
btl_pcie_modex_send();
*num_btl_modules = mca_btl_pcie_component.pcie_num_btls;
return btl_array;;
}
int
mca_btl_pcie_component_progress()
{
unsigned int i;
btl_pcie_fifo_entry_t msg_idx;
int count = 0;
for (i = 0 ; i < mca_btl_pcie_component.pcie_num_btls ; ++i) {
mca_btl_pcie_module_t *pcie_btl =
&(mca_btl_pcie_component.pcie_btls[i]);
mca_btl_base_endpoint_t *endpoint = pcie_btl->endpoint;
if (!pcie_btl->active) continue;
msg_idx = ompi_btl_pcie_fifo_get_msg(&endpoint->recv_fifo);
/* Potential optimization is to drain every time we enter progress */
if (msg_idx) {
int rc;
int ack = ((msg_idx & BTL_PCIE_FIFO_TYPE_MASK) == BTL_PCIE_FIFO_TYPE_ACK) ? 1 : 0;
msg_idx &= BTL_PCIE_FIFO_DATA_MASK;
if (ack) {
/* we have a send frag ack */
mca_btl_pcie_frag_t *frag = (mca_btl_pcie_frag_t*) msg_idx;
mca_btl_pcie_sma_buf_t *buf = frag->sma_buf;
BTL_VERBOSE(("received ack for frag %lx (0x%lx)", (long)msg_idx, (long)frag));
/* Done with buffer, can return now */
MCA_BTL_PCIE_SMA_BUF_RETURN(pcie_btl, buf, rc);
frag->base.des_cbfunc(&pcie_btl->super, endpoint,
&(frag->base),
OMPI_SUCCESS);
/* return the send credit */
ompi_btl_pcie_fifo_complete_msg(&endpoint->send_fifo, 1);
count++;
} else {
/* we have a send frag (incoming data) */
mca_btl_pcie_frag_t *recv_frag = &pcie_btl->pcie_recv_frag;
mca_btl_pcie_header_t *hdr = (mca_btl_pcie_header_t*) (endpoint->lcl_frag_base + msg_idx);
recv_frag->hdr = hdr;
OMPI_BTL_PCIE_HEADER_NTOH((*recv_frag->hdr));
recv_frag->segment.seg_addr.pval = ((unsigned char*) recv_frag->hdr) + sizeof(mca_btl_pcie_header_t);
recv_frag->segment.seg_len = recv_frag->hdr->length;
BTL_VERBOSE(("received tag %d, base 0x%lx", recv_frag->hdr->tag, (long)&recv_frag->base));
pcie_btl->pcie_reg[recv_frag->hdr->tag].cbfunc(&pcie_btl->super,
recv_frag->hdr->tag, &recv_frag->base,
pcie_btl->pcie_reg[recv_frag->hdr->tag].cbdata);
rc = ompi_btl_pcie_fifo_set_msg(&endpoint->send_fifo, hdr->send_frag.lval);
/* BWB - FIX ME - this is only safe if the number of
queue entries is twice the free list size */
ompi_btl_pcie_fifo_complete_msg(&endpoint->send_fifo, 1);
count++;
}
}
}
return count;
}
static int
pcie_reg_mr(void *reg_data, void *base, size_t size,
mca_mpool_base_registration_t *reg)
{
mca_btl_pcie_module_t * pcie_btl = (mca_btl_pcie_module_t*) reg_data;
mca_btl_pcie_endpoint_t * endpoint = pcie_btl->endpoint;
mca_btl_pcie_reg_t * pcie_reg = (mca_btl_pcie_reg_t*) reg;
if(dd_register_memory_region(&endpoint->pcie_adapter,
&pcie_reg->handle,
base,
size,
AXON_MR_LOCAL_READ |
AXON_MR_LOCAL_WRITE |
AXON_MR_REMOTE_ACCESS |
AXON_MR_REMOTE_READ |
AXON_MR_REMOTE_WRITE )) {
BTL_ERROR(("error deregistering memory!\n"));
return OMPI_ERROR;
}
return OMPI_SUCCESS;
}
static int
pcie_dereg_mr(void* reg_data, mca_mpool_base_registration_t *reg)
{
mca_btl_pcie_module_t * pcie_btl = (mca_btl_pcie_module_t*) reg_data;
mca_btl_pcie_endpoint_t * endpoint = pcie_btl->endpoint;
mca_btl_pcie_reg_t * pcie_reg = (mca_btl_pcie_reg_t*) reg;
if(pcie_reg->handle >= 0) {
if(dd_deregister_memory_region(&endpoint->pcie_adapter,
&pcie_reg->handle)) {
BTL_ERROR(("error deregistering memory!\n"));
return OMPI_ERROR;
}
} else {
return OMPI_ERROR;
}
return OMPI_SUCCESS;
}

Просмотреть файл

@ -1,21 +0,0 @@
#ifndef _BTL_PCIE_DDRIVER_H
#define _BTL_PCIE_DDRIVER_H
typedef struct DD_adapter_handle
{
int local_sma_size;
int remote_sma_size;
void* local_sma_address;
void* remote_sma_address;
int fd;
void *cmd_block;
} DD_adapter_handle;
#include <asm/types.h>
#include <fcntl.h>
#include <sys/mman.h>
#include <sys/ioctl.h>
#include <linux/axon_ioctl.h>
#endif

Просмотреть файл

@ -1,318 +0,0 @@
/*
* Copyright (c) 2007 Los Alamos National Security, LLC.
* All righs reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include <sys/time.h>
#include <time.h>
#include "opal/align.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/oob/base/base.h"
#include "orte/mca/rml/rml.h"
#include "ompi/types.h"
#include "ompi/mca/btl/base/btl_base_error.h"
#include "ompi/mca/mpool/mpool.h"
#include "ompi/mca/mpool/base/base.h"
#include "ompi/mca/mpool/pcie/mpool_pcie.h"
#include "btl_pcie.h"
#include "btl_pcie_endpoint.h"
#include "btl_pcie_proc.h"
#include "btl_pcie_frag.h"
#include "btl_pcie_ddriver.h"
/*
* Initialize state of the endpoint instance.
*
*/
static void mca_btl_pcie_endpoint_construct(mca_btl_base_endpoint_t* endpoint)
{
endpoint->endpoint_btl = 0;
endpoint->endpoint_proc = 0;
}
/*
* Destroy a endpoint
*
*/
static void mca_btl_pcie_endpoint_destruct(mca_btl_base_endpoint_t* endpoint)
{
}
OBJ_CLASS_INSTANCE(
mca_btl_pcie_endpoint_t,
opal_list_item_t,
mca_btl_pcie_endpoint_construct,
mca_btl_pcie_endpoint_destruct);
static int
dd_open (const char* device_name, DD_adapter_handle* a_handle)
{
int fd;
int lSize, rSize;
void *localPtr,*remotePtr; /* shared memory pointers */
fd = open(device_name, O_RDWR);
if(-1 == fd) return -1;
lSize = 1024*1024;
rSize = 1024*1024;
localPtr = mmap(0, lSize, PROT_READ|PROT_WRITE,
MAP_SHARED, fd, LOCAL_SMA_OFFSET);
if (MAP_FAILED == localPtr) {
close(fd);
return -3;
}
remotePtr = mmap(0, rSize, PROT_READ|PROT_WRITE,
MAP_SHARED, fd, REMOTE_SMA_OFFSET);
if (MAP_FAILED == remotePtr) {
munmap(localPtr,lSize);
close(fd);
return -4;
}
a_handle->fd = fd;
a_handle->local_sma_size = lSize;
a_handle->remote_sma_size = rSize;
a_handle->local_sma_address = localPtr;
a_handle->remote_sma_address = remotePtr;
a_handle->cmd_block = mmap (0, 64 * 124, PROT_READ|PROT_WRITE, MAP_PRIVATE,
fd, DMA_COMMAND_BUFFER_OFFSET);
if (MAP_FAILED == a_handle->cmd_block) {
close (fd);
return -5;
}
return 0;
}
/*
* Initialize an endpoint
*/
int mca_btl_pcie_endpoint_init(mca_btl_base_endpoint_t* endpoint)
{
int rc;
mca_btl_pcie_module_t* pcie_btl =
endpoint->endpoint_btl;
mca_mpool_base_resources_t mpool_resources;
size_t fifo_buffer_len, current_offset = 0;
/* Open our device */
rc = dd_open(endpoint->lcl_dev_name,
&endpoint->pcie_adapter);
if( 0 != rc) {
BTL_ERROR(("Failed to open pcie device dd_open says : %d\n", rc));
return OMPI_ERROR;
}
/* fill in endpoint data for begining of resources */
endpoint->lcl_sma_ptr = endpoint->pcie_adapter.local_sma_address;
if(NULL == endpoint->lcl_sma_ptr) {
BTL_ERROR(("Error: local sma address is null\n"));
return OMPI_ERROR;
}
endpoint->rem_sma_ptr = endpoint->pcie_adapter.remote_sma_address;
if(NULL == endpoint->rem_sma_ptr) {
BTL_ERROR(("Error: remote sma address is null\n"));
return OMPI_ERROR;
}
BTL_VERBOSE(("SMA for device %s: local=0x%lx,%d remote=0x%lx,%d",
endpoint->lcl_dev_name,
(long)endpoint->lcl_sma_ptr,
endpoint->pcie_adapter.local_sma_size,
(long)endpoint->rem_sma_ptr,
endpoint->pcie_adapter.remote_sma_size));
/* 16 bytes of the buffer reserved for the 8 byte local DMA completion */
endpoint->lcl_dma_status = ((char*) endpoint->lcl_sma_ptr) + current_offset;
current_offset += 16;
/* fifo_buffer_len bytes reserved for fifos */
fifo_buffer_len = sizeof(btl_pcie_fifo_entry_t) * mca_btl_pcie_component.pcie_recv_queue_len;
rc = ompi_btl_pcie_fifo_init_send(&(endpoint->send_fifo),
mca_btl_pcie_component.pcie_recv_queue_len,
((char*) endpoint->rem_sma_ptr) + current_offset);
if (OMPI_SUCCESS != rc) {
BTL_ERROR(("Error: Failed to init send fifo: %d", rc));
return rc;
}
rc = ompi_btl_pcie_fifo_init_recv(&(endpoint->recv_fifo),
mca_btl_pcie_component.pcie_recv_queue_len,
((char*) endpoint->lcl_sma_ptr) + current_offset,
fifo_buffer_len);
if (OMPI_SUCCESS != rc) {
BTL_ERROR(("Error: Failed to init recv fifo: %d", rc));
return rc;
}
current_offset += fifo_buffer_len;
/* reserve rest of the space for the mpool */
endpoint->rem_frag_base =
((char*) endpoint->rem_sma_ptr) + current_offset;
endpoint->lcl_frag_base =
((char*) endpoint->lcl_sma_ptr) + current_offset;
/* don't need to align this one as the free list */
/* will take care of it. */
mpool_resources.base = endpoint->rem_frag_base;
mpool_resources.len = endpoint->pcie_adapter.remote_sma_size -
current_offset;
/* setup my pcie mpool */
pcie_btl->pcie_mpool =
mca_mpool_base_module_create(mca_btl_pcie_component.pcie_send_mpool_name,
pcie_btl,
&mpool_resources);
/* setup the modules free lists and such as we now */
/* have enough info to setup the mpool */
/* eager SMA communication buffers */
#if (OMPI_MAJOR_VERSION <= 1) && (OMPI_MINOR_VERSION <= 2)
ompi_free_list_init_ex(&(pcie_btl->pcie_sma_buf_eager),
sizeof(mca_btl_pcie_sma_buf_eager_t) +
mca_btl_pcie_module.super.btl_eager_limit,
sizeof(mca_btl_pcie_sma_buf_eager_t),
MCA_BTL_PCIE_FRAG_ALIGN,
OBJ_CLASS(mca_btl_pcie_sma_buf_eager_t),
mca_btl_pcie_component.pcie_free_list_num,
mca_btl_pcie_component.pcie_free_list_max,
mca_btl_pcie_component.pcie_free_list_inc,
pcie_btl->pcie_mpool);
/* max size SMA communication buffers */
ompi_free_list_init_ex(&(pcie_btl->pcie_sma_buf_max),
sizeof(mca_btl_pcie_sma_buf_max_t) +
mca_btl_pcie_module.super.btl_max_send_size,
sizeof(mca_btl_pcie_sma_buf_max_t),
MCA_BTL_PCIE_FRAG_ALIGN,
OBJ_CLASS(mca_btl_pcie_sma_buf_max_t),
mca_btl_pcie_component.pcie_free_list_num,
mca_btl_pcie_component.pcie_free_list_max,
mca_btl_pcie_component.pcie_free_list_inc,
pcie_btl->pcie_mpool);
/* User eager fragment buffer */
ompi_free_list_init_ex(&(pcie_btl->pcie_frag_eager),
sizeof(mca_btl_pcie_frag_eager_t) +
mca_btl_pcie_module.super.btl_eager_limit,
sizeof(mca_btl_pcie_frag_eager_t),
MCA_BTL_PCIE_FRAG_ALIGN,
OBJ_CLASS(mca_btl_pcie_frag_eager_t),
mca_btl_pcie_component.pcie_free_list_num,
mca_btl_pcie_component.pcie_free_list_max,
mca_btl_pcie_component.pcie_free_list_inc,
NULL);
/* User max size fragment buffer */
ompi_free_list_init_ex(&(pcie_btl->pcie_frag_max),
sizeof(mca_btl_pcie_frag_max_t) +
mca_btl_pcie_module.super.btl_max_send_size,
sizeof(mca_btl_pcie_frag_max_t),
MCA_BTL_PCIE_FRAG_ALIGN,
OBJ_CLASS(mca_btl_pcie_frag_max_t),
mca_btl_pcie_component.pcie_free_list_num,
mca_btl_pcie_component.pcie_free_list_max,
mca_btl_pcie_component.pcie_free_list_inc,
NULL);
#else
ompi_free_list_init_ex(&(pcie_btl->pcie_sma_buf_eager),
mca_btl_pcie_module.super.btl_eager_limit,
MCA_BTL_PCIE_FRAG_ALIGN,
OBJ_CLASS(mca_btl_pcie_sma_buf_eager_t),
mca_btl_pcie_component.pcie_free_list_num,
mca_btl_pcie_component.pcie_free_list_max,
mca_btl_pcie_component.pcie_free_list_inc,
pcie_btl->pcie_mpool,
NULL,
NULL);
/* max size SMA communication buffers */
ompi_free_list_init_ex(&(pcie_btl->pcie_sma_buf_max),
mca_btl_pcie_module.super.btl_max_send_size,
MCA_BTL_PCIE_FRAG_ALIGN,
OBJ_CLASS(mca_btl_pcie_sma_buf_max_t),
mca_btl_pcie_component.pcie_free_list_num,
mca_btl_pcie_component.pcie_free_list_max,
mca_btl_pcie_component.pcie_free_list_inc,
pcie_btl->pcie_mpool,
NULL,
NULL);
/* User eager fragment buffer */
ompi_free_list_init_ex(&(pcie_btl->pcie_frag_eager),
mca_btl_pcie_module.super.btl_eager_limit,
MCA_BTL_PCIE_FRAG_ALIGN,
OBJ_CLASS(mca_btl_pcie_frag_eager_t),
mca_btl_pcie_component.pcie_free_list_num,
mca_btl_pcie_component.pcie_free_list_max,
mca_btl_pcie_component.pcie_free_list_inc,
NULL,
NULL,
NULL);
/* User max size fragment buffer */
ompi_free_list_init_ex(&(pcie_btl->pcie_frag_max),
mca_btl_pcie_module.super.btl_max_send_size,
MCA_BTL_PCIE_FRAG_ALIGN,
OBJ_CLASS(mca_btl_pcie_frag_max_t),
mca_btl_pcie_component.pcie_free_list_num,
mca_btl_pcie_component.pcie_free_list_max,
mca_btl_pcie_component.pcie_free_list_inc,
NULL,
NULL,
NULL);
#endif
/* dma frags. note that we can only have 16 outstanding memory
handles so we cannot currently support leave_pinned and we must
limit the number of outstanding DMAs via the free list of DMA
frags */
ompi_free_list_init(&(pcie_btl->pcie_frag_dma),
sizeof(mca_btl_pcie_frag_dma_t),
OBJ_CLASS(mca_btl_pcie_frag_dma_t),
16,
16,
0,
NULL);
/* recv frag */
OBJ_CONSTRUCT(&(pcie_btl->pcie_recv_frag),
mca_btl_pcie_frag_recv_t);
pcie_btl->endpoint = endpoint;
pcie_btl->active = true;
return OMPI_SUCCESS;
}
/*
* Finalize an endpoint
*/
int mca_btl_pcie_endpoint_fini(mca_btl_base_endpoint_t* endpoint)
{
return OMPI_SUCCESS;
}

Просмотреть файл

@ -1,92 +0,0 @@
/*
* Copyright (c) 2007 Los Alamos National Security, LLC.
* All righs reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef MCA_BTL_PCIE_ENDPOINT_H
#define MCA_BTL_PCIE_ENDPOINT_H
#include "ompi_config.h"
#include "opal/class/opal_list.h"
#include "opal/event/event.h"
#include "ompi/mca/btl/btl.h"
#include "ompi/mca/pml/pml.h"
#include "btl_pcie_ddriver.h"
#include "btl_pcie_frag.h"
#include "btl_pcie.h"
#include "btl_pcie_fifo.h"
BEGIN_C_DECLS
/**
* An abstraction that represents a connection to a endpoint process.
* An instance of mca_btl_base_endpoint_t is associated w/ each process
* and BTL pair at startup. However, connections to the endpoint
* are established dynamically on an as-needed basis:
*/
struct mca_btl_base_endpoint_t {
opal_list_item_t super;
struct mca_btl_pcie_module_t* endpoint_btl;
/**< BTL instance that created this connection */
struct mca_btl_pcie_proc_t* endpoint_proc;
/**< proc structure corresponding to endpoint */
/** the name of the remote PCIE device */
char* rem_dev_name;
/** the name of the local PCIE device */
char* lcl_dev_name;
/** the pcie adapter - returned by dd_open */
DD_adapter_handle pcie_adapter;
/** local pcie SMA memory for this endpoint */
char *lcl_sma_ptr;
/** remote pcie SMA memory for this endpoint */
char *rem_sma_ptr;
/** remote fragment starting point (in which to
* deliver data via "rdma" write
*/
char *rem_frag_base;
char *lcl_frag_base;
char *lcl_dma_status;
btl_pcie_fifo_t recv_fifo;
btl_pcie_fifo_t send_fifo;
};
typedef struct mca_btl_base_endpoint_t mca_btl_base_endpoint_t;
typedef mca_btl_base_endpoint_t mca_btl_pcie_endpoint_t;
OBJ_CLASS_DECLARATION(mca_btl_pcie_endpoint_t);
/*
* Initialize an endpoint
*/
int mca_btl_pcie_endpoint_init(mca_btl_base_endpoint_t* endpoint);
/*
* Finalize an endpoint
*/
int mca_btl_pcie_endpoint_fini(mca_btl_base_endpoint_t* endpoint);
END_C_DECLS
#endif /* #ifndef MCA_BTL_PCIE_ENDPOINT_H */

Просмотреть файл

@ -1,97 +0,0 @@
/*
* Copyright (c) 2007 Los Alamos National Security, LLC.
* All righs reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include <string.h>
#include "opal/threads/mutex.h"
#include "opal/types.h"
#include "ompi/constants.h"
#include "btl_pcie_fifo.h"
static uint32_t
get_mask(unsigned int len)
{
int pop_count, highest_used_bit, tmp_input_integer;
unsigned int pow;
/* init counters */
pop_count=0;
highest_used_bit=1;
/* get population count and highest non-zero bit */
tmp_input_integer = len;
while (tmp_input_integer > 0) {
pop_count += (tmp_input_integer & 1);
highest_used_bit++;
tmp_input_integer >> 1;
}
if (1 < pop_count) {
/* round up */
highest_used_bit++;
}
/* generate power value */
pow = 1 << highest_used_bit;
if (pow != len) return 0;
return pow - 1;
}
int
ompi_btl_pcie_fifo_init_send(btl_pcie_fifo_t *fifo,
unsigned int fifo_len,
void *queue_space)
{
fifo->fifo_len = fifo_len;
fifo->current_index = 0;
fifo->num_outstanding = 0;
fifo->mask = get_mask(fifo_len);
fifo->queue = queue_space;
if (fifo->mask == 0) return OMPI_ERROR;
return OMPI_SUCCESS;
}
int
ompi_btl_pcie_fifo_init_recv(btl_pcie_fifo_t *fifo,
unsigned int fifo_len,
void *queue_space,
size_t queue_space_len)
{
fifo->fifo_len = fifo_len;
fifo->current_index = 1;
fifo->num_outstanding = 0;
fifo->mask = get_mask(fifo_len);
fifo->queue = queue_space;
if (fifo->mask == 0) return OMPI_ERROR;
if (fifo_len * sizeof(btl_pcie_fifo_entry_t) > queue_space_len) {
return OMPI_ERROR;
}
/* initialize the queue to empty */
memset(fifo->queue, 0, fifo_len * sizeof(btl_pcie_fifo_entry_t));
return OMPI_SUCCESS;
}
int
ompi_btl_pcie_fifo_finalize(btl_pcie_fifo_t *fifo)
{
return OMPI_SUCCESS;
}

Просмотреть файл

@ -1,171 +0,0 @@
/*
* Copyright (c) 2007 Los Alamos National Security, LLC.
* All righs reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef BTL_PCIE_FIFO_H
#define BTL_PCIE_FIFO_H
#include "ompi_config.h"
#include "ompi/constants.h"
#include "opal/threads/mutex.h"
#include "opal/types.h"
#include "ompi/mca/btl/base/btl_base_error.h"
BEGIN_C_DECLS
typedef uint64_t btl_pcie_fifo_entry_t;
#define BTL_PCIE_FIFO_TYPE_MASK 0x8000000000000000
#define BTL_PCIE_FIFO_DATA_MASK 0x7FFFFFFFFFFFFFFF
#define BTL_PCIE_FIFO_TYPE_ACK 0x0000000000000000
#define BTL_PCIE_FIFO_TYPE_SEND 0x8000000000000000
struct btl_pcie_fifo_t {
/* number of entries in queue */
uint32_t fifo_len;
/* for sender: next place to write
* for receiver: next place to read */
uint32_t current_index;
/* for sender: number of entries "in flight". Must always be less
than or equal to fifo_len */
uint32_t num_outstanding;
uint32_t mask;
/* the actual buffer */
btl_pcie_fifo_entry_t* queue;
};
typedef struct btl_pcie_fifo_t btl_pcie_fifo_t;
/**
* Initialize fifo structure
*
* Initialize send/recv fifo structure. The fifo structure does
* double duty of maintaining both the sender and receiver. This
* function initializes the send view of the fifo structure, for
* use to receive messages. fifo_get_msg() should not be called on
* this fifo.
*
* @note fifo_len must match the value given to the matching
* fifo_init_recv(), although there are no checks to verify this.
*
* @param[in] fifo A pointer to a fifo structure to be
* initialized
* @param[in] fifo_len Requested length of the fifo queue
* @param[in] queue_space Space for the receive queue (remote pointer)
*
* @retval OMPI_SUCCESS Everything worked
* @retval OMPI_ERROR Good luck!
*/
int ompi_btl_pcie_fifo_init_send(btl_pcie_fifo_t *fifo,
unsigned int fifo_len,
void *queue_space);
/**
* Initialize fifo structure
*
* Initialize send/recv fifo structure. The fifo structure does
* double duty of maintaining both the sender and receiver. This
* function initializes the receive view of the fifo structure, for
* use to receive messages. fifo_set_msg() should not be called on
* this fifo.
*
* @note fifo_len must match the value given to the matching
* fifo_init_send(), although there are no checks to verify this.
*
* @param[in] fifo A pointer to a fifo structure to be
* initialized
* @param[in] fifo_len Requested length of the fifo queue
* @param[in] queue_space Space for the receive queue (local pointer)
* @param[in] queue_space_len Length of queue_space
*
* @retval OMPI_SUCCESS Everything worked
* @retval OMPI_ERROR Good luck!
*/
int ompi_btl_pcie_fifo_init_recv(btl_pcie_fifo_t *fifo,
unsigned int fifo_len,
void *queue_space,
size_t queue_space_len);
int ompi_btl_pcie_fifo_finalize(btl_pcie_fifo_t *fifo);
/**
* Read a message from the queue
*
* Read a message from the queue
*
* @param[in] fifo The receive view of the fifo
*
* @return A non-zero message or 0 if no new messages are
* available.
*/
static inline btl_pcie_fifo_entry_t
ompi_btl_pcie_fifo_get_msg(btl_pcie_fifo_t *fifo)
{
/* BWB - TODO - if we ever want to be multi-threaded, we'll
need to fix this */
btl_pcie_fifo_entry_t ret = 0;
if (0 != (ret = fifo->queue[fifo->current_index])) {
fifo->queue[fifo->current_index] = 0;
fifo->current_index++;
fifo->current_index &= fifo->mask;
}
return ret;
}
/**
* Write a message pointer into the queue
*
* Write a message pointer into the send queue view of the fifo.
*
* @param[in] fifo The send view of the fifo
* @param[in] msg The index to the payload to deliver
*
* @retval OMPI_SUCCESS Fifo successfully updated
* @retval OMPI_ERR_RESOURCE_BUSY There was no space in the fifo
*/
static inline int
ompi_btl_pcie_fifo_set_msg(btl_pcie_fifo_t *fifo, btl_pcie_fifo_entry_t msg)
{
uint32_t outstanding;
/* see if we have a slot */
outstanding = OPAL_THREAD_ADD32(&fifo->num_outstanding, 1);
if (outstanding > fifo->fifo_len) {
OPAL_THREAD_ADD32(&fifo->num_outstanding, -1);
return OMPI_ERR_RESOURCE_BUSY;
}
/* now that we have a slot, figure out where it is. Allow the
outstanding to wrap around forever - just mask out the bits we
don't care about. */
outstanding = OPAL_THREAD_ADD32(&fifo->current_index, 1);
outstanding &= fifo->mask;
fifo->queue[outstanding] = msg;
return OMPI_SUCCESS;
}
static inline int
ompi_btl_pcie_fifo_complete_msg(btl_pcie_fifo_t *fifo,
unsigned int num_msgs)
{
OPAL_THREAD_ADD32(&fifo->num_outstanding, -num_msgs);
return OMPI_SUCCESS;
}
END_C_DECLS
#endif /* BTL_PCIE_FIFO_H */

Просмотреть файл

@ -1,139 +0,0 @@
/*
* Copyright (c) 2007 Los Alamos National Security, LLC.
* All righs reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "btl_pcie_frag.h"
#include "btl_pcie.h"
static void
mca_btl_pcie_sma_buf_eager_constructor(mca_btl_pcie_sma_buf_t* buf)
{
buf->pcie_data.pval = buf + 1;
buf->type = MCA_BTL_PCIE_TYPE_EAGER;
}
static void
mca_btl_pcie_sma_buf_max_constructor(mca_btl_pcie_sma_buf_t* buf)
{
buf->pcie_data.pval = buf + 1;
buf->type = MCA_BTL_PCIE_TYPE_MAX;
}
OBJ_CLASS_INSTANCE(mca_btl_pcie_sma_buf_eager_t,
ompi_free_list_item_t,
mca_btl_pcie_sma_buf_eager_constructor,
NULL);
OBJ_CLASS_INSTANCE(mca_btl_pcie_sma_buf_max_t,
ompi_free_list_item_t,
mca_btl_pcie_sma_buf_max_constructor,
NULL);
static void
mca_btl_pcie_frag_dma_constructor(mca_btl_pcie_frag_t* frag)
{
frag->base.des_src = NULL;
frag->base.des_src_cnt = 0;
frag->base.des_dst = NULL;
frag->base.des_dst_cnt = 0;
frag->segment.seg_addr.pval = NULL;
frag->segment.seg_len = 0;
frag->endpoint = NULL;
frag->hdr = NULL;
frag->size = 0;
frag->registration = NULL;
frag->type = MCA_BTL_PCIE_TYPE_RDMA;
frag->sma_buf = NULL;
}
static void
mca_btl_pcie_frag_common_constructor(mca_btl_pcie_frag_t* frag)
{
frag->base.des_src = &frag->segment;
frag->base.des_src_cnt = 1;
frag->base.des_dst = NULL;
frag->base.des_dst_cnt = 0;
frag->hdr = (mca_btl_pcie_header_t*) (frag + 1);
frag->hdr->send_frag.pval = frag;
frag->segment.seg_addr.pval = ((unsigned char*) frag->hdr) + sizeof(mca_btl_pcie_header_t);
frag->segment.seg_len = frag->size;
frag->endpoint = NULL;
frag->registration = NULL;
frag->sma_buf = NULL;
}
static void
mca_btl_pcie_frag_eager_constructor(mca_btl_pcie_frag_t* frag)
{
frag->size = mca_btl_pcie_module.super.btl_eager_limit;
mca_btl_pcie_frag_common_constructor(frag);
frag->type = MCA_BTL_PCIE_TYPE_EAGER;
}
static void mca_btl_pcie_frag_max_constructor(mca_btl_pcie_frag_t* frag)
{
frag->size = mca_btl_pcie_module.super.btl_max_send_size;
mca_btl_pcie_frag_common_constructor(frag);
frag->type = MCA_BTL_PCIE_TYPE_MAX;
}
static void mca_btl_pcie_frag_recv_constructor(mca_btl_pcie_frag_t *frag)
{
frag->base.des_src = NULL;
frag->base.des_src_cnt = 0;
frag->base.des_dst = &frag->segment;
frag->base.des_dst_cnt = 1;
frag->segment.seg_addr.pval = NULL;
frag->segment.seg_len = 0;
frag->endpoint = NULL;
frag->hdr = NULL;
frag->size = 0;
frag->registration = NULL;
frag->type = MCA_BTL_PCIE_TYPE_RECV;
frag->sma_buf = NULL;
}
OBJ_CLASS_INSTANCE(
mca_btl_pcie_frag_eager_t,
mca_btl_base_descriptor_t,
mca_btl_pcie_frag_eager_constructor,
NULL);
OBJ_CLASS_INSTANCE(
mca_btl_pcie_frag_max_t,
mca_btl_base_descriptor_t,
mca_btl_pcie_frag_max_constructor,
NULL);
OBJ_CLASS_INSTANCE(
mca_btl_pcie_frag_recv_t,
mca_btl_base_descriptor_t,
mca_btl_pcie_frag_recv_constructor,
NULL);
OBJ_CLASS_INSTANCE(
mca_btl_pcie_frag_dma_t,
mca_btl_base_descriptor_t,
mca_btl_pcie_frag_dma_constructor,
NULL);

Просмотреть файл

@ -1,179 +0,0 @@
/*
* Copyright (c) 2007 Los Alamos National Security, LLC.
* All righs reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef MCA_BTL_PCIE_FRAG_H
#define MCA_BTL_PCIE_FRAG_H
#include "ompi_config.h"
#include "ompi/mca/btl/btl.h"
BEGIN_C_DECLS
#define MCA_BTL_PCIE_FRAG_ALIGN (16)
/* Header that sits at top of any send message */
struct mca_btl_pcie_header_t {
mca_btl_base_tag_t tag;
uint8_t pad[3];
uint32_t length;
ompi_ptr_t send_frag;
};
typedef struct mca_btl_pcie_header_t mca_btl_pcie_header_t;
#define OMPI_BTL_PCIE_HEADER_HTON(header) \
do { \
(header).length = htonl((header).length); \
} while (0)
#define OMPI_BTL_PCIE_HEADER_NTOH(header) \
do { \
(header).length = ntohl((header).length); \
} while (0)
struct mca_btl_pcie_frag_t;
/** Type description for fragments / buffers */
enum mca_btl_pcie_frag_type_t {
MCA_BTL_PCIE_TYPE_UNKNOWN,
MCA_BTL_PCIE_TYPE_EAGER,
MCA_BTL_PCIE_TYPE_MAX,
MCA_BTL_PCIE_TYPE_RDMA,
MCA_BTL_PCIE_TYPE_RECV
};
typedef enum mca_btl_pcie_frag_type_t mca_btl_pcie_frag_type_t;
/** SMA transfer fragment */
struct mca_btl_pcie_sma_buf_t {
ompi_free_list_item_t super;
/** Pointer to the SMA space available for this copy. An
ompi_ptr_t because in v1.2, this sits in the sma region,
and we need to not have different sizes on each endpoint. */
ompi_ptr_t pcie_data;
/** type of buffer */
mca_btl_pcie_frag_type_t type;
};
typedef struct mca_btl_pcie_sma_buf_t mca_btl_pcie_sma_buf_t;
typedef mca_btl_pcie_sma_buf_t mca_btl_pcie_sma_buf_eager_t;
OBJ_CLASS_DECLARATION(mca_btl_pcie_sma_buf_eager_t);
typedef mca_btl_pcie_sma_buf_t mca_btl_pcie_sma_buf_max_t;
OBJ_CLASS_DECLARATION(mca_btl_pcie_sma_buf_max_t);
#define MCA_BTL_PCIE_SMA_BUF_ALLOC_EAGER(btl, buf, rc) \
{ \
ompi_free_list_item_t *item; \
OMPI_FREE_LIST_GET(&((mca_btl_pcie_module_t*)btl)->pcie_sma_buf_eager, item, rc); \
buf = (mca_btl_pcie_sma_buf_t*) item; \
}
#define MCA_BTL_PCIE_SMA_BUF_ALLOC_MAX(btl, buf, rc) \
{ \
ompi_free_list_item_t *item; \
OMPI_FREE_LIST_GET(&((mca_btl_pcie_module_t*)btl)->pcie_sma_buf_max, item, rc); \
buf = (mca_btl_pcie_sma_buf_t*) item; \
}
#define MCA_BTL_PCIE_SMA_BUF_RETURN(btl, buf, ret) \
{ \
ret = OMPI_SUCCESS; \
switch ((buf)->type) { \
case MCA_BTL_PCIE_TYPE_EAGER: \
OMPI_FREE_LIST_RETURN(&((mca_btl_pcie_module_t*)btl)->pcie_sma_buf_eager, \
(ompi_free_list_item_t*)(buf)); \
break; \
case MCA_BTL_PCIE_TYPE_MAX: \
OMPI_FREE_LIST_RETURN(&((mca_btl_pcie_module_t*)btl)->pcie_sma_buf_max, \
(ompi_free_list_item_t*)(buf)); \
break; \
default: \
BTL_ERROR(("Invalid return type (%d) for frag 0x%lx in SMA_BUF_RETURN", \
buf->type, (long)buf)); \
ret = OMPI_ERR_BAD_PARAM; \
} \
}
/** Fragment description -- used for send/rdma fragments */
struct mca_btl_pcie_frag_t {
mca_btl_base_descriptor_t base;
mca_btl_base_segment_t segment;
struct mca_btl_base_endpoint_t *endpoint;
mca_btl_pcie_header_t *hdr;
size_t size;
struct mca_btl_pcie_reg_t *registration;
mca_btl_pcie_frag_type_t type;
mca_btl_pcie_sma_buf_t *sma_buf;
};
typedef struct mca_btl_pcie_frag_t mca_btl_pcie_frag_t;
typedef struct mca_btl_pcie_frag_t mca_btl_pcie_frag_eager_t;
OBJ_CLASS_DECLARATION(mca_btl_pcie_frag_eager_t);
typedef struct mca_btl_pcie_frag_t mca_btl_pcie_frag_max_t;
OBJ_CLASS_DECLARATION(mca_btl_pcie_frag_max_t);
typedef struct mca_btl_pcie_frag_t mca_btl_pcie_frag_recv_t;
OBJ_CLASS_DECLARATION(mca_btl_pcie_frag_recv_t);
typedef struct mca_btl_pcie_frag_t mca_btl_pcie_frag_dma_t;
OBJ_CLASS_DECLARATION(mca_btl_pcie_frag_dma_t);
#define MCA_BTL_PCIE_FRAG_ALLOC_EAGER(btl, frag, rc) \
{ \
ompi_free_list_item_t *item; \
OMPI_FREE_LIST_GET(&((mca_btl_pcie_module_t*)btl)->pcie_frag_eager, item, rc); \
frag = (mca_btl_pcie_frag_t*) item; \
}
#define MCA_BTL_PCIE_FRAG_ALLOC_MAX(btl, frag, rc) \
{ \
ompi_free_list_item_t *item; \
OMPI_FREE_LIST_GET(&((mca_btl_pcie_module_t*)btl)->pcie_frag_max, item, rc); \
frag = (mca_btl_pcie_frag_t*) item; \
}
#define MCA_BTL_PCIE_FRAG_ALLOC_DMA(btl, frag, rc) \
{ \
\
ompi_free_list_item_t *item; \
OMPI_FREE_LIST_GET(&((mca_btl_pcie_module_t*)btl)->pcie_frag_dma, item, rc); \
frag = (mca_btl_pcie_frag_t*) item; \
}
#define MCA_BTL_PCIE_FRAG_RETURN(btl, frag, ret) \
{ \
ret = OMPI_SUCCESS; \
switch ((frag)->type) { \
case MCA_BTL_PCIE_TYPE_EAGER: \
OMPI_FREE_LIST_RETURN(&((mca_btl_pcie_module_t*)btl)->pcie_frag_eager, \
(ompi_free_list_item_t*)(frag)); \
break; \
case MCA_BTL_PCIE_TYPE_MAX: \
OMPI_FREE_LIST_RETURN(&((mca_btl_pcie_module_t*)btl)->pcie_frag_max, \
(ompi_free_list_item_t*)(frag)); \
break; \
case MCA_BTL_PCIE_TYPE_RDMA: \
OMPI_FREE_LIST_RETURN(&((mca_btl_pcie_module_t*)btl)->pcie_frag_dma, \
(ompi_free_list_item_t*)(frag)); \
break; \
default: \
BTL_ERROR(("Invalid return type (%d) for frag 0x%lx in FRAG_RETURN", \
frag->type, (long)frag)); \
ret = OMPI_ERR_BAD_PARAM; \
} \
}
END_C_DECLS
#endif /* #ifndef MCA_BTL_PCIE_FRAG_H */

Просмотреть файл

@ -1,58 +0,0 @@
/*
* Copyright (c) 2007 Los Alamos National Security, LLC.
* All righs reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef BTL_PCIE_CFG_LEX_H_
#define BTL_PCIE_CFG_LEX_H_
#include "opal_config.h"
#ifdef malloc
#undef malloc
#endif
#ifdef realloc
#undef realloc
#endif
#ifdef free
#undef free
#endif
#include <stdio.h>
int btl_pcie_cfg_yylex(void);
int btl_pcie_cfg_init_buffer(FILE *file);
extern FILE *btl_pcie_cfg_yyin;
extern bool btl_pcie_cfg_parse_done;
extern char *btl_pcie_cfg_yytext;
extern int btl_pcie_cfg_yynewlines;
/*
* Make lex-generated files not issue compiler warnings
*/
#define YY_STACK_USED 0
#define YY_ALWAYS_INTERACTIVE 0
#define YY_NEVER_INTERACTIVE 0
#define YY_MAIN 0
#define YY_NO_UNPUT 1
#define YY_SKIP_YYWRAP 1
enum {
BTL_PCIE_CFG_PARSE_DONE = 1,
BTL_PCIE_CFG_PARSE_ERROR,
BTL_PCIE_CFG_PARSE_NEWLINE,
BTL_PCIE_CFG_PARSE_HOSTNAME_CORE,
BTL_PCIE_CFG_PARSE_HOSTNAME_DEVICE,
BTL_PCIE_CFG_PARSE_DEVICE,
BTL_PCIE_CFG_PARSE_MAX
};
#endif /* #ifndef BTL_PCIE_CFG_LEX_H_ */

Просмотреть файл

@ -1,129 +0,0 @@
%{ /* -*- C -*- */
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006 Cisco Systems, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "opal_config.h"
#include <stdio.h>
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#include "btl_pcie_lex.h"
/*
* local functions
*/
BEGIN_C_DECLS
static int finish_parsing(void) ;
static int btl_pcie_cfg_yywrap(void);
END_C_DECLS
/*
* global variables
*/
int btl_pcie_cfg_yynewlines = 1;
bool btl_pcie_cfg_parse_done = false;
char *btl_pcie_cfg_string = NULL;
#define yyterminate() \
return finish_parsing()
%}
WHITE [\f\t\v ]
CHAR [A-Za-z0-9_\-\.]
NAME_CHAR [A-Za-z0-9_\-\.\\\/]
%x comment
%x section_name
%x section_end
%x value
%%
{WHITE}*\n { ++btl_pcie_cfg_yynewlines;
return BTL_PCIE_CFG_PARSE_NEWLINE; }
#.*\n { ++btl_pcie_cfg_yynewlines;
return BTL_PCIE_CFG_PARSE_NEWLINE; }
"//".*\n { ++btl_pcie_cfg_yynewlines;
return BTL_PCIE_CFG_PARSE_NEWLINE; }
"/*" { BEGIN(comment);
return BTL_PCIE_CFG_PARSE_NEWLINE; }
<comment>[^*\n]* ; /* Eat up non '*'s */
<comment>"*"+[^*/\n]* ; /* Eat '*'s not followed by a '/' */
<comment>\n { ++btl_pcie_cfg_yynewlines;
return BTL_PCIE_CFG_PARSE_NEWLINE; }
<comment>"*"+"/" { BEGIN(INITIAL); /* Done with block comment */
return BTL_PCIE_CFG_PARSE_NEWLINE; }
{CHAR}+":"[0-9] { return BTL_PCIE_CFG_PARSE_HOSTNAME_CORE; }
{CHAR}+":"{NAME_CHAR}+ { return BTL_PCIE_CFG_PARSE_HOSTNAME_DEVICE; }
{NAME_CHAR}+ { return BTL_PCIE_CFG_PARSE_DEVICE; }
{WHITE}+ ; /* whitespace */
%%
/*
* This cleans up at the end of the parse (since, in this case, we
* always parse the entire file) and prevents a memory leak.
*/
static int finish_parsing(void)
{
if (NULL != YY_CURRENT_BUFFER) {
yy_delete_buffer(YY_CURRENT_BUFFER);
#if defined(YY_CURRENT_BUFFER_LVALUE)
YY_CURRENT_BUFFER_LVALUE = NULL;
#else
YY_CURRENT_BUFFER = NULL;
#endif /* YY_CURRENT_BUFFER_LVALUE */
}
return YY_NULL;
}
static int btl_pcie_cfg_yywrap(void)
{
btl_pcie_cfg_parse_done = true;
return 1;
}
/*
* Ensure that we have a valid yybuffer to use. Specifically, if this
* scanner is invoked a second time, finish_parsing() (above) will
* have been executed, and the current buffer will have been freed.
* Flex doesn't recognize this fact because as far as it's concerned,
* its internal state was already initialized, so it thinks it should
* have a valid buffer. Hence, here we ensure to give it a valid
* buffer.
*/
int btl_pcie_cfg_init_buffer(FILE *file)
{
YY_BUFFER_STATE buf = yy_create_buffer(file, YY_BUF_SIZE);
yy_switch_to_buffer(buf);
return 0;
}

Просмотреть файл

@ -1,194 +0,0 @@
/*
* Copyright (c) 2007 Los Alamos National Security, LLC.
* All righs reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "opal/class/opal_hash_table.h"
#include "ompi/mca/btl/base/btl_base_error.h"
#include "ompi/mca/mpool/base/base.h"
#include "ompi/runtime/ompi_module_exchange.h"
#include "btl_pcie.h"
#include "btl_pcie_proc.h"
static void mca_btl_pcie_proc_construct(mca_btl_pcie_proc_t* proc);
static void mca_btl_pcie_proc_destruct(mca_btl_pcie_proc_t* proc);
OBJ_CLASS_INSTANCE(mca_btl_pcie_proc_t,
opal_list_item_t, mca_btl_pcie_proc_construct,
mca_btl_pcie_proc_destruct);
void mca_btl_pcie_proc_construct(mca_btl_pcie_proc_t* proc)
{
proc->proc_ompi = 0;
proc->proc_addr_count = 0;
proc->proc_endpoint_count = 0;
OBJ_CONSTRUCT(&proc->proc_lock, opal_mutex_t);
/* add to list of all proc instance */
OPAL_THREAD_LOCK(&mca_btl_pcie_component.pcie_lock);
opal_list_append(&mca_btl_pcie_component.pcie_procs, &proc->super);
OPAL_THREAD_UNLOCK(&mca_btl_pcie_component.pcie_lock);
}
/*
* Cleanup ib proc instance
*/
void mca_btl_pcie_proc_destruct(mca_btl_pcie_proc_t* proc)
{
/* remove from list of all proc instances */
OPAL_THREAD_LOCK(&mca_btl_pcie_component.pcie_lock);
opal_list_remove_item(&mca_btl_pcie_component.pcie_procs, &proc->super);
OPAL_THREAD_UNLOCK(&mca_btl_pcie_component.pcie_lock);
OBJ_DESTRUCT(&proc->proc_lock);
}
/*
* Look for an existing TEMPLATE process instances based on the associated
* ompi_proc_t instance.
*/
static mca_btl_pcie_proc_t* mca_btl_pcie_proc_lookup_ompi(ompi_proc_t* ompi_proc)
{
mca_btl_pcie_proc_t* pcie_proc;
OPAL_THREAD_LOCK(&mca_btl_pcie_component.pcie_lock);
for(pcie_proc = (mca_btl_pcie_proc_t*)
opal_list_get_first(&mca_btl_pcie_component.pcie_procs);
pcie_proc != (mca_btl_pcie_proc_t*)
opal_list_get_end(&mca_btl_pcie_component.pcie_procs);
pcie_proc = (mca_btl_pcie_proc_t*)opal_list_get_next(pcie_proc)) {
if(pcie_proc->proc_ompi == ompi_proc) {
OPAL_THREAD_UNLOCK(&mca_btl_pcie_component.pcie_lock);
return pcie_proc;
}
}
OPAL_THREAD_UNLOCK(&mca_btl_pcie_component.pcie_lock);
return NULL;
}
/*
* Create a TEMPLATE process structure. There is a one-to-one correspondence
* between a ompi_proc_t and a mca_btl_pcie_proc_t instance. We cache
* additional data (specifically the list of mca_btl_pcie_endpoint_t instances,
* and published addresses) associated w/ a given destination on this
* datastructure.
*/
int mca_btl_pcie_proc_create(ompi_proc_t* ompi_proc,
mca_btl_pcie_module_t* pcie_btl,
mca_btl_pcie_proc_t** ret_proc)
{
mca_btl_pcie_proc_t* pcie_proc = NULL;
char *rem_dev_name = NULL, *lcl_dev_name = NULL;
char *rem_hostname = NULL;
int rc, num_peers, i;
size_t size;
mca_btl_pcie_modex_info_t *modex_info;
/* Check if already have proc structure for this ompi process */
pcie_proc = mca_btl_pcie_proc_lookup_ompi(ompi_proc);
if(pcie_proc != NULL) {
/* Gotcha! */
*ret_proc = pcie_proc;
return OMPI_SUCCESS;
}
/* query for the peer's device name info */
rc = ompi_modex_recv(&mca_btl_pcie_component.super.btl_version,
ompi_proc,
(void*)&modex_info,
&size);
if (OMPI_SUCCESS != rc) {
opal_output(mca_btl_base_output, "[%s:%d] ompi_modex_recv failed for peer %s",
__FILE__, __LINE__, ORTE_NAME_PRINT(&ompi_proc->proc_name));
OBJ_RELEASE(pcie_proc);
*ret_proc = NULL;
return OMPI_ERROR;
}
if (0 == size || 0 != size % sizeof(mca_btl_pcie_modex_info_t)) {
*ret_proc = NULL;
return OMPI_SUCCESS;
}
num_peers = size / sizeof(mca_btl_pcie_modex_info_t);
for (i = 0 ; i < num_peers ; ++i) {
MCA_BTL_PCIE_MODEX_INFO_NTOH(modex_info[i]);
rem_hostname = modex_info[i].hostname;
rem_dev_name = modex_info[i].devicename;
lcl_dev_name = ompi_btl_pcie_cfg_get_matching_device(rem_hostname,
rem_dev_name);
if (NULL != lcl_dev_name &&
0 == strcmp(lcl_dev_name, pcie_btl->lcl_dev_name)) {
/* we have a match. continue onward */
break;
}
}
/* make sure the local device names match */
if(NULL == lcl_dev_name ||
0 != strcmp(lcl_dev_name, pcie_btl->lcl_dev_name)){
*ret_proc = NULL;
return OMPI_SUCCESS;
}
BTL_VERBOSE(("Have matching devices: %s:%s <-> %s:%s",
orte_process_info.nodename,
pcie_btl->lcl_dev_name,
rem_hostname,
rem_dev_name));
pcie_proc = OBJ_NEW(mca_btl_pcie_proc_t);
if(NULL == pcie_proc){
*ret_proc = NULL;
return OMPI_ERR_OUT_OF_RESOURCE;
}
pcie_proc->proc_ompi = ompi_proc;
/* build a unique identifier (of arbitrary
* size) to represent the proc */
pcie_proc->proc_guid = ompi_proc->proc_name;
/* Initialize number of peer */
pcie_proc->proc_endpoint_count = 1;
pcie_proc->endpoint_proc = OBJ_NEW(mca_btl_pcie_endpoint_t);
if(NULL == pcie_proc->endpoint_proc) {
free(rem_dev_name);
*ret_proc = NULL;
return OMPI_ERR_OUT_OF_RESOURCE;
}
pcie_proc->endpoint_proc->lcl_dev_name = lcl_dev_name;
pcie_proc->endpoint_proc->rem_dev_name = rem_dev_name;
pcie_proc->endpoint_proc->endpoint_proc = pcie_proc;
pcie_proc->endpoint_proc->endpoint_btl = pcie_btl;
if(OMPI_SUCCESS != mca_btl_pcie_endpoint_init(pcie_proc->endpoint_proc)) {
BTL_ERROR(("Error initializing the PCIE endpoint \n"));
*ret_proc = NULL;
return OMPI_ERROR;
}
*ret_proc = pcie_proc;
return OMPI_SUCCESS;
}

Просмотреть файл

@ -1,62 +0,0 @@
/*
* Copyright (c) 2007 Los Alamos National Security, LLC.
* All righs reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef MCA_BTL_PCIE_PROC_H
#define MCA_BTL_PCIE_PROC_H
#include "ompi_config.h"
#include "opal/class/opal_list.h"
#include "ompi/proc/proc.h"
#include "btl_pcie.h"
#include "btl_pcie_endpoint.h"
BEGIN_C_DECLS
/**
* Represents the state of a remote process and the set of addresses
* that it exports. Also cache an instance of mca_btl_base_endpoint_t for
* each
* BTL instance that attempts to open a connection to the process.
*/
struct mca_btl_pcie_proc_t {
opal_list_item_t super;
/**< allow proc to be placed on a list */
ompi_proc_t *proc_ompi;
/**< pointer to corresponding ompi_proc_t */
orte_process_name_t proc_guid;
/**< globally unique identifier for the process */
size_t proc_addr_count;
/**< number of addresses published by endpoint */
struct mca_btl_base_endpoint_t *endpoint_proc;
/**< endpoint that has been created to access this proc */
size_t proc_endpoint_count;
/**< number of endpoints */
opal_mutex_t proc_lock;
/**< lock to protect against concurrent access to proc state */
};
typedef struct mca_btl_pcie_proc_t mca_btl_pcie_proc_t;
OBJ_CLASS_DECLARATION(mca_btl_pcie_proc_t);
int mca_btl_pcie_proc_create(ompi_proc_t* ompi_proc,
mca_btl_pcie_module_t* pcie_btl,
mca_btl_pcie_proc_t** ret_proc);
END_C_DECLS
#endif /* #ifndef MCA_BTL_PCIE_PROC_H */

Просмотреть файл

@ -1,31 +0,0 @@
# -*- shell-script -*-
#
# Copyright (c) 2007 Los Alamos National Security, LLC. All rights
# reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# MCA_btl_pcie_CONFIG(action-if-can-compile,
# [action-if-cant-compile])
# ------------------------------------------------
AC_DEFUN([MCA_btl_pcie_CONFIG],[
OMPI_CHECK_PCIE([btl_pcie],
[btl_pcie_happy="yes"],
[btl_pcie_happy="no"])
AS_IF([test "$btl_pcie_happy" = "yes"],
[btl_pcie_WRAPPER_EXTRA_LDFLAGS="$btl_pcie_LDFLAGS"
btl_pcie_WRAPPER_EXTRA_LIBS="$btl_pcie_LIBS"
$1],
[$2])
# substitute in the things needed to build pcie
AC_SUBST([btl_pcie_CPPFLAGS])
AC_SUBST([btl_pcie_LDFLAGS])
AC_SUBST([btl_pcie_LIBS])
])dnl

Просмотреть файл

@ -1,24 +0,0 @@
# -*- shell-script -*-
#
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2005 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# Copyright (c) 2007 Los Alamos National Security, LLC. All rights
# reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# Specific to this module
PARAM_CONFIG_FILES="Makefile"

Просмотреть файл

@ -1,20 +0,0 @@
# -*- text -*-
# Copyright (c) 2007 Los Alamos National Security, LLC.
# All righs reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# This is the US/English help file for Open MPI's IBM PCIe support
[initialization:more-than-one-cpu]
The PCIe BTL found that the CPU affinity mask for the current process
includes more than one CPU (%d). When using Open MPI on the Cell
machines with the PCIe driver, the affinity mask must include exactly
one CPU.
[initialization:no-cpus]
The PCIe BTL was unable to find any CPUs in the affinity mask for the
current process. This usually indicates a system issue that must be
resolved by the system administrator.

Просмотреть файл

@ -1,159 +0,0 @@
# hostname:core device
n01-001-0:0 /dev/axon0
n01-001-0:1 /dev/axon1
n01-001-0:2 /dev/axon2
n01-001-0:3 /dev/axon3
n01-001-1:0 /dev/axon0
n01-001-1:1 /dev/axon1
n01-001-2:0 /dev/axon0
n01-001-2:1 /dev/axon1
n01-002-0:0 /dev/axon0
n01-002-0:1 /dev/axon1
n01-002-0:2 /dev/axon2
n01-002-0:3 /dev/axon3
n01-002-1:0 /dev/axon0
n01-002-1:1 /dev/axon1
n01-002-2:0 /dev/axon0
n01-002-2:1 /dev/axon1
n01-003-0:0 /dev/axon0
n01-003-0:1 /dev/axon1
n01-003-0:2 /dev/axon2
n01-003-0:3 /dev/axon3
n01-003-1:0 /dev/axon0
n01-003-1:1 /dev/axon1
n01-004-0:0 /dev/axon0
n01-004-0:1 /dev/axon1
n01-004-0:2 /dev/axon2
n01-004-0:3 /dev/axon3
n01-004-1:0 /dev/axon0
n01-004-1:1 /dev/axon1
n01-005-0:0 /dev/axon0
n01-005-0:1 /dev/axon1
n01-005-0:2 /dev/axon2
n01-005-0:3 /dev/axon3
n01-005-1:0 /dev/axon0
n01-005-1:1 /dev/axon1
n01-005-2:0 /dev/axon0
n01-005-2:1 /dev/axon1
n01-006-0:0 /dev/axon0
n01-006-0:1 /dev/axon1
n01-006-0:2 /dev/axon2
n01-006-0:3 /dev/axon3
n01-006-1:0 /dev/axon0
n01-006-1:1 /dev/axon1
n01-006-2:0 /dev/axon0
n01-006-2:1 /dev/axon1
n01-007-0:0 /dev/axon0
n01-007-0:1 /dev/axon1
n01-007-0:2 /dev/axon2
n01-007-0:3 /dev/axon3
n01-007-1:0 /dev/axon0
n01-007-1:1 /dev/axon1
n01-007-2:0 /dev/axon0
n01-007-2:1 /dev/axon1
n01-008-0:0 /dev/axon0
n01-008-0:1 /dev/axon1
n01-008-0:2 /dev/axon2
n01-008-0:3 /dev/axon3
n01-008-1:0 /dev/axon0
n01-008-1:1 /dev/axon1
n01-008-2:0 /dev/axon0
n01-008-2:1 /dev/axon1
n01-009-0:0 /dev/axon0
n01-009-0:1 /dev/axon1
n01-009-0:2 /dev/axon2
n01-009-0:3 /dev/axon3
n01-009-1:0 /dev/axon0
n01-009-1:1 /dev/axon1
n01-009-2:0 /dev/axon0
n01-009-2:1 /dev/axon1
n01-010-0:0 /dev/axon0
n01-010-0:1 /dev/axon1
n01-010-0:2 /dev/axon2
n01-010-0:3 /dev/axon3
n01-010-1:0 /dev/axon0
n01-010-1:1 /dev/axon1
n01-010-2:0 /dev/axon0
n01-010-2:1 /dev/axon1
n01-011-0:0 /dev/axon0
n01-011-0:1 /dev/axon1
n01-011-0:2 /dev/axon2
n01-011-0:3 /dev/axon3
n01-011-1:0 /dev/axon0
n01-011-1:1 /dev/axon1
n01-011-2:0 /dev/axon0
n01-011-2:1 /dev/axon1
n01-012-0:0 /dev/axon0
n01-012-0:1 /dev/axon1
n01-012-0:2 /dev/axon2
n01-012-0:3 /dev/axon3
n01-012-1:0 /dev/axon0
n01-012-1:1 /dev/axon1
n01-012-2:0 /dev/axon0
n01-012-2:1 /dev/axon1
n01-013-0:0 /dev/axon0
n01-013-0:1 /dev/axon1
n01-013-0:2 /dev/axon2
n01-013-0:3 /dev/axon3
n01-013-1:0 /dev/axon0
n01-013-1:1 /dev/axon1
n01-013-2:0 /dev/axon0
n01-013-2:1 /dev/axon1
n01-014-0:0 /dev/axon0
n01-014-0:1 /dev/axon1
n01-014-0:2 /dev/axon2
n01-014-0:3 /dev/axon3
n01-014-1:0 /dev/axon0
n01-014-1:1 /dev/axon1
n01-014-2:0 /dev/axon0
n01-014-2:1 /dev/axon1
n01-015-0:0 /dev/axon0
n01-015-0:1 /dev/axon1
n01-015-0:2 /dev/axon2
n01-015-0:3 /dev/axon3
n01-015-1:0 /dev/axon0
n01-015-1:1 /dev/axon1
n01-015-2:0 /dev/axon0
n01-015-2:1 /dev/axon1

Просмотреть файл

@ -1,82 +0,0 @@
# opteron_host:device cell_host:device
n01-001-0:/dev/axon0 n01-001-1:/dev/axon0
n01-001-0:/dev/axon1 n01-001-1:/dev/axon1
n01-001-0:/dev/axon2 n01-001-2:/dev/axon0
n01-001-0:/dev/axon3 n01-001-2:/dev/axon1
n01-002-0:/dev/axon0 n01-002-1:/dev/axon0
n01-002-0:/dev/axon1 n01-002-1:/dev/axon1
n01-002-0:/dev/axon2 n01-002-2:/dev/axon0
n01-002-0:/dev/axon3 n01-002-2:/dev/axon1
n01-003-0:/dev/axon0 n01-003-1:/dev/axon0
n01-003-0:/dev/axon1 n01-003-1:/dev/axon1
n01-003-0:/dev/axon2 n01-003-2:/dev/axon0
n01-003-0:/dev/axon3 n01-003-2:/dev/axon1
n01-004-0:/dev/axon0 n01-004-1:/dev/axon0
n01-004-0:/dev/axon1 n01-004-1:/dev/axon1
n01-004-0:/dev/axon2 n01-004-2:/dev/axon0
n01-004-0:/dev/axon3 n01-004-2:/dev/axon1
n01-005-0:/dev/axon0 n01-005-1:/dev/axon0
n01-005-0:/dev/axon1 n01-005-1:/dev/axon1
n01-005-0:/dev/axon2 n01-005-2:/dev/axon0
n01-005-0:/dev/axon3 n01-005-2:/dev/axon1
n01-006-0:/dev/axon0 n01-006-1:/dev/axon0
n01-006-0:/dev/axon1 n01-006-1:/dev/axon1
n01-006-0:/dev/axon2 n01-006-2:/dev/axon0
n01-006-0:/dev/axon3 n01-006-2:/dev/axon1
n01-007-0:/dev/axon0 n01-007-1:/dev/axon0
n01-007-0:/dev/axon1 n01-007-1:/dev/axon1
n01-007-0:/dev/axon2 n01-007-2:/dev/axon0
n01-007-0:/dev/axon3 n01-007-2:/dev/axon1
n01-008-0:/dev/axon0 n01-008-1:/dev/axon0
n01-008-0:/dev/axon1 n01-008-1:/dev/axon1
n01-008-0:/dev/axon2 n01-008-2:/dev/axon0
n01-008-0:/dev/axon3 n01-008-2:/dev/axon1
n01-009-0:/dev/axon0 n01-009-1:/dev/axon0
n01-009-0:/dev/axon1 n01-009-1:/dev/axon1
n01-009-0:/dev/axon2 n01-009-2:/dev/axon0
n01-009-0:/dev/axon3 n01-009-2:/dev/axon1
n01-010-0:/dev/axon0 n01-010-1:/dev/axon0
n01-010-0:/dev/axon1 n01-010-1:/dev/axon1
n01-010-0:/dev/axon2 n01-010-2:/dev/axon0
n01-010-0:/dev/axon3 n01-010-2:/dev/axon1
n01-011-0:/dev/axon0 n01-011-1:/dev/axon0
n01-011-0:/dev/axon1 n01-011-1:/dev/axon1
n01-011-0:/dev/axon2 n01-011-2:/dev/axon0
n01-011-0:/dev/axon3 n01-011-2:/dev/axon1
n01-012-0:/dev/axon0 n01-012-1:/dev/axon0
n01-012-0:/dev/axon1 n01-012-1:/dev/axon1
n01-012-0:/dev/axon2 n01-012-2:/dev/axon0
n01-012-0:/dev/axon3 n01-012-2:/dev/axon1
n01-013-0:/dev/axon0 n01-013-1:/dev/axon0
n01-013-0:/dev/axon1 n01-013-1:/dev/axon1
n01-013-0:/dev/axon2 n01-013-2:/dev/axon0
n01-013-0:/dev/axon3 n01-013-2:/dev/axon1
n01-014-0:/dev/axon0 n01-014-1:/dev/axon0
n01-014-0:/dev/axon1 n01-014-1:/dev/axon1
n01-014-0:/dev/axon2 n01-014-2:/dev/axon0
n01-014-0:/dev/axon3 n01-014-2:/dev/axon1
n01-015-0:/dev/axon0 n01-015-1:/dev/axon0
n01-015-0:/dev/axon1 n01-015-1:/dev/axon1
n01-015-0:/dev/axon2 n01-015-2:/dev/axon0
n01-015-0:/dev/axon3 n01-015-2:/dev/axon1
n01-016-0:/dev/axon0 n01-016-1:/dev/axon0
n01-016-0:/dev/axon1 n01-016-1:/dev/axon1
n01-016-0:/dev/axon2 n01-016-2:/dev/axon0
n01-016-0:/dev/axon3 n01-016-2:/dev/axon1