Remove a stale pcie btl that never got completed
This commit was SVN r22498.
Этот коммит содержится в:
родитель
93e930ae13
Коммит
b3dd63fd81
@ -1,71 +0,0 @@
|
||||
#
|
||||
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
# University Research and Technology
|
||||
# Corporation. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
# of Tennessee Research Foundation. All rights
|
||||
# reserved.
|
||||
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
# University of Stuttgart. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
# All rights reserved.
|
||||
# Copyright (c) 2007 Los Alamos National Security, LLC. All rights
|
||||
# reserved.
|
||||
# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
AM_CPPFLAGS = $(btl_pcie_CPPFLAGS)
|
||||
|
||||
dist_pkgdata_DATA = \
|
||||
help-mpi-btl-pcie.txt
|
||||
|
||||
sources = \
|
||||
btl_pcie.c \
|
||||
btl_pcie.h \
|
||||
btl_pcie_component.c \
|
||||
btl_pcie_endpoint.c \
|
||||
btl_pcie_endpoint.h \
|
||||
btl_pcie_fifo.c \
|
||||
btl_pcie_fifo.h \
|
||||
btl_pcie_frag.c \
|
||||
btl_pcie_frag.h \
|
||||
btl_pcie_proc.c \
|
||||
btl_pcie_proc.h \
|
||||
btl_pcie_lex.l \
|
||||
btl_pcie_lex.h \
|
||||
btl_pcie_cfg.c
|
||||
|
||||
if OMPI_BUILD_btl_pcie_DSO
|
||||
lib =
|
||||
lib_sources =
|
||||
component = mca_btl_pcie.la
|
||||
component_sources = $(sources)
|
||||
else
|
||||
lib = libmca_btl_pcie.la
|
||||
lib_sources = $(sources)
|
||||
component =
|
||||
component_sources =
|
||||
endif
|
||||
|
||||
mcacomponentdir = $(pkglibdir)
|
||||
mcacomponent_LTLIBRARIES = $(component)
|
||||
mca_btl_pcie_la_SOURCES = $(component_sources)
|
||||
mca_btl_pcie_la_LDFLAGS = -module -avoid-version $(btl_pcie_LDFLAGS)
|
||||
mca_btl_pcie_la_LIBADD = $(btl_pcie_LIBS)
|
||||
|
||||
noinst_LTLIBRARIES = $(lib)
|
||||
libmca_btl_pcie_la_SOURCES = $(lib_sources)
|
||||
libmca_btl_pcie_la_LDFLAGS= -module -avoid-version $(btl_pcie_LDFLAGS)
|
||||
libmca_btl_pcie_la_LIBADD = $(btl_pcie_LIBS)
|
||||
|
||||
|
||||
ompi_sysconfdir = $(sysconfdir)
|
||||
ompi_sysconf_DATA = \
|
||||
mca-btl-pcie-local-resources.cfg \
|
||||
mca-btl-pcie-remote-resources.cfg
|
||||
|
@ -1,149 +0,0 @@
|
||||
/**
|
||||
* axon_ioctl - provides an io control interface to the axon driver
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
*
|
||||
* Copyright (C) IBM Corporation, 2008
|
||||
*
|
||||
* Authors: H Brett Bolen <hbbolen@us.ibm.com>,
|
||||
* Tim Schimke <tschimke@us.ibm.com>,
|
||||
* Jesse Arroyo <arroyoj@us.ibm.com>,
|
||||
* Murali N Iyer <mniyer@us.ibm.com>
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
#ifndef __AXON_IOCTL_H__
|
||||
#define __AXON_IOCTL_H__
|
||||
|
||||
#define AXONIO_IOC_MAGIC 'x'
|
||||
|
||||
/* DMA Memory Registration */
|
||||
#define AXONIO_DMA_REGISTER _IOWR(AXONIO_IOC_MAGIC, 0x7, \
|
||||
struct AXON_MR_registration)
|
||||
#define AXONIO_DMA_DEREGISTER _IOWR(AXONIO_IOC_MAGIC, 0x8, \
|
||||
struct AXON_MR_deregistration)
|
||||
#define AXONIO_DMA_EXTREGISTER _IOWR(AXONIO_IOC_MAGIC, 0x10, \
|
||||
struct AXON_MR_ext_registration)
|
||||
#define AXONIO_ISSUE_DMA_FAST _IOWR(AXONIO_IOC_MAGIC, 0x11, __u32)
|
||||
|
||||
/* Wakeup Notificaton of remote system */
|
||||
#define AXONIO_NOTIFY _IOR(AXONIO_IOC_MAGIC, 0x13, __u32)
|
||||
|
||||
|
||||
/*
|
||||
* mmap offsets
|
||||
*/
|
||||
#define LOCAL_SMA_OFFSET 0x0
|
||||
#define REMOTE_SMA_OFFSET 0x0100000
|
||||
#define DMA_COMMAND_BUFFER_OFFSET 0x0800000
|
||||
|
||||
|
||||
/*
|
||||
* opaque handles
|
||||
*/
|
||||
typedef __u64 AXON_memory_region_handle;
|
||||
|
||||
/**
|
||||
* Supports Memory Registration
|
||||
* AXON_DMA_REGISTER
|
||||
* AXON_DMA_DEREGISTER
|
||||
*
|
||||
* permissions bitmask
|
||||
*
|
||||
* 0x01 - allow local access ( always true)
|
||||
* 0x02 - allow local read
|
||||
* 0x04 - allow local write
|
||||
* 0x10 - allow remote access
|
||||
* 0x20 - allow remote read
|
||||
* 0x40 - allow remote write
|
||||
*/
|
||||
|
||||
enum {
|
||||
AXON_MR_LOCAL_ACCESS = 0x00000001,
|
||||
AXON_MR_LOCAL_READ = 0x00000002,
|
||||
AXON_MR_LOCAL_WRITE = 0x00000004,
|
||||
AXON_MR_REMOTE_ACCESS = 0x00000010,
|
||||
AXON_MR_REMOTE_READ = 0x00000020,
|
||||
AXON_MR_REMOTE_WRITE = 0x00000040,
|
||||
};
|
||||
|
||||
|
||||
struct AXON_MR_registration{
|
||||
AXON_memory_region_handle memory_region_handle;
|
||||
__u64 local_dma_memory;
|
||||
__u64 local_dma_memory_size;
|
||||
__u64 permissions;
|
||||
};
|
||||
|
||||
struct AXON_MR_deregistration{
|
||||
AXON_memory_region_handle memory_region_handle;
|
||||
};
|
||||
|
||||
struct AXON_MR_ext_registration{
|
||||
AXON_memory_region_handle memory_region_handle;
|
||||
__u64 permissions;
|
||||
};
|
||||
|
||||
/**
|
||||
* Supports DMA GET/PUT status queries
|
||||
*
|
||||
* NOTE: AXON_dma_request required to be within SMA area
|
||||
*/
|
||||
|
||||
struct AXON_dma_list_entry {
|
||||
AXON_memory_region_handle src_memory_region_handle;
|
||||
__u64 src_address;
|
||||
__u64 transfer_size;
|
||||
/* total size 0x18 */
|
||||
};
|
||||
|
||||
enum {
|
||||
AXON_DMATYPE_PUT = 0x01, /* dma local to remote */
|
||||
AXON_DMATYPE_GET = 0x02, /* dma remote to local */
|
||||
};
|
||||
|
||||
enum {
|
||||
AXON_DMAFLAG_WRITE_REMOTE_STATUS = 0x00000001,
|
||||
AXON_DMAFLAG_LOCAL_COMPLETION_SIGNAL = 0x00000002,
|
||||
};
|
||||
|
||||
struct AXON_dma_request {
|
||||
__u32 dma_type;
|
||||
__u32 flags;
|
||||
__u32 localDmaStatusOffset;
|
||||
__u32 remoteDmaStatusOffset;
|
||||
__u64 transfer_size; /* bytes */
|
||||
__u32 local_descriptor_count;
|
||||
__u32 remote_descriptor_count;
|
||||
__u64 rsvd1;
|
||||
struct AXON_dma_list_entry local_descriptor[10];
|
||||
struct AXON_dma_list_entry remote_descriptor[10];
|
||||
};
|
||||
struct AXON_dma_command_list_fast {
|
||||
__u32 dma_requests_available;
|
||||
__u32 dma_requests_started;
|
||||
__u32 dma_req_offset; /* offset into command block mmap area */
|
||||
};
|
||||
|
||||
/**
|
||||
* Wakeup Notificaton
|
||||
*/
|
||||
struct AXON_WAKEUP {
|
||||
__u32 type;
|
||||
};
|
||||
|
||||
|
||||
#endif /* __AXON_IOCTL_H__ */
|
@ -1,612 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2009 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "ompi_config.h"
|
||||
|
||||
#include <string.h>
|
||||
#include <sched.h>
|
||||
|
||||
#include "opal/types.h"
|
||||
#include "opal/util/output.h"
|
||||
#include "opal/util/if.h"
|
||||
#include "opal/sys/atomic.h"
|
||||
#include "opal/mca/paffinity/paffinity.h"
|
||||
|
||||
#include "opal/datatype/opal_convertor.h"
|
||||
#include "ompi/datatype/ompi_datatype.h"
|
||||
#include "ompi/mca/btl/btl.h"
|
||||
#include "ompi/mca/btl/base/btl_base_error.h"
|
||||
#include "ompi/mca/mpool/mpool.h"
|
||||
#include "ompi/mca/mpool/base/base.h"
|
||||
#include "ompi/mca/pml/pml.h"
|
||||
|
||||
#include "btl_pcie.h"
|
||||
#include "btl_pcie_frag.h"
|
||||
#include "btl_pcie_proc.h"
|
||||
#include "btl_pcie_endpoint.h"
|
||||
#include "btl_pcie_ddriver.h"
|
||||
|
||||
mca_btl_pcie_module_t mca_btl_pcie_module = {
|
||||
{
|
||||
&mca_btl_pcie_component.super,
|
||||
0, /* max size of first fragment */
|
||||
0, /* Threshold below which BTL should not fragment */
|
||||
0, /* max send fragment size */
|
||||
0, /* pipeline protocol length */
|
||||
0, /* max rdma fragment size */
|
||||
0, /* min packet size for pipeline protocol */
|
||||
0, /* exclusivity */
|
||||
0, /* latency */
|
||||
0, /* bandwidth */
|
||||
0, /* flags */
|
||||
mca_btl_pcie_add_procs,
|
||||
mca_btl_pcie_del_procs,
|
||||
mca_btl_pcie_register,
|
||||
mca_btl_pcie_finalize,
|
||||
mca_btl_pcie_alloc,
|
||||
mca_btl_pcie_free,
|
||||
mca_btl_pcie_prepare_src,
|
||||
mca_btl_pcie_prepare_dst,
|
||||
mca_btl_pcie_send,
|
||||
NULL, /* send immediate */
|
||||
mca_btl_pcie_put, /* put */
|
||||
NULL, /* get */
|
||||
mca_btl_base_dump, /*dump */
|
||||
NULL, /* mpool */
|
||||
NULL, /* register error cb */
|
||||
NULL /* ft event */
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
|
||||
int mca_btl_pcie_add_procs(
|
||||
struct mca_btl_base_module_t* btl,
|
||||
size_t nprocs,
|
||||
struct ompi_proc_t **ompi_procs,
|
||||
struct mca_btl_base_endpoint_t** peers,
|
||||
opal_bitmap_t* reachable)
|
||||
{
|
||||
mca_btl_pcie_module_t* pcie_btl = (mca_btl_pcie_module_t*)btl;
|
||||
int i;
|
||||
|
||||
for(i = 0; i < (int) nprocs; i++) {
|
||||
struct ompi_proc_t* ompi_proc = ompi_procs[i];
|
||||
mca_btl_pcie_proc_t* pcie_proc;
|
||||
int rc;
|
||||
|
||||
/* Don't connect to anyone on our local node, including
|
||||
ourselves. The PCIe doesn't work that way, and the mapper
|
||||
sometimes gets confused by that fact. */
|
||||
if (OPAL_PROC_ON_LOCAL_NODE(ompi_proc->proc_flags)) continue;
|
||||
|
||||
rc = mca_btl_pcie_proc_create(ompi_proc, pcie_btl, &pcie_proc);
|
||||
if(OMPI_SUCCESS != rc) {
|
||||
return rc;
|
||||
} else if (pcie_proc) {
|
||||
opal_bitmap_set_bit(reachable, i);
|
||||
peers[i] = pcie_proc->endpoint_proc;
|
||||
}
|
||||
}
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
int mca_btl_pcie_del_procs(struct mca_btl_base_module_t* btl,
|
||||
size_t nprocs,
|
||||
struct ompi_proc_t **procs,
|
||||
struct mca_btl_base_endpoint_t ** peers)
|
||||
{
|
||||
/* TODO */
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Register callback function to support send/recv semantics
|
||||
*/
|
||||
|
||||
int mca_btl_pcie_register(
|
||||
struct mca_btl_base_module_t* btl,
|
||||
mca_btl_base_tag_t tag,
|
||||
mca_btl_base_module_recv_cb_fn_t cbfunc,
|
||||
void* cbdata)
|
||||
{
|
||||
mca_btl_pcie_module_t* pcie_btl = (mca_btl_pcie_module_t*) btl;
|
||||
pcie_btl->pcie_reg[tag].cbfunc = cbfunc;
|
||||
pcie_btl->pcie_reg[tag].cbdata = cbdata;
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Allocate a segment.
|
||||
*
|
||||
* @param btl (IN) BTL module
|
||||
* @param size (IN) Request segment size.
|
||||
*/
|
||||
|
||||
mca_btl_base_descriptor_t* mca_btl_pcie_alloc(
|
||||
struct mca_btl_base_module_t* btl,
|
||||
struct mca_btl_base_endpoint_t* endpoint,
|
||||
uint8_t order,
|
||||
size_t size,
|
||||
uint32_t flags)
|
||||
{
|
||||
mca_btl_pcie_module_t* pcie_btl = (mca_btl_pcie_module_t*) btl;
|
||||
mca_btl_pcie_frag_t* frag = NULL;
|
||||
int rc;
|
||||
|
||||
if (size <= btl->btl_eager_limit) {
|
||||
MCA_BTL_PCIE_FRAG_ALLOC_EAGER(pcie_btl, frag, rc);
|
||||
if (frag) {
|
||||
frag->segment.seg_len = size;
|
||||
frag->base.des_flags = 0;
|
||||
frag->hdr->length = size;
|
||||
}
|
||||
}
|
||||
if (NULL == frag && size <= btl->btl_max_send_size) {
|
||||
MCA_BTL_PCIE_FRAG_ALLOC_MAX(pcie_btl, frag, rc);
|
||||
if (frag) {
|
||||
frag->segment.seg_len = size;
|
||||
frag->base.des_flags = 0;
|
||||
frag->hdr->length = size;
|
||||
}
|
||||
}
|
||||
BTL_VERBOSE(("btl_pcie_alloc called for %lu bytes, returning 0x%lx", (unsigned long)size, (long)frag));
|
||||
|
||||
return (mca_btl_base_descriptor_t*) frag;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Return a segment
|
||||
*/
|
||||
|
||||
int mca_btl_pcie_free(
|
||||
struct mca_btl_base_module_t* btl,
|
||||
mca_btl_base_descriptor_t* des)
|
||||
{
|
||||
mca_btl_pcie_frag_t* frag = (mca_btl_pcie_frag_t*)des;
|
||||
mca_btl_pcie_module_t* pcie_btl = (mca_btl_pcie_module_t*) btl;
|
||||
int ret;
|
||||
|
||||
BTL_VERBOSE(("btl_pcie_free returning 0x%lx", (long)frag));
|
||||
|
||||
if (frag->registration != NULL) {
|
||||
pcie_btl->rdma_mpool->mpool_deregister(pcie_btl->rdma_mpool,
|
||||
(mca_mpool_base_registration_t*)
|
||||
frag->registration);
|
||||
frag->registration = NULL;
|
||||
}
|
||||
|
||||
MCA_BTL_PCIE_FRAG_RETURN(pcie_btl, frag, ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Pack data and return a descriptor that can be
|
||||
* used for send/put.
|
||||
*
|
||||
* @param btl (IN) BTL module
|
||||
* @param peer (IN) BTL peer addressing
|
||||
*/
|
||||
mca_btl_base_descriptor_t* mca_btl_pcie_prepare_src(
|
||||
struct mca_btl_base_module_t* btl,
|
||||
struct mca_btl_base_endpoint_t* endpoint,
|
||||
struct mca_mpool_base_registration_t* registration,
|
||||
struct opal_convertor_t* convertor,
|
||||
uint8_t order,
|
||||
size_t reserve,
|
||||
size_t* size,
|
||||
uint32_t flags
|
||||
)
|
||||
{
|
||||
mca_btl_pcie_frag_t* frag = NULL;
|
||||
mca_btl_pcie_reg_t* pcie_reg;
|
||||
mca_btl_pcie_module_t* pcie_btl = (mca_btl_pcie_module_t*) btl;
|
||||
struct iovec iov;
|
||||
uint32_t iov_count = 1;
|
||||
size_t max_data = *size;
|
||||
int rc;
|
||||
|
||||
BTL_VERBOSE(("btl_pcie_prepare_src called with reserve %lu", (unsigned long)reserve));
|
||||
|
||||
/* check and see if the data is contiguous */
|
||||
if(opal_convertor_need_buffers(convertor) == false && 0 == reserve) {
|
||||
MCA_BTL_PCIE_FRAG_ALLOC_DMA(btl, frag, rc);
|
||||
if(NULL == frag) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
iov.iov_len = max_data;
|
||||
iov.iov_base = NULL;
|
||||
|
||||
/* get the user buffer's address */
|
||||
opal_convertor_pack(convertor, &iov, &iov_count, &max_data);
|
||||
*size = max_data;
|
||||
|
||||
if(NULL == registration) {
|
||||
rc = pcie_btl->rdma_mpool->mpool_register(pcie_btl->rdma_mpool,
|
||||
iov.iov_base, max_data, 0, ®istration);
|
||||
if(OMPI_SUCCESS != rc || NULL == registration){
|
||||
MCA_BTL_PCIE_FRAG_RETURN(pcie_btl, frag, rc);
|
||||
return NULL;
|
||||
}
|
||||
frag->registration = (mca_btl_pcie_reg_t*) registration;
|
||||
}
|
||||
|
||||
pcie_reg = (mca_btl_pcie_reg_t*) registration;
|
||||
frag->base.des_flags = 0;
|
||||
frag->base.des_src = &frag->segment;
|
||||
frag->base.des_src_cnt = 1;
|
||||
frag->base.des_dst = NULL;
|
||||
frag->base.des_dst_cnt = 0;
|
||||
frag->base.des_flags = 0;
|
||||
|
||||
frag->segment.seg_len = max_data;
|
||||
frag->segment.seg_addr.pval = iov.iov_base;
|
||||
frag->segment.seg_key.key64 = (uint64_t)pcie_reg->handle;
|
||||
|
||||
BTL_VERBOSE(("prepare_src: frag->segment.seg_len = %lu .seg_addr.pval= %lu "
|
||||
"frag->segment.seg_key.key64 = %lu",
|
||||
(unsigned long)frag->segment.seg_len, (unsigned long)frag->segment.seg_addr.pval,
|
||||
(unsigned long)frag->segment.seg_key.key64));
|
||||
|
||||
return &frag->base;
|
||||
|
||||
} else {
|
||||
/*
|
||||
* if we aren't pinning the data and the requested size is less
|
||||
* than the eager limit pack into a fragment from the eager pool
|
||||
*/
|
||||
if (max_data+reserve <= btl->btl_eager_limit) {
|
||||
|
||||
MCA_BTL_PCIE_FRAG_ALLOC_EAGER(btl, frag, rc);
|
||||
if(NULL == frag) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
iov.iov_len = max_data;
|
||||
iov.iov_base = (unsigned char*) frag->segment.seg_addr.pval + reserve;
|
||||
|
||||
rc = opal_convertor_pack(convertor, &iov, &iov_count, &max_data );
|
||||
*size = max_data;
|
||||
if( rc < 0 ) {
|
||||
MCA_BTL_PCIE_FRAG_RETURN(btl, frag, rc);
|
||||
return NULL;
|
||||
}
|
||||
frag->segment.seg_len = max_data + reserve;
|
||||
}
|
||||
|
||||
/*
|
||||
* otherwise pack as much data as we can into a fragment
|
||||
* that is the max send size.
|
||||
*/
|
||||
else {
|
||||
|
||||
MCA_BTL_PCIE_FRAG_ALLOC_MAX(btl, frag, rc);
|
||||
if(NULL == frag) {
|
||||
return NULL;
|
||||
}
|
||||
if(max_data + reserve > frag->size){
|
||||
max_data = frag->size - reserve;
|
||||
}
|
||||
iov.iov_len = max_data;
|
||||
iov.iov_base = (unsigned char*) frag->segment.seg_addr.pval + reserve;
|
||||
|
||||
rc = opal_convertor_pack(convertor, &iov, &iov_count, &max_data );
|
||||
*size = max_data;
|
||||
|
||||
if( rc < 0 ) {
|
||||
MCA_BTL_PCIE_FRAG_RETURN(btl, frag, rc);
|
||||
return NULL;
|
||||
}
|
||||
frag->segment.seg_len = max_data + reserve;
|
||||
|
||||
}
|
||||
frag->hdr->length = *size + reserve;
|
||||
frag->base.des_src = &frag->segment;
|
||||
frag->base.des_src_cnt = 1;
|
||||
frag->base.des_dst = NULL;
|
||||
frag->base.des_dst_cnt = 0;
|
||||
frag->base.des_flags = 0;
|
||||
return &frag->base;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Prepare a descriptor for send/rdma using the supplied
|
||||
* convertor. If the convertor references data that is contigous,
|
||||
* the descriptor may simply point to the user buffer. Otherwise,
|
||||
* this routine is responsible for allocating buffer space and
|
||||
* packing if required.
|
||||
*
|
||||
* @param btl (IN) BTL module
|
||||
* @param endpoint (IN) BTL peer addressing
|
||||
* @param convertor (IN) Data type convertor
|
||||
* @param reserve (IN) Additional bytes requested by upper layer to precede user data
|
||||
* @param size (IN/OUT) Number of bytes to prepare (IN), number of bytes actually prepared (OUT)
|
||||
*/
|
||||
|
||||
mca_btl_base_descriptor_t* mca_btl_pcie_prepare_dst(
|
||||
struct mca_btl_base_module_t* btl,
|
||||
struct mca_btl_base_endpoint_t* endpoint,
|
||||
struct mca_mpool_base_registration_t* registration,
|
||||
struct opal_convertor_t* convertor,
|
||||
uint8_t order,
|
||||
size_t reserve,
|
||||
size_t* size,
|
||||
uint32_t flags)
|
||||
{
|
||||
mca_btl_pcie_frag_t* frag;
|
||||
mca_btl_pcie_reg_t* pcie_reg;
|
||||
mca_btl_pcie_module_t* pcie_btl = (mca_btl_pcie_module_t*) btl;
|
||||
int rc;
|
||||
ptrdiff_t lb;
|
||||
|
||||
MCA_BTL_PCIE_FRAG_ALLOC_DMA(pcie_btl, frag, rc);
|
||||
if(NULL == frag) {
|
||||
return NULL;
|
||||
}
|
||||
ompi_datatype_type_lb((ompi_datatype_t*)convertor->pDesc, &lb);
|
||||
frag->segment.seg_addr.pval = convertor->pBaseBuf + lb +
|
||||
convertor->bConverted;
|
||||
if(NULL == registration) {
|
||||
rc = pcie_btl->rdma_mpool->mpool_register(pcie_btl->rdma_mpool,
|
||||
frag->segment.seg_addr.pval, *size, 0,
|
||||
®istration);
|
||||
if(OMPI_SUCCESS != rc || NULL == registration) {
|
||||
MCA_BTL_PCIE_FRAG_RETURN(pcie_btl, frag, rc);
|
||||
return NULL;
|
||||
}
|
||||
frag->registration = (mca_btl_pcie_reg_t*) registration;
|
||||
}
|
||||
pcie_reg = (mca_btl_pcie_reg_t*)registration;
|
||||
|
||||
frag->segment.seg_len = *size;
|
||||
frag->segment.seg_key.key64 = (uint64_t) pcie_reg->handle;
|
||||
|
||||
frag->base.des_dst = &frag->segment;
|
||||
frag->base.des_dst_cnt = 1;
|
||||
frag->base.des_src = NULL;
|
||||
frag->base.des_src_cnt = 0;
|
||||
frag->base.des_flags = 0;
|
||||
|
||||
BTL_VERBOSE(("prepare_dst: frag->segment.seg_len = %lu .seg_addr.pval= %lu "
|
||||
"frag->segment.seg_key.key64 = %lu",
|
||||
(unsigned long)frag->segment.seg_len, (unsigned long)frag->segment.seg_addr.pval,
|
||||
(unsigned long)frag->segment.seg_key.key64));
|
||||
|
||||
return &frag->base;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Initiate an asynchronous send.
|
||||
*
|
||||
* @param btl (IN) BTL module
|
||||
* @param endpoint (IN) BTL addressing information
|
||||
* @param descriptor (IN) Description of the data to be transfered
|
||||
* @param tag (IN) The tag value used to notify the peer.
|
||||
*/
|
||||
|
||||
int mca_btl_pcie_send(
|
||||
struct mca_btl_base_module_t* btl,
|
||||
struct mca_btl_base_endpoint_t* endpoint,
|
||||
struct mca_btl_base_descriptor_t* descriptor,
|
||||
mca_btl_base_tag_t tag)
|
||||
|
||||
{
|
||||
/* mca_btl_pcie_module_t* pcie_btl = (mca_btl_pcie_module_t*) btl; */
|
||||
mca_btl_pcie_module_t* pcie_btl = (mca_btl_pcie_module_t*) btl;
|
||||
mca_btl_pcie_frag_t* frag = (mca_btl_pcie_frag_t*)descriptor;
|
||||
mca_btl_pcie_sma_buf_t *buf = NULL;
|
||||
int rc;
|
||||
btl_pcie_fifo_entry_t idx;
|
||||
|
||||
/* setup these fields so they get pulled over in the memcpy */
|
||||
frag->hdr->tag = tag;
|
||||
frag->hdr->length = frag->segment.seg_len;
|
||||
|
||||
if (frag->type == MCA_BTL_PCIE_TYPE_EAGER) {
|
||||
MCA_BTL_PCIE_SMA_BUF_ALLOC_EAGER(pcie_btl, buf, rc);
|
||||
} else {
|
||||
MCA_BTL_PCIE_SMA_BUF_ALLOC_MAX(pcie_btl, buf, rc);
|
||||
}
|
||||
if (NULL == frag) {
|
||||
BTL_ERROR(("can't alloc buf for frag of type %d", frag->type));
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
frag->endpoint = endpoint;
|
||||
frag->sma_buf = buf;
|
||||
/* Put fragment into network byte order before copy to save work
|
||||
done in sma region */
|
||||
OMPI_BTL_PCIE_HEADER_HTON(*frag->hdr);
|
||||
/* BWB - FIX ME - both pointers are 16 byte aligned and the
|
||||
buffers behind them are a multiple of 16 in length (but
|
||||
frag->segment.seg_len might not be). There might be a more
|
||||
optimized memcpy option given that behavior. */
|
||||
memcpy(buf->pcie_data.pval, frag->hdr,
|
||||
sizeof(mca_btl_pcie_header_t) +
|
||||
frag->segment.seg_len);
|
||||
|
||||
/* send the fragment pointer to the receiver,
|
||||
who will later ACK it back so that we can return it */
|
||||
idx = ((char*) buf->pcie_data.pval) - ((char*) endpoint->rem_frag_base);
|
||||
idx |= BTL_PCIE_FIFO_TYPE_SEND;
|
||||
|
||||
/* make sure the top bit is zero */
|
||||
assert((idx & BTL_PCIE_FIFO_TYPE_MASK) == BTL_PCIE_FIFO_TYPE_SEND);
|
||||
|
||||
/* need to barrier prior to writing remote completion */
|
||||
opal_atomic_wmb();
|
||||
|
||||
BTL_VERBOSE(("sent frag 0x%lx (offset %lx), tag %d, length %d, rc = %d",
|
||||
(long)frag, idx, frag->hdr->tag, frag->segment.seg_len, rc));
|
||||
|
||||
idx = opal_swap_bytes8(idx);
|
||||
rc = ompi_btl_pcie_fifo_set_msg(&endpoint->send_fifo, idx);
|
||||
if(OMPI_SUCCESS != rc) {
|
||||
if(OMPI_ERR_RESOURCE_BUSY == rc) {
|
||||
/* BWB - FIX ME - queue for later */
|
||||
abort();
|
||||
} else {
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
static int
|
||||
dd_dma_request(DD_adapter_handle *a_handle,
|
||||
struct AXON_dma_request *dma_req,
|
||||
int dma_requests_available,
|
||||
int *dma_requests_started)
|
||||
{
|
||||
int rc;
|
||||
#if 0
|
||||
struct AXON_dma_command_list dma_op;
|
||||
|
||||
memset (&dma_op, 0x00, sizeof(dma_op));
|
||||
dma_op.dma_req = dma_req;
|
||||
dma_op.dma_requests_available = dma_requests_available;
|
||||
rc = ioctl (a_handle->fd, AXONIO_ISSUE_DMA ,&dma_op);
|
||||
|
||||
if (0 == rc) {
|
||||
*dma_requests_started = dma_op.dma_requests_started;
|
||||
}
|
||||
#else
|
||||
struct AXON_dma_command_list_fast *command = a_handle->cmd_block;
|
||||
|
||||
command->dma_req_offset = sizeof (struct AXON_dma_command_list_fast);
|
||||
command->dma_requests_available = dma_requests_available;
|
||||
command->dma_requests_started = 0;
|
||||
dma_req->flags = AXON_DMAFLAG_WRITE_REMOTE_STATUS;
|
||||
memcpy ((char *) command + command->dma_req_offset,
|
||||
dma_req,
|
||||
sizeof (struct AXON_dma_request));
|
||||
|
||||
rc = ioctl (a_handle->fd, AXONIO_ISSUE_DMA_FAST, 0);
|
||||
if (0 == rc)
|
||||
*dma_requests_started = command->dma_requests_started;
|
||||
|
||||
#endif
|
||||
return rc;
|
||||
}
|
||||
|
||||
/**
|
||||
* Initiate an asynchronous put.
|
||||
*
|
||||
* @param btl (IN) BTL module
|
||||
* @param endpoint (IN) BTL addressing information
|
||||
* @param descriptor (IN) Description of the data to be transferred
|
||||
*/
|
||||
|
||||
int mca_btl_pcie_put(
|
||||
mca_btl_base_module_t* btl,
|
||||
mca_btl_base_endpoint_t* endpoint,
|
||||
mca_btl_base_descriptor_t* descriptor)
|
||||
{
|
||||
|
||||
mca_btl_pcie_frag_t* frag = (mca_btl_pcie_frag_t*) descriptor;
|
||||
struct AXON_dma_request dma_req;
|
||||
int dma_reqs_started;
|
||||
int rc;
|
||||
volatile uint64_t *dma_status_addr;
|
||||
uint64_t dma_status;
|
||||
|
||||
frag->endpoint = endpoint;
|
||||
|
||||
memset(&dma_req,0x00,sizeof(dma_req));
|
||||
dma_req.dma_type = AXON_DMATYPE_PUT;
|
||||
|
||||
dma_req.local_descriptor[0].src_address = frag->base.des_src->seg_addr.lval;
|
||||
dma_req.local_descriptor[0].src_memory_region_handle = frag->base.des_src->seg_key.key64;
|
||||
|
||||
dma_req.remote_descriptor[0].src_address =
|
||||
opal_swap_bytes8(frag->base.des_dst->seg_addr.lval);
|
||||
dma_req.remote_descriptor[0].src_memory_region_handle =
|
||||
opal_swap_bytes8(frag->base.des_dst->seg_key.key64);
|
||||
|
||||
dma_req.transfer_size =
|
||||
dma_req.remote_descriptor[0].transfer_size =
|
||||
dma_req.local_descriptor[0].transfer_size = frag->base.des_src->seg_len;
|
||||
|
||||
dma_req.localDmaStatusOffset = endpoint->lcl_dma_status - (char*) endpoint->lcl_sma_ptr;
|
||||
dma_req.remoteDmaStatusOffset = 0;
|
||||
|
||||
dma_req.local_descriptor_count = 1;
|
||||
dma_req.remote_descriptor_count = 1;
|
||||
|
||||
dma_status_addr = (uint64_t*) endpoint->lcl_dma_status;
|
||||
*dma_status_addr = 0;
|
||||
|
||||
rc = dd_dma_request(&endpoint->pcie_adapter,
|
||||
&dma_req,
|
||||
1,
|
||||
&dma_reqs_started);
|
||||
|
||||
if (0 != rc) abort();
|
||||
|
||||
/* wait for completion, for now anyway */
|
||||
while (0 == (dma_status = *dma_status_addr)) {
|
||||
/* sched_yield(); */
|
||||
}
|
||||
|
||||
frag->base.des_cbfunc(btl, endpoint, &(frag->base), OMPI_SUCCESS);
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Initiate an asynchronous get.
|
||||
*
|
||||
* @param btl (IN) BTL module
|
||||
* @param endpoint (IN) BTL addressing information
|
||||
* @param descriptor (IN) Description of the data to be transferred
|
||||
*
|
||||
*/
|
||||
|
||||
int mca_btl_pcie_get(
|
||||
mca_btl_base_module_t* btl,
|
||||
mca_btl_base_endpoint_t* endpoint,
|
||||
mca_btl_base_descriptor_t* descriptor)
|
||||
{
|
||||
return OMPI_ERR_NOT_IMPLEMENTED;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* Cleanup/release module resources.
|
||||
*/
|
||||
|
||||
int mca_btl_pcie_finalize(struct mca_btl_base_module_t* btl)
|
||||
{
|
||||
mca_btl_pcie_module_t* pcie_btl = (mca_btl_pcie_module_t*) btl;
|
||||
OBJ_DESTRUCT(&pcie_btl->pcie_lock);
|
||||
OBJ_DESTRUCT(&pcie_btl->pcie_sma_buf_eager);
|
||||
OBJ_DESTRUCT(&pcie_btl->pcie_sma_buf_max);
|
||||
OBJ_DESTRUCT(&pcie_btl->pcie_frag_eager);
|
||||
OBJ_DESTRUCT(&pcie_btl->pcie_frag_max);
|
||||
OBJ_DESTRUCT(&pcie_btl->pcie_frag_dma);
|
||||
OBJ_DESTRUCT(&pcie_btl->pcie_recv_frag);
|
||||
return OMPI_SUCCESS;
|
||||
}
|
@ -1,368 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2009 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#ifndef MCA_BTL_PCIE_H
|
||||
#define MCA_BTL_PCIE_H
|
||||
|
||||
#include "ompi_config.h"
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "opal/align.h"
|
||||
#include "opal/event/event.h"
|
||||
#include "opal/util/output.h"
|
||||
#include "opal/class/opal_bitmap.h"
|
||||
|
||||
#include "orte/util/proc_info.h"
|
||||
|
||||
#include "ompi/class/ompi_free_list.h"
|
||||
#include "ompi/mca/btl/btl.h"
|
||||
#include "ompi/mca/btl/base/base.h"
|
||||
#include "ompi/mca/mpool/mpool.h"
|
||||
#include "ompi/mca/pml/pml.h"
|
||||
|
||||
#include "btl_pcie_ddriver.h"
|
||||
#include "btl_pcie_frag.h"
|
||||
#include "btl_pcie_fifo.h"
|
||||
|
||||
BEGIN_C_DECLS
|
||||
|
||||
#define MCA_BTL_HAS_MPOOL 1
|
||||
|
||||
/**
|
||||
* PCIE BTL component.
|
||||
*/
|
||||
|
||||
struct mca_btl_pcie_component_t {
|
||||
/** BTL base component */
|
||||
mca_btl_base_component_1_0_1_t super;
|
||||
|
||||
/* ***** Configuration information ***** */
|
||||
|
||||
/** initial size of free lists */
|
||||
int pcie_free_list_num;
|
||||
|
||||
/** maximum size of free lists */
|
||||
int pcie_free_list_max;
|
||||
|
||||
/** number of elements to alloc when growing free lists */
|
||||
int pcie_free_list_inc;
|
||||
|
||||
/** name of send/recv memory pool */
|
||||
char* pcie_send_mpool_name;
|
||||
|
||||
/** name of put/get memory pool */
|
||||
char *pcie_dma_mpool_name;
|
||||
|
||||
/** Number of entries in the send/recv queue structure */
|
||||
int pcie_recv_queue_len;
|
||||
|
||||
/* **** Component data ***** */
|
||||
|
||||
/** array of available modules */
|
||||
struct mca_btl_pcie_module_t *pcie_btls;
|
||||
|
||||
/** Number of initialized pcie_btl modules */
|
||||
uint32_t pcie_num_btls;
|
||||
|
||||
/** list of pcie proc structures, created during add_procs */
|
||||
opal_list_t pcie_procs;
|
||||
|
||||
/** lock for accessing component state */
|
||||
opal_mutex_t pcie_lock;
|
||||
};
|
||||
typedef struct mca_btl_pcie_component_t mca_btl_pcie_component_t;
|
||||
|
||||
OMPI_MODULE_DECLSPEC extern mca_btl_pcie_component_t mca_btl_pcie_component;
|
||||
|
||||
/**
|
||||
* BTL Module Interface
|
||||
*/
|
||||
struct mca_btl_pcie_module_t {
|
||||
mca_btl_base_module_t super; /**< base BTL interface */
|
||||
|
||||
bool active;
|
||||
|
||||
mca_btl_base_recv_reg_t pcie_reg[MCA_BTL_TAG_MAX];
|
||||
|
||||
/** name of the pcie device */
|
||||
char *lcl_dev_name;
|
||||
|
||||
/** Free list of communication buffers in the SMA region */
|
||||
ompi_free_list_t pcie_sma_buf_eager;
|
||||
ompi_free_list_t pcie_sma_buf_max;
|
||||
|
||||
/** Free list of bounce fragments, normal user memory */
|
||||
ompi_free_list_t pcie_frag_eager;
|
||||
ompi_free_list_t pcie_frag_max;
|
||||
|
||||
/* free list of DMA fragments */
|
||||
ompi_free_list_t pcie_frag_dma;
|
||||
|
||||
/* single receive fragment to handle upcalls on message reception.
|
||||
This will need to be a free list if multiple receive callbacks
|
||||
could be triggered at the same time, which will happen if the
|
||||
code goes MT hot. */
|
||||
mca_btl_pcie_frag_recv_t pcie_recv_frag;
|
||||
|
||||
/* lock for accessing module state */
|
||||
opal_mutex_t pcie_lock;
|
||||
|
||||
/* mpool for allocating the members of pcie_sma_buf* */
|
||||
struct mca_mpool_base_module_t* pcie_mpool;
|
||||
/* mpool for RDMA pinning */
|
||||
struct mca_mpool_base_module_t* rdma_mpool;
|
||||
|
||||
/* Endpoint associated with this module (there's a one-to-one
|
||||
mapping of modules and endpoints, since a device can only
|
||||
handle one endpoint at a time */
|
||||
struct mca_btl_base_endpoint_t* endpoint;
|
||||
};
|
||||
typedef struct mca_btl_pcie_module_t mca_btl_pcie_module_t;
|
||||
extern mca_btl_pcie_module_t mca_btl_pcie_module;
|
||||
|
||||
struct mca_btl_pcie_reg_t {
|
||||
mca_mpool_base_registration_t base;
|
||||
AXON_memory_region_handle handle;
|
||||
};
|
||||
typedef struct mca_btl_pcie_reg_t mca_btl_pcie_reg_t;
|
||||
|
||||
struct mca_btl_pcie_modex_info_t {
|
||||
char hostname[ORTE_MAX_HOSTNAME_SIZE];
|
||||
char devicename[PATH_MAX];
|
||||
};
|
||||
typedef struct mca_btl_pcie_modex_info_t mca_btl_pcie_modex_info_t;
|
||||
#define MCA_BTL_PCIE_MODEX_INFO_HTON(h)
|
||||
#define MCA_BTL_PCIE_MODEX_INFO_NTOH(h)
|
||||
|
||||
|
||||
/**
|
||||
* Register TEMPLATE component parameters with the MCA framework
|
||||
*/
|
||||
extern int mca_btl_pcie_component_open(void);
|
||||
|
||||
/**
|
||||
* Any final cleanup before being unloaded.
|
||||
*/
|
||||
extern int mca_btl_pcie_component_close(void);
|
||||
|
||||
/**
|
||||
* TEMPLATE component initialization.
|
||||
*
|
||||
* @param num_btl_modules (OUT) Number of BTLs returned in BTL array.
|
||||
* @param allow_multi_user_threads (OUT) Flag indicating wether BTL supports user threads (TRUE)
|
||||
* @param have_hidden_threads (OUT) Flag indicating wether BTL uses threads (TRUE)
|
||||
*/
|
||||
extern mca_btl_base_module_t** mca_btl_pcie_component_init(
|
||||
int *num_btl_modules,
|
||||
bool allow_multi_user_threads,
|
||||
bool have_hidden_threads
|
||||
);
|
||||
|
||||
|
||||
/**
|
||||
* TEMPLATE component progress.
|
||||
*/
|
||||
extern int mca_btl_pcie_component_progress(void);
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Cleanup any resources held by the BTL.
|
||||
*
|
||||
* @param btl BTL instance.
|
||||
* @return OMPI_SUCCESS or error status on failure.
|
||||
*/
|
||||
|
||||
extern int mca_btl_pcie_finalize(
|
||||
struct mca_btl_base_module_t* btl
|
||||
);
|
||||
|
||||
|
||||
/**
|
||||
* PML->BTL notification of change in the process list.
|
||||
*
|
||||
* @param btl (IN)
|
||||
* @param nprocs (IN) Number of processes
|
||||
* @param procs (IN) Set of processes
|
||||
* @param peers (OUT) Set of (optional) peer addressing info.
|
||||
* @param peers (IN/OUT) Set of processes that are reachable via this BTL.
|
||||
* @return OMPI_SUCCESS or error status on failure.
|
||||
*
|
||||
*/
|
||||
|
||||
extern int mca_btl_pcie_add_procs(
|
||||
struct mca_btl_base_module_t* btl,
|
||||
size_t nprocs,
|
||||
struct ompi_proc_t **procs,
|
||||
struct mca_btl_base_endpoint_t** peers,
|
||||
opal_bitmap_t* reachable
|
||||
);
|
||||
|
||||
/**
|
||||
* PML->BTL notification of change in the process list.
|
||||
*
|
||||
* @param btl (IN) BTL instance
|
||||
* @param nproc (IN) Number of processes.
|
||||
* @param procs (IN) Set of processes.
|
||||
* @param peers (IN) Set of peer data structures.
|
||||
* @return Status indicating if cleanup was successful
|
||||
*
|
||||
*/
|
||||
|
||||
extern int mca_btl_pcie_del_procs(
|
||||
struct mca_btl_base_module_t* btl,
|
||||
size_t nprocs,
|
||||
struct ompi_proc_t **procs,
|
||||
struct mca_btl_base_endpoint_t** peers
|
||||
);
|
||||
|
||||
|
||||
/**
|
||||
* Initiate an asynchronous send.
|
||||
*
|
||||
* @param btl (IN) BTL module
|
||||
* @param endpoint (IN) BTL addressing information
|
||||
* @param descriptor (IN) Description of the data to be transfered
|
||||
* @param tag (IN) The tag value used to notify the peer.
|
||||
*/
|
||||
|
||||
extern int mca_btl_pcie_send(
|
||||
struct mca_btl_base_module_t* btl,
|
||||
struct mca_btl_base_endpoint_t* btl_peer,
|
||||
struct mca_btl_base_descriptor_t* descriptor,
|
||||
mca_btl_base_tag_t tag
|
||||
);
|
||||
|
||||
|
||||
/**
|
||||
* Initiate an asynchronous put.
|
||||
*
|
||||
* @param btl (IN) BTL module
|
||||
* @param endpoint (IN) BTL addressing information
|
||||
* @param descriptor (IN) Description of the data to be transferred
|
||||
*/
|
||||
|
||||
extern int mca_btl_pcie_put(
|
||||
struct mca_btl_base_module_t* btl,
|
||||
struct mca_btl_base_endpoint_t* btl_peer,
|
||||
struct mca_btl_base_descriptor_t* decriptor
|
||||
);
|
||||
|
||||
|
||||
/**
|
||||
* Initiate an asynchronous get.
|
||||
*
|
||||
* @param btl (IN) BTL module
|
||||
* @param endpoint (IN) BTL addressing information
|
||||
* @param descriptor (IN) Description of the data to be transferred
|
||||
*/
|
||||
|
||||
extern int mca_btl_pcie_get(
|
||||
struct mca_btl_base_module_t* btl,
|
||||
struct mca_btl_base_endpoint_t* btl_peer,
|
||||
struct mca_btl_base_descriptor_t* decriptor
|
||||
);
|
||||
|
||||
/**
|
||||
* Register a callback function that is called on receipt
|
||||
* of a fragment.
|
||||
*
|
||||
* @param btl (IN) BTL module
|
||||
* @return Status indicating if registration was successful
|
||||
*
|
||||
*/
|
||||
|
||||
extern int mca_btl_pcie_register(
|
||||
struct mca_btl_base_module_t* btl,
|
||||
mca_btl_base_tag_t tag,
|
||||
mca_btl_base_module_recv_cb_fn_t cbfunc,
|
||||
void* cbdata);
|
||||
|
||||
/**
|
||||
* Allocate a descriptor with a segment of the requested size.
|
||||
* Note that the BTL layer may choose to return a smaller size
|
||||
* if it cannot support the request.
|
||||
*
|
||||
* @param btl (IN) BTL module
|
||||
* @param size (IN) Request segment size.
|
||||
*/
|
||||
|
||||
extern mca_btl_base_descriptor_t* mca_btl_pcie_alloc(
|
||||
struct mca_btl_base_module_t* btl,
|
||||
struct mca_btl_base_endpoint_t* endpoint,
|
||||
uint8_t order,
|
||||
size_t size,
|
||||
uint32_t flags);
|
||||
|
||||
|
||||
/**
|
||||
* Return a segment allocated by this BTL.
|
||||
*
|
||||
* @param btl (IN) BTL module
|
||||
* @param descriptor (IN) Allocated descriptor.
|
||||
*/
|
||||
|
||||
extern int mca_btl_pcie_free(
|
||||
struct mca_btl_base_module_t* btl,
|
||||
mca_btl_base_descriptor_t* des);
|
||||
|
||||
|
||||
/**
|
||||
* Prepare a descriptor for send/rdma using the supplied
|
||||
* convertor. If the convertor references data that is contigous,
|
||||
* the descriptor may simply point to the user buffer. Otherwise,
|
||||
* this routine is responsible for allocating buffer space and
|
||||
* packing if required.
|
||||
*
|
||||
* @param btl (IN) BTL module
|
||||
* @param endpoint (IN) BTL peer addressing
|
||||
* @param convertor (IN) Data type convertor
|
||||
* @param reserve (IN) Additional bytes requested by upper layer to precede user data
|
||||
* @param size (IN/OUT) Number of bytes to prepare (IN), number of bytes actually prepared (OUT)
|
||||
*/
|
||||
|
||||
mca_btl_base_descriptor_t* mca_btl_pcie_prepare_src(
|
||||
struct mca_btl_base_module_t* btl,
|
||||
struct mca_btl_base_endpoint_t* endpoint,
|
||||
struct mca_mpool_base_registration_t* registration,
|
||||
struct opal_convertor_t* convertor,
|
||||
uint8_t order,
|
||||
size_t reserve,
|
||||
size_t* size,
|
||||
uint32_t flags
|
||||
);
|
||||
|
||||
extern mca_btl_base_descriptor_t* mca_btl_pcie_prepare_dst(
|
||||
struct mca_btl_base_module_t* btl,
|
||||
struct mca_btl_base_endpoint_t* endpoint,
|
||||
struct mca_mpool_base_registration_t* registration,
|
||||
struct opal_convertor_t* convertor,
|
||||
uint8_t order,
|
||||
size_t reserve,
|
||||
size_t* size,
|
||||
uint32_t flags);
|
||||
|
||||
/**
|
||||
* Fault Tolerance Event Notification Function
|
||||
* @param state Checkpoint Stae
|
||||
* @return OMPI_SUCCESS or failure status
|
||||
*/
|
||||
int mca_btl_pcie_ft_event(int state);
|
||||
|
||||
char* ompi_btl_pcie_cfg_get_local_device(char* hostname, int core);
|
||||
char* ompi_btl_pcie_cfg_get_matching_device(char* remote_hostname,
|
||||
char* remote_device);
|
||||
|
||||
|
||||
END_C_DECLS
|
||||
|
||||
#endif /* #ifndef MCA_BTL_PCIE_H */
|
@ -1,196 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2007 Los Alamos National Security, LLC.
|
||||
* All righs reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "ompi_config.h"
|
||||
|
||||
#include <string.h>
|
||||
#include <ctype.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "opal/util/output.h"
|
||||
#include "opal/util/os_path.h"
|
||||
#include "opal/mca/base/mca_base_param.h"
|
||||
#include "opal/mca/installdirs/installdirs.h"
|
||||
|
||||
#include "orte/util/proc_info.h"
|
||||
#include "orte/util/show_help.h"
|
||||
|
||||
#include "btl_pcie.h"
|
||||
#include "btl_pcie_lex.h"
|
||||
|
||||
static char *cfg_filename;
|
||||
static char *key_buffer = NULL;
|
||||
static size_t key_buffer_len = 0;
|
||||
|
||||
/*
|
||||
* Local functions
|
||||
*/
|
||||
static char* parse_file(char *filename, bool local, char *key);
|
||||
|
||||
|
||||
/**************************************************************************/
|
||||
|
||||
char *
|
||||
ompi_btl_pcie_cfg_get_local_device(char* hostname, int core)
|
||||
{
|
||||
char *key, *ret, *file;
|
||||
|
||||
file = opal_os_path(false,
|
||||
opal_install_dirs.sysconfdir,
|
||||
"mca-btl-pcie-local-resources.cfg",
|
||||
NULL);
|
||||
|
||||
asprintf(&key, "%s:%d", hostname, core);
|
||||
ret = parse_file(file, true, key);
|
||||
free(key);
|
||||
free(file);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
char *
|
||||
ompi_btl_pcie_cfg_get_matching_device(char* remote_hostname,
|
||||
char* remote_device)
|
||||
{
|
||||
char *key, *ret, *pos, *file;
|
||||
|
||||
file = opal_os_path(false,
|
||||
opal_install_dirs.sysconfdir,
|
||||
"mca-btl-pcie-remote-resources.cfg",
|
||||
NULL);
|
||||
|
||||
asprintf(&key, "%s:%s", remote_hostname, remote_device);
|
||||
ret = parse_file(file, false, key);
|
||||
free(file);
|
||||
free(key);
|
||||
|
||||
if (ret == NULL) return NULL;
|
||||
|
||||
pos = strchr(ret, ':');
|
||||
if (pos == NULL) {
|
||||
free(ret);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* make sure this is my hostname */
|
||||
*pos = '\0';
|
||||
if (0 != strcmp(orte_process_info.nodename, ret)) {
|
||||
free(ret);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
pos++;
|
||||
pos = strdup(pos);
|
||||
free(ret);
|
||||
|
||||
return pos;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Parse a single file
|
||||
*/
|
||||
static char* parse_file(char *filename, bool local, char* key)
|
||||
{
|
||||
int val;
|
||||
bool me;
|
||||
char *tmp = NULL;
|
||||
|
||||
/* Open the file */
|
||||
cfg_filename = filename;
|
||||
btl_pcie_cfg_yyin = fopen(filename, "r");
|
||||
if (NULL == btl_pcie_cfg_yyin) {
|
||||
orte_show_help("help-mpi-btl-pcie.txt", "ini file:file not found",
|
||||
true, filename);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/* Do the parsing */
|
||||
btl_pcie_cfg_parse_done = false;
|
||||
btl_pcie_cfg_yynewlines = 1;
|
||||
btl_pcie_cfg_init_buffer(btl_pcie_cfg_yyin);
|
||||
while (!btl_pcie_cfg_parse_done) {
|
||||
val = btl_pcie_cfg_yylex();
|
||||
switch (val) {
|
||||
case BTL_PCIE_CFG_PARSE_DONE:
|
||||
/* This will also set btl_pcie_cfg_parse_done to true, so just
|
||||
break here */
|
||||
break;
|
||||
|
||||
case BTL_PCIE_CFG_PARSE_NEWLINE:
|
||||
/* blank line! ignore it */
|
||||
break;
|
||||
|
||||
case BTL_PCIE_CFG_PARSE_HOSTNAME_CORE:
|
||||
if (!local) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (0 == strcmp(key, btl_pcie_cfg_yytext)) {
|
||||
me = true;
|
||||
} else {
|
||||
me = false;
|
||||
}
|
||||
|
||||
val = btl_pcie_cfg_yylex();
|
||||
if (BTL_PCIE_CFG_PARSE_DEVICE != val) {
|
||||
abort();
|
||||
}
|
||||
|
||||
if (me) return strdup(btl_pcie_cfg_yytext);
|
||||
|
||||
break;
|
||||
|
||||
case BTL_PCIE_CFG_PARSE_HOSTNAME_DEVICE:
|
||||
if (local) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (0 == strcmp(key, btl_pcie_cfg_yytext)) {
|
||||
me = true;
|
||||
} else {
|
||||
tmp = strdup(btl_pcie_cfg_yytext);
|
||||
me = false;
|
||||
}
|
||||
|
||||
val = btl_pcie_cfg_yylex();
|
||||
if (BTL_PCIE_CFG_PARSE_HOSTNAME_DEVICE != val) {
|
||||
abort();
|
||||
}
|
||||
|
||||
if (me) {
|
||||
return strdup(btl_pcie_cfg_yytext);
|
||||
} else {
|
||||
if (0 == strcmp(key, btl_pcie_cfg_yytext)) {
|
||||
return tmp;
|
||||
} else {
|
||||
free(tmp);
|
||||
}
|
||||
}
|
||||
|
||||
break;
|
||||
|
||||
default:
|
||||
return NULL;
|
||||
break;
|
||||
}
|
||||
}
|
||||
fclose(btl_pcie_cfg_yyin);
|
||||
|
||||
cleanup:
|
||||
if (NULL != key_buffer) {
|
||||
free(key_buffer);
|
||||
key_buffer = NULL;
|
||||
key_buffer_len = 0;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
@ -1,518 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2007 Los Alamos National Security, LLC.
|
||||
* All righs reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "ompi_config.h"
|
||||
#include "ompi/constants.h"
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include <unistd.h>
|
||||
#include <errno.h>
|
||||
#include <string.h>
|
||||
#include <sched.h>
|
||||
#include <ctype.h>
|
||||
|
||||
#include "opal/event/event.h"
|
||||
#include "opal/util/argv.h"
|
||||
#include "opal/util/if.h"
|
||||
#include "opal/util/output.h"
|
||||
#include "opal/mca/base/mca_base_param.h"
|
||||
#include "opal/mca/paffinity/paffinity.h"
|
||||
#include "opal/mca/paffinity/base/base.h"
|
||||
|
||||
#include "orte/util/proc_info.h"
|
||||
#include "orte/util/show_help.h"
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
|
||||
#include "ompi/constants.h"
|
||||
#include "ompi/mca/btl/btl.h"
|
||||
#include "ompi/mca/btl/base/base.h"
|
||||
#include "ompi/mca/btl/base/btl_base_error.h"
|
||||
#include "ompi/mca/mpool/base/base.h"
|
||||
#include "ompi/mca/mpool/rdma/mpool_rdma.h"
|
||||
#include "ompi/mca/pml/pml.h"
|
||||
#include "ompi/runtime/ompi_module_exchange.h"
|
||||
|
||||
#include "btl_pcie.h"
|
||||
#include "btl_pcie_frag.h"
|
||||
#include "btl_pcie_endpoint.h"
|
||||
#include "btl_pcie_ddriver.h"
|
||||
|
||||
static int
|
||||
dd_register_memory_region (DD_adapter_handle *a_handle,
|
||||
AXON_memory_region_handle *mr_handle,
|
||||
void *starting_addr,
|
||||
int size,
|
||||
int flags)
|
||||
{
|
||||
int rc;
|
||||
struct AXON_MR_registration regInfo;
|
||||
|
||||
memset (®Info, 0, sizeof(regInfo));
|
||||
|
||||
regInfo.local_dma_memory_size = size;
|
||||
regInfo.local_dma_memory = (__u64) starting_addr;
|
||||
/* codeme Q-should we check for valid permission flags here or just
|
||||
* let the ioctl code handle it?
|
||||
*/
|
||||
regInfo.permissions = flags|1; /* always turn on local access */
|
||||
|
||||
rc = ioctl (a_handle->fd, AXONIO_DMA_REGISTER, ®Info);
|
||||
if(-1 == rc) {
|
||||
return -1;
|
||||
}
|
||||
*mr_handle = (AXON_memory_region_handle)regInfo.memory_region_handle;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
dd_deregister_memory_region (DD_adapter_handle *a_handle,
|
||||
AXON_memory_region_handle *mr_handle)
|
||||
{
|
||||
return (ioctl (a_handle->fd, AXONIO_DMA_DEREGISTER, mr_handle));
|
||||
}
|
||||
|
||||
static int pcie_reg_mr(void *reg_data, void *base, size_t size,
|
||||
mca_mpool_base_registration_t *reg);
|
||||
static int pcie_dereg_mr(void* reg_data, mca_mpool_base_registration_t *reg);
|
||||
|
||||
|
||||
mca_btl_pcie_component_t mca_btl_pcie_component = {
|
||||
{
|
||||
/* First, the mca_base_component_t struct containing meta information
|
||||
about the component itself */
|
||||
|
||||
{
|
||||
/* Indicate that we are a pml v2.0.0 component (which also implies a
|
||||
specific MCA version) */
|
||||
|
||||
MCA_BTL_BASE_VERSION_2_0_0,
|
||||
|
||||
"pcie", /* MCA component name */
|
||||
OMPI_MAJOR_VERSION, /* MCA component major version */
|
||||
OMPI_MINOR_VERSION, /* MCA component minor version */
|
||||
OMPI_RELEASE_VERSION, /* MCA component release version */
|
||||
mca_btl_pcie_component_open, /* component open */
|
||||
mca_btl_pcie_component_close /* component close */
|
||||
},
|
||||
|
||||
/* Next the MCA v2.0.0 component meta data */
|
||||
|
||||
{
|
||||
false
|
||||
},
|
||||
|
||||
mca_btl_pcie_component_init,
|
||||
mca_btl_pcie_component_progress,
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
/*
|
||||
* utility routines for parameter registration
|
||||
*/
|
||||
static char*
|
||||
mca_btl_pcie_param_register_string(const char* param_name,
|
||||
const char* param_desc,
|
||||
const char* default_value)
|
||||
{
|
||||
char *value;
|
||||
|
||||
mca_base_param_reg_string(&mca_btl_pcie_component.super.btl_version,
|
||||
param_name, param_desc, false, false,
|
||||
default_value, &value);
|
||||
return value;
|
||||
}
|
||||
|
||||
|
||||
static int
|
||||
mca_btl_pcie_param_register_int(const char* param_name,
|
||||
const char* param_desc,
|
||||
int default_value)
|
||||
{
|
||||
int value;
|
||||
|
||||
mca_base_param_reg_int(&mca_btl_pcie_component.super.btl_version,
|
||||
param_name, param_desc, false, false,
|
||||
default_value, &value);
|
||||
return value;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Register PCIE device found in local config file. The MCA framework
|
||||
* will make this available to all peers.
|
||||
*/
|
||||
static int
|
||||
btl_pcie_modex_send(void)
|
||||
{
|
||||
size_t size;
|
||||
unsigned int i;
|
||||
mca_btl_pcie_modex_info_t *info;
|
||||
|
||||
size = mca_btl_pcie_component.pcie_num_btls *
|
||||
sizeof(mca_btl_pcie_modex_info_t);
|
||||
info = malloc(size);
|
||||
if (NULL == info) return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
|
||||
|
||||
for (i = 0 ; i < mca_btl_pcie_component.pcie_num_btls ; ++i) {
|
||||
strncpy(info[i].hostname,
|
||||
orte_process_info.nodename,
|
||||
ORTE_MAX_HOSTNAME_SIZE - 1);
|
||||
info[i].hostname[ORTE_MAX_HOSTNAME_SIZE - 1] = '\0';
|
||||
strncpy(info[i].devicename,
|
||||
mca_btl_pcie_component.pcie_btls[i].lcl_dev_name, PATH_MAX - 1);
|
||||
info[i].devicename[PATH_MAX - 1] = '\0';
|
||||
MCA_BTL_PCIE_MODEX_INFO_HTON(info[i]);
|
||||
}
|
||||
|
||||
#if (OMPI_MAJOR_VERSION <= 1) && (OMPI_MINOR_VERSION <= 2)
|
||||
return mca_pml_base_modex_send(&mca_btl_pcie_component.super.btl_version, info, size);
|
||||
#else
|
||||
return ompi_modex_send(&mca_btl_pcie_component.super.btl_version, info, size);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Called by MCA framework to open the component, registers
|
||||
* component parameters.
|
||||
*/
|
||||
int
|
||||
mca_btl_pcie_component_open(void)
|
||||
{
|
||||
/* initialize state */
|
||||
mca_btl_pcie_component.pcie_num_btls = 0;
|
||||
mca_btl_pcie_component.pcie_btls = NULL;
|
||||
|
||||
/* initialize objects */
|
||||
OBJ_CONSTRUCT(&mca_btl_pcie_component.pcie_procs, opal_list_t);
|
||||
|
||||
/* component parameters */
|
||||
mca_btl_pcie_component.pcie_free_list_num =
|
||||
mca_btl_pcie_param_register_int ("free_list_num",
|
||||
"Initial size of free lists (must be >= 1)",
|
||||
16);
|
||||
/* BWB - FIX ME - The need to limit the free list max size is an
|
||||
artifact of the lack of flow control in the BTL. Since we're
|
||||
already using bounce fragments, it should be possible to make
|
||||
this unlimited, and then properly handle the case where an SMA
|
||||
region isn't available when send is called on a given frag.
|
||||
Something similar to what Open IB does when we don't have send
|
||||
credits would work really well here. See comment in
|
||||
btl_pcie_send() for more information. */
|
||||
mca_btl_pcie_component.pcie_free_list_max =
|
||||
mca_btl_pcie_param_register_int ("free_list_max",
|
||||
"Max size of free lists. "
|
||||
"free_list_max * (first_frag_size + max_send_size) "
|
||||
"must be less than (SMA memory size - (recv_queue_len * 4) - 8)",
|
||||
32);
|
||||
mca_btl_pcie_component.pcie_free_list_inc =
|
||||
mca_btl_pcie_param_register_int ("free_list_inc",
|
||||
"Increment size of free lists (must be >= 1)",
|
||||
8);
|
||||
|
||||
mca_btl_pcie_component.pcie_send_mpool_name =
|
||||
mca_btl_pcie_param_register_string("send_mpool",
|
||||
"Name of the memory pool to be used for send messages. "
|
||||
"(it is unlikely that you will ever want to change this)",
|
||||
"pcie");
|
||||
|
||||
mca_btl_pcie_component.pcie_dma_mpool_name =
|
||||
mca_btl_pcie_param_register_string("dma_mpool",
|
||||
"Name of the memory pool to be used for rdma messages. "
|
||||
"(it is unlikely that you will ever want to change this)",
|
||||
"rdma");
|
||||
|
||||
mca_btl_pcie_component.pcie_recv_queue_len =
|
||||
mca_btl_pcie_param_register_int("recv_queue_len",
|
||||
"Length of receive fifo. Must be 4 * free_list_max",
|
||||
256);
|
||||
|
||||
mca_btl_pcie_module.super.btl_exclusivity =
|
||||
mca_btl_pcie_param_register_int ("exclusivity",
|
||||
"Priority of PCIe BTL. (must be > 0)",
|
||||
MCA_BTL_EXCLUSIVITY_DEFAULT + 1);
|
||||
|
||||
mca_btl_pcie_module.super.btl_eager_limit =
|
||||
mca_btl_pcie_param_register_int ("first_frag_size",
|
||||
"Size (in bytes) of the first fragment sent of any "
|
||||
"message. It is the maximum size of \"short\" messages "
|
||||
"and the maximum size of the \"phase 1\" fragment sent "
|
||||
"for all large messages (must be >= 1).",
|
||||
1*1024) - sizeof(mca_btl_pcie_header_t);
|
||||
mca_btl_pcie_module.super.btl_rndv_eager_limit =
|
||||
mca_btl_pcie_param_register_int ("btl_rndv_eager_limit",
|
||||
"Minimum message size (in bytes) that will be striped "
|
||||
"across multiple network devices when using "
|
||||
"send/receive semantics. Messages shorter than this "
|
||||
"size will be sent across a single network (must be >= "
|
||||
"1)",
|
||||
2*1024) - sizeof(mca_btl_pcie_header_t);
|
||||
mca_btl_pcie_module.super.btl_max_send_size =
|
||||
mca_btl_pcie_param_register_int ("max_send_size",
|
||||
"Maximum size (in bytes) of a single \"phase 2\" fragment "
|
||||
"of a long message when using the pipeline protocol "
|
||||
"(must be >= 1)",
|
||||
4*1024) - sizeof(mca_btl_pcie_header_t);
|
||||
mca_btl_pcie_module.super.btl_rdma_pipeline_send_length =
|
||||
mca_btl_pcie_param_register_int("rdma_pipeline_send_length",
|
||||
"Length of the \"phase 2\" portion of a large message (in "
|
||||
"bytes) when using the pipeline protocol. This part of "
|
||||
"the message will be split into fragments of size "
|
||||
"max_send_size and sent using send/receive semantics "
|
||||
"(must be >= 0; only relevant when the PUT flag is "
|
||||
"set)",
|
||||
12*1024);
|
||||
mca_btl_pcie_module.super.btl_rdma_pipeline_frag_size =
|
||||
mca_btl_pcie_param_register_int("rdma_pipeline_frag_size",
|
||||
"Maximum size (in bytes) of a single \"phase 3\" fragment "
|
||||
"from a long message when using the pipeline protocol. "
|
||||
"These fragments will be sent using RDMA semantics "
|
||||
"(must be >= 1; only relevant when the PUT flag is "
|
||||
"set)",
|
||||
2*1024*1024);
|
||||
mca_btl_pcie_module.super.btl_min_rdma_pipeline_size =
|
||||
mca_btl_pcie_param_register_int("min_rdma_pipeline_size",
|
||||
"Messages smaller than this size (in bytes) will not "
|
||||
"use the RDMA pipeline protocol. Instead, they will be "
|
||||
"split into fragments of max_send_size and sent using "
|
||||
"send/receive semantics (must be >=0, and is "
|
||||
"automatically adjusted up to at least "
|
||||
"(eager_limit+btl_rdma_pipeline_send_length); only "
|
||||
"relevant when the PUT flag is set)",
|
||||
16 * 1024);
|
||||
|
||||
mca_btl_pcie_module.super.btl_flags =
|
||||
mca_btl_pcie_param_register_int("flags",
|
||||
"BTL control flags. Defaults to (SEND|PUT|HETEROGENEOUS_RDMA)",
|
||||
#ifdef MCA_BTL_FLAGS_HETEROGENEOUS_RDMA
|
||||
MCA_BTL_FLAGS_HETEROGENEOUS_RDMA |
|
||||
#endif
|
||||
MCA_BTL_FLAGS_SEND |
|
||||
MCA_BTL_FLAGS_PUT);
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
mca_btl_pcie_component_close(void)
|
||||
{
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
mca_btl_base_module_t**
|
||||
mca_btl_pcie_component_init(int *num_btl_modules,
|
||||
bool enable_progress_threads,
|
||||
bool enable_mpi_threads)
|
||||
{
|
||||
cpu_set_t cpu_set;
|
||||
unsigned int i;
|
||||
int num_cpus, *cpus;
|
||||
struct stat stat_buf;
|
||||
struct mca_mpool_base_resources_t mpool_resources;
|
||||
mca_btl_base_module_t **btl_array;
|
||||
|
||||
*num_btl_modules = 0;
|
||||
|
||||
/* find all cpus we're bound to */
|
||||
cpus = malloc(CPU_SETSIZE * sizeof(int));
|
||||
memset(cpus, 0, CPU_SETSIZE * sizeof(int));
|
||||
num_cpus = 0;
|
||||
CPU_ZERO(&cpu_set);
|
||||
|
||||
sched_getaffinity(0, sizeof(cpu_set), &cpu_set);
|
||||
for (i = 0 ; i < CPU_SETSIZE ; ++i) {
|
||||
if (CPU_ISSET(i, &cpu_set)) cpus[num_cpus++] = i;
|
||||
}
|
||||
#if defined(__PPC__)
|
||||
if (num_cpus > 1) {
|
||||
orte_show_help("help-mpi-btl-pcie.txt", "initialization:more-than-one-cpu",
|
||||
true, num_cpus);
|
||||
return NULL;
|
||||
}
|
||||
#endif /* #ifdef __PPC__ */
|
||||
if (0 == num_cpus) {
|
||||
orte_show_help("help-mpi-btl-pcie.txt", "initialization:no-cpus",
|
||||
true);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Create the module storage space */
|
||||
mca_btl_pcie_component.pcie_num_btls = num_cpus;
|
||||
mca_btl_pcie_component.pcie_btls = malloc(mca_btl_pcie_component.pcie_num_btls *
|
||||
sizeof(struct mca_btl_pcie_module_t));
|
||||
btl_array = malloc(mca_btl_pcie_component.pcie_num_btls *
|
||||
sizeof(mca_btl_base_module_t*));
|
||||
|
||||
/* initialize the modules */
|
||||
for (i = 0 ; i < mca_btl_pcie_component.pcie_num_btls ; ++i) {
|
||||
mca_btl_pcie_module_t *btl = &(mca_btl_pcie_component.pcie_btls[i]);
|
||||
|
||||
btl_array[i] = (mca_btl_base_module_t*) btl;
|
||||
|
||||
memcpy(btl, &mca_btl_pcie_module, sizeof(mca_btl_pcie_module_t));
|
||||
|
||||
/* check if we have a device listed in our local config file */
|
||||
btl->lcl_dev_name =
|
||||
ompi_btl_pcie_cfg_get_local_device(orte_process_info.nodename, cpus[i]);
|
||||
BTL_VERBOSE(("Local device for %s:%d = %s", orte_process_info.nodename, cpus[i],
|
||||
btl->lcl_dev_name));
|
||||
|
||||
/* make sure said device is sane */
|
||||
if(stat(btl->lcl_dev_name, &stat_buf)) {
|
||||
BTL_ERROR(("Error %s opening device %s\n", strerror(errno),
|
||||
btl->lcl_dev_name));
|
||||
return NULL;
|
||||
}
|
||||
|
||||
OBJ_CONSTRUCT(&btl->pcie_sma_buf_eager, ompi_free_list_t);
|
||||
OBJ_CONSTRUCT(&btl->pcie_sma_buf_max, ompi_free_list_t);
|
||||
|
||||
OBJ_CONSTRUCT(&btl->pcie_frag_eager, ompi_free_list_t);
|
||||
OBJ_CONSTRUCT(&btl->pcie_frag_max, ompi_free_list_t);
|
||||
|
||||
OBJ_CONSTRUCT(&btl->pcie_frag_dma, ompi_free_list_t);
|
||||
|
||||
OBJ_CONSTRUCT(&btl->pcie_lock, opal_mutex_t);
|
||||
|
||||
/* time to setup DMA mpool */
|
||||
mpool_resources.reg_data = (void*) btl;
|
||||
mpool_resources.sizeof_reg = sizeof(mca_btl_pcie_reg_t);
|
||||
mpool_resources.register_mem = pcie_reg_mr;
|
||||
mpool_resources.deregister_mem = pcie_dereg_mr;
|
||||
btl->rdma_mpool =
|
||||
mca_mpool_base_module_create("rdma",
|
||||
&btl->super,
|
||||
&mpool_resources);
|
||||
btl->super.btl_mpool = btl->rdma_mpool;
|
||||
|
||||
btl->active = false;
|
||||
}
|
||||
|
||||
/* push our address info to everyone */
|
||||
btl_pcie_modex_send();
|
||||
|
||||
*num_btl_modules = mca_btl_pcie_component.pcie_num_btls;
|
||||
return btl_array;;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
mca_btl_pcie_component_progress()
|
||||
{
|
||||
unsigned int i;
|
||||
btl_pcie_fifo_entry_t msg_idx;
|
||||
int count = 0;
|
||||
|
||||
for (i = 0 ; i < mca_btl_pcie_component.pcie_num_btls ; ++i) {
|
||||
mca_btl_pcie_module_t *pcie_btl =
|
||||
&(mca_btl_pcie_component.pcie_btls[i]);
|
||||
mca_btl_base_endpoint_t *endpoint = pcie_btl->endpoint;
|
||||
|
||||
if (!pcie_btl->active) continue;
|
||||
|
||||
msg_idx = ompi_btl_pcie_fifo_get_msg(&endpoint->recv_fifo);
|
||||
|
||||
/* Potential optimization is to drain every time we enter progress */
|
||||
if (msg_idx) {
|
||||
int rc;
|
||||
int ack = ((msg_idx & BTL_PCIE_FIFO_TYPE_MASK) == BTL_PCIE_FIFO_TYPE_ACK) ? 1 : 0;
|
||||
msg_idx &= BTL_PCIE_FIFO_DATA_MASK;
|
||||
|
||||
if (ack) {
|
||||
/* we have a send frag ack */
|
||||
mca_btl_pcie_frag_t *frag = (mca_btl_pcie_frag_t*) msg_idx;
|
||||
mca_btl_pcie_sma_buf_t *buf = frag->sma_buf;
|
||||
|
||||
BTL_VERBOSE(("received ack for frag %lx (0x%lx)", (long)msg_idx, (long)frag));
|
||||
|
||||
/* Done with buffer, can return now */
|
||||
MCA_BTL_PCIE_SMA_BUF_RETURN(pcie_btl, buf, rc);
|
||||
|
||||
frag->base.des_cbfunc(&pcie_btl->super, endpoint,
|
||||
&(frag->base),
|
||||
OMPI_SUCCESS);
|
||||
|
||||
/* return the send credit */
|
||||
ompi_btl_pcie_fifo_complete_msg(&endpoint->send_fifo, 1);
|
||||
count++;
|
||||
} else {
|
||||
/* we have a send frag (incoming data) */
|
||||
mca_btl_pcie_frag_t *recv_frag = &pcie_btl->pcie_recv_frag;
|
||||
mca_btl_pcie_header_t *hdr = (mca_btl_pcie_header_t*) (endpoint->lcl_frag_base + msg_idx);
|
||||
recv_frag->hdr = hdr;
|
||||
OMPI_BTL_PCIE_HEADER_NTOH((*recv_frag->hdr));
|
||||
recv_frag->segment.seg_addr.pval = ((unsigned char*) recv_frag->hdr) + sizeof(mca_btl_pcie_header_t);
|
||||
recv_frag->segment.seg_len = recv_frag->hdr->length;
|
||||
BTL_VERBOSE(("received tag %d, base 0x%lx", recv_frag->hdr->tag, (long)&recv_frag->base));
|
||||
pcie_btl->pcie_reg[recv_frag->hdr->tag].cbfunc(&pcie_btl->super,
|
||||
recv_frag->hdr->tag, &recv_frag->base,
|
||||
pcie_btl->pcie_reg[recv_frag->hdr->tag].cbdata);
|
||||
|
||||
rc = ompi_btl_pcie_fifo_set_msg(&endpoint->send_fifo, hdr->send_frag.lval);
|
||||
/* BWB - FIX ME - this is only safe if the number of
|
||||
queue entries is twice the free list size */
|
||||
ompi_btl_pcie_fifo_complete_msg(&endpoint->send_fifo, 1);
|
||||
count++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
|
||||
static int
|
||||
pcie_reg_mr(void *reg_data, void *base, size_t size,
|
||||
mca_mpool_base_registration_t *reg)
|
||||
{
|
||||
mca_btl_pcie_module_t * pcie_btl = (mca_btl_pcie_module_t*) reg_data;
|
||||
mca_btl_pcie_endpoint_t * endpoint = pcie_btl->endpoint;
|
||||
mca_btl_pcie_reg_t * pcie_reg = (mca_btl_pcie_reg_t*) reg;
|
||||
|
||||
if(dd_register_memory_region(&endpoint->pcie_adapter,
|
||||
&pcie_reg->handle,
|
||||
base,
|
||||
size,
|
||||
AXON_MR_LOCAL_READ |
|
||||
AXON_MR_LOCAL_WRITE |
|
||||
AXON_MR_REMOTE_ACCESS |
|
||||
AXON_MR_REMOTE_READ |
|
||||
AXON_MR_REMOTE_WRITE )) {
|
||||
BTL_ERROR(("error deregistering memory!\n"));
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
static int
|
||||
pcie_dereg_mr(void* reg_data, mca_mpool_base_registration_t *reg)
|
||||
{
|
||||
mca_btl_pcie_module_t * pcie_btl = (mca_btl_pcie_module_t*) reg_data;
|
||||
mca_btl_pcie_endpoint_t * endpoint = pcie_btl->endpoint;
|
||||
mca_btl_pcie_reg_t * pcie_reg = (mca_btl_pcie_reg_t*) reg;
|
||||
|
||||
if(pcie_reg->handle >= 0) {
|
||||
if(dd_deregister_memory_region(&endpoint->pcie_adapter,
|
||||
&pcie_reg->handle)) {
|
||||
BTL_ERROR(("error deregistering memory!\n"));
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
} else {
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
@ -1,21 +0,0 @@
|
||||
#ifndef _BTL_PCIE_DDRIVER_H
|
||||
#define _BTL_PCIE_DDRIVER_H
|
||||
|
||||
typedef struct DD_adapter_handle
|
||||
{
|
||||
int local_sma_size;
|
||||
int remote_sma_size;
|
||||
void* local_sma_address;
|
||||
void* remote_sma_address;
|
||||
int fd;
|
||||
void *cmd_block;
|
||||
} DD_adapter_handle;
|
||||
|
||||
#include <asm/types.h>
|
||||
#include <fcntl.h>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/ioctl.h>
|
||||
|
||||
#include <linux/axon_ioctl.h>
|
||||
|
||||
#endif
|
@ -1,318 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2007 Los Alamos National Security, LLC.
|
||||
* All righs reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "ompi_config.h"
|
||||
|
||||
#include <sys/time.h>
|
||||
#include <time.h>
|
||||
|
||||
#include "opal/align.h"
|
||||
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
#include "orte/mca/oob/base/base.h"
|
||||
#include "orte/mca/rml/rml.h"
|
||||
|
||||
#include "ompi/types.h"
|
||||
#include "ompi/mca/btl/base/btl_base_error.h"
|
||||
#include "ompi/mca/mpool/mpool.h"
|
||||
#include "ompi/mca/mpool/base/base.h"
|
||||
#include "ompi/mca/mpool/pcie/mpool_pcie.h"
|
||||
|
||||
#include "btl_pcie.h"
|
||||
#include "btl_pcie_endpoint.h"
|
||||
#include "btl_pcie_proc.h"
|
||||
#include "btl_pcie_frag.h"
|
||||
#include "btl_pcie_ddriver.h"
|
||||
|
||||
/*
|
||||
* Initialize state of the endpoint instance.
|
||||
*
|
||||
*/
|
||||
|
||||
static void mca_btl_pcie_endpoint_construct(mca_btl_base_endpoint_t* endpoint)
|
||||
{
|
||||
endpoint->endpoint_btl = 0;
|
||||
endpoint->endpoint_proc = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Destroy a endpoint
|
||||
*
|
||||
*/
|
||||
|
||||
static void mca_btl_pcie_endpoint_destruct(mca_btl_base_endpoint_t* endpoint)
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
OBJ_CLASS_INSTANCE(
|
||||
mca_btl_pcie_endpoint_t,
|
||||
opal_list_item_t,
|
||||
mca_btl_pcie_endpoint_construct,
|
||||
mca_btl_pcie_endpoint_destruct);
|
||||
|
||||
|
||||
static int
|
||||
dd_open (const char* device_name, DD_adapter_handle* a_handle)
|
||||
{
|
||||
int fd;
|
||||
int lSize, rSize;
|
||||
void *localPtr,*remotePtr; /* shared memory pointers */
|
||||
|
||||
fd = open(device_name, O_RDWR);
|
||||
if(-1 == fd) return -1;
|
||||
|
||||
lSize = 1024*1024;
|
||||
rSize = 1024*1024;
|
||||
|
||||
localPtr = mmap(0, lSize, PROT_READ|PROT_WRITE,
|
||||
MAP_SHARED, fd, LOCAL_SMA_OFFSET);
|
||||
if (MAP_FAILED == localPtr) {
|
||||
close(fd);
|
||||
return -3;
|
||||
}
|
||||
|
||||
remotePtr = mmap(0, rSize, PROT_READ|PROT_WRITE,
|
||||
MAP_SHARED, fd, REMOTE_SMA_OFFSET);
|
||||
if (MAP_FAILED == remotePtr) {
|
||||
munmap(localPtr,lSize);
|
||||
close(fd);
|
||||
return -4;
|
||||
}
|
||||
|
||||
a_handle->fd = fd;
|
||||
a_handle->local_sma_size = lSize;
|
||||
a_handle->remote_sma_size = rSize;
|
||||
a_handle->local_sma_address = localPtr;
|
||||
a_handle->remote_sma_address = remotePtr;
|
||||
|
||||
a_handle->cmd_block = mmap (0, 64 * 124, PROT_READ|PROT_WRITE, MAP_PRIVATE,
|
||||
fd, DMA_COMMAND_BUFFER_OFFSET);
|
||||
if (MAP_FAILED == a_handle->cmd_block) {
|
||||
close (fd);
|
||||
return -5;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Initialize an endpoint
|
||||
*/
|
||||
int mca_btl_pcie_endpoint_init(mca_btl_base_endpoint_t* endpoint)
|
||||
{
|
||||
int rc;
|
||||
mca_btl_pcie_module_t* pcie_btl =
|
||||
endpoint->endpoint_btl;
|
||||
mca_mpool_base_resources_t mpool_resources;
|
||||
size_t fifo_buffer_len, current_offset = 0;
|
||||
|
||||
/* Open our device */
|
||||
rc = dd_open(endpoint->lcl_dev_name,
|
||||
&endpoint->pcie_adapter);
|
||||
if( 0 != rc) {
|
||||
BTL_ERROR(("Failed to open pcie device dd_open says : %d\n", rc));
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
/* fill in endpoint data for begining of resources */
|
||||
endpoint->lcl_sma_ptr = endpoint->pcie_adapter.local_sma_address;
|
||||
if(NULL == endpoint->lcl_sma_ptr) {
|
||||
BTL_ERROR(("Error: local sma address is null\n"));
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
endpoint->rem_sma_ptr = endpoint->pcie_adapter.remote_sma_address;
|
||||
if(NULL == endpoint->rem_sma_ptr) {
|
||||
BTL_ERROR(("Error: remote sma address is null\n"));
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
BTL_VERBOSE(("SMA for device %s: local=0x%lx,%d remote=0x%lx,%d",
|
||||
endpoint->lcl_dev_name,
|
||||
(long)endpoint->lcl_sma_ptr,
|
||||
endpoint->pcie_adapter.local_sma_size,
|
||||
(long)endpoint->rem_sma_ptr,
|
||||
endpoint->pcie_adapter.remote_sma_size));
|
||||
|
||||
/* 16 bytes of the buffer reserved for the 8 byte local DMA completion */
|
||||
endpoint->lcl_dma_status = ((char*) endpoint->lcl_sma_ptr) + current_offset;
|
||||
current_offset += 16;
|
||||
|
||||
/* fifo_buffer_len bytes reserved for fifos */
|
||||
fifo_buffer_len = sizeof(btl_pcie_fifo_entry_t) * mca_btl_pcie_component.pcie_recv_queue_len;
|
||||
|
||||
rc = ompi_btl_pcie_fifo_init_send(&(endpoint->send_fifo),
|
||||
mca_btl_pcie_component.pcie_recv_queue_len,
|
||||
((char*) endpoint->rem_sma_ptr) + current_offset);
|
||||
if (OMPI_SUCCESS != rc) {
|
||||
BTL_ERROR(("Error: Failed to init send fifo: %d", rc));
|
||||
return rc;
|
||||
}
|
||||
|
||||
rc = ompi_btl_pcie_fifo_init_recv(&(endpoint->recv_fifo),
|
||||
mca_btl_pcie_component.pcie_recv_queue_len,
|
||||
((char*) endpoint->lcl_sma_ptr) + current_offset,
|
||||
fifo_buffer_len);
|
||||
if (OMPI_SUCCESS != rc) {
|
||||
BTL_ERROR(("Error: Failed to init recv fifo: %d", rc));
|
||||
return rc;
|
||||
}
|
||||
|
||||
current_offset += fifo_buffer_len;
|
||||
|
||||
/* reserve rest of the space for the mpool */
|
||||
endpoint->rem_frag_base =
|
||||
((char*) endpoint->rem_sma_ptr) + current_offset;
|
||||
|
||||
endpoint->lcl_frag_base =
|
||||
((char*) endpoint->lcl_sma_ptr) + current_offset;
|
||||
|
||||
/* don't need to align this one as the free list */
|
||||
/* will take care of it. */
|
||||
mpool_resources.base = endpoint->rem_frag_base;
|
||||
mpool_resources.len = endpoint->pcie_adapter.remote_sma_size -
|
||||
current_offset;
|
||||
|
||||
/* setup my pcie mpool */
|
||||
pcie_btl->pcie_mpool =
|
||||
mca_mpool_base_module_create(mca_btl_pcie_component.pcie_send_mpool_name,
|
||||
pcie_btl,
|
||||
&mpool_resources);
|
||||
|
||||
/* setup the modules free lists and such as we now */
|
||||
/* have enough info to setup the mpool */
|
||||
|
||||
/* eager SMA communication buffers */
|
||||
#if (OMPI_MAJOR_VERSION <= 1) && (OMPI_MINOR_VERSION <= 2)
|
||||
ompi_free_list_init_ex(&(pcie_btl->pcie_sma_buf_eager),
|
||||
sizeof(mca_btl_pcie_sma_buf_eager_t) +
|
||||
mca_btl_pcie_module.super.btl_eager_limit,
|
||||
sizeof(mca_btl_pcie_sma_buf_eager_t),
|
||||
MCA_BTL_PCIE_FRAG_ALIGN,
|
||||
OBJ_CLASS(mca_btl_pcie_sma_buf_eager_t),
|
||||
mca_btl_pcie_component.pcie_free_list_num,
|
||||
mca_btl_pcie_component.pcie_free_list_max,
|
||||
mca_btl_pcie_component.pcie_free_list_inc,
|
||||
pcie_btl->pcie_mpool);
|
||||
|
||||
/* max size SMA communication buffers */
|
||||
ompi_free_list_init_ex(&(pcie_btl->pcie_sma_buf_max),
|
||||
sizeof(mca_btl_pcie_sma_buf_max_t) +
|
||||
mca_btl_pcie_module.super.btl_max_send_size,
|
||||
sizeof(mca_btl_pcie_sma_buf_max_t),
|
||||
MCA_BTL_PCIE_FRAG_ALIGN,
|
||||
OBJ_CLASS(mca_btl_pcie_sma_buf_max_t),
|
||||
mca_btl_pcie_component.pcie_free_list_num,
|
||||
mca_btl_pcie_component.pcie_free_list_max,
|
||||
mca_btl_pcie_component.pcie_free_list_inc,
|
||||
pcie_btl->pcie_mpool);
|
||||
|
||||
/* User eager fragment buffer */
|
||||
ompi_free_list_init_ex(&(pcie_btl->pcie_frag_eager),
|
||||
sizeof(mca_btl_pcie_frag_eager_t) +
|
||||
mca_btl_pcie_module.super.btl_eager_limit,
|
||||
sizeof(mca_btl_pcie_frag_eager_t),
|
||||
MCA_BTL_PCIE_FRAG_ALIGN,
|
||||
OBJ_CLASS(mca_btl_pcie_frag_eager_t),
|
||||
mca_btl_pcie_component.pcie_free_list_num,
|
||||
mca_btl_pcie_component.pcie_free_list_max,
|
||||
mca_btl_pcie_component.pcie_free_list_inc,
|
||||
NULL);
|
||||
|
||||
/* User max size fragment buffer */
|
||||
ompi_free_list_init_ex(&(pcie_btl->pcie_frag_max),
|
||||
sizeof(mca_btl_pcie_frag_max_t) +
|
||||
mca_btl_pcie_module.super.btl_max_send_size,
|
||||
sizeof(mca_btl_pcie_frag_max_t),
|
||||
MCA_BTL_PCIE_FRAG_ALIGN,
|
||||
OBJ_CLASS(mca_btl_pcie_frag_max_t),
|
||||
mca_btl_pcie_component.pcie_free_list_num,
|
||||
mca_btl_pcie_component.pcie_free_list_max,
|
||||
mca_btl_pcie_component.pcie_free_list_inc,
|
||||
NULL);
|
||||
#else
|
||||
ompi_free_list_init_ex(&(pcie_btl->pcie_sma_buf_eager),
|
||||
mca_btl_pcie_module.super.btl_eager_limit,
|
||||
MCA_BTL_PCIE_FRAG_ALIGN,
|
||||
OBJ_CLASS(mca_btl_pcie_sma_buf_eager_t),
|
||||
mca_btl_pcie_component.pcie_free_list_num,
|
||||
mca_btl_pcie_component.pcie_free_list_max,
|
||||
mca_btl_pcie_component.pcie_free_list_inc,
|
||||
pcie_btl->pcie_mpool,
|
||||
NULL,
|
||||
NULL);
|
||||
|
||||
/* max size SMA communication buffers */
|
||||
ompi_free_list_init_ex(&(pcie_btl->pcie_sma_buf_max),
|
||||
mca_btl_pcie_module.super.btl_max_send_size,
|
||||
MCA_BTL_PCIE_FRAG_ALIGN,
|
||||
OBJ_CLASS(mca_btl_pcie_sma_buf_max_t),
|
||||
mca_btl_pcie_component.pcie_free_list_num,
|
||||
mca_btl_pcie_component.pcie_free_list_max,
|
||||
mca_btl_pcie_component.pcie_free_list_inc,
|
||||
pcie_btl->pcie_mpool,
|
||||
NULL,
|
||||
NULL);
|
||||
|
||||
/* User eager fragment buffer */
|
||||
ompi_free_list_init_ex(&(pcie_btl->pcie_frag_eager),
|
||||
mca_btl_pcie_module.super.btl_eager_limit,
|
||||
MCA_BTL_PCIE_FRAG_ALIGN,
|
||||
OBJ_CLASS(mca_btl_pcie_frag_eager_t),
|
||||
mca_btl_pcie_component.pcie_free_list_num,
|
||||
mca_btl_pcie_component.pcie_free_list_max,
|
||||
mca_btl_pcie_component.pcie_free_list_inc,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL);
|
||||
|
||||
/* User max size fragment buffer */
|
||||
ompi_free_list_init_ex(&(pcie_btl->pcie_frag_max),
|
||||
mca_btl_pcie_module.super.btl_max_send_size,
|
||||
MCA_BTL_PCIE_FRAG_ALIGN,
|
||||
OBJ_CLASS(mca_btl_pcie_frag_max_t),
|
||||
mca_btl_pcie_component.pcie_free_list_num,
|
||||
mca_btl_pcie_component.pcie_free_list_max,
|
||||
mca_btl_pcie_component.pcie_free_list_inc,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL);
|
||||
#endif
|
||||
|
||||
/* dma frags. note that we can only have 16 outstanding memory
|
||||
handles so we cannot currently support leave_pinned and we must
|
||||
limit the number of outstanding DMAs via the free list of DMA
|
||||
frags */
|
||||
ompi_free_list_init(&(pcie_btl->pcie_frag_dma),
|
||||
sizeof(mca_btl_pcie_frag_dma_t),
|
||||
OBJ_CLASS(mca_btl_pcie_frag_dma_t),
|
||||
16,
|
||||
16,
|
||||
0,
|
||||
NULL);
|
||||
|
||||
/* recv frag */
|
||||
OBJ_CONSTRUCT(&(pcie_btl->pcie_recv_frag),
|
||||
mca_btl_pcie_frag_recv_t);
|
||||
|
||||
pcie_btl->endpoint = endpoint;
|
||||
pcie_btl->active = true;
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* Finalize an endpoint
|
||||
*/
|
||||
int mca_btl_pcie_endpoint_fini(mca_btl_base_endpoint_t* endpoint)
|
||||
{
|
||||
return OMPI_SUCCESS;
|
||||
}
|
@ -1,92 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2007 Los Alamos National Security, LLC.
|
||||
* All righs reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#ifndef MCA_BTL_PCIE_ENDPOINT_H
|
||||
#define MCA_BTL_PCIE_ENDPOINT_H
|
||||
|
||||
#include "ompi_config.h"
|
||||
|
||||
#include "opal/class/opal_list.h"
|
||||
#include "opal/event/event.h"
|
||||
|
||||
#include "ompi/mca/btl/btl.h"
|
||||
#include "ompi/mca/pml/pml.h"
|
||||
|
||||
#include "btl_pcie_ddriver.h"
|
||||
#include "btl_pcie_frag.h"
|
||||
#include "btl_pcie.h"
|
||||
#include "btl_pcie_fifo.h"
|
||||
|
||||
BEGIN_C_DECLS
|
||||
|
||||
/**
|
||||
* An abstraction that represents a connection to a endpoint process.
|
||||
* An instance of mca_btl_base_endpoint_t is associated w/ each process
|
||||
* and BTL pair at startup. However, connections to the endpoint
|
||||
* are established dynamically on an as-needed basis:
|
||||
*/
|
||||
|
||||
struct mca_btl_base_endpoint_t {
|
||||
opal_list_item_t super;
|
||||
|
||||
struct mca_btl_pcie_module_t* endpoint_btl;
|
||||
/**< BTL instance that created this connection */
|
||||
|
||||
struct mca_btl_pcie_proc_t* endpoint_proc;
|
||||
/**< proc structure corresponding to endpoint */
|
||||
|
||||
/** the name of the remote PCIE device */
|
||||
char* rem_dev_name;
|
||||
/** the name of the local PCIE device */
|
||||
char* lcl_dev_name;
|
||||
|
||||
/** the pcie adapter - returned by dd_open */
|
||||
DD_adapter_handle pcie_adapter;
|
||||
|
||||
/** local pcie SMA memory for this endpoint */
|
||||
char *lcl_sma_ptr;
|
||||
|
||||
/** remote pcie SMA memory for this endpoint */
|
||||
char *rem_sma_ptr;
|
||||
|
||||
/** remote fragment starting point (in which to
|
||||
* deliver data via "rdma" write
|
||||
*/
|
||||
char *rem_frag_base;
|
||||
char *lcl_frag_base;
|
||||
|
||||
char *lcl_dma_status;
|
||||
|
||||
btl_pcie_fifo_t recv_fifo;
|
||||
|
||||
btl_pcie_fifo_t send_fifo;
|
||||
|
||||
|
||||
};
|
||||
|
||||
typedef struct mca_btl_base_endpoint_t mca_btl_base_endpoint_t;
|
||||
|
||||
typedef mca_btl_base_endpoint_t mca_btl_pcie_endpoint_t;
|
||||
OBJ_CLASS_DECLARATION(mca_btl_pcie_endpoint_t);
|
||||
|
||||
|
||||
/*
|
||||
* Initialize an endpoint
|
||||
*/
|
||||
int mca_btl_pcie_endpoint_init(mca_btl_base_endpoint_t* endpoint);
|
||||
|
||||
/*
|
||||
* Finalize an endpoint
|
||||
*/
|
||||
int mca_btl_pcie_endpoint_fini(mca_btl_base_endpoint_t* endpoint);
|
||||
|
||||
END_C_DECLS
|
||||
|
||||
#endif /* #ifndef MCA_BTL_PCIE_ENDPOINT_H */
|
@ -1,97 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2007 Los Alamos National Security, LLC.
|
||||
* All righs reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "ompi_config.h"
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#include "opal/threads/mutex.h"
|
||||
#include "opal/types.h"
|
||||
#include "ompi/constants.h"
|
||||
|
||||
#include "btl_pcie_fifo.h"
|
||||
|
||||
static uint32_t
|
||||
get_mask(unsigned int len)
|
||||
{
|
||||
int pop_count, highest_used_bit, tmp_input_integer;
|
||||
unsigned int pow;
|
||||
|
||||
/* init counters */
|
||||
pop_count=0;
|
||||
highest_used_bit=1;
|
||||
|
||||
/* get population count and highest non-zero bit */
|
||||
tmp_input_integer = len;
|
||||
while (tmp_input_integer > 0) {
|
||||
pop_count += (tmp_input_integer & 1);
|
||||
highest_used_bit++;
|
||||
tmp_input_integer >> 1;
|
||||
}
|
||||
if (1 < pop_count) {
|
||||
/* round up */
|
||||
highest_used_bit++;
|
||||
}
|
||||
|
||||
/* generate power value */
|
||||
pow = 1 << highest_used_bit;
|
||||
|
||||
if (pow != len) return 0;
|
||||
return pow - 1;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
ompi_btl_pcie_fifo_init_send(btl_pcie_fifo_t *fifo,
|
||||
unsigned int fifo_len,
|
||||
void *queue_space)
|
||||
{
|
||||
fifo->fifo_len = fifo_len;
|
||||
fifo->current_index = 0;
|
||||
fifo->num_outstanding = 0;
|
||||
fifo->mask = get_mask(fifo_len);
|
||||
fifo->queue = queue_space;
|
||||
|
||||
if (fifo->mask == 0) return OMPI_ERROR;
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
ompi_btl_pcie_fifo_init_recv(btl_pcie_fifo_t *fifo,
|
||||
unsigned int fifo_len,
|
||||
void *queue_space,
|
||||
size_t queue_space_len)
|
||||
{
|
||||
fifo->fifo_len = fifo_len;
|
||||
fifo->current_index = 1;
|
||||
fifo->num_outstanding = 0;
|
||||
fifo->mask = get_mask(fifo_len);
|
||||
fifo->queue = queue_space;
|
||||
|
||||
if (fifo->mask == 0) return OMPI_ERROR;
|
||||
|
||||
if (fifo_len * sizeof(btl_pcie_fifo_entry_t) > queue_space_len) {
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
/* initialize the queue to empty */
|
||||
memset(fifo->queue, 0, fifo_len * sizeof(btl_pcie_fifo_entry_t));
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
ompi_btl_pcie_fifo_finalize(btl_pcie_fifo_t *fifo)
|
||||
{
|
||||
return OMPI_SUCCESS;
|
||||
}
|
@ -1,171 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2007 Los Alamos National Security, LLC.
|
||||
* All righs reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#ifndef BTL_PCIE_FIFO_H
|
||||
#define BTL_PCIE_FIFO_H
|
||||
|
||||
#include "ompi_config.h"
|
||||
#include "ompi/constants.h"
|
||||
|
||||
#include "opal/threads/mutex.h"
|
||||
#include "opal/types.h"
|
||||
#include "ompi/mca/btl/base/btl_base_error.h"
|
||||
|
||||
BEGIN_C_DECLS
|
||||
|
||||
typedef uint64_t btl_pcie_fifo_entry_t;
|
||||
#define BTL_PCIE_FIFO_TYPE_MASK 0x8000000000000000
|
||||
#define BTL_PCIE_FIFO_DATA_MASK 0x7FFFFFFFFFFFFFFF
|
||||
#define BTL_PCIE_FIFO_TYPE_ACK 0x0000000000000000
|
||||
#define BTL_PCIE_FIFO_TYPE_SEND 0x8000000000000000
|
||||
|
||||
struct btl_pcie_fifo_t {
|
||||
/* number of entries in queue */
|
||||
uint32_t fifo_len;
|
||||
/* for sender: next place to write
|
||||
* for receiver: next place to read */
|
||||
uint32_t current_index;
|
||||
/* for sender: number of entries "in flight". Must always be less
|
||||
than or equal to fifo_len */
|
||||
uint32_t num_outstanding;
|
||||
uint32_t mask;
|
||||
/* the actual buffer */
|
||||
btl_pcie_fifo_entry_t* queue;
|
||||
};
|
||||
typedef struct btl_pcie_fifo_t btl_pcie_fifo_t;
|
||||
|
||||
|
||||
/**
|
||||
* Initialize fifo structure
|
||||
*
|
||||
* Initialize send/recv fifo structure. The fifo structure does
|
||||
* double duty of maintaining both the sender and receiver. This
|
||||
* function initializes the send view of the fifo structure, for
|
||||
* use to receive messages. fifo_get_msg() should not be called on
|
||||
* this fifo.
|
||||
*
|
||||
* @note fifo_len must match the value given to the matching
|
||||
* fifo_init_recv(), although there are no checks to verify this.
|
||||
*
|
||||
* @param[in] fifo A pointer to a fifo structure to be
|
||||
* initialized
|
||||
* @param[in] fifo_len Requested length of the fifo queue
|
||||
* @param[in] queue_space Space for the receive queue (remote pointer)
|
||||
*
|
||||
* @retval OMPI_SUCCESS Everything worked
|
||||
* @retval OMPI_ERROR Good luck!
|
||||
*/
|
||||
int ompi_btl_pcie_fifo_init_send(btl_pcie_fifo_t *fifo,
|
||||
unsigned int fifo_len,
|
||||
void *queue_space);
|
||||
|
||||
|
||||
/**
|
||||
* Initialize fifo structure
|
||||
*
|
||||
* Initialize send/recv fifo structure. The fifo structure does
|
||||
* double duty of maintaining both the sender and receiver. This
|
||||
* function initializes the receive view of the fifo structure, for
|
||||
* use to receive messages. fifo_set_msg() should not be called on
|
||||
* this fifo.
|
||||
*
|
||||
* @note fifo_len must match the value given to the matching
|
||||
* fifo_init_send(), although there are no checks to verify this.
|
||||
*
|
||||
* @param[in] fifo A pointer to a fifo structure to be
|
||||
* initialized
|
||||
* @param[in] fifo_len Requested length of the fifo queue
|
||||
* @param[in] queue_space Space for the receive queue (local pointer)
|
||||
* @param[in] queue_space_len Length of queue_space
|
||||
*
|
||||
* @retval OMPI_SUCCESS Everything worked
|
||||
* @retval OMPI_ERROR Good luck!
|
||||
*/
|
||||
int ompi_btl_pcie_fifo_init_recv(btl_pcie_fifo_t *fifo,
|
||||
unsigned int fifo_len,
|
||||
void *queue_space,
|
||||
size_t queue_space_len);
|
||||
|
||||
int ompi_btl_pcie_fifo_finalize(btl_pcie_fifo_t *fifo);
|
||||
|
||||
|
||||
/**
|
||||
* Read a message from the queue
|
||||
*
|
||||
* Read a message from the queue
|
||||
*
|
||||
* @param[in] fifo The receive view of the fifo
|
||||
*
|
||||
* @return A non-zero message or 0 if no new messages are
|
||||
* available.
|
||||
*/
|
||||
static inline btl_pcie_fifo_entry_t
|
||||
ompi_btl_pcie_fifo_get_msg(btl_pcie_fifo_t *fifo)
|
||||
{
|
||||
/* BWB - TODO - if we ever want to be multi-threaded, we'll
|
||||
need to fix this */
|
||||
btl_pcie_fifo_entry_t ret = 0;
|
||||
if (0 != (ret = fifo->queue[fifo->current_index])) {
|
||||
fifo->queue[fifo->current_index] = 0;
|
||||
fifo->current_index++;
|
||||
fifo->current_index &= fifo->mask;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Write a message pointer into the queue
|
||||
*
|
||||
* Write a message pointer into the send queue view of the fifo.
|
||||
*
|
||||
* @param[in] fifo The send view of the fifo
|
||||
* @param[in] msg The index to the payload to deliver
|
||||
*
|
||||
* @retval OMPI_SUCCESS Fifo successfully updated
|
||||
* @retval OMPI_ERR_RESOURCE_BUSY There was no space in the fifo
|
||||
*/
|
||||
static inline int
|
||||
ompi_btl_pcie_fifo_set_msg(btl_pcie_fifo_t *fifo, btl_pcie_fifo_entry_t msg)
|
||||
{
|
||||
uint32_t outstanding;
|
||||
|
||||
/* see if we have a slot */
|
||||
outstanding = OPAL_THREAD_ADD32(&fifo->num_outstanding, 1);
|
||||
if (outstanding > fifo->fifo_len) {
|
||||
OPAL_THREAD_ADD32(&fifo->num_outstanding, -1);
|
||||
return OMPI_ERR_RESOURCE_BUSY;
|
||||
}
|
||||
|
||||
/* now that we have a slot, figure out where it is. Allow the
|
||||
outstanding to wrap around forever - just mask out the bits we
|
||||
don't care about. */
|
||||
outstanding = OPAL_THREAD_ADD32(&fifo->current_index, 1);
|
||||
outstanding &= fifo->mask;
|
||||
|
||||
fifo->queue[outstanding] = msg;
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
static inline int
|
||||
ompi_btl_pcie_fifo_complete_msg(btl_pcie_fifo_t *fifo,
|
||||
unsigned int num_msgs)
|
||||
{
|
||||
OPAL_THREAD_ADD32(&fifo->num_outstanding, -num_msgs);
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
END_C_DECLS
|
||||
|
||||
#endif /* BTL_PCIE_FIFO_H */
|
@ -1,139 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2007 Los Alamos National Security, LLC.
|
||||
* All righs reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "ompi_config.h"
|
||||
|
||||
#include "btl_pcie_frag.h"
|
||||
#include "btl_pcie.h"
|
||||
|
||||
|
||||
static void
|
||||
mca_btl_pcie_sma_buf_eager_constructor(mca_btl_pcie_sma_buf_t* buf)
|
||||
{
|
||||
buf->pcie_data.pval = buf + 1;
|
||||
buf->type = MCA_BTL_PCIE_TYPE_EAGER;
|
||||
}
|
||||
|
||||
static void
|
||||
mca_btl_pcie_sma_buf_max_constructor(mca_btl_pcie_sma_buf_t* buf)
|
||||
{
|
||||
buf->pcie_data.pval = buf + 1;
|
||||
buf->type = MCA_BTL_PCIE_TYPE_MAX;
|
||||
}
|
||||
|
||||
OBJ_CLASS_INSTANCE(mca_btl_pcie_sma_buf_eager_t,
|
||||
ompi_free_list_item_t,
|
||||
mca_btl_pcie_sma_buf_eager_constructor,
|
||||
NULL);
|
||||
|
||||
OBJ_CLASS_INSTANCE(mca_btl_pcie_sma_buf_max_t,
|
||||
ompi_free_list_item_t,
|
||||
mca_btl_pcie_sma_buf_max_constructor,
|
||||
NULL);
|
||||
|
||||
|
||||
static void
|
||||
mca_btl_pcie_frag_dma_constructor(mca_btl_pcie_frag_t* frag)
|
||||
{
|
||||
frag->base.des_src = NULL;
|
||||
frag->base.des_src_cnt = 0;
|
||||
frag->base.des_dst = NULL;
|
||||
frag->base.des_dst_cnt = 0;
|
||||
|
||||
frag->segment.seg_addr.pval = NULL;
|
||||
frag->segment.seg_len = 0;
|
||||
|
||||
frag->endpoint = NULL;
|
||||
frag->hdr = NULL;
|
||||
frag->size = 0;
|
||||
frag->registration = NULL;
|
||||
frag->type = MCA_BTL_PCIE_TYPE_RDMA;
|
||||
frag->sma_buf = NULL;
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
mca_btl_pcie_frag_common_constructor(mca_btl_pcie_frag_t* frag)
|
||||
{
|
||||
frag->base.des_src = &frag->segment;
|
||||
frag->base.des_src_cnt = 1;
|
||||
frag->base.des_dst = NULL;
|
||||
frag->base.des_dst_cnt = 0;
|
||||
|
||||
frag->hdr = (mca_btl_pcie_header_t*) (frag + 1);
|
||||
frag->hdr->send_frag.pval = frag;
|
||||
|
||||
frag->segment.seg_addr.pval = ((unsigned char*) frag->hdr) + sizeof(mca_btl_pcie_header_t);
|
||||
frag->segment.seg_len = frag->size;
|
||||
|
||||
frag->endpoint = NULL;
|
||||
frag->registration = NULL;
|
||||
frag->sma_buf = NULL;
|
||||
}
|
||||
|
||||
static void
|
||||
mca_btl_pcie_frag_eager_constructor(mca_btl_pcie_frag_t* frag)
|
||||
{
|
||||
frag->size = mca_btl_pcie_module.super.btl_eager_limit;
|
||||
mca_btl_pcie_frag_common_constructor(frag);
|
||||
frag->type = MCA_BTL_PCIE_TYPE_EAGER;
|
||||
}
|
||||
|
||||
static void mca_btl_pcie_frag_max_constructor(mca_btl_pcie_frag_t* frag)
|
||||
{
|
||||
frag->size = mca_btl_pcie_module.super.btl_max_send_size;
|
||||
mca_btl_pcie_frag_common_constructor(frag);
|
||||
frag->type = MCA_BTL_PCIE_TYPE_MAX;
|
||||
}
|
||||
|
||||
|
||||
static void mca_btl_pcie_frag_recv_constructor(mca_btl_pcie_frag_t *frag)
|
||||
{
|
||||
frag->base.des_src = NULL;
|
||||
frag->base.des_src_cnt = 0;
|
||||
frag->base.des_dst = &frag->segment;
|
||||
frag->base.des_dst_cnt = 1;
|
||||
|
||||
frag->segment.seg_addr.pval = NULL;
|
||||
frag->segment.seg_len = 0;
|
||||
|
||||
frag->endpoint = NULL;
|
||||
frag->hdr = NULL;
|
||||
frag->size = 0;
|
||||
frag->registration = NULL;
|
||||
frag->type = MCA_BTL_PCIE_TYPE_RECV;
|
||||
frag->sma_buf = NULL;
|
||||
}
|
||||
|
||||
|
||||
OBJ_CLASS_INSTANCE(
|
||||
mca_btl_pcie_frag_eager_t,
|
||||
mca_btl_base_descriptor_t,
|
||||
mca_btl_pcie_frag_eager_constructor,
|
||||
NULL);
|
||||
|
||||
OBJ_CLASS_INSTANCE(
|
||||
mca_btl_pcie_frag_max_t,
|
||||
mca_btl_base_descriptor_t,
|
||||
mca_btl_pcie_frag_max_constructor,
|
||||
NULL);
|
||||
|
||||
|
||||
OBJ_CLASS_INSTANCE(
|
||||
mca_btl_pcie_frag_recv_t,
|
||||
mca_btl_base_descriptor_t,
|
||||
mca_btl_pcie_frag_recv_constructor,
|
||||
NULL);
|
||||
|
||||
OBJ_CLASS_INSTANCE(
|
||||
mca_btl_pcie_frag_dma_t,
|
||||
mca_btl_base_descriptor_t,
|
||||
mca_btl_pcie_frag_dma_constructor,
|
||||
NULL);
|
@ -1,179 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2007 Los Alamos National Security, LLC.
|
||||
* All righs reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#ifndef MCA_BTL_PCIE_FRAG_H
|
||||
#define MCA_BTL_PCIE_FRAG_H
|
||||
|
||||
#include "ompi_config.h"
|
||||
|
||||
#include "ompi/mca/btl/btl.h"
|
||||
|
||||
BEGIN_C_DECLS
|
||||
|
||||
#define MCA_BTL_PCIE_FRAG_ALIGN (16)
|
||||
|
||||
/* Header that sits at top of any send message */
|
||||
struct mca_btl_pcie_header_t {
|
||||
mca_btl_base_tag_t tag;
|
||||
uint8_t pad[3];
|
||||
uint32_t length;
|
||||
ompi_ptr_t send_frag;
|
||||
};
|
||||
typedef struct mca_btl_pcie_header_t mca_btl_pcie_header_t;
|
||||
|
||||
#define OMPI_BTL_PCIE_HEADER_HTON(header) \
|
||||
do { \
|
||||
(header).length = htonl((header).length); \
|
||||
} while (0)
|
||||
|
||||
|
||||
#define OMPI_BTL_PCIE_HEADER_NTOH(header) \
|
||||
do { \
|
||||
(header).length = ntohl((header).length); \
|
||||
} while (0)
|
||||
|
||||
struct mca_btl_pcie_frag_t;
|
||||
|
||||
/** Type description for fragments / buffers */
|
||||
enum mca_btl_pcie_frag_type_t {
|
||||
MCA_BTL_PCIE_TYPE_UNKNOWN,
|
||||
MCA_BTL_PCIE_TYPE_EAGER,
|
||||
MCA_BTL_PCIE_TYPE_MAX,
|
||||
MCA_BTL_PCIE_TYPE_RDMA,
|
||||
MCA_BTL_PCIE_TYPE_RECV
|
||||
};
|
||||
typedef enum mca_btl_pcie_frag_type_t mca_btl_pcie_frag_type_t;
|
||||
|
||||
/** SMA transfer fragment */
|
||||
struct mca_btl_pcie_sma_buf_t {
|
||||
ompi_free_list_item_t super;
|
||||
/** Pointer to the SMA space available for this copy. An
|
||||
ompi_ptr_t because in v1.2, this sits in the sma region,
|
||||
and we need to not have different sizes on each endpoint. */
|
||||
ompi_ptr_t pcie_data;
|
||||
/** type of buffer */
|
||||
mca_btl_pcie_frag_type_t type;
|
||||
};
|
||||
typedef struct mca_btl_pcie_sma_buf_t mca_btl_pcie_sma_buf_t;
|
||||
|
||||
typedef mca_btl_pcie_sma_buf_t mca_btl_pcie_sma_buf_eager_t;
|
||||
OBJ_CLASS_DECLARATION(mca_btl_pcie_sma_buf_eager_t);
|
||||
|
||||
typedef mca_btl_pcie_sma_buf_t mca_btl_pcie_sma_buf_max_t;
|
||||
OBJ_CLASS_DECLARATION(mca_btl_pcie_sma_buf_max_t);
|
||||
|
||||
#define MCA_BTL_PCIE_SMA_BUF_ALLOC_EAGER(btl, buf, rc) \
|
||||
{ \
|
||||
ompi_free_list_item_t *item; \
|
||||
OMPI_FREE_LIST_GET(&((mca_btl_pcie_module_t*)btl)->pcie_sma_buf_eager, item, rc); \
|
||||
buf = (mca_btl_pcie_sma_buf_t*) item; \
|
||||
}
|
||||
|
||||
#define MCA_BTL_PCIE_SMA_BUF_ALLOC_MAX(btl, buf, rc) \
|
||||
{ \
|
||||
ompi_free_list_item_t *item; \
|
||||
OMPI_FREE_LIST_GET(&((mca_btl_pcie_module_t*)btl)->pcie_sma_buf_max, item, rc); \
|
||||
buf = (mca_btl_pcie_sma_buf_t*) item; \
|
||||
}
|
||||
|
||||
#define MCA_BTL_PCIE_SMA_BUF_RETURN(btl, buf, ret) \
|
||||
{ \
|
||||
ret = OMPI_SUCCESS; \
|
||||
switch ((buf)->type) { \
|
||||
case MCA_BTL_PCIE_TYPE_EAGER: \
|
||||
OMPI_FREE_LIST_RETURN(&((mca_btl_pcie_module_t*)btl)->pcie_sma_buf_eager, \
|
||||
(ompi_free_list_item_t*)(buf)); \
|
||||
break; \
|
||||
case MCA_BTL_PCIE_TYPE_MAX: \
|
||||
OMPI_FREE_LIST_RETURN(&((mca_btl_pcie_module_t*)btl)->pcie_sma_buf_max, \
|
||||
(ompi_free_list_item_t*)(buf)); \
|
||||
break; \
|
||||
default: \
|
||||
BTL_ERROR(("Invalid return type (%d) for frag 0x%lx in SMA_BUF_RETURN", \
|
||||
buf->type, (long)buf)); \
|
||||
ret = OMPI_ERR_BAD_PARAM; \
|
||||
} \
|
||||
}
|
||||
|
||||
|
||||
/** Fragment description -- used for send/rdma fragments */
|
||||
struct mca_btl_pcie_frag_t {
|
||||
mca_btl_base_descriptor_t base;
|
||||
mca_btl_base_segment_t segment;
|
||||
struct mca_btl_base_endpoint_t *endpoint;
|
||||
mca_btl_pcie_header_t *hdr;
|
||||
size_t size;
|
||||
struct mca_btl_pcie_reg_t *registration;
|
||||
mca_btl_pcie_frag_type_t type;
|
||||
mca_btl_pcie_sma_buf_t *sma_buf;
|
||||
};
|
||||
typedef struct mca_btl_pcie_frag_t mca_btl_pcie_frag_t;
|
||||
|
||||
typedef struct mca_btl_pcie_frag_t mca_btl_pcie_frag_eager_t;
|
||||
OBJ_CLASS_DECLARATION(mca_btl_pcie_frag_eager_t);
|
||||
|
||||
typedef struct mca_btl_pcie_frag_t mca_btl_pcie_frag_max_t;
|
||||
OBJ_CLASS_DECLARATION(mca_btl_pcie_frag_max_t);
|
||||
|
||||
typedef struct mca_btl_pcie_frag_t mca_btl_pcie_frag_recv_t;
|
||||
OBJ_CLASS_DECLARATION(mca_btl_pcie_frag_recv_t);
|
||||
|
||||
typedef struct mca_btl_pcie_frag_t mca_btl_pcie_frag_dma_t;
|
||||
OBJ_CLASS_DECLARATION(mca_btl_pcie_frag_dma_t);
|
||||
|
||||
|
||||
#define MCA_BTL_PCIE_FRAG_ALLOC_EAGER(btl, frag, rc) \
|
||||
{ \
|
||||
ompi_free_list_item_t *item; \
|
||||
OMPI_FREE_LIST_GET(&((mca_btl_pcie_module_t*)btl)->pcie_frag_eager, item, rc); \
|
||||
frag = (mca_btl_pcie_frag_t*) item; \
|
||||
}
|
||||
|
||||
#define MCA_BTL_PCIE_FRAG_ALLOC_MAX(btl, frag, rc) \
|
||||
{ \
|
||||
ompi_free_list_item_t *item; \
|
||||
OMPI_FREE_LIST_GET(&((mca_btl_pcie_module_t*)btl)->pcie_frag_max, item, rc); \
|
||||
frag = (mca_btl_pcie_frag_t*) item; \
|
||||
}
|
||||
|
||||
#define MCA_BTL_PCIE_FRAG_ALLOC_DMA(btl, frag, rc) \
|
||||
{ \
|
||||
\
|
||||
ompi_free_list_item_t *item; \
|
||||
OMPI_FREE_LIST_GET(&((mca_btl_pcie_module_t*)btl)->pcie_frag_dma, item, rc); \
|
||||
frag = (mca_btl_pcie_frag_t*) item; \
|
||||
}
|
||||
|
||||
#define MCA_BTL_PCIE_FRAG_RETURN(btl, frag, ret) \
|
||||
{ \
|
||||
ret = OMPI_SUCCESS; \
|
||||
switch ((frag)->type) { \
|
||||
case MCA_BTL_PCIE_TYPE_EAGER: \
|
||||
OMPI_FREE_LIST_RETURN(&((mca_btl_pcie_module_t*)btl)->pcie_frag_eager, \
|
||||
(ompi_free_list_item_t*)(frag)); \
|
||||
break; \
|
||||
case MCA_BTL_PCIE_TYPE_MAX: \
|
||||
OMPI_FREE_LIST_RETURN(&((mca_btl_pcie_module_t*)btl)->pcie_frag_max, \
|
||||
(ompi_free_list_item_t*)(frag)); \
|
||||
break; \
|
||||
case MCA_BTL_PCIE_TYPE_RDMA: \
|
||||
OMPI_FREE_LIST_RETURN(&((mca_btl_pcie_module_t*)btl)->pcie_frag_dma, \
|
||||
(ompi_free_list_item_t*)(frag)); \
|
||||
break; \
|
||||
default: \
|
||||
BTL_ERROR(("Invalid return type (%d) for frag 0x%lx in FRAG_RETURN", \
|
||||
frag->type, (long)frag)); \
|
||||
ret = OMPI_ERR_BAD_PARAM; \
|
||||
} \
|
||||
}
|
||||
|
||||
END_C_DECLS
|
||||
|
||||
#endif /* #ifndef MCA_BTL_PCIE_FRAG_H */
|
@ -1,58 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2007 Los Alamos National Security, LLC.
|
||||
* All righs reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#ifndef BTL_PCIE_CFG_LEX_H_
|
||||
#define BTL_PCIE_CFG_LEX_H_
|
||||
|
||||
#include "opal_config.h"
|
||||
|
||||
#ifdef malloc
|
||||
#undef malloc
|
||||
#endif
|
||||
#ifdef realloc
|
||||
#undef realloc
|
||||
#endif
|
||||
#ifdef free
|
||||
#undef free
|
||||
#endif
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
int btl_pcie_cfg_yylex(void);
|
||||
int btl_pcie_cfg_init_buffer(FILE *file);
|
||||
|
||||
extern FILE *btl_pcie_cfg_yyin;
|
||||
extern bool btl_pcie_cfg_parse_done;
|
||||
extern char *btl_pcie_cfg_yytext;
|
||||
extern int btl_pcie_cfg_yynewlines;
|
||||
|
||||
/*
|
||||
* Make lex-generated files not issue compiler warnings
|
||||
*/
|
||||
#define YY_STACK_USED 0
|
||||
#define YY_ALWAYS_INTERACTIVE 0
|
||||
#define YY_NEVER_INTERACTIVE 0
|
||||
#define YY_MAIN 0
|
||||
#define YY_NO_UNPUT 1
|
||||
#define YY_SKIP_YYWRAP 1
|
||||
|
||||
enum {
|
||||
BTL_PCIE_CFG_PARSE_DONE = 1,
|
||||
BTL_PCIE_CFG_PARSE_ERROR,
|
||||
|
||||
BTL_PCIE_CFG_PARSE_NEWLINE,
|
||||
BTL_PCIE_CFG_PARSE_HOSTNAME_CORE,
|
||||
BTL_PCIE_CFG_PARSE_HOSTNAME_DEVICE,
|
||||
BTL_PCIE_CFG_PARSE_DEVICE,
|
||||
|
||||
BTL_PCIE_CFG_PARSE_MAX
|
||||
};
|
||||
|
||||
#endif /* #ifndef BTL_PCIE_CFG_LEX_H_ */
|
@ -1,129 +0,0 @@
|
||||
%{ /* -*- C -*- */
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2006 Cisco Systems, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "opal_config.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#ifdef HAVE_UNISTD_H
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
#include "btl_pcie_lex.h"
|
||||
|
||||
/*
|
||||
* local functions
|
||||
*/
|
||||
BEGIN_C_DECLS
|
||||
|
||||
static int finish_parsing(void) ;
|
||||
static int btl_pcie_cfg_yywrap(void);
|
||||
|
||||
END_C_DECLS
|
||||
|
||||
/*
|
||||
* global variables
|
||||
*/
|
||||
int btl_pcie_cfg_yynewlines = 1;
|
||||
bool btl_pcie_cfg_parse_done = false;
|
||||
char *btl_pcie_cfg_string = NULL;
|
||||
|
||||
#define yyterminate() \
|
||||
return finish_parsing()
|
||||
|
||||
%}
|
||||
|
||||
WHITE [\f\t\v ]
|
||||
CHAR [A-Za-z0-9_\-\.]
|
||||
NAME_CHAR [A-Za-z0-9_\-\.\\\/]
|
||||
|
||||
%x comment
|
||||
%x section_name
|
||||
%x section_end
|
||||
%x value
|
||||
|
||||
%%
|
||||
|
||||
{WHITE}*\n { ++btl_pcie_cfg_yynewlines;
|
||||
return BTL_PCIE_CFG_PARSE_NEWLINE; }
|
||||
#.*\n { ++btl_pcie_cfg_yynewlines;
|
||||
return BTL_PCIE_CFG_PARSE_NEWLINE; }
|
||||
"//".*\n { ++btl_pcie_cfg_yynewlines;
|
||||
return BTL_PCIE_CFG_PARSE_NEWLINE; }
|
||||
|
||||
"/*" { BEGIN(comment);
|
||||
return BTL_PCIE_CFG_PARSE_NEWLINE; }
|
||||
<comment>[^*\n]* ; /* Eat up non '*'s */
|
||||
<comment>"*"+[^*/\n]* ; /* Eat '*'s not followed by a '/' */
|
||||
<comment>\n { ++btl_pcie_cfg_yynewlines;
|
||||
return BTL_PCIE_CFG_PARSE_NEWLINE; }
|
||||
<comment>"*"+"/" { BEGIN(INITIAL); /* Done with block comment */
|
||||
return BTL_PCIE_CFG_PARSE_NEWLINE; }
|
||||
|
||||
{CHAR}+":"[0-9] { return BTL_PCIE_CFG_PARSE_HOSTNAME_CORE; }
|
||||
{CHAR}+":"{NAME_CHAR}+ { return BTL_PCIE_CFG_PARSE_HOSTNAME_DEVICE; }
|
||||
|
||||
{NAME_CHAR}+ { return BTL_PCIE_CFG_PARSE_DEVICE; }
|
||||
|
||||
{WHITE}+ ; /* whitespace */
|
||||
|
||||
%%
|
||||
|
||||
|
||||
/*
|
||||
* This cleans up at the end of the parse (since, in this case, we
|
||||
* always parse the entire file) and prevents a memory leak.
|
||||
*/
|
||||
static int finish_parsing(void)
|
||||
{
|
||||
if (NULL != YY_CURRENT_BUFFER) {
|
||||
yy_delete_buffer(YY_CURRENT_BUFFER);
|
||||
#if defined(YY_CURRENT_BUFFER_LVALUE)
|
||||
YY_CURRENT_BUFFER_LVALUE = NULL;
|
||||
#else
|
||||
YY_CURRENT_BUFFER = NULL;
|
||||
#endif /* YY_CURRENT_BUFFER_LVALUE */
|
||||
}
|
||||
return YY_NULL;
|
||||
}
|
||||
|
||||
|
||||
static int btl_pcie_cfg_yywrap(void)
|
||||
{
|
||||
btl_pcie_cfg_parse_done = true;
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Ensure that we have a valid yybuffer to use. Specifically, if this
|
||||
* scanner is invoked a second time, finish_parsing() (above) will
|
||||
* have been executed, and the current buffer will have been freed.
|
||||
* Flex doesn't recognize this fact because as far as it's concerned,
|
||||
* its internal state was already initialized, so it thinks it should
|
||||
* have a valid buffer. Hence, here we ensure to give it a valid
|
||||
* buffer.
|
||||
*/
|
||||
int btl_pcie_cfg_init_buffer(FILE *file)
|
||||
{
|
||||
YY_BUFFER_STATE buf = yy_create_buffer(file, YY_BUF_SIZE);
|
||||
yy_switch_to_buffer(buf);
|
||||
|
||||
return 0;
|
||||
}
|
@ -1,194 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2007 Los Alamos National Security, LLC.
|
||||
* All righs reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "ompi_config.h"
|
||||
|
||||
#include "opal/class/opal_hash_table.h"
|
||||
#include "ompi/mca/btl/base/btl_base_error.h"
|
||||
#include "ompi/mca/mpool/base/base.h"
|
||||
#include "ompi/runtime/ompi_module_exchange.h"
|
||||
|
||||
#include "btl_pcie.h"
|
||||
#include "btl_pcie_proc.h"
|
||||
|
||||
static void mca_btl_pcie_proc_construct(mca_btl_pcie_proc_t* proc);
|
||||
static void mca_btl_pcie_proc_destruct(mca_btl_pcie_proc_t* proc);
|
||||
|
||||
OBJ_CLASS_INSTANCE(mca_btl_pcie_proc_t,
|
||||
opal_list_item_t, mca_btl_pcie_proc_construct,
|
||||
mca_btl_pcie_proc_destruct);
|
||||
|
||||
void mca_btl_pcie_proc_construct(mca_btl_pcie_proc_t* proc)
|
||||
{
|
||||
proc->proc_ompi = 0;
|
||||
proc->proc_addr_count = 0;
|
||||
proc->proc_endpoint_count = 0;
|
||||
OBJ_CONSTRUCT(&proc->proc_lock, opal_mutex_t);
|
||||
/* add to list of all proc instance */
|
||||
OPAL_THREAD_LOCK(&mca_btl_pcie_component.pcie_lock);
|
||||
opal_list_append(&mca_btl_pcie_component.pcie_procs, &proc->super);
|
||||
OPAL_THREAD_UNLOCK(&mca_btl_pcie_component.pcie_lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* Cleanup ib proc instance
|
||||
*/
|
||||
|
||||
void mca_btl_pcie_proc_destruct(mca_btl_pcie_proc_t* proc)
|
||||
{
|
||||
/* remove from list of all proc instances */
|
||||
OPAL_THREAD_LOCK(&mca_btl_pcie_component.pcie_lock);
|
||||
opal_list_remove_item(&mca_btl_pcie_component.pcie_procs, &proc->super);
|
||||
OPAL_THREAD_UNLOCK(&mca_btl_pcie_component.pcie_lock);
|
||||
|
||||
OBJ_DESTRUCT(&proc->proc_lock);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Look for an existing TEMPLATE process instances based on the associated
|
||||
* ompi_proc_t instance.
|
||||
*/
|
||||
static mca_btl_pcie_proc_t* mca_btl_pcie_proc_lookup_ompi(ompi_proc_t* ompi_proc)
|
||||
{
|
||||
mca_btl_pcie_proc_t* pcie_proc;
|
||||
|
||||
OPAL_THREAD_LOCK(&mca_btl_pcie_component.pcie_lock);
|
||||
|
||||
for(pcie_proc = (mca_btl_pcie_proc_t*)
|
||||
opal_list_get_first(&mca_btl_pcie_component.pcie_procs);
|
||||
pcie_proc != (mca_btl_pcie_proc_t*)
|
||||
opal_list_get_end(&mca_btl_pcie_component.pcie_procs);
|
||||
pcie_proc = (mca_btl_pcie_proc_t*)opal_list_get_next(pcie_proc)) {
|
||||
|
||||
if(pcie_proc->proc_ompi == ompi_proc) {
|
||||
OPAL_THREAD_UNLOCK(&mca_btl_pcie_component.pcie_lock);
|
||||
return pcie_proc;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
OPAL_THREAD_UNLOCK(&mca_btl_pcie_component.pcie_lock);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Create a TEMPLATE process structure. There is a one-to-one correspondence
|
||||
* between a ompi_proc_t and a mca_btl_pcie_proc_t instance. We cache
|
||||
* additional data (specifically the list of mca_btl_pcie_endpoint_t instances,
|
||||
* and published addresses) associated w/ a given destination on this
|
||||
* datastructure.
|
||||
*/
|
||||
|
||||
int mca_btl_pcie_proc_create(ompi_proc_t* ompi_proc,
|
||||
mca_btl_pcie_module_t* pcie_btl,
|
||||
mca_btl_pcie_proc_t** ret_proc)
|
||||
{
|
||||
mca_btl_pcie_proc_t* pcie_proc = NULL;
|
||||
char *rem_dev_name = NULL, *lcl_dev_name = NULL;
|
||||
char *rem_hostname = NULL;
|
||||
int rc, num_peers, i;
|
||||
size_t size;
|
||||
mca_btl_pcie_modex_info_t *modex_info;
|
||||
|
||||
/* Check if already have proc structure for this ompi process */
|
||||
pcie_proc = mca_btl_pcie_proc_lookup_ompi(ompi_proc);
|
||||
|
||||
if(pcie_proc != NULL) {
|
||||
/* Gotcha! */
|
||||
*ret_proc = pcie_proc;
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
/* query for the peer's device name info */
|
||||
rc = ompi_modex_recv(&mca_btl_pcie_component.super.btl_version,
|
||||
ompi_proc,
|
||||
(void*)&modex_info,
|
||||
&size);
|
||||
if (OMPI_SUCCESS != rc) {
|
||||
opal_output(mca_btl_base_output, "[%s:%d] ompi_modex_recv failed for peer %s",
|
||||
__FILE__, __LINE__, ORTE_NAME_PRINT(&ompi_proc->proc_name));
|
||||
OBJ_RELEASE(pcie_proc);
|
||||
*ret_proc = NULL;
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
if (0 == size || 0 != size % sizeof(mca_btl_pcie_modex_info_t)) {
|
||||
*ret_proc = NULL;
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
num_peers = size / sizeof(mca_btl_pcie_modex_info_t);
|
||||
|
||||
for (i = 0 ; i < num_peers ; ++i) {
|
||||
MCA_BTL_PCIE_MODEX_INFO_NTOH(modex_info[i]);
|
||||
rem_hostname = modex_info[i].hostname;
|
||||
rem_dev_name = modex_info[i].devicename;
|
||||
lcl_dev_name = ompi_btl_pcie_cfg_get_matching_device(rem_hostname,
|
||||
rem_dev_name);
|
||||
if (NULL != lcl_dev_name &&
|
||||
0 == strcmp(lcl_dev_name, pcie_btl->lcl_dev_name)) {
|
||||
/* we have a match. continue onward */
|
||||
break;
|
||||
}
|
||||
}
|
||||
/* make sure the local device names match */
|
||||
if(NULL == lcl_dev_name ||
|
||||
0 != strcmp(lcl_dev_name, pcie_btl->lcl_dev_name)){
|
||||
*ret_proc = NULL;
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
BTL_VERBOSE(("Have matching devices: %s:%s <-> %s:%s",
|
||||
orte_process_info.nodename,
|
||||
pcie_btl->lcl_dev_name,
|
||||
rem_hostname,
|
||||
rem_dev_name));
|
||||
|
||||
pcie_proc = OBJ_NEW(mca_btl_pcie_proc_t);
|
||||
if(NULL == pcie_proc){
|
||||
*ret_proc = NULL;
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
pcie_proc->proc_ompi = ompi_proc;
|
||||
|
||||
/* build a unique identifier (of arbitrary
|
||||
* size) to represent the proc */
|
||||
pcie_proc->proc_guid = ompi_proc->proc_name;
|
||||
|
||||
/* Initialize number of peer */
|
||||
pcie_proc->proc_endpoint_count = 1;
|
||||
|
||||
pcie_proc->endpoint_proc = OBJ_NEW(mca_btl_pcie_endpoint_t);
|
||||
if(NULL == pcie_proc->endpoint_proc) {
|
||||
free(rem_dev_name);
|
||||
*ret_proc = NULL;
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
pcie_proc->endpoint_proc->lcl_dev_name = lcl_dev_name;
|
||||
pcie_proc->endpoint_proc->rem_dev_name = rem_dev_name;
|
||||
pcie_proc->endpoint_proc->endpoint_proc = pcie_proc;
|
||||
pcie_proc->endpoint_proc->endpoint_btl = pcie_btl;
|
||||
|
||||
if(OMPI_SUCCESS != mca_btl_pcie_endpoint_init(pcie_proc->endpoint_proc)) {
|
||||
BTL_ERROR(("Error initializing the PCIE endpoint \n"));
|
||||
*ret_proc = NULL;
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
*ret_proc = pcie_proc;
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
|
@ -1,62 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2007 Los Alamos National Security, LLC.
|
||||
* All righs reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#ifndef MCA_BTL_PCIE_PROC_H
|
||||
#define MCA_BTL_PCIE_PROC_H
|
||||
|
||||
#include "ompi_config.h"
|
||||
|
||||
#include "opal/class/opal_list.h"
|
||||
#include "ompi/proc/proc.h"
|
||||
|
||||
#include "btl_pcie.h"
|
||||
#include "btl_pcie_endpoint.h"
|
||||
|
||||
BEGIN_C_DECLS
|
||||
|
||||
/**
|
||||
* Represents the state of a remote process and the set of addresses
|
||||
* that it exports. Also cache an instance of mca_btl_base_endpoint_t for
|
||||
* each
|
||||
* BTL instance that attempts to open a connection to the process.
|
||||
*/
|
||||
struct mca_btl_pcie_proc_t {
|
||||
opal_list_item_t super;
|
||||
/**< allow proc to be placed on a list */
|
||||
|
||||
ompi_proc_t *proc_ompi;
|
||||
/**< pointer to corresponding ompi_proc_t */
|
||||
|
||||
orte_process_name_t proc_guid;
|
||||
/**< globally unique identifier for the process */
|
||||
|
||||
size_t proc_addr_count;
|
||||
/**< number of addresses published by endpoint */
|
||||
|
||||
struct mca_btl_base_endpoint_t *endpoint_proc;
|
||||
/**< endpoint that has been created to access this proc */
|
||||
|
||||
size_t proc_endpoint_count;
|
||||
/**< number of endpoints */
|
||||
|
||||
opal_mutex_t proc_lock;
|
||||
/**< lock to protect against concurrent access to proc state */
|
||||
|
||||
};
|
||||
typedef struct mca_btl_pcie_proc_t mca_btl_pcie_proc_t;
|
||||
OBJ_CLASS_DECLARATION(mca_btl_pcie_proc_t);
|
||||
|
||||
int mca_btl_pcie_proc_create(ompi_proc_t* ompi_proc,
|
||||
mca_btl_pcie_module_t* pcie_btl,
|
||||
mca_btl_pcie_proc_t** ret_proc);
|
||||
|
||||
END_C_DECLS
|
||||
|
||||
#endif /* #ifndef MCA_BTL_PCIE_PROC_H */
|
@ -1,31 +0,0 @@
|
||||
# -*- shell-script -*-
|
||||
#
|
||||
# Copyright (c) 2007 Los Alamos National Security, LLC. All rights
|
||||
# reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
|
||||
# MCA_btl_pcie_CONFIG(action-if-can-compile,
|
||||
# [action-if-cant-compile])
|
||||
# ------------------------------------------------
|
||||
AC_DEFUN([MCA_btl_pcie_CONFIG],[
|
||||
OMPI_CHECK_PCIE([btl_pcie],
|
||||
[btl_pcie_happy="yes"],
|
||||
[btl_pcie_happy="no"])
|
||||
|
||||
AS_IF([test "$btl_pcie_happy" = "yes"],
|
||||
[btl_pcie_WRAPPER_EXTRA_LDFLAGS="$btl_pcie_LDFLAGS"
|
||||
btl_pcie_WRAPPER_EXTRA_LIBS="$btl_pcie_LIBS"
|
||||
$1],
|
||||
[$2])
|
||||
|
||||
# substitute in the things needed to build pcie
|
||||
AC_SUBST([btl_pcie_CPPFLAGS])
|
||||
AC_SUBST([btl_pcie_LDFLAGS])
|
||||
AC_SUBST([btl_pcie_LIBS])
|
||||
])dnl
|
@ -1,24 +0,0 @@
|
||||
# -*- shell-script -*-
|
||||
#
|
||||
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
# University Research and Technology
|
||||
# Corporation. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
# of Tennessee Research Foundation. All rights
|
||||
# reserved.
|
||||
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
# University of Stuttgart. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
# All rights reserved.
|
||||
# Copyright (c) 2007 Los Alamos National Security, LLC. All rights
|
||||
# reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
# Specific to this module
|
||||
|
||||
PARAM_CONFIG_FILES="Makefile"
|
@ -1,20 +0,0 @@
|
||||
# -*- text -*-
|
||||
# Copyright (c) 2007 Los Alamos National Security, LLC.
|
||||
# All righs reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
# This is the US/English help file for Open MPI's IBM PCIe support
|
||||
|
||||
[initialization:more-than-one-cpu]
|
||||
The PCIe BTL found that the CPU affinity mask for the current process
|
||||
includes more than one CPU (%d). When using Open MPI on the Cell
|
||||
machines with the PCIe driver, the affinity mask must include exactly
|
||||
one CPU.
|
||||
[initialization:no-cpus]
|
||||
The PCIe BTL was unable to find any CPUs in the affinity mask for the
|
||||
current process. This usually indicates a system issue that must be
|
||||
resolved by the system administrator.
|
@ -1,159 +0,0 @@
|
||||
# hostname:core device
|
||||
n01-001-0:0 /dev/axon0
|
||||
n01-001-0:1 /dev/axon1
|
||||
n01-001-0:2 /dev/axon2
|
||||
n01-001-0:3 /dev/axon3
|
||||
|
||||
n01-001-1:0 /dev/axon0
|
||||
n01-001-1:1 /dev/axon1
|
||||
|
||||
n01-001-2:0 /dev/axon0
|
||||
n01-001-2:1 /dev/axon1
|
||||
|
||||
n01-002-0:0 /dev/axon0
|
||||
n01-002-0:1 /dev/axon1
|
||||
n01-002-0:2 /dev/axon2
|
||||
n01-002-0:3 /dev/axon3
|
||||
|
||||
n01-002-1:0 /dev/axon0
|
||||
n01-002-1:1 /dev/axon1
|
||||
|
||||
n01-002-2:0 /dev/axon0
|
||||
n01-002-2:1 /dev/axon1
|
||||
|
||||
n01-003-0:0 /dev/axon0
|
||||
n01-003-0:1 /dev/axon1
|
||||
n01-003-0:2 /dev/axon2
|
||||
n01-003-0:3 /dev/axon3
|
||||
|
||||
n01-003-1:0 /dev/axon0
|
||||
n01-003-1:1 /dev/axon1
|
||||
|
||||
n01-004-0:0 /dev/axon0
|
||||
n01-004-0:1 /dev/axon1
|
||||
n01-004-0:2 /dev/axon2
|
||||
n01-004-0:3 /dev/axon3
|
||||
|
||||
n01-004-1:0 /dev/axon0
|
||||
n01-004-1:1 /dev/axon1
|
||||
|
||||
n01-005-0:0 /dev/axon0
|
||||
n01-005-0:1 /dev/axon1
|
||||
n01-005-0:2 /dev/axon2
|
||||
n01-005-0:3 /dev/axon3
|
||||
|
||||
n01-005-1:0 /dev/axon0
|
||||
n01-005-1:1 /dev/axon1
|
||||
|
||||
n01-005-2:0 /dev/axon0
|
||||
n01-005-2:1 /dev/axon1
|
||||
|
||||
n01-006-0:0 /dev/axon0
|
||||
n01-006-0:1 /dev/axon1
|
||||
n01-006-0:2 /dev/axon2
|
||||
n01-006-0:3 /dev/axon3
|
||||
|
||||
n01-006-1:0 /dev/axon0
|
||||
n01-006-1:1 /dev/axon1
|
||||
|
||||
n01-006-2:0 /dev/axon0
|
||||
n01-006-2:1 /dev/axon1
|
||||
|
||||
n01-007-0:0 /dev/axon0
|
||||
n01-007-0:1 /dev/axon1
|
||||
n01-007-0:2 /dev/axon2
|
||||
n01-007-0:3 /dev/axon3
|
||||
|
||||
n01-007-1:0 /dev/axon0
|
||||
n01-007-1:1 /dev/axon1
|
||||
|
||||
n01-007-2:0 /dev/axon0
|
||||
n01-007-2:1 /dev/axon1
|
||||
|
||||
n01-008-0:0 /dev/axon0
|
||||
n01-008-0:1 /dev/axon1
|
||||
n01-008-0:2 /dev/axon2
|
||||
n01-008-0:3 /dev/axon3
|
||||
|
||||
n01-008-1:0 /dev/axon0
|
||||
n01-008-1:1 /dev/axon1
|
||||
|
||||
n01-008-2:0 /dev/axon0
|
||||
n01-008-2:1 /dev/axon1
|
||||
|
||||
n01-009-0:0 /dev/axon0
|
||||
n01-009-0:1 /dev/axon1
|
||||
n01-009-0:2 /dev/axon2
|
||||
n01-009-0:3 /dev/axon3
|
||||
|
||||
n01-009-1:0 /dev/axon0
|
||||
n01-009-1:1 /dev/axon1
|
||||
|
||||
n01-009-2:0 /dev/axon0
|
||||
n01-009-2:1 /dev/axon1
|
||||
|
||||
n01-010-0:0 /dev/axon0
|
||||
n01-010-0:1 /dev/axon1
|
||||
n01-010-0:2 /dev/axon2
|
||||
n01-010-0:3 /dev/axon3
|
||||
|
||||
n01-010-1:0 /dev/axon0
|
||||
n01-010-1:1 /dev/axon1
|
||||
|
||||
n01-010-2:0 /dev/axon0
|
||||
n01-010-2:1 /dev/axon1
|
||||
|
||||
n01-011-0:0 /dev/axon0
|
||||
n01-011-0:1 /dev/axon1
|
||||
n01-011-0:2 /dev/axon2
|
||||
n01-011-0:3 /dev/axon3
|
||||
|
||||
n01-011-1:0 /dev/axon0
|
||||
n01-011-1:1 /dev/axon1
|
||||
|
||||
n01-011-2:0 /dev/axon0
|
||||
n01-011-2:1 /dev/axon1
|
||||
|
||||
n01-012-0:0 /dev/axon0
|
||||
n01-012-0:1 /dev/axon1
|
||||
n01-012-0:2 /dev/axon2
|
||||
n01-012-0:3 /dev/axon3
|
||||
|
||||
n01-012-1:0 /dev/axon0
|
||||
n01-012-1:1 /dev/axon1
|
||||
|
||||
n01-012-2:0 /dev/axon0
|
||||
n01-012-2:1 /dev/axon1
|
||||
|
||||
n01-013-0:0 /dev/axon0
|
||||
n01-013-0:1 /dev/axon1
|
||||
n01-013-0:2 /dev/axon2
|
||||
n01-013-0:3 /dev/axon3
|
||||
|
||||
n01-013-1:0 /dev/axon0
|
||||
n01-013-1:1 /dev/axon1
|
||||
|
||||
n01-013-2:0 /dev/axon0
|
||||
n01-013-2:1 /dev/axon1
|
||||
|
||||
n01-014-0:0 /dev/axon0
|
||||
n01-014-0:1 /dev/axon1
|
||||
n01-014-0:2 /dev/axon2
|
||||
n01-014-0:3 /dev/axon3
|
||||
|
||||
n01-014-1:0 /dev/axon0
|
||||
n01-014-1:1 /dev/axon1
|
||||
|
||||
n01-014-2:0 /dev/axon0
|
||||
n01-014-2:1 /dev/axon1
|
||||
|
||||
n01-015-0:0 /dev/axon0
|
||||
n01-015-0:1 /dev/axon1
|
||||
n01-015-0:2 /dev/axon2
|
||||
n01-015-0:3 /dev/axon3
|
||||
|
||||
n01-015-1:0 /dev/axon0
|
||||
n01-015-1:1 /dev/axon1
|
||||
|
||||
n01-015-2:0 /dev/axon0
|
||||
n01-015-2:1 /dev/axon1
|
@ -1,82 +0,0 @@
|
||||
# opteron_host:device cell_host:device
|
||||
|
||||
n01-001-0:/dev/axon0 n01-001-1:/dev/axon0
|
||||
n01-001-0:/dev/axon1 n01-001-1:/dev/axon1
|
||||
n01-001-0:/dev/axon2 n01-001-2:/dev/axon0
|
||||
n01-001-0:/dev/axon3 n01-001-2:/dev/axon1
|
||||
|
||||
n01-002-0:/dev/axon0 n01-002-1:/dev/axon0
|
||||
n01-002-0:/dev/axon1 n01-002-1:/dev/axon1
|
||||
n01-002-0:/dev/axon2 n01-002-2:/dev/axon0
|
||||
n01-002-0:/dev/axon3 n01-002-2:/dev/axon1
|
||||
|
||||
n01-003-0:/dev/axon0 n01-003-1:/dev/axon0
|
||||
n01-003-0:/dev/axon1 n01-003-1:/dev/axon1
|
||||
n01-003-0:/dev/axon2 n01-003-2:/dev/axon0
|
||||
n01-003-0:/dev/axon3 n01-003-2:/dev/axon1
|
||||
|
||||
n01-004-0:/dev/axon0 n01-004-1:/dev/axon0
|
||||
n01-004-0:/dev/axon1 n01-004-1:/dev/axon1
|
||||
n01-004-0:/dev/axon2 n01-004-2:/dev/axon0
|
||||
n01-004-0:/dev/axon3 n01-004-2:/dev/axon1
|
||||
|
||||
n01-005-0:/dev/axon0 n01-005-1:/dev/axon0
|
||||
n01-005-0:/dev/axon1 n01-005-1:/dev/axon1
|
||||
n01-005-0:/dev/axon2 n01-005-2:/dev/axon0
|
||||
n01-005-0:/dev/axon3 n01-005-2:/dev/axon1
|
||||
|
||||
n01-006-0:/dev/axon0 n01-006-1:/dev/axon0
|
||||
n01-006-0:/dev/axon1 n01-006-1:/dev/axon1
|
||||
n01-006-0:/dev/axon2 n01-006-2:/dev/axon0
|
||||
n01-006-0:/dev/axon3 n01-006-2:/dev/axon1
|
||||
|
||||
n01-007-0:/dev/axon0 n01-007-1:/dev/axon0
|
||||
n01-007-0:/dev/axon1 n01-007-1:/dev/axon1
|
||||
n01-007-0:/dev/axon2 n01-007-2:/dev/axon0
|
||||
n01-007-0:/dev/axon3 n01-007-2:/dev/axon1
|
||||
|
||||
n01-008-0:/dev/axon0 n01-008-1:/dev/axon0
|
||||
n01-008-0:/dev/axon1 n01-008-1:/dev/axon1
|
||||
n01-008-0:/dev/axon2 n01-008-2:/dev/axon0
|
||||
n01-008-0:/dev/axon3 n01-008-2:/dev/axon1
|
||||
|
||||
n01-009-0:/dev/axon0 n01-009-1:/dev/axon0
|
||||
n01-009-0:/dev/axon1 n01-009-1:/dev/axon1
|
||||
n01-009-0:/dev/axon2 n01-009-2:/dev/axon0
|
||||
n01-009-0:/dev/axon3 n01-009-2:/dev/axon1
|
||||
|
||||
n01-010-0:/dev/axon0 n01-010-1:/dev/axon0
|
||||
n01-010-0:/dev/axon1 n01-010-1:/dev/axon1
|
||||
n01-010-0:/dev/axon2 n01-010-2:/dev/axon0
|
||||
n01-010-0:/dev/axon3 n01-010-2:/dev/axon1
|
||||
|
||||
n01-011-0:/dev/axon0 n01-011-1:/dev/axon0
|
||||
n01-011-0:/dev/axon1 n01-011-1:/dev/axon1
|
||||
n01-011-0:/dev/axon2 n01-011-2:/dev/axon0
|
||||
n01-011-0:/dev/axon3 n01-011-2:/dev/axon1
|
||||
|
||||
n01-012-0:/dev/axon0 n01-012-1:/dev/axon0
|
||||
n01-012-0:/dev/axon1 n01-012-1:/dev/axon1
|
||||
n01-012-0:/dev/axon2 n01-012-2:/dev/axon0
|
||||
n01-012-0:/dev/axon3 n01-012-2:/dev/axon1
|
||||
|
||||
n01-013-0:/dev/axon0 n01-013-1:/dev/axon0
|
||||
n01-013-0:/dev/axon1 n01-013-1:/dev/axon1
|
||||
n01-013-0:/dev/axon2 n01-013-2:/dev/axon0
|
||||
n01-013-0:/dev/axon3 n01-013-2:/dev/axon1
|
||||
|
||||
n01-014-0:/dev/axon0 n01-014-1:/dev/axon0
|
||||
n01-014-0:/dev/axon1 n01-014-1:/dev/axon1
|
||||
n01-014-0:/dev/axon2 n01-014-2:/dev/axon0
|
||||
n01-014-0:/dev/axon3 n01-014-2:/dev/axon1
|
||||
|
||||
n01-015-0:/dev/axon0 n01-015-1:/dev/axon0
|
||||
n01-015-0:/dev/axon1 n01-015-1:/dev/axon1
|
||||
n01-015-0:/dev/axon2 n01-015-2:/dev/axon0
|
||||
n01-015-0:/dev/axon3 n01-015-2:/dev/axon1
|
||||
|
||||
n01-016-0:/dev/axon0 n01-016-1:/dev/axon0
|
||||
n01-016-0:/dev/axon1 n01-016-1:/dev/axon1
|
||||
n01-016-0:/dev/axon2 n01-016-2:/dev/axon0
|
||||
n01-016-0:/dev/axon3 n01-016-2:/dev/axon1
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user