From 20b81ff6348cad3e008cc7d5e691dd16736734ec Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Thu, 5 Mar 2009 02:40:25 +0000 Subject: [PATCH] Add the PCIE BTL. This won't actually work yet - still need to work through issues with system header files, generalize specification of resources, etc. - but it won't build unless specifically directed to do so. Meantime, any more changes that impact these areas of the code base can be reflected here rather than having to be dealt with later. This commit was SVN r20734. --- config/ompi_check_pcie.m4 | 56 + configure.ac | 4 +- ompi/mca/btl/pcie/Makefile.am | 75 + ompi/mca/btl/pcie/btl_pcie.c | 572 ++++++ ompi/mca/btl/pcie/btl_pcie.h | 368 ++++ ompi/mca/btl/pcie/btl_pcie_cfg.c | 196 ++ ompi/mca/btl/pcie/btl_pcie_component.c | 487 +++++ ompi/mca/btl/pcie/btl_pcie_endpoint.c | 274 +++ ompi/mca/btl/pcie/btl_pcie_endpoint.h | 92 + ompi/mca/btl/pcie/btl_pcie_fifo.c | 97 + ompi/mca/btl/pcie/btl_pcie_fifo.h | 171 ++ ompi/mca/btl/pcie/btl_pcie_frag.c | 139 ++ ompi/mca/btl/pcie/btl_pcie_frag.h | 179 ++ ompi/mca/btl/pcie/btl_pcie_lex.c | 1698 +++++++++++++++++ ompi/mca/btl/pcie/btl_pcie_lex.h | 58 + ompi/mca/btl/pcie/btl_pcie_lex.l | 125 ++ ompi/mca/btl/pcie/btl_pcie_proc.c | 194 ++ ompi/mca/btl/pcie/btl_pcie_proc.h | 62 + ompi/mca/btl/pcie/configure.m4 | 31 + ompi/mca/btl/pcie/configure.params | 24 + ompi/mca/btl/pcie/help-mpi-btl-pcie.txt | 20 + .../btl/pcie/mca-btl-pcie-local-resources.cfg | 159 ++ .../pcie/mca-btl-pcie-remote-resources.cfg | 82 + ompi/mca/mpool/pcie/Makefile.am | 57 + ompi/mca/mpool/pcie/configure.m4 | 31 + ompi/mca/mpool/pcie/configure.params | 26 + ompi/mca/mpool/pcie/mpool_pcie.h | 87 + ompi/mca/mpool/pcie/mpool_pcie_component.c | 112 ++ ompi/mca/mpool/pcie/mpool_pcie_module.c | 70 + 29 files changed, 5544 insertions(+), 2 deletions(-) create mode 100644 config/ompi_check_pcie.m4 create mode 100644 ompi/mca/btl/pcie/Makefile.am create mode 100644 ompi/mca/btl/pcie/btl_pcie.c create mode 100644 ompi/mca/btl/pcie/btl_pcie.h create mode 100644 ompi/mca/btl/pcie/btl_pcie_cfg.c create mode 100644 ompi/mca/btl/pcie/btl_pcie_component.c create mode 100644 ompi/mca/btl/pcie/btl_pcie_endpoint.c create mode 100644 ompi/mca/btl/pcie/btl_pcie_endpoint.h create mode 100644 ompi/mca/btl/pcie/btl_pcie_fifo.c create mode 100644 ompi/mca/btl/pcie/btl_pcie_fifo.h create mode 100644 ompi/mca/btl/pcie/btl_pcie_frag.c create mode 100644 ompi/mca/btl/pcie/btl_pcie_frag.h create mode 100644 ompi/mca/btl/pcie/btl_pcie_lex.c create mode 100644 ompi/mca/btl/pcie/btl_pcie_lex.h create mode 100644 ompi/mca/btl/pcie/btl_pcie_lex.l create mode 100644 ompi/mca/btl/pcie/btl_pcie_proc.c create mode 100644 ompi/mca/btl/pcie/btl_pcie_proc.h create mode 100644 ompi/mca/btl/pcie/configure.m4 create mode 100644 ompi/mca/btl/pcie/configure.params create mode 100644 ompi/mca/btl/pcie/help-mpi-btl-pcie.txt create mode 100644 ompi/mca/btl/pcie/mca-btl-pcie-local-resources.cfg create mode 100644 ompi/mca/btl/pcie/mca-btl-pcie-remote-resources.cfg create mode 100644 ompi/mca/mpool/pcie/Makefile.am create mode 100644 ompi/mca/mpool/pcie/configure.m4 create mode 100644 ompi/mca/mpool/pcie/configure.params create mode 100644 ompi/mca/mpool/pcie/mpool_pcie.h create mode 100644 ompi/mca/mpool/pcie/mpool_pcie_component.c create mode 100644 ompi/mca/mpool/pcie/mpool_pcie_module.c diff --git a/config/ompi_check_pcie.m4 b/config/ompi_check_pcie.m4 new file mode 100644 index 0000000000..213924f68f --- /dev/null +++ b/config/ompi_check_pcie.m4 @@ -0,0 +1,56 @@ +# -*- shell-script -*- +# +# Copyright (c) 2007 Los Alamos National Security, LLC. All rights +# reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# OMPI_CHECK_PCIE(prefix, [action-if-found], [action-if-not-found]) +# -------------------------------------------------------- +AC_DEFUN([OMPI_CHECK_PCIE],[ + AC_ARG_WITH([pcie], + [AC_HELP_STRING([--with-pcie(=DIR)], + [Build PCIE (QLogic InfiniPath PCIE) support, searching for libraries in DIR])]) + AC_ARG_WITH([pcie-libdir], + [AC_HELP_STRING([--with-pcie-libdir=DIR], + [Search for PCIE (QLogic InfiniPath PCIE) libraries in DIR])]) + + ompi_check_pcie_$1_save_CPPFLAGS="$CPPFLAGS" + ompi_check_pcie_$1_save_LDFLAGS="$LDFLAGS" + ompi_check_pcie_$1_save_LIBS="$LIBS" + + ompi_check_pcie_happy="yes" + + AS_IF([test "$with_pcie" != "no"], + [AS_IF([test ! -z "$with_pcie" -a "$with_pcie" != "yes"], + [ompi_check_pcie_dir="$with_pcie"]) + AS_IF([test ! -z "$with_pcie_libdir" -a "$with_pcie_libdir" != "yes"], + [ompi_check_pcie_libdir="$with_pcie_libdir"]) + OMPI_CHECK_PACKAGE([$1], + [axon_ioctl.h], + [], + [$ompi_check_pcie_dir], + [$ompi_check_pcie_libdir], + [ompi_check_pcie_happy="yes"], + [ompi_check_pcie_happy="no"])], + [ompi_check_pcie_happy="no"]) + + + CPPFLAGS="$ompi_check_pcie_$1_save_CPPFLAGS" + LDFLAGS="$ompi_check_pcie_$1_save_LDFLAGS" + LIBS="$ompi_check_pcie_$1_save_LIBS" + + AS_IF([test "$ompi_check_pcie_happy" = "yes" -a "$enable_progress_threads" = "yes"], + [AC_MSG_WARN([PCIE driver does not currently support progress threads. Disabling BTL.]) + ompi_check_pcie_happy="no"]) + + AS_IF([test "$ompi_check_pcie_happy" = "yes"], + [$2], + [AS_IF([test ! -z "$with_pcie" -a "$with_pcie" != "no"], + [AC_MSG_ERROR([PCIe support requested but not found. Aborting])]) + $3]) +]) diff --git a/configure.ac b/configure.ac index c9cdb27dea..cdc0e228c9 100644 --- a/configure.ac +++ b/configure.ac @@ -638,9 +638,9 @@ ompi_show_title "Header file tests" AC_CHECK_HEADERS([alloca.h aio.h arpa/inet.h dirent.h \ dlfcn.h execinfo.h err.h fcntl.h grp.h inttypes.h libgen.h \ - libutil.h netdb.h netinet/in.h netinet/tcp.h \ + libutil.h memory.h netdb.h netinet/in.h netinet/tcp.h \ poll.h pthread.h pty.h pwd.h sched.h stdint.h \ - string.h strings.h stropts.h sys/fcntl.h sys/ipc.h \ + stdlib.h string.h strings.h stropts.h sys/fcntl.h sys/ipc.h \ sys/ioctl.h sys/mman.h sys/param.h sys/queue.h \ sys/resource.h sys/select.h sys/socket.h sys/sockio.h \ stdarg.h sys/stat.h sys/statvfs.h sys/time.h sys/tree.h \ diff --git a/ompi/mca/btl/pcie/Makefile.am b/ompi/mca/btl/pcie/Makefile.am new file mode 100644 index 0000000000..5092f031c3 --- /dev/null +++ b/ompi/mca/btl/pcie/Makefile.am @@ -0,0 +1,75 @@ +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2007 Los Alamos National Security, LLC. All rights +# reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +AM_CPPFLAGS = $(btl_pcie_CPPFLAGS) + +dist_pkgdata_DATA = \ + help-mpi-btl-pcie.txt + +sources = \ + btl_pcie.c \ + btl_pcie.h \ + btl_pcie_component.c \ + btl_pcie_endpoint.c \ + btl_pcie_endpoint.h \ + btl_pcie_fifo.c \ + btl_pcie_fifo.h \ + btl_pcie_frag.c \ + btl_pcie_frag.h \ + btl_pcie_proc.c \ + btl_pcie_proc.h \ + btl_pcie_lex.c \ + btl_pcie_lex.h \ + btl_pcie_cfg.c \ + btl_pcie_ddriver.h \ + btl_pcie_ddriver.c + +EXTRA_DIST = btl_pcie_lex.l + + +if OMPI_BUILD_btl_pcie_DSO +lib = +lib_sources = +component = mca_btl_pcie.la +component_sources = $(sources) +else +lib = libmca_btl_pcie.la +lib_sources = $(sources) +component = +component_sources = +endif + +mcacomponentdir = $(pkglibdir) +mcacomponent_LTLIBRARIES = $(component) +mca_btl_pcie_la_SOURCES = $(component_sources) +mca_btl_pcie_la_LDFLAGS = -module -avoid-version $(btl_pcie_LDFLAGS) +mca_btl_pcie_la_LIBADD = $(btl_pcie_LIBS) + +noinst_LTLIBRARIES = $(lib) +libmca_btl_pcie_la_SOURCES = $(lib_sources) +libmca_btl_pcie_la_LDFLAGS= -module -avoid-version $(btl_pcie_LDFLAGS) +libmca_btl_pcie_la_LIBADD = $(btl_pcie_LIBS) + + +ompi_sysconfdir = $(OMPI_SYSCONFDIR) +ompi_sysconf_DATA = \ + mca-btl-pcie-local-resources.cfg \ + mca-btl-pcie-remote-resources.cfg + diff --git a/ompi/mca/btl/pcie/btl_pcie.c b/ompi/mca/btl/pcie/btl_pcie.c new file mode 100644 index 0000000000..58ff804347 --- /dev/null +++ b/ompi/mca/btl/pcie/btl_pcie.c @@ -0,0 +1,572 @@ +/* + * Copyright (c) 2007 Los Alamos National Security, LLC. + * All righs reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include +#include + +#include "opal/types.h" +#include "opal/util/output.h" +#include "opal/util/if.h" +#include "opal/sys/atomic.h" +#include "opal/mca/paffinity/paffinity.h" + +#include "ompi/datatype/convertor.h" +#include "ompi/mca/btl/btl.h" +#include "ompi/mca/btl/base/btl_base_error.h" +#include "ompi/mca/mpool/mpool.h" +#include "ompi/mca/mpool/base/base.h" +#include "ompi/mca/pml/pml.h" + +#include "btl_pcie.h" +#include "btl_pcie_frag.h" +#include "btl_pcie_proc.h" +#include "btl_pcie_endpoint.h" + +mca_btl_pcie_module_t mca_btl_pcie_module = { + { + &mca_btl_pcie_component.super, + 0, /* max size of first fragment */ + 0, /* Threshold below which BTL should not fragment */ + 0, /* max send fragment size */ + 0, /* pipeline protocol length */ + 0, /* max rdma fragment size */ + 0, /* min packet size for pipeline protocol */ + 0, /* exclusivity */ + 0, /* latency */ + 0, /* bandwidth */ + 0, /* flags */ + mca_btl_pcie_add_procs, + mca_btl_pcie_del_procs, + mca_btl_pcie_register, + mca_btl_pcie_finalize, + mca_btl_pcie_alloc, + mca_btl_pcie_free, + mca_btl_pcie_prepare_src, + mca_btl_pcie_prepare_dst, + mca_btl_pcie_send, + NULL, + mca_btl_pcie_put, /* put */ + NULL, /* get */ + NULL, /*dump */ + NULL, /* mpool */ + NULL, /* register error cb */ + NULL /* ft event */ + } +}; + + +/** + * + */ + +int mca_btl_pcie_add_procs( + struct mca_btl_base_module_t* btl, + size_t nprocs, + struct ompi_proc_t **ompi_procs, + struct mca_btl_base_endpoint_t** peers, + opal_bitmap_t* reachable) +{ + mca_btl_pcie_module_t* pcie_btl = (mca_btl_pcie_module_t*)btl; + int i; + + for(i = 0; i < (int) nprocs; i++) { + struct ompi_proc_t* ompi_proc = ompi_procs[i]; + mca_btl_pcie_proc_t* pcie_proc; + int rc; + + /* Don't connect to anyone on our local node, including + ourselves. The PCIe doesn't work that way, and the mapper + sometimes gets confused by that fact. */ + if (OPAL_PROC_ON_LOCAL_NODE(ompi_proc->proc_flags)) continue; + + rc = mca_btl_pcie_proc_create(ompi_proc, pcie_btl, &pcie_proc); + if(OMPI_SUCCESS != rc) { + return rc; + } else if (pcie_proc) { + opal_bitmap_set_bit(reachable, i); + peers[i] = pcie_proc->endpoint_proc; + } + } + + return OMPI_SUCCESS; +} + +int mca_btl_pcie_del_procs(struct mca_btl_base_module_t* btl, + size_t nprocs, + struct ompi_proc_t **procs, + struct mca_btl_base_endpoint_t ** peers) +{ + /* TODO */ + return OMPI_SUCCESS; +} + + +/** + * Register callback function to support send/recv semantics + */ + +int mca_btl_pcie_register( + struct mca_btl_base_module_t* btl, + mca_btl_base_tag_t tag, + mca_btl_base_module_recv_cb_fn_t cbfunc, + void* cbdata) +{ + mca_btl_pcie_module_t* pcie_btl = (mca_btl_pcie_module_t*) btl; + pcie_btl->pcie_reg[tag].cbfunc = cbfunc; + pcie_btl->pcie_reg[tag].cbdata = cbdata; + return OMPI_SUCCESS; +} + + +/** + * Allocate a segment. + * + * @param btl (IN) BTL module + * @param size (IN) Request segment size. + */ + +mca_btl_base_descriptor_t* mca_btl_pcie_alloc( + struct mca_btl_base_module_t* btl, + struct mca_btl_base_endpoint_t* endpoint, + uint8_t order, + size_t size, + uint32_t flags) +{ + mca_btl_pcie_module_t* pcie_btl = (mca_btl_pcie_module_t*) btl; + mca_btl_pcie_frag_t* frag = NULL; + int rc; + + if (size <= btl->btl_eager_limit) { + MCA_BTL_PCIE_FRAG_ALLOC_EAGER(pcie_btl, frag, rc); + if (frag) { + frag->segment.seg_len = size; + frag->base.des_flags = 0; + frag->hdr->length = size; + } + } + if (NULL == frag && size <= btl->btl_max_send_size) { + MCA_BTL_PCIE_FRAG_ALLOC_MAX(pcie_btl, frag, rc); + if (frag) { + frag->segment.seg_len = size; + frag->base.des_flags = 0; + frag->hdr->length = size; + } + } + BTL_VERBOSE(("btl_pcie_alloc called for %d bytes, returning 0x%lx", size, frag)); + + return (mca_btl_base_descriptor_t*) frag; +} + + +/** + * Return a segment + */ + +int mca_btl_pcie_free( + struct mca_btl_base_module_t* btl, + mca_btl_base_descriptor_t* des) +{ + mca_btl_pcie_frag_t* frag = (mca_btl_pcie_frag_t*)des; + mca_btl_pcie_module_t* pcie_btl = (mca_btl_pcie_module_t*) btl; + int ret; + + BTL_VERBOSE(("btl_pcie_free returning 0x%lx", frag)); + + if (frag->registration != NULL) { + pcie_btl->rdma_mpool->mpool_deregister(pcie_btl->rdma_mpool, + (mca_mpool_base_registration_t*) + frag->registration); + frag->registration = NULL; + } + + MCA_BTL_PCIE_FRAG_RETURN(pcie_btl, frag, ret); + return ret; +} + + +/** + * Pack data and return a descriptor that can be + * used for send/put. + * + * @param btl (IN) BTL module + * @param peer (IN) BTL peer addressing + */ +mca_btl_base_descriptor_t* mca_btl_pcie_prepare_src( + struct mca_btl_base_module_t* btl, + struct mca_btl_base_endpoint_t* endpoint, + struct mca_mpool_base_registration_t* registration, + struct ompi_convertor_t* convertor, + uint8_t order, + size_t reserve, + size_t* size, + uint32_t flags +) +{ + mca_btl_pcie_frag_t* frag = NULL; + mca_btl_pcie_reg_t* pcie_reg; + mca_btl_pcie_module_t* pcie_btl = (mca_btl_pcie_module_t*) btl; + struct iovec iov; + uint32_t iov_count = 1; + size_t max_data = *size; + int rc; + + BTL_VERBOSE(("btl_pcie_prepare_src called with reserve %d", reserve)); + + /* check and see if the data is contiguous */ + if(ompi_convertor_need_buffers(convertor) == false && 0 == reserve) { + MCA_BTL_PCIE_FRAG_ALLOC_DMA(btl, frag, rc); + if(NULL == frag) { + return NULL; + } + + iov.iov_len = max_data; + iov.iov_base = NULL; + + /* get the user buffer's address */ + ompi_convertor_pack(convertor, &iov, &iov_count, &max_data); + *size = max_data; + + if(NULL == registration) { + rc = pcie_btl->rdma_mpool->mpool_register(pcie_btl->rdma_mpool, + iov.iov_base, max_data, 0, ®istration); + if(OMPI_SUCCESS != rc || NULL == registration){ + MCA_BTL_PCIE_FRAG_RETURN(pcie_btl, frag, rc); + return NULL; + } + frag->registration = (mca_btl_pcie_reg_t*) registration; + } + + pcie_reg = (mca_btl_pcie_reg_t*) registration; + frag->base.des_flags = 0; + frag->base.des_src = &frag->segment; + frag->base.des_src_cnt = 1; + frag->base.des_dst = NULL; + frag->base.des_dst_cnt = 0; + frag->base.des_flags = 0; + + frag->segment.seg_len = max_data; + frag->segment.seg_addr.pval = iov.iov_base; + frag->segment.seg_key.key64 = (uint64_t)pcie_reg->handle; + + BTL_VERBOSE(("prepare_src: frag->segment.seg_len = %lu .seg_addr.pval= %llu " + "frag->segment.seg_key.key64 = %llu", + frag->segment.seg_len, frag->segment.seg_addr.pval, + frag->segment.seg_key.key64)); + + return &frag->base; + + } else { + /* + * if we aren't pinning the data and the requested size is less + * than the eager limit pack into a fragment from the eager pool + */ + if (max_data+reserve <= btl->btl_eager_limit) { + + MCA_BTL_PCIE_FRAG_ALLOC_EAGER(btl, frag, rc); + if(NULL == frag) { + return NULL; + } + + iov.iov_len = max_data; + iov.iov_base = (unsigned char*) frag->segment.seg_addr.pval + reserve; + + rc = ompi_convertor_pack(convertor, &iov, &iov_count, &max_data ); + *size = max_data; + if( rc < 0 ) { + MCA_BTL_PCIE_FRAG_RETURN(btl, frag, rc); + return NULL; + } + frag->segment.seg_len = max_data + reserve; + } + + /* + * otherwise pack as much data as we can into a fragment + * that is the max send size. + */ + else { + + MCA_BTL_PCIE_FRAG_ALLOC_MAX(btl, frag, rc); + if(NULL == frag) { + return NULL; + } + if(max_data + reserve > frag->size){ + max_data = frag->size - reserve; + } + iov.iov_len = max_data; + iov.iov_base = (unsigned char*) frag->segment.seg_addr.pval + reserve; + + rc = ompi_convertor_pack(convertor, &iov, &iov_count, &max_data ); + *size = max_data; + + if( rc < 0 ) { + MCA_BTL_PCIE_FRAG_RETURN(btl, frag, rc); + return NULL; + } + frag->segment.seg_len = max_data + reserve; + + } + frag->hdr->length = *size + reserve; + frag->base.des_src = &frag->segment; + frag->base.des_src_cnt = 1; + frag->base.des_dst = NULL; + frag->base.des_dst_cnt = 0; + frag->base.des_flags = 0; + return &frag->base; + } + +} + +/** + * Prepare a descriptor for send/rdma using the supplied + * convertor. If the convertor references data that is contigous, + * the descriptor may simply point to the user buffer. Otherwise, + * this routine is responsible for allocating buffer space and + * packing if required. + * + * @param btl (IN) BTL module + * @param endpoint (IN) BTL peer addressing + * @param convertor (IN) Data type convertor + * @param reserve (IN) Additional bytes requested by upper layer to precede user data + * @param size (IN/OUT) Number of bytes to prepare (IN), number of bytes actually prepared (OUT) + */ + +mca_btl_base_descriptor_t* mca_btl_pcie_prepare_dst( + struct mca_btl_base_module_t* btl, + struct mca_btl_base_endpoint_t* endpoint, + struct mca_mpool_base_registration_t* registration, + struct ompi_convertor_t* convertor, + uint8_t order, + size_t reserve, + size_t* size, + uint32_t flags) +{ + mca_btl_pcie_frag_t* frag; + mca_btl_pcie_reg_t* pcie_reg; + mca_btl_pcie_module_t* pcie_btl = (mca_btl_pcie_module_t*) btl; + int rc; + ptrdiff_t lb; + + MCA_BTL_PCIE_FRAG_ALLOC_DMA(pcie_btl, frag, rc); + if(NULL == frag) { + return NULL; + } + ompi_ddt_type_lb(convertor->pDesc, &lb); + frag->segment.seg_addr.pval = convertor->pBaseBuf + lb + + convertor->bConverted; + if(NULL == registration) { + rc = pcie_btl->rdma_mpool->mpool_register(pcie_btl->rdma_mpool, + frag->segment.seg_addr.pval, *size, 0, + ®istration); + if(OMPI_SUCCESS != rc || NULL == registration) { + MCA_BTL_PCIE_FRAG_RETURN(pcie_btl, frag, rc); + return NULL; + } + frag->registration = (mca_btl_pcie_reg_t*) registration; + } + pcie_reg = (mca_btl_pcie_reg_t*)registration; + + frag->segment.seg_len = *size; + frag->segment.seg_key.key64 = (uint64_t) pcie_reg->handle; + + frag->base.des_dst = &frag->segment; + frag->base.des_dst_cnt = 1; + frag->base.des_src = NULL; + frag->base.des_src_cnt = 0; + frag->base.des_flags = 0; + + BTL_VERBOSE(("prepare_dst: frag->segment.seg_len = %lu .seg_addr.pval= %llu " + "frag->segment.seg_key.key64 = %llu", + frag->segment.seg_len, frag->segment.seg_addr.pval, + frag->segment.seg_key.key64)); + + return &frag->base; +} + + +/** + * Initiate an asynchronous send. + * + * @param btl (IN) BTL module + * @param endpoint (IN) BTL addressing information + * @param descriptor (IN) Description of the data to be transfered + * @param tag (IN) The tag value used to notify the peer. + */ + +int mca_btl_pcie_send( + struct mca_btl_base_module_t* btl, + struct mca_btl_base_endpoint_t* endpoint, + struct mca_btl_base_descriptor_t* descriptor, + mca_btl_base_tag_t tag) + +{ + /* mca_btl_pcie_module_t* pcie_btl = (mca_btl_pcie_module_t*) btl; */ + mca_btl_pcie_module_t* pcie_btl = (mca_btl_pcie_module_t*) btl; + mca_btl_pcie_frag_t* frag = (mca_btl_pcie_frag_t*)descriptor; + mca_btl_pcie_sma_buf_t *buf = NULL; + int rc; + btl_pcie_fifo_entry_t idx; + + /* setup these fields so they get pulled over in the memcpy */ + frag->hdr->tag = tag; + frag->hdr->length = frag->segment.seg_len; + + if (frag->type == MCA_BTL_PCIE_TYPE_EAGER) { + MCA_BTL_PCIE_SMA_BUF_ALLOC_EAGER(pcie_btl, buf, rc); + } else { + MCA_BTL_PCIE_SMA_BUF_ALLOC_MAX(pcie_btl, buf, rc); + } + if (NULL == frag) { + BTL_ERROR(("can't alloc buf for frag of type %d", frag->type)); + return OMPI_ERR_OUT_OF_RESOURCE; + } + + frag->endpoint = endpoint; + frag->sma_buf = buf; + /* Put fragment into network byte order before copy to save work + done in sma region */ + OMPI_BTL_PCIE_HEADER_HTON(*frag->hdr); + /* BWB - FIX ME - both pointers are 16 byte aligned and the + buffers behind them are a multiple of 16 in length (but + frag->segment.seg_len might not be). There might be a more + optimized memcpy option given that behavior. */ + memcpy(buf->pcie_data.pval, frag->hdr, + sizeof(mca_btl_pcie_header_t) + + frag->segment.seg_len); + + /* send the fragment pointer to the receiver, + who will later ACK it back so that we can return it */ + idx = ((char*) buf->pcie_data.pval) - ((char*) endpoint->rem_frag_base); + idx |= BTL_PCIE_FIFO_TYPE_SEND; + + /* make sure the top bit is zero */ + assert((idx & BTL_PCIE_FIFO_TYPE_MASK) == BTL_PCIE_FIFO_TYPE_SEND); + + /* need to barrier prior to writing remote completion */ + opal_atomic_wmb(); + + BTL_VERBOSE(("sent frag 0x%lx (offset %lx), tag %d, length %d, rc = %d", + frag, idx, frag->hdr->tag, frag->segment.seg_len, rc)); + + idx = opal_swap_bytes8(idx); + rc = ompi_btl_pcie_fifo_set_msg(&endpoint->send_fifo, idx); + if(OMPI_SUCCESS != rc) { + if(OMPI_ERR_RESOURCE_BUSY == rc) { + /* BWB - FIX ME - queue for later */ + abort(); + } else { + return rc; + } + } + + return OMPI_SUCCESS; +} + +/** + * Initiate an asynchronous put. + * + * @param btl (IN) BTL module + * @param endpoint (IN) BTL addressing information + * @param descriptor (IN) Description of the data to be transferred + */ + +int mca_btl_pcie_put( + mca_btl_base_module_t* btl, + mca_btl_base_endpoint_t* endpoint, + mca_btl_base_descriptor_t* descriptor) +{ + + mca_btl_pcie_frag_t* frag = (mca_btl_pcie_frag_t*) descriptor; + struct AXON_dma_request dma_req; + int dma_reqs_started; + int rc; + volatile uint64_t *dma_status_addr; + uint64_t dma_status; + + frag->endpoint = endpoint; + + memset(&dma_req,0x00,sizeof(dma_req)); + dma_req.dma_type = AXON_DMATYPE_PUT; + + dma_req.local_descriptor[0].src_address = frag->base.des_src->seg_addr.lval; + dma_req.local_descriptor[0].src_memory_region_handle = frag->base.des_src->seg_key.key64; + + dma_req.remote_descriptor[0].src_address = + opal_swap_bytes8(frag->base.des_dst->seg_addr.lval); + dma_req.remote_descriptor[0].src_memory_region_handle = + opal_swap_bytes8(frag->base.des_dst->seg_key.key64); + + dma_req.transfer_size = + dma_req.remote_descriptor[0].transfer_size = + dma_req.local_descriptor[0].transfer_size = frag->base.des_src->seg_len; + + dma_req.localDmaStatusOffset = endpoint->lcl_dma_status - (char*) endpoint->lcl_sma_ptr; + dma_req.remoteDmaStatusOffset = 0; + + dma_req.local_descriptor_count = 1; + dma_req.remote_descriptor_count = 1; + + dma_status_addr = (uint64_t*) endpoint->lcl_dma_status; + *dma_status_addr = 0; + + rc = dd_dma_request(&endpoint->pcie_adapter, + &dma_req, + 1, + &dma_reqs_started); + + if (0 != rc) abort(); + + /* wait for completion, for now anyway */ + while (0 == (dma_status = *dma_status_addr)) { + /* sched_yield(); */ + } + + frag->base.des_cbfunc(btl, endpoint, &(frag->base), OMPI_SUCCESS); + + return OMPI_SUCCESS; +} + + +/** + * Initiate an asynchronous get. + * + * @param btl (IN) BTL module + * @param endpoint (IN) BTL addressing information + * @param descriptor (IN) Description of the data to be transferred + * + */ + +int mca_btl_pcie_get( + mca_btl_base_module_t* btl, + mca_btl_base_endpoint_t* endpoint, + mca_btl_base_descriptor_t* descriptor) +{ + return OMPI_ERR_NOT_IMPLEMENTED; +} + + + +/* + * Cleanup/release module resources. + */ + +int mca_btl_pcie_finalize(struct mca_btl_base_module_t* btl) +{ + mca_btl_pcie_module_t* pcie_btl = (mca_btl_pcie_module_t*) btl; + OBJ_DESTRUCT(&pcie_btl->pcie_lock); + OBJ_DESTRUCT(&pcie_btl->pcie_sma_buf_eager); + OBJ_DESTRUCT(&pcie_btl->pcie_sma_buf_max); + OBJ_DESTRUCT(&pcie_btl->pcie_frag_eager); + OBJ_DESTRUCT(&pcie_btl->pcie_frag_max); + OBJ_DESTRUCT(&pcie_btl->pcie_frag_dma); + OBJ_DESTRUCT(&pcie_btl->pcie_recv_frag); + return OMPI_SUCCESS; +} diff --git a/ompi/mca/btl/pcie/btl_pcie.h b/ompi/mca/btl/pcie/btl_pcie.h new file mode 100644 index 0000000000..5816eddc40 --- /dev/null +++ b/ompi/mca/btl/pcie/btl_pcie.h @@ -0,0 +1,368 @@ +/* + * Copyright (c) 2007 Los Alamos National Security, LLC. + * All righs reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef MCA_BTL_PCIE_H +#define MCA_BTL_PCIE_H + +#include "ompi_config.h" + +#include +#include + +#include "opal/align.h" +#include "opal/event/event.h" +#include "opal/util/output.h" +#include "opal/class/opal_bitmap.h" + +#include "orte/util/proc_info.h" + +#include "ompi/class/ompi_free_list.h" +#include "ompi/mca/btl/btl.h" +#include "ompi/mca/btl/base/base.h" +#include "ompi/mca/mpool/mpool.h" +#include "ompi/mca/pml/pml.h" + +#include "btl_pcie_ddriver.h" +#include "btl_pcie_frag.h" +#include "btl_pcie_fifo.h" + +BEGIN_C_DECLS + +#define MCA_BTL_HAS_MPOOL 1 + +/** + * PCIE BTL component. + */ + +struct mca_btl_pcie_component_t { + /** BTL base component */ + mca_btl_base_component_1_0_1_t super; + + /* ***** Configuration information ***** */ + + /** initial size of free lists */ + int pcie_free_list_num; + + /** maximum size of free lists */ + int pcie_free_list_max; + + /** number of elements to alloc when growing free lists */ + int pcie_free_list_inc; + + /** name of send/recv memory pool */ + char* pcie_send_mpool_name; + + /** name of put/get memory pool */ + char *pcie_dma_mpool_name; + + /** Number of entries in the send/recv queue structure */ + int pcie_recv_queue_len; + + /* **** Component data ***** */ + + /** array of available modules */ + struct mca_btl_pcie_module_t *pcie_btls; + + /** Number of initialized pcie_btl modules */ + uint32_t pcie_num_btls; + + /** list of pcie proc structures, created during add_procs */ + opal_list_t pcie_procs; + + /** lock for accessing component state */ + opal_mutex_t pcie_lock; +}; +typedef struct mca_btl_pcie_component_t mca_btl_pcie_component_t; + +OMPI_MODULE_DECLSPEC extern mca_btl_pcie_component_t mca_btl_pcie_component; + +/** + * BTL Module Interface + */ +struct mca_btl_pcie_module_t { + mca_btl_base_module_t super; /**< base BTL interface */ + + bool active; + + mca_btl_base_recv_reg_t pcie_reg[MCA_BTL_TAG_MAX]; + + /** name of the pcie device */ + char *lcl_dev_name; + + /** Free list of communication buffers in the SMA region */ + ompi_free_list_t pcie_sma_buf_eager; + ompi_free_list_t pcie_sma_buf_max; + + /** Free list of bounce fragments, normal user memory */ + ompi_free_list_t pcie_frag_eager; + ompi_free_list_t pcie_frag_max; + + /* free list of DMA fragments */ + ompi_free_list_t pcie_frag_dma; + + /* single receive fragment to handle upcalls on message reception. + This will need to be a free list if multiple receive callbacks + could be triggered at the same time, which will happen if the + code goes MT hot. */ + mca_btl_pcie_frag_recv_t pcie_recv_frag; + + /* lock for accessing module state */ + opal_mutex_t pcie_lock; + + /* mpool for allocating the members of pcie_sma_buf* */ + struct mca_mpool_base_module_t* pcie_mpool; + /* mpool for RDMA pinning */ + struct mca_mpool_base_module_t* rdma_mpool; + + /* Endpoint associated with this module (there's a one-to-one + mapping of modules and endpoints, since a device can only + handle one endpoint at a time */ + struct mca_btl_base_endpoint_t* endpoint; +}; +typedef struct mca_btl_pcie_module_t mca_btl_pcie_module_t; +extern mca_btl_pcie_module_t mca_btl_pcie_module; + +struct mca_btl_pcie_reg_t { + mca_mpool_base_registration_t base; + AXON_memory_region_handle handle; +}; +typedef struct mca_btl_pcie_reg_t mca_btl_pcie_reg_t; + +struct mca_btl_pcie_modex_info_t { + char hostname[ORTE_MAX_HOSTNAME_SIZE]; + char devicename[OMPI_PATH_MAX]; +}; +typedef struct mca_btl_pcie_modex_info_t mca_btl_pcie_modex_info_t; +#define MCA_BTL_PCIE_MODEX_INFO_HTON(h) +#define MCA_BTL_PCIE_MODEX_INFO_NTOH(h) + + +/** + * Register TEMPLATE component parameters with the MCA framework + */ +extern int mca_btl_pcie_component_open(void); + +/** + * Any final cleanup before being unloaded. + */ +extern int mca_btl_pcie_component_close(void); + +/** + * TEMPLATE component initialization. + * + * @param num_btl_modules (OUT) Number of BTLs returned in BTL array. + * @param allow_multi_user_threads (OUT) Flag indicating wether BTL supports user threads (TRUE) + * @param have_hidden_threads (OUT) Flag indicating wether BTL uses threads (TRUE) + */ +extern mca_btl_base_module_t** mca_btl_pcie_component_init( + int *num_btl_modules, + bool allow_multi_user_threads, + bool have_hidden_threads +); + + +/** + * TEMPLATE component progress. + */ +extern int mca_btl_pcie_component_progress(void); + + + +/** + * Cleanup any resources held by the BTL. + * + * @param btl BTL instance. + * @return OMPI_SUCCESS or error status on failure. + */ + +extern int mca_btl_pcie_finalize( + struct mca_btl_base_module_t* btl +); + + +/** + * PML->BTL notification of change in the process list. + * + * @param btl (IN) + * @param nprocs (IN) Number of processes + * @param procs (IN) Set of processes + * @param peers (OUT) Set of (optional) peer addressing info. + * @param peers (IN/OUT) Set of processes that are reachable via this BTL. + * @return OMPI_SUCCESS or error status on failure. + * + */ + +extern int mca_btl_pcie_add_procs( + struct mca_btl_base_module_t* btl, + size_t nprocs, + struct ompi_proc_t **procs, + struct mca_btl_base_endpoint_t** peers, + opal_bitmap_t* reachable +); + +/** + * PML->BTL notification of change in the process list. + * + * @param btl (IN) BTL instance + * @param nproc (IN) Number of processes. + * @param procs (IN) Set of processes. + * @param peers (IN) Set of peer data structures. + * @return Status indicating if cleanup was successful + * + */ + +extern int mca_btl_pcie_del_procs( + struct mca_btl_base_module_t* btl, + size_t nprocs, + struct ompi_proc_t **procs, + struct mca_btl_base_endpoint_t** peers +); + + +/** + * Initiate an asynchronous send. + * + * @param btl (IN) BTL module + * @param endpoint (IN) BTL addressing information + * @param descriptor (IN) Description of the data to be transfered + * @param tag (IN) The tag value used to notify the peer. + */ + +extern int mca_btl_pcie_send( + struct mca_btl_base_module_t* btl, + struct mca_btl_base_endpoint_t* btl_peer, + struct mca_btl_base_descriptor_t* descriptor, + mca_btl_base_tag_t tag +); + + +/** + * Initiate an asynchronous put. + * + * @param btl (IN) BTL module + * @param endpoint (IN) BTL addressing information + * @param descriptor (IN) Description of the data to be transferred + */ + +extern int mca_btl_pcie_put( + struct mca_btl_base_module_t* btl, + struct mca_btl_base_endpoint_t* btl_peer, + struct mca_btl_base_descriptor_t* decriptor +); + + +/** + * Initiate an asynchronous get. + * + * @param btl (IN) BTL module + * @param endpoint (IN) BTL addressing information + * @param descriptor (IN) Description of the data to be transferred + */ + +extern int mca_btl_pcie_get( + struct mca_btl_base_module_t* btl, + struct mca_btl_base_endpoint_t* btl_peer, + struct mca_btl_base_descriptor_t* decriptor +); + +/** + * Register a callback function that is called on receipt + * of a fragment. + * + * @param btl (IN) BTL module + * @return Status indicating if registration was successful + * + */ + +extern int mca_btl_pcie_register( + struct mca_btl_base_module_t* btl, + mca_btl_base_tag_t tag, + mca_btl_base_module_recv_cb_fn_t cbfunc, + void* cbdata); + +/** + * Allocate a descriptor with a segment of the requested size. + * Note that the BTL layer may choose to return a smaller size + * if it cannot support the request. + * + * @param btl (IN) BTL module + * @param size (IN) Request segment size. + */ + +extern mca_btl_base_descriptor_t* mca_btl_pcie_alloc( + struct mca_btl_base_module_t* btl, + struct mca_btl_base_endpoint_t* endpoint, + uint8_t order, + size_t size, + uint32_t flags); + + +/** + * Return a segment allocated by this BTL. + * + * @param btl (IN) BTL module + * @param descriptor (IN) Allocated descriptor. + */ + +extern int mca_btl_pcie_free( + struct mca_btl_base_module_t* btl, + mca_btl_base_descriptor_t* des); + + +/** + * Prepare a descriptor for send/rdma using the supplied + * convertor. If the convertor references data that is contigous, + * the descriptor may simply point to the user buffer. Otherwise, + * this routine is responsible for allocating buffer space and + * packing if required. + * + * @param btl (IN) BTL module + * @param endpoint (IN) BTL peer addressing + * @param convertor (IN) Data type convertor + * @param reserve (IN) Additional bytes requested by upper layer to precede user data + * @param size (IN/OUT) Number of bytes to prepare (IN), number of bytes actually prepared (OUT) +*/ + +mca_btl_base_descriptor_t* mca_btl_pcie_prepare_src( + struct mca_btl_base_module_t* btl, + struct mca_btl_base_endpoint_t* endpoint, + struct mca_mpool_base_registration_t* registration, + struct ompi_convertor_t* convertor, + uint8_t order, + size_t reserve, + size_t* size, + uint32_t flags +); + +extern mca_btl_base_descriptor_t* mca_btl_pcie_prepare_dst( + struct mca_btl_base_module_t* btl, + struct mca_btl_base_endpoint_t* endpoint, + struct mca_mpool_base_registration_t* registration, + struct ompi_convertor_t* convertor, + uint8_t order, + size_t reserve, + size_t* size, + uint32_t flags); + + /** + * Fault Tolerance Event Notification Function + * @param state Checkpoint Stae + * @return OMPI_SUCCESS or failure status + */ +int mca_btl_pcie_ft_event(int state); + +char* ompi_btl_pcie_cfg_get_local_device(char* hostname, int core); +char* ompi_btl_pcie_cfg_get_matching_device(char* remote_hostname, + char* remote_device); + + +END_C_DECLS + +#endif /* #ifndef MCA_BTL_PCIE_H */ diff --git a/ompi/mca/btl/pcie/btl_pcie_cfg.c b/ompi/mca/btl/pcie/btl_pcie_cfg.c new file mode 100644 index 0000000000..f329a94d24 --- /dev/null +++ b/ompi/mca/btl/pcie/btl_pcie_cfg.c @@ -0,0 +1,196 @@ +/* + * Copyright (c) 2007 Los Alamos National Security, LLC. + * All righs reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include +#include +#include + +#include "opal/util/output.h" +#include "opal/util/os_path.h" +#include "opal/mca/base/mca_base_param.h" +#include "opal/mca/installdirs/installdirs.h" + +#include "orte/util/proc_info.h" +#include "orte/util/show_help.h" + +#include "btl_pcie.h" +#include "btl_pcie_lex.h" + +static char *cfg_filename; +static char *key_buffer = NULL; +static size_t key_buffer_len = 0; + +/* + * Local functions + */ +static char* parse_file(char *filename, bool local, char *key); + + +/**************************************************************************/ + +char * +ompi_btl_pcie_cfg_get_local_device(char* hostname, int core) +{ + char *key, *ret, *file; + + file = opal_os_path(false, + opal_install_dirs.sysconfdir, + "mca-btl-pcie-local-resources.cfg", + NULL); + + asprintf(&key, "%s:%d", hostname, core); + ret = parse_file(file, true, key); + free(key); + free(file); + + return ret; +} + +char * +ompi_btl_pcie_cfg_get_matching_device(char* remote_hostname, + char* remote_device) +{ + char *key, *ret, *pos, *file; + + file = opal_os_path(false, + opal_install_dirs.sysconfdir, + "mca-btl-pcie-remote-resources.cfg", + NULL); + + asprintf(&key, "%s:%s", remote_hostname, remote_device); + ret = parse_file(file, false, key); + free(file); + free(key); + + if (ret == NULL) return NULL; + + pos = strchr(ret, ':'); + if (pos == NULL) { + free(ret); + return NULL; + } + + /* make sure this is my hostname */ + *pos = '\0'; + if (0 != strcmp(orte_process_info.nodename, ret)) { + free(ret); + return NULL; + } + + pos++; + pos = strdup(pos); + free(ret); + + return pos; +} + + +/* + * Parse a single file + */ +static char* parse_file(char *filename, bool local, char* key) +{ + int val; + bool me; + char *tmp = NULL; + + /* Open the file */ + cfg_filename = filename; + btl_pcie_cfg_yyin = fopen(filename, "r"); + if (NULL == btl_pcie_cfg_yyin) { + orte_show_help("help-mpi-btl-pcie.txt", "ini file:file not found", + true, filename); + goto cleanup; + } + + /* Do the parsing */ + btl_pcie_cfg_parse_done = false; + btl_pcie_cfg_yynewlines = 1; + btl_pcie_cfg_init_buffer(btl_pcie_cfg_yyin); + while (!btl_pcie_cfg_parse_done) { + val = btl_pcie_cfg_yylex(); + switch (val) { + case BTL_PCIE_CFG_PARSE_DONE: + /* This will also set btl_pcie_cfg_parse_done to true, so just + break here */ + break; + + case BTL_PCIE_CFG_PARSE_NEWLINE: + /* blank line! ignore it */ + break; + + case BTL_PCIE_CFG_PARSE_HOSTNAME_CORE: + if (!local) { + return NULL; + } + + if (0 == strcmp(key, btl_pcie_cfg_yytext)) { + me = true; + } else { + me = false; + } + + val = btl_pcie_cfg_yylex(); + if (BTL_PCIE_CFG_PARSE_DEVICE != val) { + abort(); + } + + if (me) return strdup(btl_pcie_cfg_yytext); + + break; + + case BTL_PCIE_CFG_PARSE_HOSTNAME_DEVICE: + if (local) { + return NULL; + } + + if (0 == strcmp(key, btl_pcie_cfg_yytext)) { + me = true; + } else { + tmp = strdup(btl_pcie_cfg_yytext); + me = false; + } + + val = btl_pcie_cfg_yylex(); + if (BTL_PCIE_CFG_PARSE_HOSTNAME_DEVICE != val) { + abort(); + } + + if (me) { + return strdup(btl_pcie_cfg_yytext); + } else { + if (0 == strcmp(key, btl_pcie_cfg_yytext)) { + return tmp; + } else { + free(tmp); + } + } + + break; + + default: + return NULL; + break; + } + } + fclose(btl_pcie_cfg_yyin); + +cleanup: + if (NULL != key_buffer) { + free(key_buffer); + key_buffer = NULL; + key_buffer_len = 0; + } + + return NULL; +} + diff --git a/ompi/mca/btl/pcie/btl_pcie_component.c b/ompi/mca/btl/pcie/btl_pcie_component.c new file mode 100644 index 0000000000..0c7f4c8d09 --- /dev/null +++ b/ompi/mca/btl/pcie/btl_pcie_component.c @@ -0,0 +1,487 @@ +/* + * Copyright (c) 2007 Los Alamos National Security, LLC. + * All righs reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" +#include "ompi/constants.h" + +#include +#include +#include +#include +#include +#include +#include + +#include "opal/event/event.h" +#include "opal/util/argv.h" +#include "opal/util/if.h" +#include "opal/util/output.h" +#include "opal/mca/base/mca_base_param.h" +#include "opal/mca/paffinity/paffinity.h" +#include "opal/mca/paffinity/base/base.h" + +#include "orte/util/proc_info.h" +#include "orte/util/show_help.h" +#include "orte/mca/errmgr/errmgr.h" + +#include "ompi/constants.h" +#include "ompi/datatype/convertor.h" +#include "ompi/mca/btl/btl.h" +#include "ompi/mca/btl/base/base.h" +#include "ompi/mca/btl/base/btl_base_error.h" +#include "ompi/mca/mpool/base/base.h" +#include "ompi/mca/mpool/rdma/mpool_rdma.h" +#include "ompi/mca/pml/pml.h" +#include "ompi/runtime/ompi_module_exchange.h" + +#include "btl_pcie.h" +#include "btl_pcie_frag.h" +#include "btl_pcie_endpoint.h" +#include "btl_pcie_ddriver.h" + + +static int pcie_reg_mr(void *reg_data, void *base, size_t size, + mca_mpool_base_registration_t *reg); +static int pcie_dereg_mr(void* reg_data, mca_mpool_base_registration_t *reg); + + +mca_btl_pcie_component_t mca_btl_pcie_component = { + { + /* First, the mca_base_component_t struct containing meta information + about the component itself */ + + { + /* Indicate that we are a pml v2.0.0 component (which also implies a + specific MCA version) */ + + MCA_BTL_BASE_VERSION_2_0_0, + + "pcie", /* MCA component name */ + OMPI_MAJOR_VERSION, /* MCA component major version */ + OMPI_MINOR_VERSION, /* MCA component minor version */ + OMPI_RELEASE_VERSION, /* MCA component release version */ + mca_btl_pcie_component_open, /* component open */ + mca_btl_pcie_component_close /* component close */ + }, + + /* Next the MCA v2.0.0 component meta data */ + + { + false + }, + + mca_btl_pcie_component_init, + mca_btl_pcie_component_progress, + } +}; + + +/* + * utility routines for parameter registration + */ +static char* +mca_btl_pcie_param_register_string(const char* param_name, + const char* param_desc, + const char* default_value) +{ + char *value; + + mca_base_param_reg_string(&mca_btl_pcie_component.super.btl_version, + param_name, param_desc, false, false, + default_value, &value); + return value; +} + + +static int +mca_btl_pcie_param_register_int(const char* param_name, + const char* param_desc, + int default_value) +{ + int value; + + mca_base_param_reg_int(&mca_btl_pcie_component.super.btl_version, + param_name, param_desc, false, false, + default_value, &value); + return value; +} + + +/* + * Register PCIE device found in local config file. The MCA framework + * will make this available to all peers. + */ +static int +btl_pcie_modex_send(void) +{ + size_t size; + unsigned int i; + mca_btl_pcie_modex_info_t *info; + + size = mca_btl_pcie_component.pcie_num_btls * + sizeof(mca_btl_pcie_modex_info_t); + info = malloc(size); + if (NULL == info) return OMPI_ERR_TEMP_OUT_OF_RESOURCE; + + for (i = 0 ; i < mca_btl_pcie_component.pcie_num_btls ; ++i) { + strncpy(info[i].hostname, + orte_process_info.nodename, + ORTE_MAX_HOSTNAME_SIZE - 1); + info[i].hostname[ORTE_MAX_HOSTNAME_SIZE - 1] = '\0'; + strncpy(info[i].devicename, + mca_btl_pcie_component.pcie_btls[i].lcl_dev_name, + OMPI_PATH_MAX - 1); + info[i].devicename[OMPI_PATH_MAX - 1] = '\0'; + MCA_BTL_PCIE_MODEX_INFO_HTON(info[i]); + } + +#if (OMPI_MAJOR_VERSION <= 1) && (OMPI_MINOR_VERSION <= 2) + return mca_pml_base_modex_send(&mca_btl_pcie_component.super.btl_version, info, size); +#else + return ompi_modex_send(&mca_btl_pcie_component.super.btl_version, info, size); +#endif +} + + +/* + * Called by MCA framework to open the component, registers + * component parameters. + */ +int +mca_btl_pcie_component_open(void) +{ + /* initialize state */ + mca_btl_pcie_component.pcie_num_btls = 0; + mca_btl_pcie_component.pcie_btls = NULL; + + /* initialize objects */ + OBJ_CONSTRUCT(&mca_btl_pcie_component.pcie_procs, opal_list_t); + + /* component parameters */ + mca_btl_pcie_component.pcie_free_list_num = + mca_btl_pcie_param_register_int ("free_list_num", + "Initial size of free lists (must be >= 1)", + 16); + /* BWB - FIX ME - The need to limit the free list max size is an + artifact of the lack of flow control in the BTL. Since we're + already using bounce fragments, it should be possible to make + this unlimited, and then properly handle the case where an SMA + region isn't available when send is called on a given frag. + Something similar to what Open IB does when we don't have send + credits would work really well here. See comment in + btl_pcie_send() for more information. */ + mca_btl_pcie_component.pcie_free_list_max = + mca_btl_pcie_param_register_int ("free_list_max", + "Max size of free lists. " + "free_list_max * (first_frag_size + max_send_size) " + "must be less than (SMA memory size - (recv_queue_len * 4) - 8)", + 32); + mca_btl_pcie_component.pcie_free_list_inc = + mca_btl_pcie_param_register_int ("free_list_inc", + "Increment size of free lists (must be >= 1)", + 8); + + mca_btl_pcie_component.pcie_send_mpool_name = + mca_btl_pcie_param_register_string("send_mpool", + "Name of the memory pool to be used for send messages. " + "(it is unlikely that you will ever want to change this)", + "pcie"); + + mca_btl_pcie_component.pcie_dma_mpool_name = + mca_btl_pcie_param_register_string("dma_mpool", + "Name of the memory pool to be used for rdma messages. " + "(it is unlikely that you will ever want to change this)", + "rdma"); + + mca_btl_pcie_component.pcie_recv_queue_len = + mca_btl_pcie_param_register_int("recv_queue_len", + "Length of receive fifo. Must be 4 * free_list_max", + 256); + + mca_btl_pcie_module.super.btl_exclusivity = + mca_btl_pcie_param_register_int ("exclusivity", + "Priority of PCIe BTL. (must be > 0)", + MCA_BTL_EXCLUSIVITY_DEFAULT + 1); + + mca_btl_pcie_module.super.btl_eager_limit = + mca_btl_pcie_param_register_int ("first_frag_size", + "Size (in bytes) of the first fragment sent of any " + "message. It is the maximum size of \"short\" messages " + "and the maximum size of the \"phase 1\" fragment sent " + "for all large messages (must be >= 1).", + 1*1024) - sizeof(mca_btl_pcie_header_t); + mca_btl_pcie_module.super.btl_rndv_eager_limit = + mca_btl_pcie_param_register_int ("btl_rndv_eager_limit", + "Minimum message size (in bytes) that will be striped " + "across multiple network devices when using " + "send/receive semantics. Messages shorter than this " + "size will be sent across a single network (must be >= " + "1)", + 2*1024) - sizeof(mca_btl_pcie_header_t); + mca_btl_pcie_module.super.btl_max_send_size = + mca_btl_pcie_param_register_int ("max_send_size", + "Maximum size (in bytes) of a single \"phase 2\" fragment " + "of a long message when using the pipeline protocol " + "(must be >= 1)", + 4*1024) - sizeof(mca_btl_pcie_header_t); + mca_btl_pcie_module.super.btl_rdma_pipeline_send_length = + mca_btl_pcie_param_register_int("rdma_pipeline_send_length", + "Length of the \"phase 2\" portion of a large message (in " + "bytes) when using the pipeline protocol. This part of " + "the message will be split into fragments of size " + "max_send_size and sent using send/receive semantics " + "(must be >= 0; only relevant when the PUT flag is " + "set)", + 12*1024); + mca_btl_pcie_module.super.btl_rdma_pipeline_frag_size = + mca_btl_pcie_param_register_int("rdma_pipeline_frag_size", + "Maximum size (in bytes) of a single \"phase 3\" fragment " + "from a long message when using the pipeline protocol. " + "These fragments will be sent using RDMA semantics " + "(must be >= 1; only relevant when the PUT flag is " + "set)", + 2*1024*1024); + mca_btl_pcie_module.super.btl_min_rdma_pipeline_size = + mca_btl_pcie_param_register_int("min_rdma_pipeline_size", + "Messages smaller than this size (in bytes) will not " + "use the RDMA pipeline protocol. Instead, they will be " + "split into fragments of max_send_size and sent using " + "send/receive semantics (must be >=0, and is " + "automatically adjusted up to at least " + "(eager_limit+btl_rdma_pipeline_send_length); only " + "relevant when the PUT flag is set)", + 16 * 1024); + + mca_btl_pcie_module.super.btl_flags = + mca_btl_pcie_param_register_int("flags", + "BTL control flags. Defaults to (SEND|PUT|HETEROGENEOUS_RDMA)", +#ifdef MCA_BTL_FLAGS_HETEROGENEOUS_RDMA + MCA_BTL_FLAGS_HETEROGENEOUS_RDMA | +#endif + MCA_BTL_FLAGS_SEND | + MCA_BTL_FLAGS_PUT); + + return OMPI_SUCCESS; +} + + +int +mca_btl_pcie_component_close(void) +{ + return OMPI_SUCCESS; +} + + +mca_btl_base_module_t** +mca_btl_pcie_component_init(int *num_btl_modules, + bool enable_progress_threads, + bool enable_mpi_threads) +{ + cpu_set_t cpu_set; + unsigned int i; + int num_cpus, *cpus; + struct stat stat_buf; + struct mca_mpool_base_resources_t mpool_resources; + mca_btl_base_module_t **btl_array; + + *num_btl_modules = 0; + + /* find all cpus we're bound to */ + cpus = malloc(CPU_SETSIZE * sizeof(int)); + memset(cpus, 0, CPU_SETSIZE * sizeof(int)); + num_cpus = 0; + CPU_ZERO(&cpu_set); + + sched_getaffinity(0, sizeof(cpu_set), &cpu_set); + for (i = 0 ; i < CPU_SETSIZE ; ++i) { + if (CPU_ISSET(i, &cpu_set)) cpus[num_cpus++] = i; + } +#if defined(__PPC__) + if (num_cpus > 1) { + orte_show_help("help-mpi-btl-pcie.txt", "initialization:more-than-one-cpu", + true, num_cpus); + return NULL; + } +#endif /* #ifdef __PPC__ */ + if (0 == num_cpus) { + orte_show_help("help-mpi-btl-pcie.txt", "initialization:no-cpus", + true); + return NULL; + } + + /* Create the module storage space */ + mca_btl_pcie_component.pcie_num_btls = num_cpus; + mca_btl_pcie_component.pcie_btls = malloc(mca_btl_pcie_component.pcie_num_btls * + sizeof(struct mca_btl_pcie_module_t)); + btl_array = malloc(mca_btl_pcie_component.pcie_num_btls * + sizeof(mca_btl_base_module_t*)); + + /* initialize the modules */ + for (i = 0 ; i < mca_btl_pcie_component.pcie_num_btls ; ++i) { + mca_btl_pcie_module_t *btl = &(mca_btl_pcie_component.pcie_btls[i]); + + btl_array[i] = (mca_btl_base_module_t*) btl; + + memcpy(btl, &mca_btl_pcie_module, sizeof(mca_btl_pcie_module_t)); + + /* check if we have a device listed in our local config file */ + btl->lcl_dev_name = + ompi_btl_pcie_cfg_get_local_device(orte_process_info.nodename, cpus[i]); + BTL_VERBOSE(("Local device for %s:%d = %s", orte_process_info.nodename, cpus[i], + btl->lcl_dev_name)); + + /* make sure said device is sane */ + if(stat(btl->lcl_dev_name, &stat_buf)) { + BTL_ERROR(("Error %s opening device %s\n", strerror(errno), + btl->lcl_dev_name)); + return NULL; + } + + OBJ_CONSTRUCT(&btl->pcie_sma_buf_eager, ompi_free_list_t); + OBJ_CONSTRUCT(&btl->pcie_sma_buf_max, ompi_free_list_t); + + OBJ_CONSTRUCT(&btl->pcie_frag_eager, ompi_free_list_t); + OBJ_CONSTRUCT(&btl->pcie_frag_max, ompi_free_list_t); + + OBJ_CONSTRUCT(&btl->pcie_frag_dma, ompi_free_list_t); + + OBJ_CONSTRUCT(&btl->pcie_lock, opal_mutex_t); + + /* time to setup DMA mpool */ + mpool_resources.reg_data = (void*) btl; + mpool_resources.sizeof_reg = sizeof(mca_btl_pcie_reg_t); + mpool_resources.register_mem = pcie_reg_mr; + mpool_resources.deregister_mem = pcie_dereg_mr; + btl->rdma_mpool = + mca_mpool_base_module_create("rdma", + &btl->super, + &mpool_resources); + btl->super.btl_mpool = btl->rdma_mpool; + + btl->active = false; + } + + /* push our address info to everyone */ + btl_pcie_modex_send(); + + *num_btl_modules = mca_btl_pcie_component.pcie_num_btls; + return btl_array;; +} + + +int +mca_btl_pcie_component_progress() +{ + unsigned int i; + btl_pcie_fifo_entry_t msg_idx; + int count = 0; + + for (i = 0 ; i < mca_btl_pcie_component.pcie_num_btls ; ++i) { + mca_btl_pcie_module_t *pcie_btl = + &(mca_btl_pcie_component.pcie_btls[i]); + mca_btl_base_endpoint_t *endpoint = pcie_btl->endpoint; + + if (!pcie_btl->active) continue; + + msg_idx = ompi_btl_pcie_fifo_get_msg(&endpoint->recv_fifo); + + /* Potential optimization is to drain every time we enter progress */ + if (msg_idx) { + int rc; + int ack = ((msg_idx & BTL_PCIE_FIFO_TYPE_MASK) == BTL_PCIE_FIFO_TYPE_ACK) ? 1 : 0; + msg_idx &= BTL_PCIE_FIFO_DATA_MASK; + + if (ack) { + /* we have a send frag ack */ + mca_btl_pcie_frag_t *frag = (mca_btl_pcie_frag_t*) msg_idx; + mca_btl_pcie_sma_buf_t *buf = frag->sma_buf; + + BTL_VERBOSE(("received ack for frag %lx (0x%lx)", msg_idx, frag)); + + /* Done with buffer, can return now */ + MCA_BTL_PCIE_SMA_BUF_RETURN(pcie_btl, buf, rc); + + frag->base.des_cbfunc(&pcie_btl->super, endpoint, + &(frag->base), + OMPI_SUCCESS); + + /* return the send credit */ + ompi_btl_pcie_fifo_complete_msg(&endpoint->send_fifo, 1); + count++; + } else { + /* we have a send frag (incoming data) */ + mca_btl_pcie_frag_t *recv_frag = &pcie_btl->pcie_recv_frag; + mca_btl_pcie_header_t *hdr = (mca_btl_pcie_header_t*) (endpoint->lcl_frag_base + msg_idx); + recv_frag->hdr = hdr; + OMPI_BTL_PCIE_HEADER_NTOH((*recv_frag->hdr)); + recv_frag->segment.seg_addr.pval = ((unsigned char*) recv_frag->hdr) + sizeof(mca_btl_pcie_header_t); + recv_frag->segment.seg_len = recv_frag->hdr->length; + BTL_VERBOSE(("received tag %d, base 0x%lx", recv_frag->hdr->tag, &recv_frag->base)); + pcie_btl->pcie_reg[recv_frag->hdr->tag].cbfunc(&pcie_btl->super, + recv_frag->hdr->tag, &recv_frag->base, + pcie_btl->pcie_reg[recv_frag->hdr->tag].cbdata); + + rc = ompi_btl_pcie_fifo_set_msg(&endpoint->send_fifo, hdr->send_frag.lval); + /* BWB - FIX ME - this is only safe if the number of + queue entries is twice the free list size */ + ompi_btl_pcie_fifo_complete_msg(&endpoint->send_fifo, 1); + count++; + } + } + } + + return count; +} + + +static int +pcie_reg_mr(void *reg_data, void *base, size_t size, + mca_mpool_base_registration_t *reg) +{ + mca_btl_pcie_module_t * pcie_btl = (mca_btl_pcie_module_t*) reg_data; + mca_btl_pcie_endpoint_t * endpoint = pcie_btl->endpoint; + mca_btl_pcie_reg_t * pcie_reg = (mca_btl_pcie_reg_t*) reg; + + if(dd_register_memory_region(&endpoint->pcie_adapter, + &pcie_reg->handle, + base, + size, + DD_ALLOW_LOCAL_READ | + DD_ALLOW_LOCAL_WRITE | + DD_ALLOW_REMOTE_ACCESS | + DD_ALLOW_REMOTE_READ | + DD_ALLOW_REMOTE_WRITE )) { + BTL_ERROR(("error deregistering memory!\n")); + return OMPI_ERROR; + } + + return OMPI_SUCCESS; +} + + +static int +pcie_dereg_mr(void* reg_data, mca_mpool_base_registration_t *reg) +{ + mca_btl_pcie_module_t * pcie_btl = (mca_btl_pcie_module_t*) reg_data; + mca_btl_pcie_endpoint_t * endpoint = pcie_btl->endpoint; + mca_btl_pcie_reg_t * pcie_reg = (mca_btl_pcie_reg_t*) reg; + + if(pcie_reg->handle >= 0) { + if(dd_deregister_memory_region(&endpoint->pcie_adapter, + &pcie_reg->handle)) { + BTL_ERROR(("error deregistering memory!\n")); + return OMPI_ERROR; + } + } else { + return OMPI_ERROR; + } + + return OMPI_SUCCESS; +} diff --git a/ompi/mca/btl/pcie/btl_pcie_endpoint.c b/ompi/mca/btl/pcie/btl_pcie_endpoint.c new file mode 100644 index 0000000000..358a5758bc --- /dev/null +++ b/ompi/mca/btl/pcie/btl_pcie_endpoint.c @@ -0,0 +1,274 @@ +/* + * Copyright (c) 2007 Los Alamos National Security, LLC. + * All righs reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include +#include + +#include "opal/align.h" + +#include "orte/mca/errmgr/errmgr.h" +#include "orte/mca/oob/base/base.h" +#include "orte/mca/rml/rml.h" + +#include "ompi/types.h" +#include "ompi/mca/btl/base/btl_base_error.h" +#include "ompi/mca/mpool/mpool.h" +#include "ompi/mca/mpool/base/base.h" +#include "ompi/mca/mpool/pcie/mpool_pcie.h" + +#include "btl_pcie.h" +#include "btl_pcie_endpoint.h" +#include "btl_pcie_proc.h" +#include "btl_pcie_frag.h" + +/* + * Initialize state of the endpoint instance. + * + */ + +static void mca_btl_pcie_endpoint_construct(mca_btl_base_endpoint_t* endpoint) +{ + endpoint->endpoint_btl = 0; + endpoint->endpoint_proc = 0; +} + +/* + * Destroy a endpoint + * + */ + +static void mca_btl_pcie_endpoint_destruct(mca_btl_base_endpoint_t* endpoint) +{ +} + + +OBJ_CLASS_INSTANCE( + mca_btl_pcie_endpoint_t, + opal_list_item_t, + mca_btl_pcie_endpoint_construct, + mca_btl_pcie_endpoint_destruct); + + + +/* + * Initialize an endpoint + */ +int mca_btl_pcie_endpoint_init(mca_btl_base_endpoint_t* endpoint) +{ + int rc; + mca_btl_pcie_module_t* pcie_btl = + endpoint->endpoint_btl; + mca_mpool_base_resources_t mpool_resources; + size_t fifo_buffer_len, current_offset = 0; + + /* Open our device */ + rc = dd_open(endpoint->lcl_dev_name, + &endpoint->pcie_adapter); + if( 0 != rc) { + BTL_ERROR(("Failed to open pcie device dd_open says : %d\n", rc)); + return OMPI_ERROR; + } + + /* fill in endpoint data for begining of resources */ + endpoint->lcl_sma_ptr = endpoint->pcie_adapter.local_sma_address; + if(NULL == endpoint->lcl_sma_ptr) { + BTL_ERROR(("Error: local sma address is null\n")); + return OMPI_ERROR; + } + + endpoint->rem_sma_ptr = endpoint->pcie_adapter.remote_sma_address; + if(NULL == endpoint->rem_sma_ptr) { + BTL_ERROR(("Error: remote sma address is null\n")); + return OMPI_ERROR; + } + + BTL_VERBOSE(("SMA for device %s: local=0x%lx,%d remote=0x%lx,%d", + endpoint->lcl_dev_name, + endpoint->lcl_sma_ptr, + endpoint->pcie_adapter.local_sma_size, + endpoint->rem_sma_ptr, + endpoint->pcie_adapter.remote_sma_size)); + + /* 16 bytes of the buffer reserved for the 8 byte local DMA completion */ + endpoint->lcl_dma_status = ((char*) endpoint->lcl_sma_ptr) + current_offset; + current_offset += 16; + + /* fifo_buffer_len bytes reserved for fifos */ + fifo_buffer_len = sizeof(btl_pcie_fifo_entry_t) * mca_btl_pcie_component.pcie_recv_queue_len; + + rc = ompi_btl_pcie_fifo_init_send(&(endpoint->send_fifo), + mca_btl_pcie_component.pcie_recv_queue_len, + ((char*) endpoint->rem_sma_ptr) + current_offset); + if (OMPI_SUCCESS != rc) { + BTL_ERROR(("Error: Failed to init send fifo: %d", rc)); + return rc; + } + + rc = ompi_btl_pcie_fifo_init_recv(&(endpoint->recv_fifo), + mca_btl_pcie_component.pcie_recv_queue_len, + ((char*) endpoint->lcl_sma_ptr) + current_offset, + fifo_buffer_len); + if (OMPI_SUCCESS != rc) { + BTL_ERROR(("Error: Failed to init recv fifo: %d", rc)); + return rc; + } + + current_offset += fifo_buffer_len; + + /* reserve rest of the space for the mpool */ + endpoint->rem_frag_base = + ((char*) endpoint->rem_sma_ptr) + current_offset; + + endpoint->lcl_frag_base = + ((char*) endpoint->lcl_sma_ptr) + current_offset; + + /* don't need to align this one as the free list */ + /* will take care of it. */ + mpool_resources.base = endpoint->rem_frag_base; + mpool_resources.len = endpoint->pcie_adapter.remote_sma_size - + current_offset; + + /* setup my pcie mpool */ + pcie_btl->pcie_mpool = + mca_mpool_base_module_create(mca_btl_pcie_component.pcie_send_mpool_name, + pcie_btl, + &mpool_resources); + + /* setup the modules free lists and such as we now */ + /* have enough info to setup the mpool */ + + /* eager SMA communication buffers */ +#if (OMPI_MAJOR_VERSION <= 1) && (OMPI_MINOR_VERSION <= 2) + ompi_free_list_init_ex(&(pcie_btl->pcie_sma_buf_eager), + sizeof(mca_btl_pcie_sma_buf_eager_t) + + mca_btl_pcie_module.super.btl_eager_limit, + sizeof(mca_btl_pcie_sma_buf_eager_t), + MCA_BTL_PCIE_FRAG_ALIGN, + OBJ_CLASS(mca_btl_pcie_sma_buf_eager_t), + mca_btl_pcie_component.pcie_free_list_num, + mca_btl_pcie_component.pcie_free_list_max, + mca_btl_pcie_component.pcie_free_list_inc, + pcie_btl->pcie_mpool); + + /* max size SMA communication buffers */ + ompi_free_list_init_ex(&(pcie_btl->pcie_sma_buf_max), + sizeof(mca_btl_pcie_sma_buf_max_t) + + mca_btl_pcie_module.super.btl_max_send_size, + sizeof(mca_btl_pcie_sma_buf_max_t), + MCA_BTL_PCIE_FRAG_ALIGN, + OBJ_CLASS(mca_btl_pcie_sma_buf_max_t), + mca_btl_pcie_component.pcie_free_list_num, + mca_btl_pcie_component.pcie_free_list_max, + mca_btl_pcie_component.pcie_free_list_inc, + pcie_btl->pcie_mpool); + + /* User eager fragment buffer */ + ompi_free_list_init_ex(&(pcie_btl->pcie_frag_eager), + sizeof(mca_btl_pcie_frag_eager_t) + + mca_btl_pcie_module.super.btl_eager_limit, + sizeof(mca_btl_pcie_frag_eager_t), + MCA_BTL_PCIE_FRAG_ALIGN, + OBJ_CLASS(mca_btl_pcie_frag_eager_t), + mca_btl_pcie_component.pcie_free_list_num, + mca_btl_pcie_component.pcie_free_list_max, + mca_btl_pcie_component.pcie_free_list_inc, + NULL); + + /* User max size fragment buffer */ + ompi_free_list_init_ex(&(pcie_btl->pcie_frag_max), + sizeof(mca_btl_pcie_frag_max_t) + + mca_btl_pcie_module.super.btl_max_send_size, + sizeof(mca_btl_pcie_frag_max_t), + MCA_BTL_PCIE_FRAG_ALIGN, + OBJ_CLASS(mca_btl_pcie_frag_max_t), + mca_btl_pcie_component.pcie_free_list_num, + mca_btl_pcie_component.pcie_free_list_max, + mca_btl_pcie_component.pcie_free_list_inc, + NULL); +#else + ompi_free_list_init_ex(&(pcie_btl->pcie_sma_buf_eager), + mca_btl_pcie_module.super.btl_eager_limit, + MCA_BTL_PCIE_FRAG_ALIGN, + OBJ_CLASS(mca_btl_pcie_sma_buf_eager_t), + mca_btl_pcie_component.pcie_free_list_num, + mca_btl_pcie_component.pcie_free_list_max, + mca_btl_pcie_component.pcie_free_list_inc, + pcie_btl->pcie_mpool, + NULL, + NULL); + + /* max size SMA communication buffers */ + ompi_free_list_init_ex(&(pcie_btl->pcie_sma_buf_max), + mca_btl_pcie_module.super.btl_max_send_size, + MCA_BTL_PCIE_FRAG_ALIGN, + OBJ_CLASS(mca_btl_pcie_sma_buf_max_t), + mca_btl_pcie_component.pcie_free_list_num, + mca_btl_pcie_component.pcie_free_list_max, + mca_btl_pcie_component.pcie_free_list_inc, + pcie_btl->pcie_mpool, + NULL, + NULL); + + /* User eager fragment buffer */ + ompi_free_list_init_ex(&(pcie_btl->pcie_frag_eager), + mca_btl_pcie_module.super.btl_eager_limit, + MCA_BTL_PCIE_FRAG_ALIGN, + OBJ_CLASS(mca_btl_pcie_frag_eager_t), + mca_btl_pcie_component.pcie_free_list_num, + mca_btl_pcie_component.pcie_free_list_max, + mca_btl_pcie_component.pcie_free_list_inc, + NULL, + NULL, + NULL); + + /* User max size fragment buffer */ + ompi_free_list_init_ex(&(pcie_btl->pcie_frag_max), + mca_btl_pcie_module.super.btl_max_send_size, + MCA_BTL_PCIE_FRAG_ALIGN, + OBJ_CLASS(mca_btl_pcie_frag_max_t), + mca_btl_pcie_component.pcie_free_list_num, + mca_btl_pcie_component.pcie_free_list_max, + mca_btl_pcie_component.pcie_free_list_inc, + NULL, + NULL, + NULL); +#endif + + /* dma frags. note that we can only have 16 outstanding memory + handles so we cannot currently support leave_pinned and we must + limit the number of outstanding DMAs via the free list of DMA + frags */ + ompi_free_list_init(&(pcie_btl->pcie_frag_dma), + sizeof(mca_btl_pcie_frag_dma_t), + OBJ_CLASS(mca_btl_pcie_frag_dma_t), + 16, + 16, + 0, + NULL); + + /* recv frag */ + OBJ_CONSTRUCT(&(pcie_btl->pcie_recv_frag), + mca_btl_pcie_frag_recv_t); + + pcie_btl->endpoint = endpoint; + pcie_btl->active = true; + + return OMPI_SUCCESS; +} + +/* + * Finalize an endpoint + */ +int mca_btl_pcie_endpoint_fini(mca_btl_base_endpoint_t* endpoint) +{ + return OMPI_SUCCESS; +} diff --git a/ompi/mca/btl/pcie/btl_pcie_endpoint.h b/ompi/mca/btl/pcie/btl_pcie_endpoint.h new file mode 100644 index 0000000000..e840bef514 --- /dev/null +++ b/ompi/mca/btl/pcie/btl_pcie_endpoint.h @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2007 Los Alamos National Security, LLC. + * All righs reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef MCA_BTL_PCIE_ENDPOINT_H +#define MCA_BTL_PCIE_ENDPOINT_H + +#include "ompi_config.h" + +#include "opal/class/opal_list.h" +#include "opal/event/event.h" + +#include "ompi/mca/btl/btl.h" +#include "ompi/mca/pml/pml.h" + +#include "btl_pcie_ddriver.h" +#include "btl_pcie_frag.h" +#include "btl_pcie.h" +#include "btl_pcie_fifo.h" + +BEGIN_C_DECLS + +/** + * An abstraction that represents a connection to a endpoint process. + * An instance of mca_btl_base_endpoint_t is associated w/ each process + * and BTL pair at startup. However, connections to the endpoint + * are established dynamically on an as-needed basis: + */ + +struct mca_btl_base_endpoint_t { + opal_list_item_t super; + + struct mca_btl_pcie_module_t* endpoint_btl; + /**< BTL instance that created this connection */ + + struct mca_btl_pcie_proc_t* endpoint_proc; + /**< proc structure corresponding to endpoint */ + + /** the name of the remote PCIE device */ + char* rem_dev_name; + /** the name of the local PCIE device */ + char* lcl_dev_name; + + /** the pcie adapter - returned by dd_open */ + DD_adapter_handle pcie_adapter; + + /** local pcie SMA memory for this endpoint */ + char *lcl_sma_ptr; + + /** remote pcie SMA memory for this endpoint */ + char *rem_sma_ptr; + + /** remote fragment starting point (in which to + * deliver data via "rdma" write + */ + char *rem_frag_base; + char *lcl_frag_base; + + char *lcl_dma_status; + + btl_pcie_fifo_t recv_fifo; + + btl_pcie_fifo_t send_fifo; + + +}; + +typedef struct mca_btl_base_endpoint_t mca_btl_base_endpoint_t; + +typedef mca_btl_base_endpoint_t mca_btl_pcie_endpoint_t; +OBJ_CLASS_DECLARATION(mca_btl_pcie_endpoint_t); + + +/* + * Initialize an endpoint + */ +int mca_btl_pcie_endpoint_init(mca_btl_base_endpoint_t* endpoint); + +/* + * Finalize an endpoint + */ +int mca_btl_pcie_endpoint_fini(mca_btl_base_endpoint_t* endpoint); + +END_C_DECLS + +#endif /* #ifndef MCA_BTL_PCIE_ENDPOINT_H */ diff --git a/ompi/mca/btl/pcie/btl_pcie_fifo.c b/ompi/mca/btl/pcie/btl_pcie_fifo.c new file mode 100644 index 0000000000..bdf56df4e2 --- /dev/null +++ b/ompi/mca/btl/pcie/btl_pcie_fifo.c @@ -0,0 +1,97 @@ +/* + * Copyright (c) 2007 Los Alamos National Security, LLC. + * All righs reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include + +#include "opal/threads/mutex.h" +#include "opal/types.h" +#include "ompi/constants.h" + +#include "btl_pcie_fifo.h" + +static uint32_t +get_mask(unsigned int len) +{ + int pop_count, highest_used_bit, tmp_input_integer; + unsigned int pow; + + /* init counters */ + pop_count=0; + highest_used_bit=1; + + /* get population count and highest non-zero bit */ + tmp_input_integer = len; + while (tmp_input_integer > 0) { + pop_count += (tmp_input_integer & 1); + highest_used_bit++; + tmp_input_integer >> 1; + } + if (1 < pop_count) { + /* round up */ + highest_used_bit++; + } + + /* generate power value */ + pow = 1 << highest_used_bit; + + if (pow != len) return 0; + return pow - 1; +} + + +int +ompi_btl_pcie_fifo_init_send(btl_pcie_fifo_t *fifo, + unsigned int fifo_len, + void *queue_space) +{ + fifo->fifo_len = fifo_len; + fifo->current_index = 0; + fifo->num_outstanding = 0; + fifo->mask = get_mask(fifo_len); + fifo->queue = queue_space; + + if (fifo->mask == 0) return OMPI_ERROR; + + return OMPI_SUCCESS; +} + + +int +ompi_btl_pcie_fifo_init_recv(btl_pcie_fifo_t *fifo, + unsigned int fifo_len, + void *queue_space, + size_t queue_space_len) +{ + fifo->fifo_len = fifo_len; + fifo->current_index = 1; + fifo->num_outstanding = 0; + fifo->mask = get_mask(fifo_len); + fifo->queue = queue_space; + + if (fifo->mask == 0) return OMPI_ERROR; + + if (fifo_len * sizeof(btl_pcie_fifo_entry_t) > queue_space_len) { + return OMPI_ERROR; + } + + /* initialize the queue to empty */ + memset(fifo->queue, 0, fifo_len * sizeof(btl_pcie_fifo_entry_t)); + + return OMPI_SUCCESS; +} + + +int +ompi_btl_pcie_fifo_finalize(btl_pcie_fifo_t *fifo) +{ + return OMPI_SUCCESS; +} diff --git a/ompi/mca/btl/pcie/btl_pcie_fifo.h b/ompi/mca/btl/pcie/btl_pcie_fifo.h new file mode 100644 index 0000000000..c9d0a7b0d7 --- /dev/null +++ b/ompi/mca/btl/pcie/btl_pcie_fifo.h @@ -0,0 +1,171 @@ +/* + * Copyright (c) 2007 Los Alamos National Security, LLC. + * All righs reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef BTL_PCIE_FIFO_H +#define BTL_PCIE_FIFO_H + +#include "ompi_config.h" +#include "ompi/constants.h" + +#include "opal/threads/mutex.h" +#include "opal/types.h" +#include "ompi/mca/btl/base/btl_base_error.h" + +BEGIN_C_DECLS + +typedef uint64_t btl_pcie_fifo_entry_t; +#define BTL_PCIE_FIFO_TYPE_MASK 0x8000000000000000 +#define BTL_PCIE_FIFO_DATA_MASK 0x7FFFFFFFFFFFFFFF +#define BTL_PCIE_FIFO_TYPE_ACK 0x0000000000000000 +#define BTL_PCIE_FIFO_TYPE_SEND 0x8000000000000000 + +struct btl_pcie_fifo_t { + /* number of entries in queue */ + uint32_t fifo_len; + /* for sender: next place to write + * for receiver: next place to read */ + uint32_t current_index; + /* for sender: number of entries "in flight". Must always be less + than or equal to fifo_len */ + uint32_t num_outstanding; + uint32_t mask; + /* the actual buffer */ + btl_pcie_fifo_entry_t* queue; +}; +typedef struct btl_pcie_fifo_t btl_pcie_fifo_t; + + +/** + * Initialize fifo structure + * + * Initialize send/recv fifo structure. The fifo structure does + * double duty of maintaining both the sender and receiver. This + * function initializes the send view of the fifo structure, for + * use to receive messages. fifo_get_msg() should not be called on + * this fifo. + * + * @note fifo_len must match the value given to the matching + * fifo_init_recv(), although there are no checks to verify this. + * + * @param[in] fifo A pointer to a fifo structure to be + * initialized + * @param[in] fifo_len Requested length of the fifo queue + * @param[in] queue_space Space for the receive queue (remote pointer) + * + * @retval OMPI_SUCCESS Everything worked + * @retval OMPI_ERROR Good luck! + */ +int ompi_btl_pcie_fifo_init_send(btl_pcie_fifo_t *fifo, + unsigned int fifo_len, + void *queue_space); + + +/** + * Initialize fifo structure + * + * Initialize send/recv fifo structure. The fifo structure does + * double duty of maintaining both the sender and receiver. This + * function initializes the receive view of the fifo structure, for + * use to receive messages. fifo_set_msg() should not be called on + * this fifo. + * + * @note fifo_len must match the value given to the matching + * fifo_init_send(), although there are no checks to verify this. + * + * @param[in] fifo A pointer to a fifo structure to be + * initialized + * @param[in] fifo_len Requested length of the fifo queue + * @param[in] queue_space Space for the receive queue (local pointer) + * @param[in] queue_space_len Length of queue_space + * + * @retval OMPI_SUCCESS Everything worked + * @retval OMPI_ERROR Good luck! + */ +int ompi_btl_pcie_fifo_init_recv(btl_pcie_fifo_t *fifo, + unsigned int fifo_len, + void *queue_space, + size_t queue_space_len); + +int ompi_btl_pcie_fifo_finalize(btl_pcie_fifo_t *fifo); + + +/** + * Read a message from the queue + * + * Read a message from the queue + * + * @param[in] fifo The receive view of the fifo + * + * @return A non-zero message or 0 if no new messages are + * available. + */ +static inline btl_pcie_fifo_entry_t +ompi_btl_pcie_fifo_get_msg(btl_pcie_fifo_t *fifo) +{ + /* BWB - TODO - if we ever want to be multi-threaded, we'll + need to fix this */ + btl_pcie_fifo_entry_t ret = 0; + if (0 != (ret = fifo->queue[fifo->current_index])) { + fifo->queue[fifo->current_index] = 0; + fifo->current_index++; + fifo->current_index &= fifo->mask; + } + + return ret; +} + + +/** + * Write a message pointer into the queue + * + * Write a message pointer into the send queue view of the fifo. + * + * @param[in] fifo The send view of the fifo + * @param[in] msg The index to the payload to deliver + * + * @retval OMPI_SUCCESS Fifo successfully updated + * @retval OMPI_ERR_RESOURCE_BUSY There was no space in the fifo + */ +static inline int +ompi_btl_pcie_fifo_set_msg(btl_pcie_fifo_t *fifo, btl_pcie_fifo_entry_t msg) +{ + uint32_t outstanding; + + /* see if we have a slot */ + outstanding = OPAL_THREAD_ADD32(&fifo->num_outstanding, 1); + if (outstanding > fifo->fifo_len) { + OPAL_THREAD_ADD32(&fifo->num_outstanding, -1); + return OMPI_ERR_RESOURCE_BUSY; + } + + /* now that we have a slot, figure out where it is. Allow the + outstanding to wrap around forever - just mask out the bits we + don't care about. */ + outstanding = OPAL_THREAD_ADD32(&fifo->current_index, 1); + outstanding &= fifo->mask; + + fifo->queue[outstanding] = msg; + + return OMPI_SUCCESS; +} + + +static inline int +ompi_btl_pcie_fifo_complete_msg(btl_pcie_fifo_t *fifo, + unsigned int num_msgs) +{ + OPAL_THREAD_ADD32(&fifo->num_outstanding, -num_msgs); + return OMPI_SUCCESS; +} + + +END_C_DECLS + +#endif /* BTL_PCIE_FIFO_H */ diff --git a/ompi/mca/btl/pcie/btl_pcie_frag.c b/ompi/mca/btl/pcie/btl_pcie_frag.c new file mode 100644 index 0000000000..4053c8e361 --- /dev/null +++ b/ompi/mca/btl/pcie/btl_pcie_frag.c @@ -0,0 +1,139 @@ +/* + * Copyright (c) 2007 Los Alamos National Security, LLC. + * All righs reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "btl_pcie_frag.h" +#include "btl_pcie.h" + + +static void +mca_btl_pcie_sma_buf_eager_constructor(mca_btl_pcie_sma_buf_t* buf) +{ + buf->pcie_data.pval = buf + 1; + buf->type = MCA_BTL_PCIE_TYPE_EAGER; +} + +static void +mca_btl_pcie_sma_buf_max_constructor(mca_btl_pcie_sma_buf_t* buf) +{ + buf->pcie_data.pval = buf + 1; + buf->type = MCA_BTL_PCIE_TYPE_MAX; +} + +OBJ_CLASS_INSTANCE(mca_btl_pcie_sma_buf_eager_t, + ompi_free_list_item_t, + mca_btl_pcie_sma_buf_eager_constructor, + NULL); + +OBJ_CLASS_INSTANCE(mca_btl_pcie_sma_buf_max_t, + ompi_free_list_item_t, + mca_btl_pcie_sma_buf_max_constructor, + NULL); + + +static void +mca_btl_pcie_frag_dma_constructor(mca_btl_pcie_frag_t* frag) +{ + frag->base.des_src = NULL; + frag->base.des_src_cnt = 0; + frag->base.des_dst = NULL; + frag->base.des_dst_cnt = 0; + + frag->segment.seg_addr.pval = NULL; + frag->segment.seg_len = 0; + + frag->endpoint = NULL; + frag->hdr = NULL; + frag->size = 0; + frag->registration = NULL; + frag->type = MCA_BTL_PCIE_TYPE_RDMA; + frag->sma_buf = NULL; +} + + +static void +mca_btl_pcie_frag_common_constructor(mca_btl_pcie_frag_t* frag) +{ + frag->base.des_src = &frag->segment; + frag->base.des_src_cnt = 1; + frag->base.des_dst = NULL; + frag->base.des_dst_cnt = 0; + + frag->hdr = (mca_btl_pcie_header_t*) (frag + 1); + frag->hdr->send_frag.pval = frag; + + frag->segment.seg_addr.pval = ((unsigned char*) frag->hdr) + sizeof(mca_btl_pcie_header_t); + frag->segment.seg_len = frag->size; + + frag->endpoint = NULL; + frag->registration = NULL; + frag->sma_buf = NULL; +} + +static void +mca_btl_pcie_frag_eager_constructor(mca_btl_pcie_frag_t* frag) +{ + frag->size = mca_btl_pcie_module.super.btl_eager_limit; + mca_btl_pcie_frag_common_constructor(frag); + frag->type = MCA_BTL_PCIE_TYPE_EAGER; +} + +static void mca_btl_pcie_frag_max_constructor(mca_btl_pcie_frag_t* frag) +{ + frag->size = mca_btl_pcie_module.super.btl_max_send_size; + mca_btl_pcie_frag_common_constructor(frag); + frag->type = MCA_BTL_PCIE_TYPE_MAX; +} + + +static void mca_btl_pcie_frag_recv_constructor(mca_btl_pcie_frag_t *frag) +{ + frag->base.des_src = NULL; + frag->base.des_src_cnt = 0; + frag->base.des_dst = &frag->segment; + frag->base.des_dst_cnt = 1; + + frag->segment.seg_addr.pval = NULL; + frag->segment.seg_len = 0; + + frag->endpoint = NULL; + frag->hdr = NULL; + frag->size = 0; + frag->registration = NULL; + frag->type = MCA_BTL_PCIE_TYPE_RECV; + frag->sma_buf = NULL; +} + + +OBJ_CLASS_INSTANCE( + mca_btl_pcie_frag_eager_t, + mca_btl_base_descriptor_t, + mca_btl_pcie_frag_eager_constructor, + NULL); + +OBJ_CLASS_INSTANCE( + mca_btl_pcie_frag_max_t, + mca_btl_base_descriptor_t, + mca_btl_pcie_frag_max_constructor, + NULL); + + +OBJ_CLASS_INSTANCE( + mca_btl_pcie_frag_recv_t, + mca_btl_base_descriptor_t, + mca_btl_pcie_frag_recv_constructor, + NULL); + +OBJ_CLASS_INSTANCE( + mca_btl_pcie_frag_dma_t, + mca_btl_base_descriptor_t, + mca_btl_pcie_frag_dma_constructor, + NULL); diff --git a/ompi/mca/btl/pcie/btl_pcie_frag.h b/ompi/mca/btl/pcie/btl_pcie_frag.h new file mode 100644 index 0000000000..b31b9e1582 --- /dev/null +++ b/ompi/mca/btl/pcie/btl_pcie_frag.h @@ -0,0 +1,179 @@ +/* + * Copyright (c) 2007 Los Alamos National Security, LLC. + * All righs reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef MCA_BTL_PCIE_FRAG_H +#define MCA_BTL_PCIE_FRAG_H + +#include "ompi_config.h" + +#include "ompi/mca/btl/btl.h" + +BEGIN_C_DECLS + +#define MCA_BTL_PCIE_FRAG_ALIGN (16) + +/* Header that sits at top of any send message */ +struct mca_btl_pcie_header_t { + mca_btl_base_tag_t tag; + uint8_t pad[3]; + uint32_t length; + ompi_ptr_t send_frag; +}; +typedef struct mca_btl_pcie_header_t mca_btl_pcie_header_t; + +#define OMPI_BTL_PCIE_HEADER_HTON(header) \ +do { \ + (header).length = htonl((header).length); \ + } while (0) + + +#define OMPI_BTL_PCIE_HEADER_NTOH(header) \ +do { \ + (header).length = ntohl((header).length); \ + } while (0) + +struct mca_btl_pcie_frag_t; + +/** Type description for fragments / buffers */ +enum mca_btl_pcie_frag_type_t { + MCA_BTL_PCIE_TYPE_UNKNOWN, + MCA_BTL_PCIE_TYPE_EAGER, + MCA_BTL_PCIE_TYPE_MAX, + MCA_BTL_PCIE_TYPE_RDMA, + MCA_BTL_PCIE_TYPE_RECV +}; +typedef enum mca_btl_pcie_frag_type_t mca_btl_pcie_frag_type_t; + +/** SMA transfer fragment */ +struct mca_btl_pcie_sma_buf_t { + ompi_free_list_item_t super; + /** Pointer to the SMA space available for this copy. An + ompi_ptr_t because in v1.2, this sits in the sma region, + and we need to not have different sizes on each endpoint. */ + ompi_ptr_t pcie_data; + /** type of buffer */ + mca_btl_pcie_frag_type_t type; +}; +typedef struct mca_btl_pcie_sma_buf_t mca_btl_pcie_sma_buf_t; + +typedef mca_btl_pcie_sma_buf_t mca_btl_pcie_sma_buf_eager_t; +OBJ_CLASS_DECLARATION(mca_btl_pcie_sma_buf_eager_t); + +typedef mca_btl_pcie_sma_buf_t mca_btl_pcie_sma_buf_max_t; +OBJ_CLASS_DECLARATION(mca_btl_pcie_sma_buf_max_t); + +#define MCA_BTL_PCIE_SMA_BUF_ALLOC_EAGER(btl, buf, rc) \ +{ \ + ompi_free_list_item_t *item; \ + OMPI_FREE_LIST_GET(&((mca_btl_pcie_module_t*)btl)->pcie_sma_buf_eager, item, rc); \ + buf = (mca_btl_pcie_sma_buf_t*) item; \ +} + +#define MCA_BTL_PCIE_SMA_BUF_ALLOC_MAX(btl, buf, rc) \ +{ \ + ompi_free_list_item_t *item; \ + OMPI_FREE_LIST_GET(&((mca_btl_pcie_module_t*)btl)->pcie_sma_buf_max, item, rc); \ + buf = (mca_btl_pcie_sma_buf_t*) item; \ +} + +#define MCA_BTL_PCIE_SMA_BUF_RETURN(btl, buf, ret) \ +{ \ + ret = OMPI_SUCCESS; \ + switch ((buf)->type) { \ + case MCA_BTL_PCIE_TYPE_EAGER: \ + OMPI_FREE_LIST_RETURN(&((mca_btl_pcie_module_t*)btl)->pcie_sma_buf_eager, \ + (ompi_free_list_item_t*)(buf)); \ + break; \ + case MCA_BTL_PCIE_TYPE_MAX: \ + OMPI_FREE_LIST_RETURN(&((mca_btl_pcie_module_t*)btl)->pcie_sma_buf_max, \ + (ompi_free_list_item_t*)(buf)); \ + break; \ + default: \ + BTL_ERROR(("Invalid return type (%d) for frag 0x%lx in SMA_BUF_RETURN", \ + buf->type, buf)); \ + ret = OMPI_ERR_BAD_PARAM; \ + } \ +} + + +/** Fragment description -- used for send/rdma fragments */ +struct mca_btl_pcie_frag_t { + mca_btl_base_descriptor_t base; + mca_btl_base_segment_t segment; + struct mca_btl_base_endpoint_t *endpoint; + mca_btl_pcie_header_t *hdr; + size_t size; + struct mca_btl_pcie_reg_t *registration; + mca_btl_pcie_frag_type_t type; + mca_btl_pcie_sma_buf_t *sma_buf; +}; +typedef struct mca_btl_pcie_frag_t mca_btl_pcie_frag_t; + +typedef struct mca_btl_pcie_frag_t mca_btl_pcie_frag_eager_t; +OBJ_CLASS_DECLARATION(mca_btl_pcie_frag_eager_t); + +typedef struct mca_btl_pcie_frag_t mca_btl_pcie_frag_max_t; +OBJ_CLASS_DECLARATION(mca_btl_pcie_frag_max_t); + +typedef struct mca_btl_pcie_frag_t mca_btl_pcie_frag_recv_t; +OBJ_CLASS_DECLARATION(mca_btl_pcie_frag_recv_t); + +typedef struct mca_btl_pcie_frag_t mca_btl_pcie_frag_dma_t; +OBJ_CLASS_DECLARATION(mca_btl_pcie_frag_dma_t); + + +#define MCA_BTL_PCIE_FRAG_ALLOC_EAGER(btl, frag, rc) \ +{ \ + ompi_free_list_item_t *item; \ + OMPI_FREE_LIST_GET(&((mca_btl_pcie_module_t*)btl)->pcie_frag_eager, item, rc); \ + frag = (mca_btl_pcie_frag_t*) item; \ +} + +#define MCA_BTL_PCIE_FRAG_ALLOC_MAX(btl, frag, rc) \ +{ \ + ompi_free_list_item_t *item; \ + OMPI_FREE_LIST_GET(&((mca_btl_pcie_module_t*)btl)->pcie_frag_max, item, rc); \ + frag = (mca_btl_pcie_frag_t*) item; \ +} + +#define MCA_BTL_PCIE_FRAG_ALLOC_DMA(btl, frag, rc) \ +{ \ + \ + ompi_free_list_item_t *item; \ + OMPI_FREE_LIST_GET(&((mca_btl_pcie_module_t*)btl)->pcie_frag_dma, item, rc); \ + frag = (mca_btl_pcie_frag_t*) item; \ +} + +#define MCA_BTL_PCIE_FRAG_RETURN(btl, frag, ret) \ +{ \ + ret = OMPI_SUCCESS; \ + switch ((frag)->type) { \ + case MCA_BTL_PCIE_TYPE_EAGER: \ + OMPI_FREE_LIST_RETURN(&((mca_btl_pcie_module_t*)btl)->pcie_frag_eager, \ + (ompi_free_list_item_t*)(frag)); \ + break; \ + case MCA_BTL_PCIE_TYPE_MAX: \ + OMPI_FREE_LIST_RETURN(&((mca_btl_pcie_module_t*)btl)->pcie_frag_max, \ + (ompi_free_list_item_t*)(frag)); \ + break; \ + case MCA_BTL_PCIE_TYPE_RDMA: \ + OMPI_FREE_LIST_RETURN(&((mca_btl_pcie_module_t*)btl)->pcie_frag_dma, \ + (ompi_free_list_item_t*)(frag)); \ + break; \ + default: \ + BTL_ERROR(("Invalid return type (%d) for frag 0x%lx in FRAG_RETURN", \ + frag->type, frag)); \ + ret = OMPI_ERR_BAD_PARAM; \ + } \ +} + +END_C_DECLS + +#endif /* #ifndef MCA_BTL_PCIE_FRAG_H */ diff --git a/ompi/mca/btl/pcie/btl_pcie_lex.c b/ompi/mca/btl/pcie/btl_pcie_lex.c new file mode 100644 index 0000000000..eddb256a70 --- /dev/null +++ b/ompi/mca/btl/pcie/btl_pcie_lex.c @@ -0,0 +1,1698 @@ +#define yy_create_buffer btl_pcie_cfg_yy_create_buffer +#define yy_delete_buffer btl_pcie_cfg_yy_delete_buffer +#define yy_scan_buffer btl_pcie_cfg_yy_scan_buffer +#define yy_scan_string btl_pcie_cfg_yy_scan_string +#define yy_scan_bytes btl_pcie_cfg_yy_scan_bytes +#define yy_flex_debug btl_pcie_cfg_yy_flex_debug +#define yy_init_buffer btl_pcie_cfg_yy_init_buffer +#define yy_flush_buffer btl_pcie_cfg_yy_flush_buffer +#define yy_load_buffer_state btl_pcie_cfg_yy_load_buffer_state +#define yy_switch_to_buffer btl_pcie_cfg_yy_switch_to_buffer +#define yyin btl_pcie_cfg_yyin +#define yyleng btl_pcie_cfg_yyleng +#define yylex btl_pcie_cfg_yylex +#define yyout btl_pcie_cfg_yyout +#define yyrestart btl_pcie_cfg_yyrestart +#define yytext btl_pcie_cfg_yytext +#define yywrap btl_pcie_cfg_yywrap + +/* A lexical scanner generated by flex */ + +/* Scanner skeleton version: + * $Header: /home/usr/ddd/openmpi/cvs/pcie/ompi/mca/btl/pcie/btl_pcie_lex.c,v 1.1 2007/08/01 15:10:22 bbarrett Exp $ + */ + +#define FLEX_SCANNER +#define YY_FLEX_MAJOR_VERSION 2 +#define YY_FLEX_MINOR_VERSION 5 + +#include +#include + + +/* cfront 1.2 defines "c_plusplus" instead of "__cplusplus" */ +#ifdef c_plusplus +#ifndef __cplusplus +#define __cplusplus +#endif +#endif + + +#ifdef __cplusplus + +#include + +/* Use prototypes in function declarations. */ +#define YY_USE_PROTOS + +/* The "const" storage-class-modifier is valid. */ +#define YY_USE_CONST + +#else /* ! __cplusplus */ + +#if __STDC__ + +#define YY_USE_PROTOS +#define YY_USE_CONST + +#endif /* __STDC__ */ +#endif /* ! __cplusplus */ + +#ifdef __TURBOC__ + #pragma warn -rch + #pragma warn -use +#include +#include +#define YY_USE_CONST +#define YY_USE_PROTOS +#endif + +#ifdef YY_USE_CONST +#define yyconst const +#else +#define yyconst +#endif + + +#ifdef YY_USE_PROTOS +#define YY_PROTO(proto) proto +#else +#define YY_PROTO(proto) () +#endif + +/* Returned upon end-of-file. */ +#define YY_NULL 0 + +/* Promotes a possibly negative, possibly signed char to an unsigned + * integer for use as an array index. If the signed char is negative, + * we want to instead treat it as an 8-bit unsigned char, hence the + * double cast. + */ +#define YY_SC_TO_UI(c) ((unsigned int) (unsigned char) c) + +/* Enter a start condition. This macro really ought to take a parameter, + * but we do it the disgusting crufty way forced on us by the ()-less + * definition of BEGIN. + */ +#define BEGIN yy_start = 1 + 2 * + +/* Translate the current start state into a value that can be later handed + * to BEGIN to return to the state. The YYSTATE alias is for lex + * compatibility. + */ +#define YY_START ((yy_start - 1) / 2) +#define YYSTATE YY_START + +/* Action number for EOF rule of a given start state. */ +#define YY_STATE_EOF(state) (YY_END_OF_BUFFER + state + 1) + +/* Special action meaning "start processing a new file". */ +#define YY_NEW_FILE yyrestart( yyin ) + +#define YY_END_OF_BUFFER_CHAR 0 + +/* Size of default input buffer. */ +#define YY_BUF_SIZE 16384 + +typedef struct yy_buffer_state *YY_BUFFER_STATE; + +extern int yyleng; +extern FILE *yyin, *yyout; + +#define EOB_ACT_CONTINUE_SCAN 0 +#define EOB_ACT_END_OF_FILE 1 +#define EOB_ACT_LAST_MATCH 2 + +/* The funky do-while in the following #define is used to turn the definition + * int a single C statement (which needs a semi-colon terminator). This + * avoids problems with code like: + * + * if ( condition_holds ) + * yyless( 5 ); + * else + * do_something_else(); + * + * Prior to using the do-while the compiler would get upset at the + * "else" because it interpreted the "if" statement as being all + * done when it reached the ';' after the yyless() call. + */ + +/* Return all but the first 'n' matched characters back to the input stream. */ + +#define yyless(n) \ + do \ + { \ + /* Undo effects of setting up yytext. */ \ + *yy_cp = yy_hold_char; \ + YY_RESTORE_YY_MORE_OFFSET \ + yy_c_buf_p = yy_cp = yy_bp + n - YY_MORE_ADJ; \ + YY_DO_BEFORE_ACTION; /* set up yytext again */ \ + } \ + while ( 0 ) + +#define unput(c) yyunput( c, yytext_ptr ) + +/* The following is because we cannot portably get our hands on size_t + * (without autoconf's help, which isn't available because we want + * flex-generated scanners to compile on their own). + */ +typedef unsigned int yy_size_t; + + +struct yy_buffer_state + { + FILE *yy_input_file; + + char *yy_ch_buf; /* input buffer */ + char *yy_buf_pos; /* current position in input buffer */ + + /* Size of input buffer in bytes, not including room for EOB + * characters. + */ + yy_size_t yy_buf_size; + + /* Number of characters read into yy_ch_buf, not including EOB + * characters. + */ + int yy_n_chars; + + /* Whether we "own" the buffer - i.e., we know we created it, + * and can realloc() it to grow it, and should free() it to + * delete it. + */ + int yy_is_our_buffer; + + /* Whether this is an "interactive" input source; if so, and + * if we're using stdio for input, then we want to use getc() + * instead of fread(), to make sure we stop fetching input after + * each newline. + */ + int yy_is_interactive; + + /* Whether we're considered to be at the beginning of a line. + * If so, '^' rules will be active on the next match, otherwise + * not. + */ + int yy_at_bol; + + /* Whether to try to fill the input buffer when we reach the + * end of it. + */ + int yy_fill_buffer; + + int yy_buffer_status; +#define YY_BUFFER_NEW 0 +#define YY_BUFFER_NORMAL 1 + /* When an EOF's been seen but there's still some text to process + * then we mark the buffer as YY_EOF_PENDING, to indicate that we + * shouldn't try reading from the input source any more. We might + * still have a bunch of tokens to match, though, because of + * possible backing-up. + * + * When we actually see the EOF, we change the status to "new" + * (via yyrestart()), so that the user can continue scanning by + * just pointing yyin at a new input file. + */ +#define YY_BUFFER_EOF_PENDING 2 + }; + +static YY_BUFFER_STATE yy_current_buffer = 0; + +/* We provide macros for accessing buffer states in case in the + * future we want to put the buffer states in a more general + * "scanner state". + */ +#define YY_CURRENT_BUFFER yy_current_buffer + + +/* yy_hold_char holds the character lost when yytext is formed. */ +static char yy_hold_char; + +static int yy_n_chars; /* number of characters read into yy_ch_buf */ + + +int yyleng; + +/* Points to current character in buffer. */ +static char *yy_c_buf_p = (char *) 0; +static int yy_init = 1; /* whether we need to initialize */ +static int yy_start = 0; /* start state number */ + +/* Flag which is used to allow yywrap()'s to do buffer switches + * instead of setting up a fresh yyin. A bit of a hack ... + */ +static int yy_did_buffer_switch_on_eof; + +void yyrestart YY_PROTO(( FILE *input_file )); + +void yy_switch_to_buffer YY_PROTO(( YY_BUFFER_STATE new_buffer )); +void yy_load_buffer_state YY_PROTO(( void )); +YY_BUFFER_STATE yy_create_buffer YY_PROTO(( FILE *file, int size )); +void yy_delete_buffer YY_PROTO(( YY_BUFFER_STATE b )); +void yy_init_buffer YY_PROTO(( YY_BUFFER_STATE b, FILE *file )); +void yy_flush_buffer YY_PROTO(( YY_BUFFER_STATE b )); +#define YY_FLUSH_BUFFER yy_flush_buffer( yy_current_buffer ) + +YY_BUFFER_STATE yy_scan_buffer YY_PROTO(( char *base, yy_size_t size )); +YY_BUFFER_STATE yy_scan_string YY_PROTO(( yyconst char *yy_str )); +YY_BUFFER_STATE yy_scan_bytes YY_PROTO(( yyconst char *bytes, int len )); + +static void *yy_flex_alloc YY_PROTO(( yy_size_t )); +static void *yy_flex_realloc YY_PROTO(( void *, yy_size_t )); +static void yy_flex_free YY_PROTO(( void * )); + +#define yy_new_buffer yy_create_buffer + +#define yy_set_interactive(is_interactive) \ + { \ + if ( ! yy_current_buffer ) \ + yy_current_buffer = yy_create_buffer( yyin, YY_BUF_SIZE ); \ + yy_current_buffer->yy_is_interactive = is_interactive; \ + } + +#define yy_set_bol(at_bol) \ + { \ + if ( ! yy_current_buffer ) \ + yy_current_buffer = yy_create_buffer( yyin, YY_BUF_SIZE ); \ + yy_current_buffer->yy_at_bol = at_bol; \ + } + +#define YY_AT_BOL() (yy_current_buffer->yy_at_bol) + +typedef unsigned char YY_CHAR; +FILE *yyin = (FILE *) 0, *yyout = (FILE *) 0; +typedef int yy_state_type; +extern char *yytext; +#define yytext_ptr yytext + +static yy_state_type yy_get_previous_state YY_PROTO(( void )); +static yy_state_type yy_try_NUL_trans YY_PROTO(( yy_state_type current_state )); +static int yy_get_next_buffer YY_PROTO(( void )); +static void yy_fatal_error YY_PROTO(( yyconst char msg[] )); + +/* Done after the current pattern has been matched and before the + * corresponding action - sets up yytext. + */ +#define YY_DO_BEFORE_ACTION \ + yytext_ptr = yy_bp; \ + yyleng = (int) (yy_cp - yy_bp); \ + yy_hold_char = *yy_cp; \ + *yy_cp = '\0'; \ + yy_c_buf_p = yy_cp; + +#define YY_NUM_RULES 13 +#define YY_END_OF_BUFFER 14 +static yyconst short int yy_accept[41] = + { 0, + 0, 0, 5, 5, 0, 0, 0, 0, 0, 0, + 14, 13, 12, 1, 13, 11, 11, 11, 5, 7, + 6, 12, 1, 0, 2, 11, 11, 0, 4, 11, + 5, 6, 6, 8, 10, 9, 0, 3, 11, 0 + } ; + +static yyconst int yy_ec[256] = + { 0, + 1, 1, 1, 1, 1, 1, 1, 1, 2, 3, + 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 2, 1, 1, 4, 1, 1, 1, 1, 1, + 1, 5, 1, 1, 6, 6, 7, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 9, 1, 1, + 1, 1, 1, 1, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 1, 10, 1, 1, 6, 1, 6, 6, 6, 6, + + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1 + } ; + +static yyconst int yy_meta[11] = + { 0, + 1, 1, 2, 1, 3, 4, 4, 4, 1, 4 + } ; + +static yyconst short int yy_base[48] = + { 0, + 0, 0, 8, 11, 0, 0, 0, 0, 0, 0, + 44, 73, 15, 73, 38, 13, 19, 23, 0, 73, + 22, 0, 73, 22, 73, 0, 0, 7, 73, 29, + 0, 35, 38, 73, 0, 0, 40, 73, 0, 73, + 50, 54, 58, 60, 64, 68, 8 + } ; + +static yyconst short int yy_def[48] = + { 0, + 40, 1, 41, 41, 42, 42, 42, 42, 42, 42, + 40, 40, 40, 40, 43, 40, 44, 44, 45, 40, + 46, 13, 40, 43, 40, 16, 18, 47, 40, 40, + 45, 46, 46, 40, 47, 47, 30, 40, 30, 0, + 40, 40, 40, 40, 40, 40, 40 + } ; + +static yyconst short int yy_nxt[84] = + { 0, + 12, 13, 14, 15, 12, 16, 17, 16, 12, 18, + 20, 35, 21, 20, 36, 21, 22, 23, 26, 27, + 26, 28, 27, 29, 25, 30, 33, 40, 34, 37, + 37, 38, 37, 37, 39, 39, 39, 37, 39, 40, + 25, 40, 33, 40, 34, 37, 37, 37, 40, 37, + 19, 19, 19, 19, 12, 12, 12, 12, 24, 24, + 24, 24, 27, 27, 31, 40, 40, 31, 32, 40, + 32, 32, 11, 40, 40, 40, 40, 40, 40, 40, + 40, 40, 40 + } ; + +static yyconst short int yy_chk[84] = + { 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 3, 47, 3, 4, 28, 4, 13, 13, 16, 16, + 16, 16, 16, 17, 24, 17, 21, 18, 21, 30, + 30, 30, 30, 30, 30, 30, 30, 30, 30, 32, + 15, 32, 33, 11, 33, 37, 37, 37, 0, 37, + 41, 41, 41, 41, 42, 42, 42, 42, 43, 43, + 43, 43, 44, 44, 45, 0, 0, 45, 46, 0, + 46, 46, 40, 40, 40, 40, 40, 40, 40, 40, + 40, 40, 40 + } ; + +static yy_state_type yy_last_accepting_state; +static char *yy_last_accepting_cpos; + +/* The intent behind this definition is that it'll catch + * any uses of REJECT which flex missed. + */ +#define REJECT reject_used_but_not_detected +#define yymore() yymore_used_but_not_detected +#define YY_MORE_ADJ 0 +#define YY_RESTORE_YY_MORE_OFFSET +char *yytext; +#line 1 "btl_pcie_lex.l" +#define INITIAL 0 +#line 2 "btl_pcie_lex.l" +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "opal_config.h" + +#include +#if HAVE_UNISTD_H +#include +#endif + +#include "btl_pcie_lex.h" + +/* + * local functions + */ +static int finish_parsing(void) ; +static int btl_pcie_cfg_yywrap(void); + +/* + * global variables + */ +int btl_pcie_cfg_yynewlines = 1; +bool btl_pcie_cfg_parse_done = false; +char *btl_pcie_cfg_string = NULL; + +#define yyterminate() \ + return finish_parsing() + +#define comment 1 + +#define section_name 2 + +#define section_end 3 + +#define value 4 + +#line 461 "lex.btl_pcie_cfg_yy.c" + +/* Macros after this point can all be overridden by user definitions in + * section 1. + */ + +#ifndef YY_SKIP_YYWRAP +#ifdef __cplusplus +extern "C" int yywrap YY_PROTO(( void )); +#else +extern int yywrap YY_PROTO(( void )); +#endif +#endif + +#ifndef YY_NO_UNPUT +static void yyunput YY_PROTO(( int c, char *buf_ptr )); +#endif + +#ifndef yytext_ptr +static void yy_flex_strncpy YY_PROTO(( char *, yyconst char *, int )); +#endif + +#ifdef YY_NEED_STRLEN +static int yy_flex_strlen YY_PROTO(( yyconst char * )); +#endif + +#ifndef YY_NO_INPUT +#ifdef __cplusplus +static int yyinput YY_PROTO(( void )); +#else +static int input YY_PROTO(( void )); +#endif +#endif + +#if YY_STACK_USED +static int yy_start_stack_ptr = 0; +static int yy_start_stack_depth = 0; +static int *yy_start_stack = 0; +#ifndef YY_NO_PUSH_STATE +static void yy_push_state YY_PROTO(( int new_state )); +#endif +#ifndef YY_NO_POP_STATE +static void yy_pop_state YY_PROTO(( void )); +#endif +#ifndef YY_NO_TOP_STATE +static int yy_top_state YY_PROTO(( void )); +#endif + +#else +#define YY_NO_PUSH_STATE 1 +#define YY_NO_POP_STATE 1 +#define YY_NO_TOP_STATE 1 +#endif + +#ifdef YY_MALLOC_DECL +YY_MALLOC_DECL +#else +#if __STDC__ +#ifndef __cplusplus +#include +#endif +#else +/* Just try to get by without declaring the routines. This will fail + * miserably on non-ANSI systems for which sizeof(size_t) != sizeof(int) + * or sizeof(void*) != sizeof(int). + */ +#endif +#endif + +/* Amount of stuff to slurp up with each read. */ +#ifndef YY_READ_BUF_SIZE +#define YY_READ_BUF_SIZE 8192 +#endif + +/* Copy whatever the last rule matched to the standard output. */ + +#ifndef ECHO +/* This used to be an fputs(), but since the string might contain NUL's, + * we now use fwrite(). + */ +#define ECHO (void) fwrite( yytext, yyleng, 1, yyout ) +#endif + +/* Gets input and stuffs it into "buf". number of characters read, or YY_NULL, + * is returned in "result". + */ +#ifndef YY_INPUT +#define YY_INPUT(buf,result,max_size) \ + if ( yy_current_buffer->yy_is_interactive ) \ + { \ + int c = '*', n; \ + for ( n = 0; n < max_size && \ + (c = getc( yyin )) != EOF && c != '\n'; ++n ) \ + buf[n] = (char) c; \ + if ( c == '\n' ) \ + buf[n++] = (char) c; \ + if ( c == EOF && ferror( yyin ) ) \ + YY_FATAL_ERROR( "input in flex scanner failed" ); \ + result = n; \ + } \ + else if ( ((result = fread( buf, 1, max_size, yyin )) == 0) \ + && ferror( yyin ) ) \ + YY_FATAL_ERROR( "input in flex scanner failed" ); +#endif + +/* No semi-colon after return; correct usage is to write "yyterminate();" - + * we don't want an extra ';' after the "return" because that will cause + * some compilers to complain about unreachable statements. + */ +#ifndef yyterminate +#define yyterminate() return YY_NULL +#endif + +/* Number of entries by which start-condition stack grows. */ +#ifndef YY_START_STACK_INCR +#define YY_START_STACK_INCR 25 +#endif + +/* Report a fatal error. */ +#ifndef YY_FATAL_ERROR +#define YY_FATAL_ERROR(msg) yy_fatal_error( msg ) +#endif + +/* Default declaration of generated scanner - a define so the user can + * easily add parameters. + */ +#ifndef YY_DECL +#define YY_DECL int yylex YY_PROTO(( void )) +#endif + +/* Code executed at the beginning of each rule, after yytext and yyleng + * have been set up. + */ +#ifndef YY_USER_ACTION +#define YY_USER_ACTION +#endif + +/* Code executed at the end of each rule. */ +#ifndef YY_BREAK +#define YY_BREAK break; +#endif + +#define YY_RULE_SETUP \ + YY_USER_ACTION + +YY_DECL + { + register yy_state_type yy_current_state; + register char *yy_cp = NULL, *yy_bp = NULL; + register int yy_act; + +#line 57 "btl_pcie_lex.l" + + +#line 615 "lex.btl_pcie_cfg_yy.c" + + if ( yy_init ) + { + yy_init = 0; + +#ifdef YY_USER_INIT + YY_USER_INIT; +#endif + + if ( ! yy_start ) + yy_start = 1; /* first start state */ + + if ( ! yyin ) + yyin = stdin; + + if ( ! yyout ) + yyout = stdout; + + if ( ! yy_current_buffer ) + yy_current_buffer = + yy_create_buffer( yyin, YY_BUF_SIZE ); + + yy_load_buffer_state(); + } + + while ( 1 ) /* loops until end-of-file is reached */ + { + yy_cp = yy_c_buf_p; + + /* Support of yytext. */ + *yy_cp = yy_hold_char; + + /* yy_bp points to the position in yy_ch_buf of the start of + * the current run. + */ + yy_bp = yy_cp; + + yy_current_state = yy_start; +yy_match: + do + { + register YY_CHAR yy_c = yy_ec[YY_SC_TO_UI(*yy_cp)]; + if ( yy_accept[yy_current_state] ) + { + yy_last_accepting_state = yy_current_state; + yy_last_accepting_cpos = yy_cp; + } + while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state ) + { + yy_current_state = (int) yy_def[yy_current_state]; + if ( yy_current_state >= 41 ) + yy_c = yy_meta[(unsigned int) yy_c]; + } + yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c]; + ++yy_cp; + } + while ( yy_base[yy_current_state] != 73 ); + +yy_find_action: + yy_act = yy_accept[yy_current_state]; + if ( yy_act == 0 ) + { /* have to back up */ + yy_cp = yy_last_accepting_cpos; + yy_current_state = yy_last_accepting_state; + yy_act = yy_accept[yy_current_state]; + } + + YY_DO_BEFORE_ACTION; + + +do_action: /* This label is used only to access EOF actions. */ + + + switch ( yy_act ) + { /* beginning of action switch */ + case 0: /* must back up */ + /* undo the effects of YY_DO_BEFORE_ACTION */ + *yy_cp = yy_hold_char; + yy_cp = yy_last_accepting_cpos; + yy_current_state = yy_last_accepting_state; + goto yy_find_action; + +case 1: +YY_RULE_SETUP +#line 59 "btl_pcie_lex.l" +{ ++btl_pcie_cfg_yynewlines; + return BTL_PCIE_CFG_PARSE_NEWLINE; } + YY_BREAK +case 2: +YY_RULE_SETUP +#line 61 "btl_pcie_lex.l" +{ ++btl_pcie_cfg_yynewlines; + return BTL_PCIE_CFG_PARSE_NEWLINE; } + YY_BREAK +case 3: +YY_RULE_SETUP +#line 63 "btl_pcie_lex.l" +{ ++btl_pcie_cfg_yynewlines; + return BTL_PCIE_CFG_PARSE_NEWLINE; } + YY_BREAK +case 4: +YY_RULE_SETUP +#line 66 "btl_pcie_lex.l" +{ BEGIN(comment); + return BTL_PCIE_CFG_PARSE_NEWLINE; } + YY_BREAK +case 5: +YY_RULE_SETUP +#line 68 "btl_pcie_lex.l" +; /* Eat up non '*'s */ + YY_BREAK +case 6: +YY_RULE_SETUP +#line 69 "btl_pcie_lex.l" +; /* Eat '*'s not followed by a '/' */ + YY_BREAK +case 7: +YY_RULE_SETUP +#line 70 "btl_pcie_lex.l" +{ ++btl_pcie_cfg_yynewlines; + return BTL_PCIE_CFG_PARSE_NEWLINE; } + YY_BREAK +case 8: +YY_RULE_SETUP +#line 72 "btl_pcie_lex.l" +{ BEGIN(INITIAL); /* Done with block comment */ + return BTL_PCIE_CFG_PARSE_NEWLINE; } + YY_BREAK +case 9: +YY_RULE_SETUP +#line 75 "btl_pcie_lex.l" +{ return BTL_PCIE_CFG_PARSE_HOSTNAME_CORE; } + YY_BREAK +case 10: +YY_RULE_SETUP +#line 76 "btl_pcie_lex.l" +{ return BTL_PCIE_CFG_PARSE_HOSTNAME_DEVICE; } + YY_BREAK +case 11: +YY_RULE_SETUP +#line 78 "btl_pcie_lex.l" +{ return BTL_PCIE_CFG_PARSE_DEVICE; } + YY_BREAK +case 12: +YY_RULE_SETUP +#line 80 "btl_pcie_lex.l" +; /* whitespace */ + YY_BREAK +case 13: +YY_RULE_SETUP +#line 82 "btl_pcie_lex.l" +ECHO; + YY_BREAK +#line 769 "lex.btl_pcie_cfg_yy.c" +case YY_STATE_EOF(INITIAL): +case YY_STATE_EOF(comment): +case YY_STATE_EOF(section_name): +case YY_STATE_EOF(section_end): +case YY_STATE_EOF(value): + yyterminate(); + + case YY_END_OF_BUFFER: + { + /* Amount of text matched not including the EOB char. */ + int yy_amount_of_matched_text = (int) (yy_cp - yytext_ptr) - 1; + + /* Undo the effects of YY_DO_BEFORE_ACTION. */ + *yy_cp = yy_hold_char; + YY_RESTORE_YY_MORE_OFFSET + + if ( yy_current_buffer->yy_buffer_status == YY_BUFFER_NEW ) + { + /* We're scanning a new file or input source. It's + * possible that this happened because the user + * just pointed yyin at a new source and called + * yylex(). If so, then we have to assure + * consistency between yy_current_buffer and our + * globals. Here is the right place to do so, because + * this is the first action (other than possibly a + * back-up) that will match for the new input source. + */ + yy_n_chars = yy_current_buffer->yy_n_chars; + yy_current_buffer->yy_input_file = yyin; + yy_current_buffer->yy_buffer_status = YY_BUFFER_NORMAL; + } + + /* Note that here we test for yy_c_buf_p "<=" to the position + * of the first EOB in the buffer, since yy_c_buf_p will + * already have been incremented past the NUL character + * (since all states make transitions on EOB to the + * end-of-buffer state). Contrast this with the test + * in input(). + */ + if ( yy_c_buf_p <= &yy_current_buffer->yy_ch_buf[yy_n_chars] ) + { /* This was really a NUL. */ + yy_state_type yy_next_state; + + yy_c_buf_p = yytext_ptr + yy_amount_of_matched_text; + + yy_current_state = yy_get_previous_state(); + + /* Okay, we're now positioned to make the NUL + * transition. We couldn't have + * yy_get_previous_state() go ahead and do it + * for us because it doesn't know how to deal + * with the possibility of jamming (and we don't + * want to build jamming into it because then it + * will run more slowly). + */ + + yy_next_state = yy_try_NUL_trans( yy_current_state ); + + yy_bp = yytext_ptr + YY_MORE_ADJ; + + if ( yy_next_state ) + { + /* Consume the NUL. */ + yy_cp = ++yy_c_buf_p; + yy_current_state = yy_next_state; + goto yy_match; + } + + else + { + yy_cp = yy_c_buf_p; + goto yy_find_action; + } + } + + else switch ( yy_get_next_buffer() ) + { + case EOB_ACT_END_OF_FILE: + { + yy_did_buffer_switch_on_eof = 0; + + if ( yywrap() ) + { + /* Note: because we've taken care in + * yy_get_next_buffer() to have set up + * yytext, we can now set up + * yy_c_buf_p so that if some total + * hoser (like flex itself) wants to + * call the scanner after we return the + * YY_NULL, it'll still work - another + * YY_NULL will get returned. + */ + yy_c_buf_p = yytext_ptr + YY_MORE_ADJ; + + yy_act = YY_STATE_EOF(YY_START); + goto do_action; + } + + else + { + if ( ! yy_did_buffer_switch_on_eof ) + YY_NEW_FILE; + } + break; + } + + case EOB_ACT_CONTINUE_SCAN: + yy_c_buf_p = + yytext_ptr + yy_amount_of_matched_text; + + yy_current_state = yy_get_previous_state(); + + yy_cp = yy_c_buf_p; + yy_bp = yytext_ptr + YY_MORE_ADJ; + goto yy_match; + + case EOB_ACT_LAST_MATCH: + yy_c_buf_p = + &yy_current_buffer->yy_ch_buf[yy_n_chars]; + + yy_current_state = yy_get_previous_state(); + + yy_cp = yy_c_buf_p; + yy_bp = yytext_ptr + YY_MORE_ADJ; + goto yy_find_action; + } + break; + } + + default: + YY_FATAL_ERROR( + "fatal flex scanner internal error--no action found" ); + } /* end of action switch */ + } /* end of scanning one token */ + } /* end of yylex */ + + +/* yy_get_next_buffer - try to read in a new buffer + * + * Returns a code representing an action: + * EOB_ACT_LAST_MATCH - + * EOB_ACT_CONTINUE_SCAN - continue scanning from current position + * EOB_ACT_END_OF_FILE - end of file + */ + +static int yy_get_next_buffer() + { + register char *dest = yy_current_buffer->yy_ch_buf; + register char *source = yytext_ptr; + register int number_to_move, i; + int ret_val; + + if ( yy_c_buf_p > &yy_current_buffer->yy_ch_buf[yy_n_chars + 1] ) + YY_FATAL_ERROR( + "fatal flex scanner internal error--end of buffer missed" ); + + if ( yy_current_buffer->yy_fill_buffer == 0 ) + { /* Don't try to fill the buffer, so this is an EOF. */ + if ( yy_c_buf_p - yytext_ptr - YY_MORE_ADJ == 1 ) + { + /* We matched a single character, the EOB, so + * treat this as a final EOF. + */ + return EOB_ACT_END_OF_FILE; + } + + else + { + /* We matched some text prior to the EOB, first + * process it. + */ + return EOB_ACT_LAST_MATCH; + } + } + + /* Try to read more data. */ + + /* First move last chars to start of buffer. */ + number_to_move = (int) (yy_c_buf_p - yytext_ptr) - 1; + + for ( i = 0; i < number_to_move; ++i ) + *(dest++) = *(source++); + + if ( yy_current_buffer->yy_buffer_status == YY_BUFFER_EOF_PENDING ) + /* don't do the read, it's not guaranteed to return an EOF, + * just force an EOF + */ + yy_current_buffer->yy_n_chars = yy_n_chars = 0; + + else + { + int num_to_read = + yy_current_buffer->yy_buf_size - number_to_move - 1; + + while ( num_to_read <= 0 ) + { /* Not enough room in the buffer - grow it. */ +#ifdef YY_USES_REJECT + YY_FATAL_ERROR( +"input buffer overflow, can't enlarge buffer because scanner uses REJECT" ); +#else + + /* just a shorter name for the current buffer */ + YY_BUFFER_STATE b = yy_current_buffer; + + int yy_c_buf_p_offset = + (int) (yy_c_buf_p - b->yy_ch_buf); + + if ( b->yy_is_our_buffer ) + { + int new_size = b->yy_buf_size * 2; + + if ( new_size <= 0 ) + b->yy_buf_size += b->yy_buf_size / 8; + else + b->yy_buf_size *= 2; + + b->yy_ch_buf = (char *) + /* Include room in for 2 EOB chars. */ + yy_flex_realloc( (void *) b->yy_ch_buf, + b->yy_buf_size + 2 ); + } + else + /* Can't grow it, we don't own it. */ + b->yy_ch_buf = 0; + + if ( ! b->yy_ch_buf ) + YY_FATAL_ERROR( + "fatal error - scanner input buffer overflow" ); + + yy_c_buf_p = &b->yy_ch_buf[yy_c_buf_p_offset]; + + num_to_read = yy_current_buffer->yy_buf_size - + number_to_move - 1; +#endif + } + + if ( num_to_read > YY_READ_BUF_SIZE ) + num_to_read = YY_READ_BUF_SIZE; + + /* Read in more data. */ + YY_INPUT( (&yy_current_buffer->yy_ch_buf[number_to_move]), + yy_n_chars, num_to_read ); + + yy_current_buffer->yy_n_chars = yy_n_chars; + } + + if ( yy_n_chars == 0 ) + { + if ( number_to_move == YY_MORE_ADJ ) + { + ret_val = EOB_ACT_END_OF_FILE; + yyrestart( yyin ); + } + + else + { + ret_val = EOB_ACT_LAST_MATCH; + yy_current_buffer->yy_buffer_status = + YY_BUFFER_EOF_PENDING; + } + } + + else + ret_val = EOB_ACT_CONTINUE_SCAN; + + yy_n_chars += number_to_move; + yy_current_buffer->yy_ch_buf[yy_n_chars] = YY_END_OF_BUFFER_CHAR; + yy_current_buffer->yy_ch_buf[yy_n_chars + 1] = YY_END_OF_BUFFER_CHAR; + + yytext_ptr = &yy_current_buffer->yy_ch_buf[0]; + + return ret_val; + } + + +/* yy_get_previous_state - get the state just before the EOB char was reached */ + +static yy_state_type yy_get_previous_state() + { + register yy_state_type yy_current_state; + register char *yy_cp; + + yy_current_state = yy_start; + + for ( yy_cp = yytext_ptr + YY_MORE_ADJ; yy_cp < yy_c_buf_p; ++yy_cp ) + { + register YY_CHAR yy_c = (*yy_cp ? yy_ec[YY_SC_TO_UI(*yy_cp)] : 1); + if ( yy_accept[yy_current_state] ) + { + yy_last_accepting_state = yy_current_state; + yy_last_accepting_cpos = yy_cp; + } + while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state ) + { + yy_current_state = (int) yy_def[yy_current_state]; + if ( yy_current_state >= 41 ) + yy_c = yy_meta[(unsigned int) yy_c]; + } + yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c]; + } + + return yy_current_state; + } + + +/* yy_try_NUL_trans - try to make a transition on the NUL character + * + * synopsis + * next_state = yy_try_NUL_trans( current_state ); + */ + +#ifdef YY_USE_PROTOS +static yy_state_type yy_try_NUL_trans( yy_state_type yy_current_state ) +#else +static yy_state_type yy_try_NUL_trans( yy_current_state ) +yy_state_type yy_current_state; +#endif + { + register int yy_is_jam; + register char *yy_cp = yy_c_buf_p; + + register YY_CHAR yy_c = 1; + if ( yy_accept[yy_current_state] ) + { + yy_last_accepting_state = yy_current_state; + yy_last_accepting_cpos = yy_cp; + } + while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state ) + { + yy_current_state = (int) yy_def[yy_current_state]; + if ( yy_current_state >= 41 ) + yy_c = yy_meta[(unsigned int) yy_c]; + } + yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c]; + yy_is_jam = (yy_current_state == 40); + + return yy_is_jam ? 0 : yy_current_state; + } + + +#ifndef YY_NO_UNPUT +#ifdef YY_USE_PROTOS +static void yyunput( int c, register char *yy_bp ) +#else +static void yyunput( c, yy_bp ) +int c; +register char *yy_bp; +#endif + { + register char *yy_cp = yy_c_buf_p; + + /* undo effects of setting up yytext */ + *yy_cp = yy_hold_char; + + if ( yy_cp < yy_current_buffer->yy_ch_buf + 2 ) + { /* need to shift things up to make room */ + /* +2 for EOB chars. */ + register int number_to_move = yy_n_chars + 2; + register char *dest = &yy_current_buffer->yy_ch_buf[ + yy_current_buffer->yy_buf_size + 2]; + register char *source = + &yy_current_buffer->yy_ch_buf[number_to_move]; + + while ( source > yy_current_buffer->yy_ch_buf ) + *--dest = *--source; + + yy_cp += (int) (dest - source); + yy_bp += (int) (dest - source); + yy_current_buffer->yy_n_chars = + yy_n_chars = yy_current_buffer->yy_buf_size; + + if ( yy_cp < yy_current_buffer->yy_ch_buf + 2 ) + YY_FATAL_ERROR( "flex scanner push-back overflow" ); + } + + *--yy_cp = (char) c; + + + yytext_ptr = yy_bp; + yy_hold_char = *yy_cp; + yy_c_buf_p = yy_cp; + } +#endif /* ifndef YY_NO_UNPUT */ + + +#ifndef YY_NO_INPUT +#ifdef __cplusplus +static int yyinput() +#else +static int input() +#endif + { + int c; + + *yy_c_buf_p = yy_hold_char; + + if ( *yy_c_buf_p == YY_END_OF_BUFFER_CHAR ) + { + /* yy_c_buf_p now points to the character we want to return. + * If this occurs *before* the EOB characters, then it's a + * valid NUL; if not, then we've hit the end of the buffer. + */ + if ( yy_c_buf_p < &yy_current_buffer->yy_ch_buf[yy_n_chars] ) + /* This was really a NUL. */ + *yy_c_buf_p = '\0'; + + else + { /* need more input */ + int offset = yy_c_buf_p - yytext_ptr; + ++yy_c_buf_p; + + switch ( yy_get_next_buffer() ) + { + case EOB_ACT_LAST_MATCH: + /* This happens because yy_g_n_b() + * sees that we've accumulated a + * token and flags that we need to + * try matching the token before + * proceeding. But for input(), + * there's no matching to consider. + * So convert the EOB_ACT_LAST_MATCH + * to EOB_ACT_END_OF_FILE. + */ + + /* Reset buffer status. */ + yyrestart( yyin ); + + /* fall through */ + + case EOB_ACT_END_OF_FILE: + { + if ( yywrap() ) + return EOF; + + if ( ! yy_did_buffer_switch_on_eof ) + YY_NEW_FILE; +#ifdef __cplusplus + return yyinput(); +#else + return input(); +#endif + } + + case EOB_ACT_CONTINUE_SCAN: + yy_c_buf_p = yytext_ptr + offset; + break; + } + } + } + + c = *(unsigned char *) yy_c_buf_p; /* cast for 8-bit char's */ + *yy_c_buf_p = '\0'; /* preserve yytext */ + yy_hold_char = *++yy_c_buf_p; + + + return c; + } +#endif /* YY_NO_INPUT */ + +#ifdef YY_USE_PROTOS +void yyrestart( FILE *input_file ) +#else +void yyrestart( input_file ) +FILE *input_file; +#endif + { + if ( ! yy_current_buffer ) + yy_current_buffer = yy_create_buffer( yyin, YY_BUF_SIZE ); + + yy_init_buffer( yy_current_buffer, input_file ); + yy_load_buffer_state(); + } + + +#ifdef YY_USE_PROTOS +void yy_switch_to_buffer( YY_BUFFER_STATE new_buffer ) +#else +void yy_switch_to_buffer( new_buffer ) +YY_BUFFER_STATE new_buffer; +#endif + { + if ( yy_current_buffer == new_buffer ) + return; + + if ( yy_current_buffer ) + { + /* Flush out information for old buffer. */ + *yy_c_buf_p = yy_hold_char; + yy_current_buffer->yy_buf_pos = yy_c_buf_p; + yy_current_buffer->yy_n_chars = yy_n_chars; + } + + yy_current_buffer = new_buffer; + yy_load_buffer_state(); + + /* We don't actually know whether we did this switch during + * EOF (yywrap()) processing, but the only time this flag + * is looked at is after yywrap() is called, so it's safe + * to go ahead and always set it. + */ + yy_did_buffer_switch_on_eof = 1; + } + + +#ifdef YY_USE_PROTOS +void yy_load_buffer_state( void ) +#else +void yy_load_buffer_state() +#endif + { + yy_n_chars = yy_current_buffer->yy_n_chars; + yytext_ptr = yy_c_buf_p = yy_current_buffer->yy_buf_pos; + yyin = yy_current_buffer->yy_input_file; + yy_hold_char = *yy_c_buf_p; + } + + +#ifdef YY_USE_PROTOS +YY_BUFFER_STATE yy_create_buffer( FILE *file, int size ) +#else +YY_BUFFER_STATE yy_create_buffer( file, size ) +FILE *file; +int size; +#endif + { + YY_BUFFER_STATE b; + + b = (YY_BUFFER_STATE) yy_flex_alloc( sizeof( struct yy_buffer_state ) ); + if ( ! b ) + YY_FATAL_ERROR( "out of dynamic memory in yy_create_buffer()" ); + + b->yy_buf_size = size; + + /* yy_ch_buf has to be 2 characters longer than the size given because + * we need to put in 2 end-of-buffer characters. + */ + b->yy_ch_buf = (char *) yy_flex_alloc( b->yy_buf_size + 2 ); + if ( ! b->yy_ch_buf ) + YY_FATAL_ERROR( "out of dynamic memory in yy_create_buffer()" ); + + b->yy_is_our_buffer = 1; + + yy_init_buffer( b, file ); + + return b; + } + + +#ifdef YY_USE_PROTOS +void yy_delete_buffer( YY_BUFFER_STATE b ) +#else +void yy_delete_buffer( b ) +YY_BUFFER_STATE b; +#endif + { + if ( ! b ) + return; + + if ( b == yy_current_buffer ) + yy_current_buffer = (YY_BUFFER_STATE) 0; + + if ( b->yy_is_our_buffer ) + yy_flex_free( (void *) b->yy_ch_buf ); + + yy_flex_free( (void *) b ); + } + + + +#ifdef YY_USE_PROTOS +void yy_init_buffer( YY_BUFFER_STATE b, FILE *file ) +#else +void yy_init_buffer( b, file ) +YY_BUFFER_STATE b; +FILE *file; +#endif + + + { + yy_flush_buffer( b ); + + b->yy_input_file = file; + b->yy_fill_buffer = 1; + +#if YY_ALWAYS_INTERACTIVE + b->yy_is_interactive = 1; +#else +#if YY_NEVER_INTERACTIVE + b->yy_is_interactive = 0; +#else + b->yy_is_interactive = file ? (isatty( fileno(file) ) > 0) : 0; +#endif +#endif + } + + +#ifdef YY_USE_PROTOS +void yy_flush_buffer( YY_BUFFER_STATE b ) +#else +void yy_flush_buffer( b ) +YY_BUFFER_STATE b; +#endif + + { + if ( ! b ) + return; + + b->yy_n_chars = 0; + + /* We always need two end-of-buffer characters. The first causes + * a transition to the end-of-buffer state. The second causes + * a jam in that state. + */ + b->yy_ch_buf[0] = YY_END_OF_BUFFER_CHAR; + b->yy_ch_buf[1] = YY_END_OF_BUFFER_CHAR; + + b->yy_buf_pos = &b->yy_ch_buf[0]; + + b->yy_at_bol = 1; + b->yy_buffer_status = YY_BUFFER_NEW; + + if ( b == yy_current_buffer ) + yy_load_buffer_state(); + } + + +#ifndef YY_NO_SCAN_BUFFER +#ifdef YY_USE_PROTOS +YY_BUFFER_STATE yy_scan_buffer( char *base, yy_size_t size ) +#else +YY_BUFFER_STATE yy_scan_buffer( base, size ) +char *base; +yy_size_t size; +#endif + { + YY_BUFFER_STATE b; + + if ( size < 2 || + base[size-2] != YY_END_OF_BUFFER_CHAR || + base[size-1] != YY_END_OF_BUFFER_CHAR ) + /* They forgot to leave room for the EOB's. */ + return 0; + + b = (YY_BUFFER_STATE) yy_flex_alloc( sizeof( struct yy_buffer_state ) ); + if ( ! b ) + YY_FATAL_ERROR( "out of dynamic memory in yy_scan_buffer()" ); + + b->yy_buf_size = size - 2; /* "- 2" to take care of EOB's */ + b->yy_buf_pos = b->yy_ch_buf = base; + b->yy_is_our_buffer = 0; + b->yy_input_file = 0; + b->yy_n_chars = b->yy_buf_size; + b->yy_is_interactive = 0; + b->yy_at_bol = 1; + b->yy_fill_buffer = 0; + b->yy_buffer_status = YY_BUFFER_NEW; + + yy_switch_to_buffer( b ); + + return b; + } +#endif + + +#ifndef YY_NO_SCAN_STRING +#ifdef YY_USE_PROTOS +YY_BUFFER_STATE yy_scan_string( yyconst char *yy_str ) +#else +YY_BUFFER_STATE yy_scan_string( yy_str ) +yyconst char *yy_str; +#endif + { + int len; + for ( len = 0; yy_str[len]; ++len ) + ; + + return yy_scan_bytes( yy_str, len ); + } +#endif + + +#ifndef YY_NO_SCAN_BYTES +#ifdef YY_USE_PROTOS +YY_BUFFER_STATE yy_scan_bytes( yyconst char *bytes, int len ) +#else +YY_BUFFER_STATE yy_scan_bytes( bytes, len ) +yyconst char *bytes; +int len; +#endif + { + YY_BUFFER_STATE b; + char *buf; + yy_size_t n; + int i; + + /* Get memory for full buffer, including space for trailing EOB's. */ + n = len + 2; + buf = (char *) yy_flex_alloc( n ); + if ( ! buf ) + YY_FATAL_ERROR( "out of dynamic memory in yy_scan_bytes()" ); + + for ( i = 0; i < len; ++i ) + buf[i] = bytes[i]; + + buf[len] = buf[len+1] = YY_END_OF_BUFFER_CHAR; + + b = yy_scan_buffer( buf, n ); + if ( ! b ) + YY_FATAL_ERROR( "bad buffer in yy_scan_bytes()" ); + + /* It's okay to grow etc. this buffer, and we should throw it + * away when we're done. + */ + b->yy_is_our_buffer = 1; + + return b; + } +#endif + + +#ifndef YY_NO_PUSH_STATE +#ifdef YY_USE_PROTOS +static void yy_push_state( int new_state ) +#else +static void yy_push_state( new_state ) +int new_state; +#endif + { + if ( yy_start_stack_ptr >= yy_start_stack_depth ) + { + yy_size_t new_size; + + yy_start_stack_depth += YY_START_STACK_INCR; + new_size = yy_start_stack_depth * sizeof( int ); + + if ( ! yy_start_stack ) + yy_start_stack = (int *) yy_flex_alloc( new_size ); + + else + yy_start_stack = (int *) yy_flex_realloc( + (void *) yy_start_stack, new_size ); + + if ( ! yy_start_stack ) + YY_FATAL_ERROR( + "out of memory expanding start-condition stack" ); + } + + yy_start_stack[yy_start_stack_ptr++] = YY_START; + + BEGIN(new_state); + } +#endif + + +#ifndef YY_NO_POP_STATE +static void yy_pop_state() + { + if ( --yy_start_stack_ptr < 0 ) + YY_FATAL_ERROR( "start-condition stack underflow" ); + + BEGIN(yy_start_stack[yy_start_stack_ptr]); + } +#endif + + +#ifndef YY_NO_TOP_STATE +static int yy_top_state() + { + return yy_start_stack[yy_start_stack_ptr - 1]; + } +#endif + +#ifndef YY_EXIT_FAILURE +#define YY_EXIT_FAILURE 2 +#endif + +#ifdef YY_USE_PROTOS +static void yy_fatal_error( yyconst char msg[] ) +#else +static void yy_fatal_error( msg ) +char msg[]; +#endif + { + (void) fprintf( stderr, "%s\n", msg ); + exit( YY_EXIT_FAILURE ); + } + + + +/* Redefine yyless() so it works in section 3 code. */ + +#undef yyless +#define yyless(n) \ + do \ + { \ + /* Undo effects of setting up yytext. */ \ + yytext[yyleng] = yy_hold_char; \ + yy_c_buf_p = yytext + n; \ + yy_hold_char = *yy_c_buf_p; \ + *yy_c_buf_p = '\0'; \ + yyleng = n; \ + } \ + while ( 0 ) + + +/* Internal utility routines. */ + +#ifndef yytext_ptr +#ifdef YY_USE_PROTOS +static void yy_flex_strncpy( char *s1, yyconst char *s2, int n ) +#else +static void yy_flex_strncpy( s1, s2, n ) +char *s1; +yyconst char *s2; +int n; +#endif + { + register int i; + for ( i = 0; i < n; ++i ) + s1[i] = s2[i]; + } +#endif + +#ifdef YY_NEED_STRLEN +#ifdef YY_USE_PROTOS +static int yy_flex_strlen( yyconst char *s ) +#else +static int yy_flex_strlen( s ) +yyconst char *s; +#endif + { + register int n; + for ( n = 0; s[n]; ++n ) + ; + + return n; + } +#endif + + +#ifdef YY_USE_PROTOS +static void *yy_flex_alloc( yy_size_t size ) +#else +static void *yy_flex_alloc( size ) +yy_size_t size; +#endif + { + return (void *) malloc( size ); + } + +#ifdef YY_USE_PROTOS +static void *yy_flex_realloc( void *ptr, yy_size_t size ) +#else +static void *yy_flex_realloc( ptr, size ) +void *ptr; +yy_size_t size; +#endif + { + /* The cast to (char *) in the following accommodates both + * implementations that use char* generic pointers, and those + * that use void* generic pointers. It works with the latter + * because both ANSI C and C++ allow castless assignment from + * any pointer type to void*, and deal with argument conversions + * as though doing an assignment. + */ + return (void *) realloc( (char *) ptr, size ); + } + +#ifdef YY_USE_PROTOS +static void yy_flex_free( void *ptr ) +#else +static void yy_flex_free( ptr ) +void *ptr; +#endif + { + free( ptr ); + } + +#if YY_MAIN +int main() + { + yylex(); + return 0; + } +#endif +#line 82 "btl_pcie_lex.l" + + + +/* + * This cleans up at the end of the parse (since, in this case, we + * always parse the entire file) and prevents a memory leak. + */ +static int finish_parsing(void) +{ + if (NULL != YY_CURRENT_BUFFER) { + yy_delete_buffer(YY_CURRENT_BUFFER); +#if defined(YY_CURRENT_BUFFER_LVALUE) + YY_CURRENT_BUFFER_LVALUE = NULL; +#else + YY_CURRENT_BUFFER = NULL; +#endif /* YY_CURRENT_BUFFER_LVALUE */ + } + return YY_NULL; +} + + +static int btl_pcie_cfg_yywrap(void) +{ + btl_pcie_cfg_parse_done = true; + return 1; +} + + +/* + * Ensure that we have a valid yybuffer to use. Specifically, if this + * scanner is invoked a second time, finish_parsing() (above) will + * have been executed, and the current buffer will have been freed. + * Flex doesn't recognize this fact because as far as it's concerned, + * its internal state was already initialized, so it thinks it should + * have a valid buffer. Hence, here we ensure to give it a valid + * buffer. + */ +int btl_pcie_cfg_init_buffer(FILE *file) +{ + YY_BUFFER_STATE buf = yy_create_buffer(file, YY_BUF_SIZE); + yy_switch_to_buffer(buf); + + return 0; +} diff --git a/ompi/mca/btl/pcie/btl_pcie_lex.h b/ompi/mca/btl/pcie/btl_pcie_lex.h new file mode 100644 index 0000000000..6de15ece26 --- /dev/null +++ b/ompi/mca/btl/pcie/btl_pcie_lex.h @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2007 Los Alamos National Security, LLC. + * All righs reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef BTL_PCIE_CFG_LEX_H_ +#define BTL_PCIE_CFG_LEX_H_ + +#include "opal_config.h" + +#ifdef malloc +#undef malloc +#endif +#ifdef realloc +#undef realloc +#endif +#ifdef free +#undef free +#endif + +#include + +int btl_pcie_cfg_yylex(void); +int btl_pcie_cfg_init_buffer(FILE *file); + +extern FILE *btl_pcie_cfg_yyin; +extern bool btl_pcie_cfg_parse_done; +extern char *btl_pcie_cfg_yytext; +extern int btl_pcie_cfg_yynewlines; + +/* + * Make lex-generated files not issue compiler warnings + */ +#define YY_STACK_USED 0 +#define YY_ALWAYS_INTERACTIVE 0 +#define YY_NEVER_INTERACTIVE 0 +#define YY_MAIN 0 +#define YY_NO_UNPUT 1 +#define YY_SKIP_YYWRAP 1 + +enum { + BTL_PCIE_CFG_PARSE_DONE = 1, + BTL_PCIE_CFG_PARSE_ERROR, + + BTL_PCIE_CFG_PARSE_NEWLINE, + BTL_PCIE_CFG_PARSE_HOSTNAME_CORE, + BTL_PCIE_CFG_PARSE_HOSTNAME_DEVICE, + BTL_PCIE_CFG_PARSE_DEVICE, + + BTL_PCIE_CFG_PARSE_MAX +}; + +#endif /* #ifndef BTL_PCIE_CFG_LEX_H_ */ diff --git a/ompi/mca/btl/pcie/btl_pcie_lex.l b/ompi/mca/btl/pcie/btl_pcie_lex.l new file mode 100644 index 0000000000..8b2db79d64 --- /dev/null +++ b/ompi/mca/btl/pcie/btl_pcie_lex.l @@ -0,0 +1,125 @@ +%{ /* -*- C -*- */ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "opal_config.h" + +#include +#if HAVE_UNISTD_H +#include +#endif + +#include "btl_pcie_lex.h" + +/* + * local functions + */ +static int finish_parsing(void) ; +static int btl_pcie_cfg_yywrap(void); + +/* + * global variables + */ +int btl_pcie_cfg_yynewlines = 1; +bool btl_pcie_cfg_parse_done = false; +char *btl_pcie_cfg_string = NULL; + +#define yyterminate() \ + return finish_parsing() + +%} + +WHITE [\f\t\v ] +CHAR [A-Za-z0-9_\-\.] +NAME_CHAR [A-Za-z0-9_\-\.\\\/] + +%x comment +%x section_name +%x section_end +%x value + +%% + +{WHITE}*\n { ++btl_pcie_cfg_yynewlines; + return BTL_PCIE_CFG_PARSE_NEWLINE; } +#.*\n { ++btl_pcie_cfg_yynewlines; + return BTL_PCIE_CFG_PARSE_NEWLINE; } +"//".*\n { ++btl_pcie_cfg_yynewlines; + return BTL_PCIE_CFG_PARSE_NEWLINE; } + +"/*" { BEGIN(comment); + return BTL_PCIE_CFG_PARSE_NEWLINE; } +[^*\n]* ; /* Eat up non '*'s */ +"*"+[^*/\n]* ; /* Eat '*'s not followed by a '/' */ +\n { ++btl_pcie_cfg_yynewlines; + return BTL_PCIE_CFG_PARSE_NEWLINE; } +"*"+"/" { BEGIN(INITIAL); /* Done with block comment */ + return BTL_PCIE_CFG_PARSE_NEWLINE; } + +{CHAR}+":"[0-9] { return BTL_PCIE_CFG_PARSE_HOSTNAME_CORE; } +{CHAR}+":"{NAME_CHAR}+ { return BTL_PCIE_CFG_PARSE_HOSTNAME_DEVICE; } + +{NAME_CHAR}+ { return BTL_PCIE_CFG_PARSE_DEVICE; } + +{WHITE}+ ; /* whitespace */ + +%% + + +/* + * This cleans up at the end of the parse (since, in this case, we + * always parse the entire file) and prevents a memory leak. + */ +static int finish_parsing(void) +{ + if (NULL != YY_CURRENT_BUFFER) { + yy_delete_buffer(YY_CURRENT_BUFFER); +#if defined(YY_CURRENT_BUFFER_LVALUE) + YY_CURRENT_BUFFER_LVALUE = NULL; +#else + YY_CURRENT_BUFFER = NULL; +#endif /* YY_CURRENT_BUFFER_LVALUE */ + } + return YY_NULL; +} + + +static int btl_pcie_cfg_yywrap(void) +{ + btl_pcie_cfg_parse_done = true; + return 1; +} + + +/* + * Ensure that we have a valid yybuffer to use. Specifically, if this + * scanner is invoked a second time, finish_parsing() (above) will + * have been executed, and the current buffer will have been freed. + * Flex doesn't recognize this fact because as far as it's concerned, + * its internal state was already initialized, so it thinks it should + * have a valid buffer. Hence, here we ensure to give it a valid + * buffer. + */ +int btl_pcie_cfg_init_buffer(FILE *file) +{ + YY_BUFFER_STATE buf = yy_create_buffer(file, YY_BUF_SIZE); + yy_switch_to_buffer(buf); + + return 0; +} diff --git a/ompi/mca/btl/pcie/btl_pcie_proc.c b/ompi/mca/btl/pcie/btl_pcie_proc.c new file mode 100644 index 0000000000..60f2233782 --- /dev/null +++ b/ompi/mca/btl/pcie/btl_pcie_proc.c @@ -0,0 +1,194 @@ +/* + * Copyright (c) 2007 Los Alamos National Security, LLC. + * All righs reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "opal/class/opal_hash_table.h" +#include "ompi/mca/btl/base/btl_base_error.h" +#include "ompi/mca/mpool/base/base.h" +#include "ompi/runtime/ompi_module_exchange.h" + +#include "btl_pcie.h" +#include "btl_pcie_proc.h" + +static void mca_btl_pcie_proc_construct(mca_btl_pcie_proc_t* proc); +static void mca_btl_pcie_proc_destruct(mca_btl_pcie_proc_t* proc); + +OBJ_CLASS_INSTANCE(mca_btl_pcie_proc_t, + opal_list_item_t, mca_btl_pcie_proc_construct, + mca_btl_pcie_proc_destruct); + +void mca_btl_pcie_proc_construct(mca_btl_pcie_proc_t* proc) +{ + proc->proc_ompi = 0; + proc->proc_addr_count = 0; + proc->proc_endpoint_count = 0; + OBJ_CONSTRUCT(&proc->proc_lock, opal_mutex_t); + /* add to list of all proc instance */ + OPAL_THREAD_LOCK(&mca_btl_pcie_component.pcie_lock); + opal_list_append(&mca_btl_pcie_component.pcie_procs, &proc->super); + OPAL_THREAD_UNLOCK(&mca_btl_pcie_component.pcie_lock); +} + +/* + * Cleanup ib proc instance + */ + +void mca_btl_pcie_proc_destruct(mca_btl_pcie_proc_t* proc) +{ + /* remove from list of all proc instances */ + OPAL_THREAD_LOCK(&mca_btl_pcie_component.pcie_lock); + opal_list_remove_item(&mca_btl_pcie_component.pcie_procs, &proc->super); + OPAL_THREAD_UNLOCK(&mca_btl_pcie_component.pcie_lock); + + OBJ_DESTRUCT(&proc->proc_lock); +} + + +/* + * Look for an existing TEMPLATE process instances based on the associated + * ompi_proc_t instance. + */ +static mca_btl_pcie_proc_t* mca_btl_pcie_proc_lookup_ompi(ompi_proc_t* ompi_proc) +{ + mca_btl_pcie_proc_t* pcie_proc; + + OPAL_THREAD_LOCK(&mca_btl_pcie_component.pcie_lock); + + for(pcie_proc = (mca_btl_pcie_proc_t*) + opal_list_get_first(&mca_btl_pcie_component.pcie_procs); + pcie_proc != (mca_btl_pcie_proc_t*) + opal_list_get_end(&mca_btl_pcie_component.pcie_procs); + pcie_proc = (mca_btl_pcie_proc_t*)opal_list_get_next(pcie_proc)) { + + if(pcie_proc->proc_ompi == ompi_proc) { + OPAL_THREAD_UNLOCK(&mca_btl_pcie_component.pcie_lock); + return pcie_proc; + } + + } + + OPAL_THREAD_UNLOCK(&mca_btl_pcie_component.pcie_lock); + + return NULL; +} + + +/* + * Create a TEMPLATE process structure. There is a one-to-one correspondence + * between a ompi_proc_t and a mca_btl_pcie_proc_t instance. We cache + * additional data (specifically the list of mca_btl_pcie_endpoint_t instances, + * and published addresses) associated w/ a given destination on this + * datastructure. + */ + +int mca_btl_pcie_proc_create(ompi_proc_t* ompi_proc, + mca_btl_pcie_module_t* pcie_btl, + mca_btl_pcie_proc_t** ret_proc) +{ + mca_btl_pcie_proc_t* pcie_proc = NULL; + char *rem_dev_name = NULL, *lcl_dev_name = NULL; + char *rem_hostname = NULL; + int rc, num_peers, i; + size_t size; + mca_btl_pcie_modex_info_t *modex_info; + + /* Check if already have proc structure for this ompi process */ + pcie_proc = mca_btl_pcie_proc_lookup_ompi(ompi_proc); + + if(pcie_proc != NULL) { + /* Gotcha! */ + *ret_proc = pcie_proc; + return OMPI_SUCCESS; + } + + /* query for the peer's device name info */ + rc = ompi_modex_recv(&mca_btl_pcie_component.super.btl_version, + ompi_proc, + (void*)&modex_info, + &size); + if (OMPI_SUCCESS != rc) { + opal_output(mca_btl_base_output, "[%s:%d] ompi_modex_recv failed for peer %s", + __FILE__, __LINE__, ORTE_NAME_PRINT(&ompi_proc->proc_name)); + OBJ_RELEASE(pcie_proc); + *ret_proc = NULL; + return OMPI_ERROR; + } + + if (0 == size || 0 != size % sizeof(mca_btl_pcie_modex_info_t)) { + *ret_proc = NULL; + return OMPI_SUCCESS; + } + + num_peers = size / sizeof(mca_btl_pcie_modex_info_t); + + for (i = 0 ; i < num_peers ; ++i) { + MCA_BTL_PCIE_MODEX_INFO_NTOH(modex_info[i]); + rem_hostname = modex_info[i].hostname; + rem_dev_name = modex_info[i].devicename; + lcl_dev_name = ompi_btl_pcie_cfg_get_matching_device(rem_hostname, + rem_dev_name); + if (NULL != lcl_dev_name && + 0 == strcmp(lcl_dev_name, pcie_btl->lcl_dev_name)) { + /* we have a match. continue onward */ + break; + } + } + /* make sure the local device names match */ + if(NULL == lcl_dev_name || + 0 != strcmp(lcl_dev_name, pcie_btl->lcl_dev_name)){ + *ret_proc = NULL; + return OMPI_SUCCESS; + } + + BTL_VERBOSE(("Have matching devices: %s:%s <-> %s:%s", + orte_process_info.nodename, + pcie_btl->lcl_dev_name, + rem_hostname, + rem_dev_name)); + + pcie_proc = OBJ_NEW(mca_btl_pcie_proc_t); + if(NULL == pcie_proc){ + *ret_proc = NULL; + return OMPI_ERR_OUT_OF_RESOURCE; + } + + pcie_proc->proc_ompi = ompi_proc; + + /* build a unique identifier (of arbitrary + * size) to represent the proc */ + pcie_proc->proc_guid = ompi_proc->proc_name; + + /* Initialize number of peer */ + pcie_proc->proc_endpoint_count = 1; + + pcie_proc->endpoint_proc = OBJ_NEW(mca_btl_pcie_endpoint_t); + if(NULL == pcie_proc->endpoint_proc) { + free(rem_dev_name); + *ret_proc = NULL; + return OMPI_ERR_OUT_OF_RESOURCE; + } + + pcie_proc->endpoint_proc->lcl_dev_name = lcl_dev_name; + pcie_proc->endpoint_proc->rem_dev_name = rem_dev_name; + pcie_proc->endpoint_proc->endpoint_proc = pcie_proc; + pcie_proc->endpoint_proc->endpoint_btl = pcie_btl; + + if(OMPI_SUCCESS != mca_btl_pcie_endpoint_init(pcie_proc->endpoint_proc)) { + BTL_ERROR(("Error initializing the PCIE endpoint \n")); + *ret_proc = NULL; + return OMPI_ERROR; + } + + *ret_proc = pcie_proc; + return OMPI_SUCCESS; +} + + diff --git a/ompi/mca/btl/pcie/btl_pcie_proc.h b/ompi/mca/btl/pcie/btl_pcie_proc.h new file mode 100644 index 0000000000..60278ecb45 --- /dev/null +++ b/ompi/mca/btl/pcie/btl_pcie_proc.h @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2007 Los Alamos National Security, LLC. + * All righs reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef MCA_BTL_PCIE_PROC_H +#define MCA_BTL_PCIE_PROC_H + +#include "ompi_config.h" + +#include "opal/class/opal_list.h" +#include "ompi/proc/proc.h" + +#include "btl_pcie.h" +#include "btl_pcie_endpoint.h" + +BEGIN_C_DECLS + +/** + * Represents the state of a remote process and the set of addresses + * that it exports. Also cache an instance of mca_btl_base_endpoint_t for + * each + * BTL instance that attempts to open a connection to the process. + */ +struct mca_btl_pcie_proc_t { + opal_list_item_t super; + /**< allow proc to be placed on a list */ + + ompi_proc_t *proc_ompi; + /**< pointer to corresponding ompi_proc_t */ + + orte_process_name_t proc_guid; + /**< globally unique identifier for the process */ + + size_t proc_addr_count; + /**< number of addresses published by endpoint */ + + struct mca_btl_base_endpoint_t *endpoint_proc; + /**< endpoint that has been created to access this proc */ + + size_t proc_endpoint_count; + /**< number of endpoints */ + + opal_mutex_t proc_lock; + /**< lock to protect against concurrent access to proc state */ + +}; +typedef struct mca_btl_pcie_proc_t mca_btl_pcie_proc_t; +OBJ_CLASS_DECLARATION(mca_btl_pcie_proc_t); + +int mca_btl_pcie_proc_create(ompi_proc_t* ompi_proc, + mca_btl_pcie_module_t* pcie_btl, + mca_btl_pcie_proc_t** ret_proc); + +END_C_DECLS + +#endif /* #ifndef MCA_BTL_PCIE_PROC_H */ diff --git a/ompi/mca/btl/pcie/configure.m4 b/ompi/mca/btl/pcie/configure.m4 new file mode 100644 index 0000000000..e33317f842 --- /dev/null +++ b/ompi/mca/btl/pcie/configure.m4 @@ -0,0 +1,31 @@ +# -*- shell-script -*- +# +# Copyright (c) 2007 Los Alamos National Security, LLC. All rights +# reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + + +# MCA_btl_pcie_CONFIG(action-if-can-compile, +# [action-if-cant-compile]) +# ------------------------------------------------ +AC_DEFUN([MCA_btl_pcie_CONFIG],[ + OMPI_CHECK_PCIE([btl_pcie], + [btl_pcie_happy="yes"], + [btl_pcie_happy="no"]) + + AS_IF([test "$btl_pcie_happy" = "yes"], + [btl_pcie_WRAPPER_EXTRA_LDFLAGS="$btl_pcie_LDFLAGS" + btl_pcie_WRAPPER_EXTRA_LIBS="$btl_pcie_LIBS" + $1], + [$2]) + + # substitute in the things needed to build pcie + AC_SUBST([btl_pcie_CPPFLAGS]) + AC_SUBST([btl_pcie_LDFLAGS]) + AC_SUBST([btl_pcie_LIBS]) +])dnl diff --git a/ompi/mca/btl/pcie/configure.params b/ompi/mca/btl/pcie/configure.params new file mode 100644 index 0000000000..3513f8d956 --- /dev/null +++ b/ompi/mca/btl/pcie/configure.params @@ -0,0 +1,24 @@ +# -*- shell-script -*- +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2007 Los Alamos National Security, LLC. All rights +# reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# Specific to this module + +PARAM_CONFIG_FILES="Makefile" diff --git a/ompi/mca/btl/pcie/help-mpi-btl-pcie.txt b/ompi/mca/btl/pcie/help-mpi-btl-pcie.txt new file mode 100644 index 0000000000..7964b0af7f --- /dev/null +++ b/ompi/mca/btl/pcie/help-mpi-btl-pcie.txt @@ -0,0 +1,20 @@ +# -*- text -*- +# Copyright (c) 2007 Los Alamos National Security, LLC. +# All righs reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# +# This is the US/English help file for Open MPI's IBM PCIe support + +[initialization:more-than-one-cpu] +The PCIe BTL found that the CPU affinity mask for the current process +includes more than one CPU (%d). When using Open MPI on the Cell +machines with the PCIe driver, the affinity mask must include exactly +one CPU. +[initialization:no-cpus] +The PCIe BTL was unable to find any CPUs in the affinity mask for the +current process. This usually indicates a system issue that must be +resolved by the system administrator. diff --git a/ompi/mca/btl/pcie/mca-btl-pcie-local-resources.cfg b/ompi/mca/btl/pcie/mca-btl-pcie-local-resources.cfg new file mode 100644 index 0000000000..9f4fb43c48 --- /dev/null +++ b/ompi/mca/btl/pcie/mca-btl-pcie-local-resources.cfg @@ -0,0 +1,159 @@ +# hostname:core device +n01-001-0:0 /dev/axon0 +n01-001-0:1 /dev/axon1 +n01-001-0:2 /dev/axon2 +n01-001-0:3 /dev/axon3 + +n01-001-1:0 /dev/axon0 +n01-001-1:1 /dev/axon1 + +n01-001-2:0 /dev/axon0 +n01-001-2:1 /dev/axon1 + +n01-002-0:0 /dev/axon0 +n01-002-0:1 /dev/axon1 +n01-002-0:2 /dev/axon2 +n01-002-0:3 /dev/axon3 + +n01-002-1:0 /dev/axon0 +n01-002-1:1 /dev/axon1 + +n01-002-2:0 /dev/axon0 +n01-002-2:1 /dev/axon1 + +n01-003-0:0 /dev/axon0 +n01-003-0:1 /dev/axon1 +n01-003-0:2 /dev/axon2 +n01-003-0:3 /dev/axon3 + +n01-003-1:0 /dev/axon0 +n01-003-1:1 /dev/axon1 + +n01-004-0:0 /dev/axon0 +n01-004-0:1 /dev/axon1 +n01-004-0:2 /dev/axon2 +n01-004-0:3 /dev/axon3 + +n01-004-1:0 /dev/axon0 +n01-004-1:1 /dev/axon1 + +n01-005-0:0 /dev/axon0 +n01-005-0:1 /dev/axon1 +n01-005-0:2 /dev/axon2 +n01-005-0:3 /dev/axon3 + +n01-005-1:0 /dev/axon0 +n01-005-1:1 /dev/axon1 + +n01-005-2:0 /dev/axon0 +n01-005-2:1 /dev/axon1 + +n01-006-0:0 /dev/axon0 +n01-006-0:1 /dev/axon1 +n01-006-0:2 /dev/axon2 +n01-006-0:3 /dev/axon3 + +n01-006-1:0 /dev/axon0 +n01-006-1:1 /dev/axon1 + +n01-006-2:0 /dev/axon0 +n01-006-2:1 /dev/axon1 + +n01-007-0:0 /dev/axon0 +n01-007-0:1 /dev/axon1 +n01-007-0:2 /dev/axon2 +n01-007-0:3 /dev/axon3 + +n01-007-1:0 /dev/axon0 +n01-007-1:1 /dev/axon1 + +n01-007-2:0 /dev/axon0 +n01-007-2:1 /dev/axon1 + +n01-008-0:0 /dev/axon0 +n01-008-0:1 /dev/axon1 +n01-008-0:2 /dev/axon2 +n01-008-0:3 /dev/axon3 + +n01-008-1:0 /dev/axon0 +n01-008-1:1 /dev/axon1 + +n01-008-2:0 /dev/axon0 +n01-008-2:1 /dev/axon1 + +n01-009-0:0 /dev/axon0 +n01-009-0:1 /dev/axon1 +n01-009-0:2 /dev/axon2 +n01-009-0:3 /dev/axon3 + +n01-009-1:0 /dev/axon0 +n01-009-1:1 /dev/axon1 + +n01-009-2:0 /dev/axon0 +n01-009-2:1 /dev/axon1 + +n01-010-0:0 /dev/axon0 +n01-010-0:1 /dev/axon1 +n01-010-0:2 /dev/axon2 +n01-010-0:3 /dev/axon3 + +n01-010-1:0 /dev/axon0 +n01-010-1:1 /dev/axon1 + +n01-010-2:0 /dev/axon0 +n01-010-2:1 /dev/axon1 + +n01-011-0:0 /dev/axon0 +n01-011-0:1 /dev/axon1 +n01-011-0:2 /dev/axon2 +n01-011-0:3 /dev/axon3 + +n01-011-1:0 /dev/axon0 +n01-011-1:1 /dev/axon1 + +n01-011-2:0 /dev/axon0 +n01-011-2:1 /dev/axon1 + +n01-012-0:0 /dev/axon0 +n01-012-0:1 /dev/axon1 +n01-012-0:2 /dev/axon2 +n01-012-0:3 /dev/axon3 + +n01-012-1:0 /dev/axon0 +n01-012-1:1 /dev/axon1 + +n01-012-2:0 /dev/axon0 +n01-012-2:1 /dev/axon1 + +n01-013-0:0 /dev/axon0 +n01-013-0:1 /dev/axon1 +n01-013-0:2 /dev/axon2 +n01-013-0:3 /dev/axon3 + +n01-013-1:0 /dev/axon0 +n01-013-1:1 /dev/axon1 + +n01-013-2:0 /dev/axon0 +n01-013-2:1 /dev/axon1 + +n01-014-0:0 /dev/axon0 +n01-014-0:1 /dev/axon1 +n01-014-0:2 /dev/axon2 +n01-014-0:3 /dev/axon3 + +n01-014-1:0 /dev/axon0 +n01-014-1:1 /dev/axon1 + +n01-014-2:0 /dev/axon0 +n01-014-2:1 /dev/axon1 + +n01-015-0:0 /dev/axon0 +n01-015-0:1 /dev/axon1 +n01-015-0:2 /dev/axon2 +n01-015-0:3 /dev/axon3 + +n01-015-1:0 /dev/axon0 +n01-015-1:1 /dev/axon1 + +n01-015-2:0 /dev/axon0 +n01-015-2:1 /dev/axon1 diff --git a/ompi/mca/btl/pcie/mca-btl-pcie-remote-resources.cfg b/ompi/mca/btl/pcie/mca-btl-pcie-remote-resources.cfg new file mode 100644 index 0000000000..3f0531361a --- /dev/null +++ b/ompi/mca/btl/pcie/mca-btl-pcie-remote-resources.cfg @@ -0,0 +1,82 @@ +# opteron_host:device cell_host:device + +n01-001-0:/dev/axon0 n01-001-1:/dev/axon0 +n01-001-0:/dev/axon1 n01-001-1:/dev/axon1 +n01-001-0:/dev/axon2 n01-001-2:/dev/axon0 +n01-001-0:/dev/axon3 n01-001-2:/dev/axon1 + +n01-002-0:/dev/axon0 n01-002-1:/dev/axon0 +n01-002-0:/dev/axon1 n01-002-1:/dev/axon1 +n01-002-0:/dev/axon2 n01-002-2:/dev/axon0 +n01-002-0:/dev/axon3 n01-002-2:/dev/axon1 + +n01-003-0:/dev/axon0 n01-003-1:/dev/axon0 +n01-003-0:/dev/axon1 n01-003-1:/dev/axon1 +n01-003-0:/dev/axon2 n01-003-2:/dev/axon0 +n01-003-0:/dev/axon3 n01-003-2:/dev/axon1 + +n01-004-0:/dev/axon0 n01-004-1:/dev/axon0 +n01-004-0:/dev/axon1 n01-004-1:/dev/axon1 +n01-004-0:/dev/axon2 n01-004-2:/dev/axon0 +n01-004-0:/dev/axon3 n01-004-2:/dev/axon1 + +n01-005-0:/dev/axon0 n01-005-1:/dev/axon0 +n01-005-0:/dev/axon1 n01-005-1:/dev/axon1 +n01-005-0:/dev/axon2 n01-005-2:/dev/axon0 +n01-005-0:/dev/axon3 n01-005-2:/dev/axon1 + +n01-006-0:/dev/axon0 n01-006-1:/dev/axon0 +n01-006-0:/dev/axon1 n01-006-1:/dev/axon1 +n01-006-0:/dev/axon2 n01-006-2:/dev/axon0 +n01-006-0:/dev/axon3 n01-006-2:/dev/axon1 + +n01-007-0:/dev/axon0 n01-007-1:/dev/axon0 +n01-007-0:/dev/axon1 n01-007-1:/dev/axon1 +n01-007-0:/dev/axon2 n01-007-2:/dev/axon0 +n01-007-0:/dev/axon3 n01-007-2:/dev/axon1 + +n01-008-0:/dev/axon0 n01-008-1:/dev/axon0 +n01-008-0:/dev/axon1 n01-008-1:/dev/axon1 +n01-008-0:/dev/axon2 n01-008-2:/dev/axon0 +n01-008-0:/dev/axon3 n01-008-2:/dev/axon1 + +n01-009-0:/dev/axon0 n01-009-1:/dev/axon0 +n01-009-0:/dev/axon1 n01-009-1:/dev/axon1 +n01-009-0:/dev/axon2 n01-009-2:/dev/axon0 +n01-009-0:/dev/axon3 n01-009-2:/dev/axon1 + +n01-010-0:/dev/axon0 n01-010-1:/dev/axon0 +n01-010-0:/dev/axon1 n01-010-1:/dev/axon1 +n01-010-0:/dev/axon2 n01-010-2:/dev/axon0 +n01-010-0:/dev/axon3 n01-010-2:/dev/axon1 + +n01-011-0:/dev/axon0 n01-011-1:/dev/axon0 +n01-011-0:/dev/axon1 n01-011-1:/dev/axon1 +n01-011-0:/dev/axon2 n01-011-2:/dev/axon0 +n01-011-0:/dev/axon3 n01-011-2:/dev/axon1 + +n01-012-0:/dev/axon0 n01-012-1:/dev/axon0 +n01-012-0:/dev/axon1 n01-012-1:/dev/axon1 +n01-012-0:/dev/axon2 n01-012-2:/dev/axon0 +n01-012-0:/dev/axon3 n01-012-2:/dev/axon1 + +n01-013-0:/dev/axon0 n01-013-1:/dev/axon0 +n01-013-0:/dev/axon1 n01-013-1:/dev/axon1 +n01-013-0:/dev/axon2 n01-013-2:/dev/axon0 +n01-013-0:/dev/axon3 n01-013-2:/dev/axon1 + +n01-014-0:/dev/axon0 n01-014-1:/dev/axon0 +n01-014-0:/dev/axon1 n01-014-1:/dev/axon1 +n01-014-0:/dev/axon2 n01-014-2:/dev/axon0 +n01-014-0:/dev/axon3 n01-014-2:/dev/axon1 + +n01-015-0:/dev/axon0 n01-015-1:/dev/axon0 +n01-015-0:/dev/axon1 n01-015-1:/dev/axon1 +n01-015-0:/dev/axon2 n01-015-2:/dev/axon0 +n01-015-0:/dev/axon3 n01-015-2:/dev/axon1 + +n01-016-0:/dev/axon0 n01-016-1:/dev/axon0 +n01-016-0:/dev/axon1 n01-016-1:/dev/axon1 +n01-016-0:/dev/axon2 n01-016-2:/dev/axon0 +n01-016-0:/dev/axon3 n01-016-2:/dev/axon1 + diff --git a/ompi/mca/mpool/pcie/Makefile.am b/ompi/mca/mpool/pcie/Makefile.am new file mode 100644 index 0000000000..a77b98d692 --- /dev/null +++ b/ompi/mca/mpool/pcie/Makefile.am @@ -0,0 +1,57 @@ +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2007 Los Alamos National Security, LLC. All rights +# reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +AM_CPPFLAGS = $(mpool_pcie_CPPFLAGS) + +sources = \ + mpool_pcie.h \ + mpool_pcie_component.c \ + mpool_pcie_module.c + +if WANT_INSTALL_HEADERS +ompidir = $(includedir)/openmpi/ompi/mca/mpool/pcie +ompi_HEADERS = mpool_pcie.h +else +ompidir = $(includedir) +ompi_HEADERS = +endif + +# Make the output library in this directory, and name it either +# mca__.la (for DSO builds) or libmca__.la +# (for static builds). + +if OMPI_BUILD_mpool_pcie_DSO +component_noinst = +component_install = mca_mpool_pcie.la +else +component_noinst = libmca_mpool_pcie.la +component_install = +endif + +mcacomponentdir = $(pkglibdir) +mcacomponent_LTLIBRARIES = $(component_install) +mca_mpool_pcie_la_SOURCES = $(sources) +mca_mpool_pcie_la_LDFLAGS = -module -avoid-version +mca_mpool_pcie_la_LIBADD = $(mpool_pcie_LIBS) + +noinst_LTLIBRARIES = $(component_noinst) +libmca_mpool_pcie_la_SOURCES = $(sources) +libmca_mpool_pcie_la_LDFLAGS = -module -avoid-version +libmca_mpool_pcie_la_LIBADD = $(mpool_pcie_LIBS) diff --git a/ompi/mca/mpool/pcie/configure.m4 b/ompi/mca/mpool/pcie/configure.m4 new file mode 100644 index 0000000000..f093097c40 --- /dev/null +++ b/ompi/mca/mpool/pcie/configure.m4 @@ -0,0 +1,31 @@ +# -*- shell-script -*- +# +# Copyright (c) 2007 Los Alamos National Security, LLC. All rights +# reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + + +# MCA_mpool_pcie_CONFIG(action-if-can-compile, +# [action-if-cant-compile]) +# ------------------------------------------------ +AC_DEFUN([MCA_mpool_pcie_CONFIG],[ + OMPI_CHECK_PCIE([mpool_pcie], + [mpool_pcie_happy="yes"], + [mpool_pcie_happy="no"]) + + AS_IF([test "$mpool_pcie_happy" = "yes"], + [mpool_pcie_WRAPPER_EXTRA_LDFLAGS="$mpool_pcie_LDFLAGS" + mpool_pcie_WRAPPER_EXTRA_LIBS="$mpool_pcie_LIBS" + $1], + [$2]) + + # substitute in the things needed to build pcie + AC_SUBST([mpool_pcie_CPPFLAGS]) + AC_SUBST([mpool_pcie_LDFLAGS]) + AC_SUBST([mpool_pcie_LIBS]) +])dnl diff --git a/ompi/mca/mpool/pcie/configure.params b/ompi/mca/mpool/pcie/configure.params new file mode 100644 index 0000000000..71d3c8009d --- /dev/null +++ b/ompi/mca/mpool/pcie/configure.params @@ -0,0 +1,26 @@ +# -*- shell-script -*- +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2006 Voltaire. All rights reserved. +# Copyright (c) 2007 Los Alamos National Security, LLC. All rights +# reserved. +# +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# Specific to this module + +PARAM_CONFIG_FILES="Makefile" diff --git a/ompi/mca/mpool/pcie/mpool_pcie.h b/ompi/mca/mpool/pcie/mpool_pcie.h new file mode 100644 index 0000000000..3d65fe3826 --- /dev/null +++ b/ompi/mca/mpool/pcie/mpool_pcie.h @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2007 Los Alamos National Security, LLC. + * All righs reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef MCA_MPOOL_PCIE_H +#define MCA_MPOOL_PCIE_H + +#include "opal/class/opal_list.h" +#include "opal/event/event.h" +#include "ompi/class/ompi_free_list.h" +#include "ompi/mca/allocator/allocator.h" +#include "ompi/mca/mpool/mpool.h" + +BEGIN_C_DECLS + +struct mca_mpool_pcie_component_t { + mca_mpool_base_component_t super; + int verbose; +}; +typedef struct mca_mpool_pcie_component_t mca_mpool_pcie_component_t; + +OMPI_MODULE_DECLSPEC extern mca_mpool_pcie_component_t mca_mpool_pcie_component; + +struct mca_mpool_pcie_module_t { + mca_mpool_base_module_t super; + void* base; + size_t offset; + size_t len; + +}; typedef struct mca_mpool_pcie_module_t mca_mpool_pcie_module_t; + + +struct mca_mpool_base_resources_t { + void *base; + size_t len; +}; +typedef struct mca_mpool_base_resources_t mca_mpool_base_resources_t; + + +/* + * Initializes the mpool module. + */ +void mca_mpool_pcie_module_init(mca_mpool_pcie_module_t* mpool); + + +/* + * Returns base address of shared memory mapping. + */ +void* mca_mpool_pcie_base(mca_mpool_base_module_t*); + + +/** + * Allocate block of shared memory. + */ +void* mca_mpool_pcie_alloc(mca_mpool_base_module_t* mpool, + size_t size, + size_t align, + uint32_t flags, + mca_mpool_base_registration_t** registration); + + +/** + * realloc function typedef + */ +void* mca_mpool_pcie_realloc(mca_mpool_base_module_t* mpool, + void* addr, + size_t size, + mca_mpool_base_registration_t** registration); + + +/** + * free function typedef + */ +void mca_mpool_pcie_free(mca_mpool_base_module_t* mpool, + void * addr, + mca_mpool_base_registration_t* registration); + + +END_C_DECLS + +#endif diff --git a/ompi/mca/mpool/pcie/mpool_pcie_component.c b/ompi/mca/mpool/pcie/mpool_pcie_component.c new file mode 100644 index 0000000000..7a1bbea7fc --- /dev/null +++ b/ompi/mca/mpool/pcie/mpool_pcie_component.c @@ -0,0 +1,112 @@ +/* + * Copyright (c) 2006-2007 Los Alamos National Security, LLC. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#if HAVE_UNISTD_H +#include +#endif /* HAVE_UNISTD_H*/ +#ifdef HAVE_STDLIB_H +#include +#endif /* HAVE_STDLIB_H */ +#include + +#include "opal/util/output.h" +#include "opal/mca/base/base.h" +#include "opal/mca/base/mca_base_param.h" + +#include "orte/util/proc_info.h" + +#include "ompi/proc/proc.h" +#include "ompi/mca/allocator/base/base.h" + +#include "mpool_pcie.h" + +/* + * Local functions + */ +static int mca_mpool_pcie_open(void); +static int mca_mpool_pcie_close( void ); +static mca_mpool_base_module_t* mca_mpool_pcie_init( + struct mca_mpool_base_resources_t* resources); + +mca_mpool_pcie_component_t mca_mpool_pcie_component = { + { + /* First, the mca_base_component_t struct containing meta + information about the component itself */ + + { + /* Indicate that we are a mpool v2.0.0 component (which also + implies a specific MCA version) */ + + MCA_MPOOL_BASE_VERSION_2_0_0, + + "pcie", /* MCA component name */ + OMPI_MAJOR_VERSION, /* MCA component major version */ + OMPI_MINOR_VERSION, /* MCA component minor version */ + OMPI_RELEASE_VERSION, /* MCA component release version */ + mca_mpool_pcie_open, /* component open */ + mca_mpool_pcie_close + }, + + /* Next the MCA v2.0.0 component meta data */ + + { + /* The component is not checkpoint ready */ + false + }, + + mca_mpool_pcie_init + } +}; + + +static int +mca_mpool_pcie_open(void) +{ + return OMPI_SUCCESS; +} + + +static int +mca_mpool_pcie_close(void) +{ + return OMPI_SUCCESS; +} + + +static mca_mpool_base_module_t* +mca_mpool_pcie_init(struct mca_mpool_base_resources_t* resources) +{ + mca_mpool_pcie_module_t* mpool_module; + + mpool_module = (mca_mpool_pcie_module_t*)malloc(sizeof(mca_mpool_pcie_module_t)); + if(NULL == mpool_module) return NULL; + + mpool_module->super.mpool_component = &mca_mpool_pcie_component.super; + mpool_module->super.mpool_base = NULL; /* no base .. */ + mpool_module->super.mpool_alloc = mca_mpool_pcie_alloc; + mpool_module->super.mpool_realloc = mca_mpool_pcie_realloc; + mpool_module->super.mpool_free = mca_mpool_pcie_free; + mpool_module->super.mpool_register = NULL; + mpool_module->super.mpool_find = NULL; + mpool_module->super.mpool_deregister = NULL; + mpool_module->super.mpool_release_memory = NULL; + mpool_module->super.mpool_finalize = NULL; + mpool_module->super.rcache = NULL; + mpool_module->super.flags = MCA_MPOOL_FLAGS_MPI_ALLOC_MEM; + + mpool_module->base = resources->base; + mpool_module->len = resources->len; + mpool_module->offset = 0; + + return (mca_mpool_base_module_t*) mpool_module; +} + diff --git a/ompi/mca/mpool/pcie/mpool_pcie_module.c b/ompi/mca/mpool/pcie/mpool_pcie_module.c new file mode 100644 index 0000000000..965c2fa53a --- /dev/null +++ b/ompi/mca/mpool/pcie/mpool_pcie_module.c @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2006-2007 Los Alamos National Security, LLC. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include + +#include "opal/util/output.h" + +#include "mpool_pcie.h" + +void +mca_mpool_pcie_module_init(mca_mpool_pcie_module_t* mpool) +{ + +} + + +void* mca_mpool_pcie_base(mca_mpool_base_module_t* mpool) +{ + return ((mca_mpool_pcie_module_t*) mpool)->base; +} + + +void* +mca_mpool_pcie_alloc(mca_mpool_base_module_t* mpool, + size_t size, + size_t align, + uint32_t flags, + mca_mpool_base_registration_t** registration) +{ + mca_mpool_pcie_module_t* mpool_pcie = + (mca_mpool_pcie_module_t*) mpool; + void *addr; + + if(mpool_pcie->offset + size > mpool_pcie->len) { + addr = NULL; + } else { + addr = (char*)mpool_pcie->base + mpool_pcie->offset; + mpool_pcie->offset += size; + } + + return addr; +} + + +void* +mca_mpool_pcie_realloc(mca_mpool_base_module_t* mpool, + void* addr, + size_t size, + mca_mpool_base_registration_t** registration) +{ + /* we don't need no realloc */ + return NULL; +} + + +void +mca_mpool_pcie_free(mca_mpool_base_module_t* mpool, void * addr, + mca_mpool_base_registration_t* registration) +{ + /* we don't need no free */ +}