diff --git a/config/ompi_check_pcie.m4 b/config/ompi_check_pcie.m4 new file mode 100644 index 0000000000..213924f68f --- /dev/null +++ b/config/ompi_check_pcie.m4 @@ -0,0 +1,56 @@ +# -*- shell-script -*- +# +# Copyright (c) 2007 Los Alamos National Security, LLC. All rights +# reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# OMPI_CHECK_PCIE(prefix, [action-if-found], [action-if-not-found]) +# -------------------------------------------------------- +AC_DEFUN([OMPI_CHECK_PCIE],[ + AC_ARG_WITH([pcie], + [AC_HELP_STRING([--with-pcie(=DIR)], + [Build PCIE (QLogic InfiniPath PCIE) support, searching for libraries in DIR])]) + AC_ARG_WITH([pcie-libdir], + [AC_HELP_STRING([--with-pcie-libdir=DIR], + [Search for PCIE (QLogic InfiniPath PCIE) libraries in DIR])]) + + ompi_check_pcie_$1_save_CPPFLAGS="$CPPFLAGS" + ompi_check_pcie_$1_save_LDFLAGS="$LDFLAGS" + ompi_check_pcie_$1_save_LIBS="$LIBS" + + ompi_check_pcie_happy="yes" + + AS_IF([test "$with_pcie" != "no"], + [AS_IF([test ! -z "$with_pcie" -a "$with_pcie" != "yes"], + [ompi_check_pcie_dir="$with_pcie"]) + AS_IF([test ! -z "$with_pcie_libdir" -a "$with_pcie_libdir" != "yes"], + [ompi_check_pcie_libdir="$with_pcie_libdir"]) + OMPI_CHECK_PACKAGE([$1], + [axon_ioctl.h], + [], + [$ompi_check_pcie_dir], + [$ompi_check_pcie_libdir], + [ompi_check_pcie_happy="yes"], + [ompi_check_pcie_happy="no"])], + [ompi_check_pcie_happy="no"]) + + + CPPFLAGS="$ompi_check_pcie_$1_save_CPPFLAGS" + LDFLAGS="$ompi_check_pcie_$1_save_LDFLAGS" + LIBS="$ompi_check_pcie_$1_save_LIBS" + + AS_IF([test "$ompi_check_pcie_happy" = "yes" -a "$enable_progress_threads" = "yes"], + [AC_MSG_WARN([PCIE driver does not currently support progress threads. Disabling BTL.]) + ompi_check_pcie_happy="no"]) + + AS_IF([test "$ompi_check_pcie_happy" = "yes"], + [$2], + [AS_IF([test ! -z "$with_pcie" -a "$with_pcie" != "no"], + [AC_MSG_ERROR([PCIe support requested but not found. Aborting])]) + $3]) +]) diff --git a/configure.ac b/configure.ac index c9cdb27dea..cdc0e228c9 100644 --- a/configure.ac +++ b/configure.ac @@ -638,9 +638,9 @@ ompi_show_title "Header file tests" AC_CHECK_HEADERS([alloca.h aio.h arpa/inet.h dirent.h \ dlfcn.h execinfo.h err.h fcntl.h grp.h inttypes.h libgen.h \ - libutil.h netdb.h netinet/in.h netinet/tcp.h \ + libutil.h memory.h netdb.h netinet/in.h netinet/tcp.h \ poll.h pthread.h pty.h pwd.h sched.h stdint.h \ - string.h strings.h stropts.h sys/fcntl.h sys/ipc.h \ + stdlib.h string.h strings.h stropts.h sys/fcntl.h sys/ipc.h \ sys/ioctl.h sys/mman.h sys/param.h sys/queue.h \ sys/resource.h sys/select.h sys/socket.h sys/sockio.h \ stdarg.h sys/stat.h sys/statvfs.h sys/time.h sys/tree.h \ diff --git a/ompi/mca/btl/pcie/Makefile.am b/ompi/mca/btl/pcie/Makefile.am new file mode 100644 index 0000000000..5092f031c3 --- /dev/null +++ b/ompi/mca/btl/pcie/Makefile.am @@ -0,0 +1,75 @@ +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2007 Los Alamos National Security, LLC. All rights +# reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +AM_CPPFLAGS = $(btl_pcie_CPPFLAGS) + +dist_pkgdata_DATA = \ + help-mpi-btl-pcie.txt + +sources = \ + btl_pcie.c \ + btl_pcie.h \ + btl_pcie_component.c \ + btl_pcie_endpoint.c \ + btl_pcie_endpoint.h \ + btl_pcie_fifo.c \ + btl_pcie_fifo.h \ + btl_pcie_frag.c \ + btl_pcie_frag.h \ + btl_pcie_proc.c \ + btl_pcie_proc.h \ + btl_pcie_lex.c \ + btl_pcie_lex.h \ + btl_pcie_cfg.c \ + btl_pcie_ddriver.h \ + btl_pcie_ddriver.c + +EXTRA_DIST = btl_pcie_lex.l + + +if OMPI_BUILD_btl_pcie_DSO +lib = +lib_sources = +component = mca_btl_pcie.la +component_sources = $(sources) +else +lib = libmca_btl_pcie.la +lib_sources = $(sources) +component = +component_sources = +endif + +mcacomponentdir = $(pkglibdir) +mcacomponent_LTLIBRARIES = $(component) +mca_btl_pcie_la_SOURCES = $(component_sources) +mca_btl_pcie_la_LDFLAGS = -module -avoid-version $(btl_pcie_LDFLAGS) +mca_btl_pcie_la_LIBADD = $(btl_pcie_LIBS) + +noinst_LTLIBRARIES = $(lib) +libmca_btl_pcie_la_SOURCES = $(lib_sources) +libmca_btl_pcie_la_LDFLAGS= -module -avoid-version $(btl_pcie_LDFLAGS) +libmca_btl_pcie_la_LIBADD = $(btl_pcie_LIBS) + + +ompi_sysconfdir = $(OMPI_SYSCONFDIR) +ompi_sysconf_DATA = \ + mca-btl-pcie-local-resources.cfg \ + mca-btl-pcie-remote-resources.cfg + diff --git a/ompi/mca/btl/pcie/btl_pcie.c b/ompi/mca/btl/pcie/btl_pcie.c new file mode 100644 index 0000000000..58ff804347 --- /dev/null +++ b/ompi/mca/btl/pcie/btl_pcie.c @@ -0,0 +1,572 @@ +/* + * Copyright (c) 2007 Los Alamos National Security, LLC. + * All righs reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include +#include + +#include "opal/types.h" +#include "opal/util/output.h" +#include "opal/util/if.h" +#include "opal/sys/atomic.h" +#include "opal/mca/paffinity/paffinity.h" + +#include "ompi/datatype/convertor.h" +#include "ompi/mca/btl/btl.h" +#include "ompi/mca/btl/base/btl_base_error.h" +#include "ompi/mca/mpool/mpool.h" +#include "ompi/mca/mpool/base/base.h" +#include "ompi/mca/pml/pml.h" + +#include "btl_pcie.h" +#include "btl_pcie_frag.h" +#include "btl_pcie_proc.h" +#include "btl_pcie_endpoint.h" + +mca_btl_pcie_module_t mca_btl_pcie_module = { + { + &mca_btl_pcie_component.super, + 0, /* max size of first fragment */ + 0, /* Threshold below which BTL should not fragment */ + 0, /* max send fragment size */ + 0, /* pipeline protocol length */ + 0, /* max rdma fragment size */ + 0, /* min packet size for pipeline protocol */ + 0, /* exclusivity */ + 0, /* latency */ + 0, /* bandwidth */ + 0, /* flags */ + mca_btl_pcie_add_procs, + mca_btl_pcie_del_procs, + mca_btl_pcie_register, + mca_btl_pcie_finalize, + mca_btl_pcie_alloc, + mca_btl_pcie_free, + mca_btl_pcie_prepare_src, + mca_btl_pcie_prepare_dst, + mca_btl_pcie_send, + NULL, + mca_btl_pcie_put, /* put */ + NULL, /* get */ + NULL, /*dump */ + NULL, /* mpool */ + NULL, /* register error cb */ + NULL /* ft event */ + } +}; + + +/** + * + */ + +int mca_btl_pcie_add_procs( + struct mca_btl_base_module_t* btl, + size_t nprocs, + struct ompi_proc_t **ompi_procs, + struct mca_btl_base_endpoint_t** peers, + opal_bitmap_t* reachable) +{ + mca_btl_pcie_module_t* pcie_btl = (mca_btl_pcie_module_t*)btl; + int i; + + for(i = 0; i < (int) nprocs; i++) { + struct ompi_proc_t* ompi_proc = ompi_procs[i]; + mca_btl_pcie_proc_t* pcie_proc; + int rc; + + /* Don't connect to anyone on our local node, including + ourselves. The PCIe doesn't work that way, and the mapper + sometimes gets confused by that fact. */ + if (OPAL_PROC_ON_LOCAL_NODE(ompi_proc->proc_flags)) continue; + + rc = mca_btl_pcie_proc_create(ompi_proc, pcie_btl, &pcie_proc); + if(OMPI_SUCCESS != rc) { + return rc; + } else if (pcie_proc) { + opal_bitmap_set_bit(reachable, i); + peers[i] = pcie_proc->endpoint_proc; + } + } + + return OMPI_SUCCESS; +} + +int mca_btl_pcie_del_procs(struct mca_btl_base_module_t* btl, + size_t nprocs, + struct ompi_proc_t **procs, + struct mca_btl_base_endpoint_t ** peers) +{ + /* TODO */ + return OMPI_SUCCESS; +} + + +/** + * Register callback function to support send/recv semantics + */ + +int mca_btl_pcie_register( + struct mca_btl_base_module_t* btl, + mca_btl_base_tag_t tag, + mca_btl_base_module_recv_cb_fn_t cbfunc, + void* cbdata) +{ + mca_btl_pcie_module_t* pcie_btl = (mca_btl_pcie_module_t*) btl; + pcie_btl->pcie_reg[tag].cbfunc = cbfunc; + pcie_btl->pcie_reg[tag].cbdata = cbdata; + return OMPI_SUCCESS; +} + + +/** + * Allocate a segment. + * + * @param btl (IN) BTL module + * @param size (IN) Request segment size. + */ + +mca_btl_base_descriptor_t* mca_btl_pcie_alloc( + struct mca_btl_base_module_t* btl, + struct mca_btl_base_endpoint_t* endpoint, + uint8_t order, + size_t size, + uint32_t flags) +{ + mca_btl_pcie_module_t* pcie_btl = (mca_btl_pcie_module_t*) btl; + mca_btl_pcie_frag_t* frag = NULL; + int rc; + + if (size <= btl->btl_eager_limit) { + MCA_BTL_PCIE_FRAG_ALLOC_EAGER(pcie_btl, frag, rc); + if (frag) { + frag->segment.seg_len = size; + frag->base.des_flags = 0; + frag->hdr->length = size; + } + } + if (NULL == frag && size <= btl->btl_max_send_size) { + MCA_BTL_PCIE_FRAG_ALLOC_MAX(pcie_btl, frag, rc); + if (frag) { + frag->segment.seg_len = size; + frag->base.des_flags = 0; + frag->hdr->length = size; + } + } + BTL_VERBOSE(("btl_pcie_alloc called for %d bytes, returning 0x%lx", size, frag)); + + return (mca_btl_base_descriptor_t*) frag; +} + + +/** + * Return a segment + */ + +int mca_btl_pcie_free( + struct mca_btl_base_module_t* btl, + mca_btl_base_descriptor_t* des) +{ + mca_btl_pcie_frag_t* frag = (mca_btl_pcie_frag_t*)des; + mca_btl_pcie_module_t* pcie_btl = (mca_btl_pcie_module_t*) btl; + int ret; + + BTL_VERBOSE(("btl_pcie_free returning 0x%lx", frag)); + + if (frag->registration != NULL) { + pcie_btl->rdma_mpool->mpool_deregister(pcie_btl->rdma_mpool, + (mca_mpool_base_registration_t*) + frag->registration); + frag->registration = NULL; + } + + MCA_BTL_PCIE_FRAG_RETURN(pcie_btl, frag, ret); + return ret; +} + + +/** + * Pack data and return a descriptor that can be + * used for send/put. + * + * @param btl (IN) BTL module + * @param peer (IN) BTL peer addressing + */ +mca_btl_base_descriptor_t* mca_btl_pcie_prepare_src( + struct mca_btl_base_module_t* btl, + struct mca_btl_base_endpoint_t* endpoint, + struct mca_mpool_base_registration_t* registration, + struct ompi_convertor_t* convertor, + uint8_t order, + size_t reserve, + size_t* size, + uint32_t flags +) +{ + mca_btl_pcie_frag_t* frag = NULL; + mca_btl_pcie_reg_t* pcie_reg; + mca_btl_pcie_module_t* pcie_btl = (mca_btl_pcie_module_t*) btl; + struct iovec iov; + uint32_t iov_count = 1; + size_t max_data = *size; + int rc; + + BTL_VERBOSE(("btl_pcie_prepare_src called with reserve %d", reserve)); + + /* check and see if the data is contiguous */ + if(ompi_convertor_need_buffers(convertor) == false && 0 == reserve) { + MCA_BTL_PCIE_FRAG_ALLOC_DMA(btl, frag, rc); + if(NULL == frag) { + return NULL; + } + + iov.iov_len = max_data; + iov.iov_base = NULL; + + /* get the user buffer's address */ + ompi_convertor_pack(convertor, &iov, &iov_count, &max_data); + *size = max_data; + + if(NULL == registration) { + rc = pcie_btl->rdma_mpool->mpool_register(pcie_btl->rdma_mpool, + iov.iov_base, max_data, 0, ®istration); + if(OMPI_SUCCESS != rc || NULL == registration){ + MCA_BTL_PCIE_FRAG_RETURN(pcie_btl, frag, rc); + return NULL; + } + frag->registration = (mca_btl_pcie_reg_t*) registration; + } + + pcie_reg = (mca_btl_pcie_reg_t*) registration; + frag->base.des_flags = 0; + frag->base.des_src = &frag->segment; + frag->base.des_src_cnt = 1; + frag->base.des_dst = NULL; + frag->base.des_dst_cnt = 0; + frag->base.des_flags = 0; + + frag->segment.seg_len = max_data; + frag->segment.seg_addr.pval = iov.iov_base; + frag->segment.seg_key.key64 = (uint64_t)pcie_reg->handle; + + BTL_VERBOSE(("prepare_src: frag->segment.seg_len = %lu .seg_addr.pval= %llu " + "frag->segment.seg_key.key64 = %llu", + frag->segment.seg_len, frag->segment.seg_addr.pval, + frag->segment.seg_key.key64)); + + return &frag->base; + + } else { + /* + * if we aren't pinning the data and the requested size is less + * than the eager limit pack into a fragment from the eager pool + */ + if (max_data+reserve <= btl->btl_eager_limit) { + + MCA_BTL_PCIE_FRAG_ALLOC_EAGER(btl, frag, rc); + if(NULL == frag) { + return NULL; + } + + iov.iov_len = max_data; + iov.iov_base = (unsigned char*) frag->segment.seg_addr.pval + reserve; + + rc = ompi_convertor_pack(convertor, &iov, &iov_count, &max_data ); + *size = max_data; + if( rc < 0 ) { + MCA_BTL_PCIE_FRAG_RETURN(btl, frag, rc); + return NULL; + } + frag->segment.seg_len = max_data + reserve; + } + + /* + * otherwise pack as much data as we can into a fragment + * that is the max send size. + */ + else { + + MCA_BTL_PCIE_FRAG_ALLOC_MAX(btl, frag, rc); + if(NULL == frag) { + return NULL; + } + if(max_data + reserve > frag->size){ + max_data = frag->size - reserve; + } + iov.iov_len = max_data; + iov.iov_base = (unsigned char*) frag->segment.seg_addr.pval + reserve; + + rc = ompi_convertor_pack(convertor, &iov, &iov_count, &max_data ); + *size = max_data; + + if( rc < 0 ) { + MCA_BTL_PCIE_FRAG_RETURN(btl, frag, rc); + return NULL; + } + frag->segment.seg_len = max_data + reserve; + + } + frag->hdr->length = *size + reserve; + frag->base.des_src = &frag->segment; + frag->base.des_src_cnt = 1; + frag->base.des_dst = NULL; + frag->base.des_dst_cnt = 0; + frag->base.des_flags = 0; + return &frag->base; + } + +} + +/** + * Prepare a descriptor for send/rdma using the supplied + * convertor. If the convertor references data that is contigous, + * the descriptor may simply point to the user buffer. Otherwise, + * this routine is responsible for allocating buffer space and + * packing if required. + * + * @param btl (IN) BTL module + * @param endpoint (IN) BTL peer addressing + * @param convertor (IN) Data type convertor + * @param reserve (IN) Additional bytes requested by upper layer to precede user data + * @param size (IN/OUT) Number of bytes to prepare (IN), number of bytes actually prepared (OUT) + */ + +mca_btl_base_descriptor_t* mca_btl_pcie_prepare_dst( + struct mca_btl_base_module_t* btl, + struct mca_btl_base_endpoint_t* endpoint, + struct mca_mpool_base_registration_t* registration, + struct ompi_convertor_t* convertor, + uint8_t order, + size_t reserve, + size_t* size, + uint32_t flags) +{ + mca_btl_pcie_frag_t* frag; + mca_btl_pcie_reg_t* pcie_reg; + mca_btl_pcie_module_t* pcie_btl = (mca_btl_pcie_module_t*) btl; + int rc; + ptrdiff_t lb; + + MCA_BTL_PCIE_FRAG_ALLOC_DMA(pcie_btl, frag, rc); + if(NULL == frag) { + return NULL; + } + ompi_ddt_type_lb(convertor->pDesc, &lb); + frag->segment.seg_addr.pval = convertor->pBaseBuf + lb + + convertor->bConverted; + if(NULL == registration) { + rc = pcie_btl->rdma_mpool->mpool_register(pcie_btl->rdma_mpool, + frag->segment.seg_addr.pval, *size, 0, + ®istration); + if(OMPI_SUCCESS != rc || NULL == registration) { + MCA_BTL_PCIE_FRAG_RETURN(pcie_btl, frag, rc); + return NULL; + } + frag->registration = (mca_btl_pcie_reg_t*) registration; + } + pcie_reg = (mca_btl_pcie_reg_t*)registration; + + frag->segment.seg_len = *size; + frag->segment.seg_key.key64 = (uint64_t) pcie_reg->handle; + + frag->base.des_dst = &frag->segment; + frag->base.des_dst_cnt = 1; + frag->base.des_src = NULL; + frag->base.des_src_cnt = 0; + frag->base.des_flags = 0; + + BTL_VERBOSE(("prepare_dst: frag->segment.seg_len = %lu .seg_addr.pval= %llu " + "frag->segment.seg_key.key64 = %llu", + frag->segment.seg_len, frag->segment.seg_addr.pval, + frag->segment.seg_key.key64)); + + return &frag->base; +} + + +/** + * Initiate an asynchronous send. + * + * @param btl (IN) BTL module + * @param endpoint (IN) BTL addressing information + * @param descriptor (IN) Description of the data to be transfered + * @param tag (IN) The tag value used to notify the peer. + */ + +int mca_btl_pcie_send( + struct mca_btl_base_module_t* btl, + struct mca_btl_base_endpoint_t* endpoint, + struct mca_btl_base_descriptor_t* descriptor, + mca_btl_base_tag_t tag) + +{ + /* mca_btl_pcie_module_t* pcie_btl = (mca_btl_pcie_module_t*) btl; */ + mca_btl_pcie_module_t* pcie_btl = (mca_btl_pcie_module_t*) btl; + mca_btl_pcie_frag_t* frag = (mca_btl_pcie_frag_t*)descriptor; + mca_btl_pcie_sma_buf_t *buf = NULL; + int rc; + btl_pcie_fifo_entry_t idx; + + /* setup these fields so they get pulled over in the memcpy */ + frag->hdr->tag = tag; + frag->hdr->length = frag->segment.seg_len; + + if (frag->type == MCA_BTL_PCIE_TYPE_EAGER) { + MCA_BTL_PCIE_SMA_BUF_ALLOC_EAGER(pcie_btl, buf, rc); + } else { + MCA_BTL_PCIE_SMA_BUF_ALLOC_MAX(pcie_btl, buf, rc); + } + if (NULL == frag) { + BTL_ERROR(("can't alloc buf for frag of type %d", frag->type)); + return OMPI_ERR_OUT_OF_RESOURCE; + } + + frag->endpoint = endpoint; + frag->sma_buf = buf; + /* Put fragment into network byte order before copy to save work + done in sma region */ + OMPI_BTL_PCIE_HEADER_HTON(*frag->hdr); + /* BWB - FIX ME - both pointers are 16 byte aligned and the + buffers behind them are a multiple of 16 in length (but + frag->segment.seg_len might not be). There might be a more + optimized memcpy option given that behavior. */ + memcpy(buf->pcie_data.pval, frag->hdr, + sizeof(mca_btl_pcie_header_t) + + frag->segment.seg_len); + + /* send the fragment pointer to the receiver, + who will later ACK it back so that we can return it */ + idx = ((char*) buf->pcie_data.pval) - ((char*) endpoint->rem_frag_base); + idx |= BTL_PCIE_FIFO_TYPE_SEND; + + /* make sure the top bit is zero */ + assert((idx & BTL_PCIE_FIFO_TYPE_MASK) == BTL_PCIE_FIFO_TYPE_SEND); + + /* need to barrier prior to writing remote completion */ + opal_atomic_wmb(); + + BTL_VERBOSE(("sent frag 0x%lx (offset %lx), tag %d, length %d, rc = %d", + frag, idx, frag->hdr->tag, frag->segment.seg_len, rc)); + + idx = opal_swap_bytes8(idx); + rc = ompi_btl_pcie_fifo_set_msg(&endpoint->send_fifo, idx); + if(OMPI_SUCCESS != rc) { + if(OMPI_ERR_RESOURCE_BUSY == rc) { + /* BWB - FIX ME - queue for later */ + abort(); + } else { + return rc; + } + } + + return OMPI_SUCCESS; +} + +/** + * Initiate an asynchronous put. + * + * @param btl (IN) BTL module + * @param endpoint (IN) BTL addressing information + * @param descriptor (IN) Description of the data to be transferred + */ + +int mca_btl_pcie_put( + mca_btl_base_module_t* btl, + mca_btl_base_endpoint_t* endpoint, + mca_btl_base_descriptor_t* descriptor) +{ + + mca_btl_pcie_frag_t* frag = (mca_btl_pcie_frag_t*) descriptor; + struct AXON_dma_request dma_req; + int dma_reqs_started; + int rc; + volatile uint64_t *dma_status_addr; + uint64_t dma_status; + + frag->endpoint = endpoint; + + memset(&dma_req,0x00,sizeof(dma_req)); + dma_req.dma_type = AXON_DMATYPE_PUT; + + dma_req.local_descriptor[0].src_address = frag->base.des_src->seg_addr.lval; + dma_req.local_descriptor[0].src_memory_region_handle = frag->base.des_src->seg_key.key64; + + dma_req.remote_descriptor[0].src_address = + opal_swap_bytes8(frag->base.des_dst->seg_addr.lval); + dma_req.remote_descriptor[0].src_memory_region_handle = + opal_swap_bytes8(frag->base.des_dst->seg_key.key64); + + dma_req.transfer_size = + dma_req.remote_descriptor[0].transfer_size = + dma_req.local_descriptor[0].transfer_size = frag->base.des_src->seg_len; + + dma_req.localDmaStatusOffset = endpoint->lcl_dma_status - (char*) endpoint->lcl_sma_ptr; + dma_req.remoteDmaStatusOffset = 0; + + dma_req.local_descriptor_count = 1; + dma_req.remote_descriptor_count = 1; + + dma_status_addr = (uint64_t*) endpoint->lcl_dma_status; + *dma_status_addr = 0; + + rc = dd_dma_request(&endpoint->pcie_adapter, + &dma_req, + 1, + &dma_reqs_started); + + if (0 != rc) abort(); + + /* wait for completion, for now anyway */ + while (0 == (dma_status = *dma_status_addr)) { + /* sched_yield(); */ + } + + frag->base.des_cbfunc(btl, endpoint, &(frag->base), OMPI_SUCCESS); + + return OMPI_SUCCESS; +} + + +/** + * Initiate an asynchronous get. + * + * @param btl (IN) BTL module + * @param endpoint (IN) BTL addressing information + * @param descriptor (IN) Description of the data to be transferred + * + */ + +int mca_btl_pcie_get( + mca_btl_base_module_t* btl, + mca_btl_base_endpoint_t* endpoint, + mca_btl_base_descriptor_t* descriptor) +{ + return OMPI_ERR_NOT_IMPLEMENTED; +} + + + +/* + * Cleanup/release module resources. + */ + +int mca_btl_pcie_finalize(struct mca_btl_base_module_t* btl) +{ + mca_btl_pcie_module_t* pcie_btl = (mca_btl_pcie_module_t*) btl; + OBJ_DESTRUCT(&pcie_btl->pcie_lock); + OBJ_DESTRUCT(&pcie_btl->pcie_sma_buf_eager); + OBJ_DESTRUCT(&pcie_btl->pcie_sma_buf_max); + OBJ_DESTRUCT(&pcie_btl->pcie_frag_eager); + OBJ_DESTRUCT(&pcie_btl->pcie_frag_max); + OBJ_DESTRUCT(&pcie_btl->pcie_frag_dma); + OBJ_DESTRUCT(&pcie_btl->pcie_recv_frag); + return OMPI_SUCCESS; +} diff --git a/ompi/mca/btl/pcie/btl_pcie.h b/ompi/mca/btl/pcie/btl_pcie.h new file mode 100644 index 0000000000..5816eddc40 --- /dev/null +++ b/ompi/mca/btl/pcie/btl_pcie.h @@ -0,0 +1,368 @@ +/* + * Copyright (c) 2007 Los Alamos National Security, LLC. + * All righs reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef MCA_BTL_PCIE_H +#define MCA_BTL_PCIE_H + +#include "ompi_config.h" + +#include +#include + +#include "opal/align.h" +#include "opal/event/event.h" +#include "opal/util/output.h" +#include "opal/class/opal_bitmap.h" + +#include "orte/util/proc_info.h" + +#include "ompi/class/ompi_free_list.h" +#include "ompi/mca/btl/btl.h" +#include "ompi/mca/btl/base/base.h" +#include "ompi/mca/mpool/mpool.h" +#include "ompi/mca/pml/pml.h" + +#include "btl_pcie_ddriver.h" +#include "btl_pcie_frag.h" +#include "btl_pcie_fifo.h" + +BEGIN_C_DECLS + +#define MCA_BTL_HAS_MPOOL 1 + +/** + * PCIE BTL component. + */ + +struct mca_btl_pcie_component_t { + /** BTL base component */ + mca_btl_base_component_1_0_1_t super; + + /* ***** Configuration information ***** */ + + /** initial size of free lists */ + int pcie_free_list_num; + + /** maximum size of free lists */ + int pcie_free_list_max; + + /** number of elements to alloc when growing free lists */ + int pcie_free_list_inc; + + /** name of send/recv memory pool */ + char* pcie_send_mpool_name; + + /** name of put/get memory pool */ + char *pcie_dma_mpool_name; + + /** Number of entries in the send/recv queue structure */ + int pcie_recv_queue_len; + + /* **** Component data ***** */ + + /** array of available modules */ + struct mca_btl_pcie_module_t *pcie_btls; + + /** Number of initialized pcie_btl modules */ + uint32_t pcie_num_btls; + + /** list of pcie proc structures, created during add_procs */ + opal_list_t pcie_procs; + + /** lock for accessing component state */ + opal_mutex_t pcie_lock; +}; +typedef struct mca_btl_pcie_component_t mca_btl_pcie_component_t; + +OMPI_MODULE_DECLSPEC extern mca_btl_pcie_component_t mca_btl_pcie_component; + +/** + * BTL Module Interface + */ +struct mca_btl_pcie_module_t { + mca_btl_base_module_t super; /**< base BTL interface */ + + bool active; + + mca_btl_base_recv_reg_t pcie_reg[MCA_BTL_TAG_MAX]; + + /** name of the pcie device */ + char *lcl_dev_name; + + /** Free list of communication buffers in the SMA region */ + ompi_free_list_t pcie_sma_buf_eager; + ompi_free_list_t pcie_sma_buf_max; + + /** Free list of bounce fragments, normal user memory */ + ompi_free_list_t pcie_frag_eager; + ompi_free_list_t pcie_frag_max; + + /* free list of DMA fragments */ + ompi_free_list_t pcie_frag_dma; + + /* single receive fragment to handle upcalls on message reception. + This will need to be a free list if multiple receive callbacks + could be triggered at the same time, which will happen if the + code goes MT hot. */ + mca_btl_pcie_frag_recv_t pcie_recv_frag; + + /* lock for accessing module state */ + opal_mutex_t pcie_lock; + + /* mpool for allocating the members of pcie_sma_buf* */ + struct mca_mpool_base_module_t* pcie_mpool; + /* mpool for RDMA pinning */ + struct mca_mpool_base_module_t* rdma_mpool; + + /* Endpoint associated with this module (there's a one-to-one + mapping of modules and endpoints, since a device can only + handle one endpoint at a time */ + struct mca_btl_base_endpoint_t* endpoint; +}; +typedef struct mca_btl_pcie_module_t mca_btl_pcie_module_t; +extern mca_btl_pcie_module_t mca_btl_pcie_module; + +struct mca_btl_pcie_reg_t { + mca_mpool_base_registration_t base; + AXON_memory_region_handle handle; +}; +typedef struct mca_btl_pcie_reg_t mca_btl_pcie_reg_t; + +struct mca_btl_pcie_modex_info_t { + char hostname[ORTE_MAX_HOSTNAME_SIZE]; + char devicename[OMPI_PATH_MAX]; +}; +typedef struct mca_btl_pcie_modex_info_t mca_btl_pcie_modex_info_t; +#define MCA_BTL_PCIE_MODEX_INFO_HTON(h) +#define MCA_BTL_PCIE_MODEX_INFO_NTOH(h) + + +/** + * Register TEMPLATE component parameters with the MCA framework + */ +extern int mca_btl_pcie_component_open(void); + +/** + * Any final cleanup before being unloaded. + */ +extern int mca_btl_pcie_component_close(void); + +/** + * TEMPLATE component initialization. + * + * @param num_btl_modules (OUT) Number of BTLs returned in BTL array. + * @param allow_multi_user_threads (OUT) Flag indicating wether BTL supports user threads (TRUE) + * @param have_hidden_threads (OUT) Flag indicating wether BTL uses threads (TRUE) + */ +extern mca_btl_base_module_t** mca_btl_pcie_component_init( + int *num_btl_modules, + bool allow_multi_user_threads, + bool have_hidden_threads +); + + +/** + * TEMPLATE component progress. + */ +extern int mca_btl_pcie_component_progress(void); + + + +/** + * Cleanup any resources held by the BTL. + * + * @param btl BTL instance. + * @return OMPI_SUCCESS or error status on failure. + */ + +extern int mca_btl_pcie_finalize( + struct mca_btl_base_module_t* btl +); + + +/** + * PML->BTL notification of change in the process list. + * + * @param btl (IN) + * @param nprocs (IN) Number of processes + * @param procs (IN) Set of processes + * @param peers (OUT) Set of (optional) peer addressing info. + * @param peers (IN/OUT) Set of processes that are reachable via this BTL. + * @return OMPI_SUCCESS or error status on failure. + * + */ + +extern int mca_btl_pcie_add_procs( + struct mca_btl_base_module_t* btl, + size_t nprocs, + struct ompi_proc_t **procs, + struct mca_btl_base_endpoint_t** peers, + opal_bitmap_t* reachable +); + +/** + * PML->BTL notification of change in the process list. + * + * @param btl (IN) BTL instance + * @param nproc (IN) Number of processes. + * @param procs (IN) Set of processes. + * @param peers (IN) Set of peer data structures. + * @return Status indicating if cleanup was successful + * + */ + +extern int mca_btl_pcie_del_procs( + struct mca_btl_base_module_t* btl, + size_t nprocs, + struct ompi_proc_t **procs, + struct mca_btl_base_endpoint_t** peers +); + + +/** + * Initiate an asynchronous send. + * + * @param btl (IN) BTL module + * @param endpoint (IN) BTL addressing information + * @param descriptor (IN) Description of the data to be transfered + * @param tag (IN) The tag value used to notify the peer. + */ + +extern int mca_btl_pcie_send( + struct mca_btl_base_module_t* btl, + struct mca_btl_base_endpoint_t* btl_peer, + struct mca_btl_base_descriptor_t* descriptor, + mca_btl_base_tag_t tag +); + + +/** + * Initiate an asynchronous put. + * + * @param btl (IN) BTL module + * @param endpoint (IN) BTL addressing information + * @param descriptor (IN) Description of the data to be transferred + */ + +extern int mca_btl_pcie_put( + struct mca_btl_base_module_t* btl, + struct mca_btl_base_endpoint_t* btl_peer, + struct mca_btl_base_descriptor_t* decriptor +); + + +/** + * Initiate an asynchronous get. + * + * @param btl (IN) BTL module + * @param endpoint (IN) BTL addressing information + * @param descriptor (IN) Description of the data to be transferred + */ + +extern int mca_btl_pcie_get( + struct mca_btl_base_module_t* btl, + struct mca_btl_base_endpoint_t* btl_peer, + struct mca_btl_base_descriptor_t* decriptor +); + +/** + * Register a callback function that is called on receipt + * of a fragment. + * + * @param btl (IN) BTL module + * @return Status indicating if registration was successful + * + */ + +extern int mca_btl_pcie_register( + struct mca_btl_base_module_t* btl, + mca_btl_base_tag_t tag, + mca_btl_base_module_recv_cb_fn_t cbfunc, + void* cbdata); + +/** + * Allocate a descriptor with a segment of the requested size. + * Note that the BTL layer may choose to return a smaller size + * if it cannot support the request. + * + * @param btl (IN) BTL module + * @param size (IN) Request segment size. + */ + +extern mca_btl_base_descriptor_t* mca_btl_pcie_alloc( + struct mca_btl_base_module_t* btl, + struct mca_btl_base_endpoint_t* endpoint, + uint8_t order, + size_t size, + uint32_t flags); + + +/** + * Return a segment allocated by this BTL. + * + * @param btl (IN) BTL module + * @param descriptor (IN) Allocated descriptor. + */ + +extern int mca_btl_pcie_free( + struct mca_btl_base_module_t* btl, + mca_btl_base_descriptor_t* des); + + +/** + * Prepare a descriptor for send/rdma using the supplied + * convertor. If the convertor references data that is contigous, + * the descriptor may simply point to the user buffer. Otherwise, + * this routine is responsible for allocating buffer space and + * packing if required. + * + * @param btl (IN) BTL module + * @param endpoint (IN) BTL peer addressing + * @param convertor (IN) Data type convertor + * @param reserve (IN) Additional bytes requested by upper layer to precede user data + * @param size (IN/OUT) Number of bytes to prepare (IN), number of bytes actually prepared (OUT) +*/ + +mca_btl_base_descriptor_t* mca_btl_pcie_prepare_src( + struct mca_btl_base_module_t* btl, + struct mca_btl_base_endpoint_t* endpoint, + struct mca_mpool_base_registration_t* registration, + struct ompi_convertor_t* convertor, + uint8_t order, + size_t reserve, + size_t* size, + uint32_t flags +); + +extern mca_btl_base_descriptor_t* mca_btl_pcie_prepare_dst( + struct mca_btl_base_module_t* btl, + struct mca_btl_base_endpoint_t* endpoint, + struct mca_mpool_base_registration_t* registration, + struct ompi_convertor_t* convertor, + uint8_t order, + size_t reserve, + size_t* size, + uint32_t flags); + + /** + * Fault Tolerance Event Notification Function + * @param state Checkpoint Stae + * @return OMPI_SUCCESS or failure status + */ +int mca_btl_pcie_ft_event(int state); + +char* ompi_btl_pcie_cfg_get_local_device(char* hostname, int core); +char* ompi_btl_pcie_cfg_get_matching_device(char* remote_hostname, + char* remote_device); + + +END_C_DECLS + +#endif /* #ifndef MCA_BTL_PCIE_H */ diff --git a/ompi/mca/btl/pcie/btl_pcie_cfg.c b/ompi/mca/btl/pcie/btl_pcie_cfg.c new file mode 100644 index 0000000000..f329a94d24 --- /dev/null +++ b/ompi/mca/btl/pcie/btl_pcie_cfg.c @@ -0,0 +1,196 @@ +/* + * Copyright (c) 2007 Los Alamos National Security, LLC. + * All righs reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include +#include +#include + +#include "opal/util/output.h" +#include "opal/util/os_path.h" +#include "opal/mca/base/mca_base_param.h" +#include "opal/mca/installdirs/installdirs.h" + +#include "orte/util/proc_info.h" +#include "orte/util/show_help.h" + +#include "btl_pcie.h" +#include "btl_pcie_lex.h" + +static char *cfg_filename; +static char *key_buffer = NULL; +static size_t key_buffer_len = 0; + +/* + * Local functions + */ +static char* parse_file(char *filename, bool local, char *key); + + +/**************************************************************************/ + +char * +ompi_btl_pcie_cfg_get_local_device(char* hostname, int core) +{ + char *key, *ret, *file; + + file = opal_os_path(false, + opal_install_dirs.sysconfdir, + "mca-btl-pcie-local-resources.cfg", + NULL); + + asprintf(&key, "%s:%d", hostname, core); + ret = parse_file(file, true, key); + free(key); + free(file); + + return ret; +} + +char * +ompi_btl_pcie_cfg_get_matching_device(char* remote_hostname, + char* remote_device) +{ + char *key, *ret, *pos, *file; + + file = opal_os_path(false, + opal_install_dirs.sysconfdir, + "mca-btl-pcie-remote-resources.cfg", + NULL); + + asprintf(&key, "%s:%s", remote_hostname, remote_device); + ret = parse_file(file, false, key); + free(file); + free(key); + + if (ret == NULL) return NULL; + + pos = strchr(ret, ':'); + if (pos == NULL) { + free(ret); + return NULL; + } + + /* make sure this is my hostname */ + *pos = '\0'; + if (0 != strcmp(orte_process_info.nodename, ret)) { + free(ret); + return NULL; + } + + pos++; + pos = strdup(pos); + free(ret); + + return pos; +} + + +/* + * Parse a single file + */ +static char* parse_file(char *filename, bool local, char* key) +{ + int val; + bool me; + char *tmp = NULL; + + /* Open the file */ + cfg_filename = filename; + btl_pcie_cfg_yyin = fopen(filename, "r"); + if (NULL == btl_pcie_cfg_yyin) { + orte_show_help("help-mpi-btl-pcie.txt", "ini file:file not found", + true, filename); + goto cleanup; + } + + /* Do the parsing */ + btl_pcie_cfg_parse_done = false; + btl_pcie_cfg_yynewlines = 1; + btl_pcie_cfg_init_buffer(btl_pcie_cfg_yyin); + while (!btl_pcie_cfg_parse_done) { + val = btl_pcie_cfg_yylex(); + switch (val) { + case BTL_PCIE_CFG_PARSE_DONE: + /* This will also set btl_pcie_cfg_parse_done to true, so just + break here */ + break; + + case BTL_PCIE_CFG_PARSE_NEWLINE: + /* blank line! ignore it */ + break; + + case BTL_PCIE_CFG_PARSE_HOSTNAME_CORE: + if (!local) { + return NULL; + } + + if (0 == strcmp(key, btl_pcie_cfg_yytext)) { + me = true; + } else { + me = false; + } + + val = btl_pcie_cfg_yylex(); + if (BTL_PCIE_CFG_PARSE_DEVICE != val) { + abort(); + } + + if (me) return strdup(btl_pcie_cfg_yytext); + + break; + + case BTL_PCIE_CFG_PARSE_HOSTNAME_DEVICE: + if (local) { + return NULL; + } + + if (0 == strcmp(key, btl_pcie_cfg_yytext)) { + me = true; + } else { + tmp = strdup(btl_pcie_cfg_yytext); + me = false; + } + + val = btl_pcie_cfg_yylex(); + if (BTL_PCIE_CFG_PARSE_HOSTNAME_DEVICE != val) { + abort(); + } + + if (me) { + return strdup(btl_pcie_cfg_yytext); + } else { + if (0 == strcmp(key, btl_pcie_cfg_yytext)) { + return tmp; + } else { + free(tmp); + } + } + + break; + + default: + return NULL; + break; + } + } + fclose(btl_pcie_cfg_yyin); + +cleanup: + if (NULL != key_buffer) { + free(key_buffer); + key_buffer = NULL; + key_buffer_len = 0; + } + + return NULL; +} + diff --git a/ompi/mca/btl/pcie/btl_pcie_component.c b/ompi/mca/btl/pcie/btl_pcie_component.c new file mode 100644 index 0000000000..0c7f4c8d09 --- /dev/null +++ b/ompi/mca/btl/pcie/btl_pcie_component.c @@ -0,0 +1,487 @@ +/* + * Copyright (c) 2007 Los Alamos National Security, LLC. + * All righs reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" +#include "ompi/constants.h" + +#include +#include +#include +#include +#include +#include +#include + +#include "opal/event/event.h" +#include "opal/util/argv.h" +#include "opal/util/if.h" +#include "opal/util/output.h" +#include "opal/mca/base/mca_base_param.h" +#include "opal/mca/paffinity/paffinity.h" +#include "opal/mca/paffinity/base/base.h" + +#include "orte/util/proc_info.h" +#include "orte/util/show_help.h" +#include "orte/mca/errmgr/errmgr.h" + +#include "ompi/constants.h" +#include "ompi/datatype/convertor.h" +#include "ompi/mca/btl/btl.h" +#include "ompi/mca/btl/base/base.h" +#include "ompi/mca/btl/base/btl_base_error.h" +#include "ompi/mca/mpool/base/base.h" +#include "ompi/mca/mpool/rdma/mpool_rdma.h" +#include "ompi/mca/pml/pml.h" +#include "ompi/runtime/ompi_module_exchange.h" + +#include "btl_pcie.h" +#include "btl_pcie_frag.h" +#include "btl_pcie_endpoint.h" +#include "btl_pcie_ddriver.h" + + +static int pcie_reg_mr(void *reg_data, void *base, size_t size, + mca_mpool_base_registration_t *reg); +static int pcie_dereg_mr(void* reg_data, mca_mpool_base_registration_t *reg); + + +mca_btl_pcie_component_t mca_btl_pcie_component = { + { + /* First, the mca_base_component_t struct containing meta information + about the component itself */ + + { + /* Indicate that we are a pml v2.0.0 component (which also implies a + specific MCA version) */ + + MCA_BTL_BASE_VERSION_2_0_0, + + "pcie", /* MCA component name */ + OMPI_MAJOR_VERSION, /* MCA component major version */ + OMPI_MINOR_VERSION, /* MCA component minor version */ + OMPI_RELEASE_VERSION, /* MCA component release version */ + mca_btl_pcie_component_open, /* component open */ + mca_btl_pcie_component_close /* component close */ + }, + + /* Next the MCA v2.0.0 component meta data */ + + { + false + }, + + mca_btl_pcie_component_init, + mca_btl_pcie_component_progress, + } +}; + + +/* + * utility routines for parameter registration + */ +static char* +mca_btl_pcie_param_register_string(const char* param_name, + const char* param_desc, + const char* default_value) +{ + char *value; + + mca_base_param_reg_string(&mca_btl_pcie_component.super.btl_version, + param_name, param_desc, false, false, + default_value, &value); + return value; +} + + +static int +mca_btl_pcie_param_register_int(const char* param_name, + const char* param_desc, + int default_value) +{ + int value; + + mca_base_param_reg_int(&mca_btl_pcie_component.super.btl_version, + param_name, param_desc, false, false, + default_value, &value); + return value; +} + + +/* + * Register PCIE device found in local config file. The MCA framework + * will make this available to all peers. + */ +static int +btl_pcie_modex_send(void) +{ + size_t size; + unsigned int i; + mca_btl_pcie_modex_info_t *info; + + size = mca_btl_pcie_component.pcie_num_btls * + sizeof(mca_btl_pcie_modex_info_t); + info = malloc(size); + if (NULL == info) return OMPI_ERR_TEMP_OUT_OF_RESOURCE; + + for (i = 0 ; i < mca_btl_pcie_component.pcie_num_btls ; ++i) { + strncpy(info[i].hostname, + orte_process_info.nodename, + ORTE_MAX_HOSTNAME_SIZE - 1); + info[i].hostname[ORTE_MAX_HOSTNAME_SIZE - 1] = '\0'; + strncpy(info[i].devicename, + mca_btl_pcie_component.pcie_btls[i].lcl_dev_name, + OMPI_PATH_MAX - 1); + info[i].devicename[OMPI_PATH_MAX - 1] = '\0'; + MCA_BTL_PCIE_MODEX_INFO_HTON(info[i]); + } + +#if (OMPI_MAJOR_VERSION <= 1) && (OMPI_MINOR_VERSION <= 2) + return mca_pml_base_modex_send(&mca_btl_pcie_component.super.btl_version, info, size); +#else + return ompi_modex_send(&mca_btl_pcie_component.super.btl_version, info, size); +#endif +} + + +/* + * Called by MCA framework to open the component, registers + * component parameters. + */ +int +mca_btl_pcie_component_open(void) +{ + /* initialize state */ + mca_btl_pcie_component.pcie_num_btls = 0; + mca_btl_pcie_component.pcie_btls = NULL; + + /* initialize objects */ + OBJ_CONSTRUCT(&mca_btl_pcie_component.pcie_procs, opal_list_t); + + /* component parameters */ + mca_btl_pcie_component.pcie_free_list_num = + mca_btl_pcie_param_register_int ("free_list_num", + "Initial size of free lists (must be >= 1)", + 16); + /* BWB - FIX ME - The need to limit the free list max size is an + artifact of the lack of flow control in the BTL. Since we're + already using bounce fragments, it should be possible to make + this unlimited, and then properly handle the case where an SMA + region isn't available when send is called on a given frag. + Something similar to what Open IB does when we don't have send + credits would work really well here. See comment in + btl_pcie_send() for more information. */ + mca_btl_pcie_component.pcie_free_list_max = + mca_btl_pcie_param_register_int ("free_list_max", + "Max size of free lists. " + "free_list_max * (first_frag_size + max_send_size) " + "must be less than (SMA memory size - (recv_queue_len * 4) - 8)", + 32); + mca_btl_pcie_component.pcie_free_list_inc = + mca_btl_pcie_param_register_int ("free_list_inc", + "Increment size of free lists (must be >= 1)", + 8); + + mca_btl_pcie_component.pcie_send_mpool_name = + mca_btl_pcie_param_register_string("send_mpool", + "Name of the memory pool to be used for send messages. " + "(it is unlikely that you will ever want to change this)", + "pcie"); + + mca_btl_pcie_component.pcie_dma_mpool_name = + mca_btl_pcie_param_register_string("dma_mpool", + "Name of the memory pool to be used for rdma messages. " + "(it is unlikely that you will ever want to change this)", + "rdma"); + + mca_btl_pcie_component.pcie_recv_queue_len = + mca_btl_pcie_param_register_int("recv_queue_len", + "Length of receive fifo. Must be 4 * free_list_max", + 256); + + mca_btl_pcie_module.super.btl_exclusivity = + mca_btl_pcie_param_register_int ("exclusivity", + "Priority of PCIe BTL. (must be > 0)", + MCA_BTL_EXCLUSIVITY_DEFAULT + 1); + + mca_btl_pcie_module.super.btl_eager_limit = + mca_btl_pcie_param_register_int ("first_frag_size", + "Size (in bytes) of the first fragment sent of any " + "message. It is the maximum size of \"short\" messages " + "and the maximum size of the \"phase 1\" fragment sent " + "for all large messages (must be >= 1).", + 1*1024) - sizeof(mca_btl_pcie_header_t); + mca_btl_pcie_module.super.btl_rndv_eager_limit = + mca_btl_pcie_param_register_int ("btl_rndv_eager_limit", + "Minimum message size (in bytes) that will be striped " + "across multiple network devices when using " + "send/receive semantics. Messages shorter than this " + "size will be sent across a single network (must be >= " + "1)", + 2*1024) - sizeof(mca_btl_pcie_header_t); + mca_btl_pcie_module.super.btl_max_send_size = + mca_btl_pcie_param_register_int ("max_send_size", + "Maximum size (in bytes) of a single \"phase 2\" fragment " + "of a long message when using the pipeline protocol " + "(must be >= 1)", + 4*1024) - sizeof(mca_btl_pcie_header_t); + mca_btl_pcie_module.super.btl_rdma_pipeline_send_length = + mca_btl_pcie_param_register_int("rdma_pipeline_send_length", + "Length of the \"phase 2\" portion of a large message (in " + "bytes) when using the pipeline protocol. This part of " + "the message will be split into fragments of size " + "max_send_size and sent using send/receive semantics " + "(must be >= 0; only relevant when the PUT flag is " + "set)", + 12*1024); + mca_btl_pcie_module.super.btl_rdma_pipeline_frag_size = + mca_btl_pcie_param_register_int("rdma_pipeline_frag_size", + "Maximum size (in bytes) of a single \"phase 3\" fragment " + "from a long message when using the pipeline protocol. " + "These fragments will be sent using RDMA semantics " + "(must be >= 1; only relevant when the PUT flag is " + "set)", + 2*1024*1024); + mca_btl_pcie_module.super.btl_min_rdma_pipeline_size = + mca_btl_pcie_param_register_int("min_rdma_pipeline_size", + "Messages smaller than this size (in bytes) will not " + "use the RDMA pipeline protocol. Instead, they will be " + "split into fragments of max_send_size and sent using " + "send/receive semantics (must be >=0, and is " + "automatically adjusted up to at least " + "(eager_limit+btl_rdma_pipeline_send_length); only " + "relevant when the PUT flag is set)", + 16 * 1024); + + mca_btl_pcie_module.super.btl_flags = + mca_btl_pcie_param_register_int("flags", + "BTL control flags. Defaults to (SEND|PUT|HETEROGENEOUS_RDMA)", +#ifdef MCA_BTL_FLAGS_HETEROGENEOUS_RDMA + MCA_BTL_FLAGS_HETEROGENEOUS_RDMA | +#endif + MCA_BTL_FLAGS_SEND | + MCA_BTL_FLAGS_PUT); + + return OMPI_SUCCESS; +} + + +int +mca_btl_pcie_component_close(void) +{ + return OMPI_SUCCESS; +} + + +mca_btl_base_module_t** +mca_btl_pcie_component_init(int *num_btl_modules, + bool enable_progress_threads, + bool enable_mpi_threads) +{ + cpu_set_t cpu_set; + unsigned int i; + int num_cpus, *cpus; + struct stat stat_buf; + struct mca_mpool_base_resources_t mpool_resources; + mca_btl_base_module_t **btl_array; + + *num_btl_modules = 0; + + /* find all cpus we're bound to */ + cpus = malloc(CPU_SETSIZE * sizeof(int)); + memset(cpus, 0, CPU_SETSIZE * sizeof(int)); + num_cpus = 0; + CPU_ZERO(&cpu_set); + + sched_getaffinity(0, sizeof(cpu_set), &cpu_set); + for (i = 0 ; i < CPU_SETSIZE ; ++i) { + if (CPU_ISSET(i, &cpu_set)) cpus[num_cpus++] = i; + } +#if defined(__PPC__) + if (num_cpus > 1) { + orte_show_help("help-mpi-btl-pcie.txt", "initialization:more-than-one-cpu", + true, num_cpus); + return NULL; + } +#endif /* #ifdef __PPC__ */ + if (0 == num_cpus) { + orte_show_help("help-mpi-btl-pcie.txt", "initialization:no-cpus", + true); + return NULL; + } + + /* Create the module storage space */ + mca_btl_pcie_component.pcie_num_btls = num_cpus; + mca_btl_pcie_component.pcie_btls = malloc(mca_btl_pcie_component.pcie_num_btls * + sizeof(struct mca_btl_pcie_module_t)); + btl_array = malloc(mca_btl_pcie_component.pcie_num_btls * + sizeof(mca_btl_base_module_t*)); + + /* initialize the modules */ + for (i = 0 ; i < mca_btl_pcie_component.pcie_num_btls ; ++i) { + mca_btl_pcie_module_t *btl = &(mca_btl_pcie_component.pcie_btls[i]); + + btl_array[i] = (mca_btl_base_module_t*) btl; + + memcpy(btl, &mca_btl_pcie_module, sizeof(mca_btl_pcie_module_t)); + + /* check if we have a device listed in our local config file */ + btl->lcl_dev_name = + ompi_btl_pcie_cfg_get_local_device(orte_process_info.nodename, cpus[i]); + BTL_VERBOSE(("Local device for %s:%d = %s", orte_process_info.nodename, cpus[i], + btl->lcl_dev_name)); + + /* make sure said device is sane */ + if(stat(btl->lcl_dev_name, &stat_buf)) { + BTL_ERROR(("Error %s opening device %s\n", strerror(errno), + btl->lcl_dev_name)); + return NULL; + } + + OBJ_CONSTRUCT(&btl->pcie_sma_buf_eager, ompi_free_list_t); + OBJ_CONSTRUCT(&btl->pcie_sma_buf_max, ompi_free_list_t); + + OBJ_CONSTRUCT(&btl->pcie_frag_eager, ompi_free_list_t); + OBJ_CONSTRUCT(&btl->pcie_frag_max, ompi_free_list_t); + + OBJ_CONSTRUCT(&btl->pcie_frag_dma, ompi_free_list_t); + + OBJ_CONSTRUCT(&btl->pcie_lock, opal_mutex_t); + + /* time to setup DMA mpool */ + mpool_resources.reg_data = (void*) btl; + mpool_resources.sizeof_reg = sizeof(mca_btl_pcie_reg_t); + mpool_resources.register_mem = pcie_reg_mr; + mpool_resources.deregister_mem = pcie_dereg_mr; + btl->rdma_mpool = + mca_mpool_base_module_create("rdma", + &btl->super, + &mpool_resources); + btl->super.btl_mpool = btl->rdma_mpool; + + btl->active = false; + } + + /* push our address info to everyone */ + btl_pcie_modex_send(); + + *num_btl_modules = mca_btl_pcie_component.pcie_num_btls; + return btl_array;; +} + + +int +mca_btl_pcie_component_progress() +{ + unsigned int i; + btl_pcie_fifo_entry_t msg_idx; + int count = 0; + + for (i = 0 ; i < mca_btl_pcie_component.pcie_num_btls ; ++i) { + mca_btl_pcie_module_t *pcie_btl = + &(mca_btl_pcie_component.pcie_btls[i]); + mca_btl_base_endpoint_t *endpoint = pcie_btl->endpoint; + + if (!pcie_btl->active) continue; + + msg_idx = ompi_btl_pcie_fifo_get_msg(&endpoint->recv_fifo); + + /* Potential optimization is to drain every time we enter progress */ + if (msg_idx) { + int rc; + int ack = ((msg_idx & BTL_PCIE_FIFO_TYPE_MASK) == BTL_PCIE_FIFO_TYPE_ACK) ? 1 : 0; + msg_idx &= BTL_PCIE_FIFO_DATA_MASK; + + if (ack) { + /* we have a send frag ack */ + mca_btl_pcie_frag_t *frag = (mca_btl_pcie_frag_t*) msg_idx; + mca_btl_pcie_sma_buf_t *buf = frag->sma_buf; + + BTL_VERBOSE(("received ack for frag %lx (0x%lx)", msg_idx, frag)); + + /* Done with buffer, can return now */ + MCA_BTL_PCIE_SMA_BUF_RETURN(pcie_btl, buf, rc); + + frag->base.des_cbfunc(&pcie_btl->super, endpoint, + &(frag->base), + OMPI_SUCCESS); + + /* return the send credit */ + ompi_btl_pcie_fifo_complete_msg(&endpoint->send_fifo, 1); + count++; + } else { + /* we have a send frag (incoming data) */ + mca_btl_pcie_frag_t *recv_frag = &pcie_btl->pcie_recv_frag; + mca_btl_pcie_header_t *hdr = (mca_btl_pcie_header_t*) (endpoint->lcl_frag_base + msg_idx); + recv_frag->hdr = hdr; + OMPI_BTL_PCIE_HEADER_NTOH((*recv_frag->hdr)); + recv_frag->segment.seg_addr.pval = ((unsigned char*) recv_frag->hdr) + sizeof(mca_btl_pcie_header_t); + recv_frag->segment.seg_len = recv_frag->hdr->length; + BTL_VERBOSE(("received tag %d, base 0x%lx", recv_frag->hdr->tag, &recv_frag->base)); + pcie_btl->pcie_reg[recv_frag->hdr->tag].cbfunc(&pcie_btl->super, + recv_frag->hdr->tag, &recv_frag->base, + pcie_btl->pcie_reg[recv_frag->hdr->tag].cbdata); + + rc = ompi_btl_pcie_fifo_set_msg(&endpoint->send_fifo, hdr->send_frag.lval); + /* BWB - FIX ME - this is only safe if the number of + queue entries is twice the free list size */ + ompi_btl_pcie_fifo_complete_msg(&endpoint->send_fifo, 1); + count++; + } + } + } + + return count; +} + + +static int +pcie_reg_mr(void *reg_data, void *base, size_t size, + mca_mpool_base_registration_t *reg) +{ + mca_btl_pcie_module_t * pcie_btl = (mca_btl_pcie_module_t*) reg_data; + mca_btl_pcie_endpoint_t * endpoint = pcie_btl->endpoint; + mca_btl_pcie_reg_t * pcie_reg = (mca_btl_pcie_reg_t*) reg; + + if(dd_register_memory_region(&endpoint->pcie_adapter, + &pcie_reg->handle, + base, + size, + DD_ALLOW_LOCAL_READ | + DD_ALLOW_LOCAL_WRITE | + DD_ALLOW_REMOTE_ACCESS | + DD_ALLOW_REMOTE_READ | + DD_ALLOW_REMOTE_WRITE )) { + BTL_ERROR(("error deregistering memory!\n")); + return OMPI_ERROR; + } + + return OMPI_SUCCESS; +} + + +static int +pcie_dereg_mr(void* reg_data, mca_mpool_base_registration_t *reg) +{ + mca_btl_pcie_module_t * pcie_btl = (mca_btl_pcie_module_t*) reg_data; + mca_btl_pcie_endpoint_t * endpoint = pcie_btl->endpoint; + mca_btl_pcie_reg_t * pcie_reg = (mca_btl_pcie_reg_t*) reg; + + if(pcie_reg->handle >= 0) { + if(dd_deregister_memory_region(&endpoint->pcie_adapter, + &pcie_reg->handle)) { + BTL_ERROR(("error deregistering memory!\n")); + return OMPI_ERROR; + } + } else { + return OMPI_ERROR; + } + + return OMPI_SUCCESS; +} diff --git a/ompi/mca/btl/pcie/btl_pcie_endpoint.c b/ompi/mca/btl/pcie/btl_pcie_endpoint.c new file mode 100644 index 0000000000..358a5758bc --- /dev/null +++ b/ompi/mca/btl/pcie/btl_pcie_endpoint.c @@ -0,0 +1,274 @@ +/* + * Copyright (c) 2007 Los Alamos National Security, LLC. + * All righs reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include +#include + +#include "opal/align.h" + +#include "orte/mca/errmgr/errmgr.h" +#include "orte/mca/oob/base/base.h" +#include "orte/mca/rml/rml.h" + +#include "ompi/types.h" +#include "ompi/mca/btl/base/btl_base_error.h" +#include "ompi/mca/mpool/mpool.h" +#include "ompi/mca/mpool/base/base.h" +#include "ompi/mca/mpool/pcie/mpool_pcie.h" + +#include "btl_pcie.h" +#include "btl_pcie_endpoint.h" +#include "btl_pcie_proc.h" +#include "btl_pcie_frag.h" + +/* + * Initialize state of the endpoint instance. + * + */ + +static void mca_btl_pcie_endpoint_construct(mca_btl_base_endpoint_t* endpoint) +{ + endpoint->endpoint_btl = 0; + endpoint->endpoint_proc = 0; +} + +/* + * Destroy a endpoint + * + */ + +static void mca_btl_pcie_endpoint_destruct(mca_btl_base_endpoint_t* endpoint) +{ +} + + +OBJ_CLASS_INSTANCE( + mca_btl_pcie_endpoint_t, + opal_list_item_t, + mca_btl_pcie_endpoint_construct, + mca_btl_pcie_endpoint_destruct); + + + +/* + * Initialize an endpoint + */ +int mca_btl_pcie_endpoint_init(mca_btl_base_endpoint_t* endpoint) +{ + int rc; + mca_btl_pcie_module_t* pcie_btl = + endpoint->endpoint_btl; + mca_mpool_base_resources_t mpool_resources; + size_t fifo_buffer_len, current_offset = 0; + + /* Open our device */ + rc = dd_open(endpoint->lcl_dev_name, + &endpoint->pcie_adapter); + if( 0 != rc) { + BTL_ERROR(("Failed to open pcie device dd_open says : %d\n", rc)); + return OMPI_ERROR; + } + + /* fill in endpoint data for begining of resources */ + endpoint->lcl_sma_ptr = endpoint->pcie_adapter.local_sma_address; + if(NULL == endpoint->lcl_sma_ptr) { + BTL_ERROR(("Error: local sma address is null\n")); + return OMPI_ERROR; + } + + endpoint->rem_sma_ptr = endpoint->pcie_adapter.remote_sma_address; + if(NULL == endpoint->rem_sma_ptr) { + BTL_ERROR(("Error: remote sma address is null\n")); + return OMPI_ERROR; + } + + BTL_VERBOSE(("SMA for device %s: local=0x%lx,%d remote=0x%lx,%d", + endpoint->lcl_dev_name, + endpoint->lcl_sma_ptr, + endpoint->pcie_adapter.local_sma_size, + endpoint->rem_sma_ptr, + endpoint->pcie_adapter.remote_sma_size)); + + /* 16 bytes of the buffer reserved for the 8 byte local DMA completion */ + endpoint->lcl_dma_status = ((char*) endpoint->lcl_sma_ptr) + current_offset; + current_offset += 16; + + /* fifo_buffer_len bytes reserved for fifos */ + fifo_buffer_len = sizeof(btl_pcie_fifo_entry_t) * mca_btl_pcie_component.pcie_recv_queue_len; + + rc = ompi_btl_pcie_fifo_init_send(&(endpoint->send_fifo), + mca_btl_pcie_component.pcie_recv_queue_len, + ((char*) endpoint->rem_sma_ptr) + current_offset); + if (OMPI_SUCCESS != rc) { + BTL_ERROR(("Error: Failed to init send fifo: %d", rc)); + return rc; + } + + rc = ompi_btl_pcie_fifo_init_recv(&(endpoint->recv_fifo), + mca_btl_pcie_component.pcie_recv_queue_len, + ((char*) endpoint->lcl_sma_ptr) + current_offset, + fifo_buffer_len); + if (OMPI_SUCCESS != rc) { + BTL_ERROR(("Error: Failed to init recv fifo: %d", rc)); + return rc; + } + + current_offset += fifo_buffer_len; + + /* reserve rest of the space for the mpool */ + endpoint->rem_frag_base = + ((char*) endpoint->rem_sma_ptr) + current_offset; + + endpoint->lcl_frag_base = + ((char*) endpoint->lcl_sma_ptr) + current_offset; + + /* don't need to align this one as the free list */ + /* will take care of it. */ + mpool_resources.base = endpoint->rem_frag_base; + mpool_resources.len = endpoint->pcie_adapter.remote_sma_size - + current_offset; + + /* setup my pcie mpool */ + pcie_btl->pcie_mpool = + mca_mpool_base_module_create(mca_btl_pcie_component.pcie_send_mpool_name, + pcie_btl, + &mpool_resources); + + /* setup the modules free lists and such as we now */ + /* have enough info to setup the mpool */ + + /* eager SMA communication buffers */ +#if (OMPI_MAJOR_VERSION <= 1) && (OMPI_MINOR_VERSION <= 2) + ompi_free_list_init_ex(&(pcie_btl->pcie_sma_buf_eager), + sizeof(mca_btl_pcie_sma_buf_eager_t) + + mca_btl_pcie_module.super.btl_eager_limit, + sizeof(mca_btl_pcie_sma_buf_eager_t), + MCA_BTL_PCIE_FRAG_ALIGN, + OBJ_CLASS(mca_btl_pcie_sma_buf_eager_t), + mca_btl_pcie_component.pcie_free_list_num, + mca_btl_pcie_component.pcie_free_list_max, + mca_btl_pcie_component.pcie_free_list_inc, + pcie_btl->pcie_mpool); + + /* max size SMA communication buffers */ + ompi_free_list_init_ex(&(pcie_btl->pcie_sma_buf_max), + sizeof(mca_btl_pcie_sma_buf_max_t) + + mca_btl_pcie_module.super.btl_max_send_size, + sizeof(mca_btl_pcie_sma_buf_max_t), + MCA_BTL_PCIE_FRAG_ALIGN, + OBJ_CLASS(mca_btl_pcie_sma_buf_max_t), + mca_btl_pcie_component.pcie_free_list_num, + mca_btl_pcie_component.pcie_free_list_max, + mca_btl_pcie_component.pcie_free_list_inc, + pcie_btl->pcie_mpool); + + /* User eager fragment buffer */ + ompi_free_list_init_ex(&(pcie_btl->pcie_frag_eager), + sizeof(mca_btl_pcie_frag_eager_t) + + mca_btl_pcie_module.super.btl_eager_limit, + sizeof(mca_btl_pcie_frag_eager_t), + MCA_BTL_PCIE_FRAG_ALIGN, + OBJ_CLASS(mca_btl_pcie_frag_eager_t), + mca_btl_pcie_component.pcie_free_list_num, + mca_btl_pcie_component.pcie_free_list_max, + mca_btl_pcie_component.pcie_free_list_inc, + NULL); + + /* User max size fragment buffer */ + ompi_free_list_init_ex(&(pcie_btl->pcie_frag_max), + sizeof(mca_btl_pcie_frag_max_t) + + mca_btl_pcie_module.super.btl_max_send_size, + sizeof(mca_btl_pcie_frag_max_t), + MCA_BTL_PCIE_FRAG_ALIGN, + OBJ_CLASS(mca_btl_pcie_frag_max_t), + mca_btl_pcie_component.pcie_free_list_num, + mca_btl_pcie_component.pcie_free_list_max, + mca_btl_pcie_component.pcie_free_list_inc, + NULL); +#else + ompi_free_list_init_ex(&(pcie_btl->pcie_sma_buf_eager), + mca_btl_pcie_module.super.btl_eager_limit, + MCA_BTL_PCIE_FRAG_ALIGN, + OBJ_CLASS(mca_btl_pcie_sma_buf_eager_t), + mca_btl_pcie_component.pcie_free_list_num, + mca_btl_pcie_component.pcie_free_list_max, + mca_btl_pcie_component.pcie_free_list_inc, + pcie_btl->pcie_mpool, + NULL, + NULL); + + /* max size SMA communication buffers */ + ompi_free_list_init_ex(&(pcie_btl->pcie_sma_buf_max), + mca_btl_pcie_module.super.btl_max_send_size, + MCA_BTL_PCIE_FRAG_ALIGN, + OBJ_CLASS(mca_btl_pcie_sma_buf_max_t), + mca_btl_pcie_component.pcie_free_list_num, + mca_btl_pcie_component.pcie_free_list_max, + mca_btl_pcie_component.pcie_free_list_inc, + pcie_btl->pcie_mpool, + NULL, + NULL); + + /* User eager fragment buffer */ + ompi_free_list_init_ex(&(pcie_btl->pcie_frag_eager), + mca_btl_pcie_module.super.btl_eager_limit, + MCA_BTL_PCIE_FRAG_ALIGN, + OBJ_CLASS(mca_btl_pcie_frag_eager_t), + mca_btl_pcie_component.pcie_free_list_num, + mca_btl_pcie_component.pcie_free_list_max, + mca_btl_pcie_component.pcie_free_list_inc, + NULL, + NULL, + NULL); + + /* User max size fragment buffer */ + ompi_free_list_init_ex(&(pcie_btl->pcie_frag_max), + mca_btl_pcie_module.super.btl_max_send_size, + MCA_BTL_PCIE_FRAG_ALIGN, + OBJ_CLASS(mca_btl_pcie_frag_max_t), + mca_btl_pcie_component.pcie_free_list_num, + mca_btl_pcie_component.pcie_free_list_max, + mca_btl_pcie_component.pcie_free_list_inc, + NULL, + NULL, + NULL); +#endif + + /* dma frags. note that we can only have 16 outstanding memory + handles so we cannot currently support leave_pinned and we must + limit the number of outstanding DMAs via the free list of DMA + frags */ + ompi_free_list_init(&(pcie_btl->pcie_frag_dma), + sizeof(mca_btl_pcie_frag_dma_t), + OBJ_CLASS(mca_btl_pcie_frag_dma_t), + 16, + 16, + 0, + NULL); + + /* recv frag */ + OBJ_CONSTRUCT(&(pcie_btl->pcie_recv_frag), + mca_btl_pcie_frag_recv_t); + + pcie_btl->endpoint = endpoint; + pcie_btl->active = true; + + return OMPI_SUCCESS; +} + +/* + * Finalize an endpoint + */ +int mca_btl_pcie_endpoint_fini(mca_btl_base_endpoint_t* endpoint) +{ + return OMPI_SUCCESS; +} diff --git a/ompi/mca/btl/pcie/btl_pcie_endpoint.h b/ompi/mca/btl/pcie/btl_pcie_endpoint.h new file mode 100644 index 0000000000..e840bef514 --- /dev/null +++ b/ompi/mca/btl/pcie/btl_pcie_endpoint.h @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2007 Los Alamos National Security, LLC. + * All righs reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef MCA_BTL_PCIE_ENDPOINT_H +#define MCA_BTL_PCIE_ENDPOINT_H + +#include "ompi_config.h" + +#include "opal/class/opal_list.h" +#include "opal/event/event.h" + +#include "ompi/mca/btl/btl.h" +#include "ompi/mca/pml/pml.h" + +#include "btl_pcie_ddriver.h" +#include "btl_pcie_frag.h" +#include "btl_pcie.h" +#include "btl_pcie_fifo.h" + +BEGIN_C_DECLS + +/** + * An abstraction that represents a connection to a endpoint process. + * An instance of mca_btl_base_endpoint_t is associated w/ each process + * and BTL pair at startup. However, connections to the endpoint + * are established dynamically on an as-needed basis: + */ + +struct mca_btl_base_endpoint_t { + opal_list_item_t super; + + struct mca_btl_pcie_module_t* endpoint_btl; + /**< BTL instance that created this connection */ + + struct mca_btl_pcie_proc_t* endpoint_proc; + /**< proc structure corresponding to endpoint */ + + /** the name of the remote PCIE device */ + char* rem_dev_name; + /** the name of the local PCIE device */ + char* lcl_dev_name; + + /** the pcie adapter - returned by dd_open */ + DD_adapter_handle pcie_adapter; + + /** local pcie SMA memory for this endpoint */ + char *lcl_sma_ptr; + + /** remote pcie SMA memory for this endpoint */ + char *rem_sma_ptr; + + /** remote fragment starting point (in which to + * deliver data via "rdma" write + */ + char *rem_frag_base; + char *lcl_frag_base; + + char *lcl_dma_status; + + btl_pcie_fifo_t recv_fifo; + + btl_pcie_fifo_t send_fifo; + + +}; + +typedef struct mca_btl_base_endpoint_t mca_btl_base_endpoint_t; + +typedef mca_btl_base_endpoint_t mca_btl_pcie_endpoint_t; +OBJ_CLASS_DECLARATION(mca_btl_pcie_endpoint_t); + + +/* + * Initialize an endpoint + */ +int mca_btl_pcie_endpoint_init(mca_btl_base_endpoint_t* endpoint); + +/* + * Finalize an endpoint + */ +int mca_btl_pcie_endpoint_fini(mca_btl_base_endpoint_t* endpoint); + +END_C_DECLS + +#endif /* #ifndef MCA_BTL_PCIE_ENDPOINT_H */ diff --git a/ompi/mca/btl/pcie/btl_pcie_fifo.c b/ompi/mca/btl/pcie/btl_pcie_fifo.c new file mode 100644 index 0000000000..bdf56df4e2 --- /dev/null +++ b/ompi/mca/btl/pcie/btl_pcie_fifo.c @@ -0,0 +1,97 @@ +/* + * Copyright (c) 2007 Los Alamos National Security, LLC. + * All righs reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include + +#include "opal/threads/mutex.h" +#include "opal/types.h" +#include "ompi/constants.h" + +#include "btl_pcie_fifo.h" + +static uint32_t +get_mask(unsigned int len) +{ + int pop_count, highest_used_bit, tmp_input_integer; + unsigned int pow; + + /* init counters */ + pop_count=0; + highest_used_bit=1; + + /* get population count and highest non-zero bit */ + tmp_input_integer = len; + while (tmp_input_integer > 0) { + pop_count += (tmp_input_integer & 1); + highest_used_bit++; + tmp_input_integer >> 1; + } + if (1 < pop_count) { + /* round up */ + highest_used_bit++; + } + + /* generate power value */ + pow = 1 << highest_used_bit; + + if (pow != len) return 0; + return pow - 1; +} + + +int +ompi_btl_pcie_fifo_init_send(btl_pcie_fifo_t *fifo, + unsigned int fifo_len, + void *queue_space) +{ + fifo->fifo_len = fifo_len; + fifo->current_index = 0; + fifo->num_outstanding = 0; + fifo->mask = get_mask(fifo_len); + fifo->queue = queue_space; + + if (fifo->mask == 0) return OMPI_ERROR; + + return OMPI_SUCCESS; +} + + +int +ompi_btl_pcie_fifo_init_recv(btl_pcie_fifo_t *fifo, + unsigned int fifo_len, + void *queue_space, + size_t queue_space_len) +{ + fifo->fifo_len = fifo_len; + fifo->current_index = 1; + fifo->num_outstanding = 0; + fifo->mask = get_mask(fifo_len); + fifo->queue = queue_space; + + if (fifo->mask == 0) return OMPI_ERROR; + + if (fifo_len * sizeof(btl_pcie_fifo_entry_t) > queue_space_len) { + return OMPI_ERROR; + } + + /* initialize the queue to empty */ + memset(fifo->queue, 0, fifo_len * sizeof(btl_pcie_fifo_entry_t)); + + return OMPI_SUCCESS; +} + + +int +ompi_btl_pcie_fifo_finalize(btl_pcie_fifo_t *fifo) +{ + return OMPI_SUCCESS; +} diff --git a/ompi/mca/btl/pcie/btl_pcie_fifo.h b/ompi/mca/btl/pcie/btl_pcie_fifo.h new file mode 100644 index 0000000000..c9d0a7b0d7 --- /dev/null +++ b/ompi/mca/btl/pcie/btl_pcie_fifo.h @@ -0,0 +1,171 @@ +/* + * Copyright (c) 2007 Los Alamos National Security, LLC. + * All righs reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef BTL_PCIE_FIFO_H +#define BTL_PCIE_FIFO_H + +#include "ompi_config.h" +#include "ompi/constants.h" + +#include "opal/threads/mutex.h" +#include "opal/types.h" +#include "ompi/mca/btl/base/btl_base_error.h" + +BEGIN_C_DECLS + +typedef uint64_t btl_pcie_fifo_entry_t; +#define BTL_PCIE_FIFO_TYPE_MASK 0x8000000000000000 +#define BTL_PCIE_FIFO_DATA_MASK 0x7FFFFFFFFFFFFFFF +#define BTL_PCIE_FIFO_TYPE_ACK 0x0000000000000000 +#define BTL_PCIE_FIFO_TYPE_SEND 0x8000000000000000 + +struct btl_pcie_fifo_t { + /* number of entries in queue */ + uint32_t fifo_len; + /* for sender: next place to write + * for receiver: next place to read */ + uint32_t current_index; + /* for sender: number of entries "in flight". Must always be less + than or equal to fifo_len */ + uint32_t num_outstanding; + uint32_t mask; + /* the actual buffer */ + btl_pcie_fifo_entry_t* queue; +}; +typedef struct btl_pcie_fifo_t btl_pcie_fifo_t; + + +/** + * Initialize fifo structure + * + * Initialize send/recv fifo structure. The fifo structure does + * double duty of maintaining both the sender and receiver. This + * function initializes the send view of the fifo structure, for + * use to receive messages. fifo_get_msg() should not be called on + * this fifo. + * + * @note fifo_len must match the value given to the matching + * fifo_init_recv(), although there are no checks to verify this. + * + * @param[in] fifo A pointer to a fifo structure to be + * initialized + * @param[in] fifo_len Requested length of the fifo queue + * @param[in] queue_space Space for the receive queue (remote pointer) + * + * @retval OMPI_SUCCESS Everything worked + * @retval OMPI_ERROR Good luck! + */ +int ompi_btl_pcie_fifo_init_send(btl_pcie_fifo_t *fifo, + unsigned int fifo_len, + void *queue_space); + + +/** + * Initialize fifo structure + * + * Initialize send/recv fifo structure. The fifo structure does + * double duty of maintaining both the sender and receiver. This + * function initializes the receive view of the fifo structure, for + * use to receive messages. fifo_set_msg() should not be called on + * this fifo. + * + * @note fifo_len must match the value given to the matching + * fifo_init_send(), although there are no checks to verify this. + * + * @param[in] fifo A pointer to a fifo structure to be + * initialized + * @param[in] fifo_len Requested length of the fifo queue + * @param[in] queue_space Space for the receive queue (local pointer) + * @param[in] queue_space_len Length of queue_space + * + * @retval OMPI_SUCCESS Everything worked + * @retval OMPI_ERROR Good luck! + */ +int ompi_btl_pcie_fifo_init_recv(btl_pcie_fifo_t *fifo, + unsigned int fifo_len, + void *queue_space, + size_t queue_space_len); + +int ompi_btl_pcie_fifo_finalize(btl_pcie_fifo_t *fifo); + + +/** + * Read a message from the queue + * + * Read a message from the queue + * + * @param[in] fifo The receive view of the fifo + * + * @return A non-zero message or 0 if no new messages are + * available. + */ +static inline btl_pcie_fifo_entry_t +ompi_btl_pcie_fifo_get_msg(btl_pcie_fifo_t *fifo) +{ + /* BWB - TODO - if we ever want to be multi-threaded, we'll + need to fix this */ + btl_pcie_fifo_entry_t ret = 0; + if (0 != (ret = fifo->queue[fifo->current_index])) { + fifo->queue[fifo->current_index] = 0; + fifo->current_index++; + fifo->current_index &= fifo->mask; + } + + return ret; +} + + +/** + * Write a message pointer into the queue + * + * Write a message pointer into the send queue view of the fifo. + * + * @param[in] fifo The send view of the fifo + * @param[in] msg The index to the payload to deliver + * + * @retval OMPI_SUCCESS Fifo successfully updated + * @retval OMPI_ERR_RESOURCE_BUSY There was no space in the fifo + */ +static inline int +ompi_btl_pcie_fifo_set_msg(btl_pcie_fifo_t *fifo, btl_pcie_fifo_entry_t msg) +{ + uint32_t outstanding; + + /* see if we have a slot */ + outstanding = OPAL_THREAD_ADD32(&fifo->num_outstanding, 1); + if (outstanding > fifo->fifo_len) { + OPAL_THREAD_ADD32(&fifo->num_outstanding, -1); + return OMPI_ERR_RESOURCE_BUSY; + } + + /* now that we have a slot, figure out where it is. Allow the + outstanding to wrap around forever - just mask out the bits we + don't care about. */ + outstanding = OPAL_THREAD_ADD32(&fifo->current_index, 1); + outstanding &= fifo->mask; + + fifo->queue[outstanding] = msg; + + return OMPI_SUCCESS; +} + + +static inline int +ompi_btl_pcie_fifo_complete_msg(btl_pcie_fifo_t *fifo, + unsigned int num_msgs) +{ + OPAL_THREAD_ADD32(&fifo->num_outstanding, -num_msgs); + return OMPI_SUCCESS; +} + + +END_C_DECLS + +#endif /* BTL_PCIE_FIFO_H */ diff --git a/ompi/mca/btl/pcie/btl_pcie_frag.c b/ompi/mca/btl/pcie/btl_pcie_frag.c new file mode 100644 index 0000000000..4053c8e361 --- /dev/null +++ b/ompi/mca/btl/pcie/btl_pcie_frag.c @@ -0,0 +1,139 @@ +/* + * Copyright (c) 2007 Los Alamos National Security, LLC. + * All righs reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "btl_pcie_frag.h" +#include "btl_pcie.h" + + +static void +mca_btl_pcie_sma_buf_eager_constructor(mca_btl_pcie_sma_buf_t* buf) +{ + buf->pcie_data.pval = buf + 1; + buf->type = MCA_BTL_PCIE_TYPE_EAGER; +} + +static void +mca_btl_pcie_sma_buf_max_constructor(mca_btl_pcie_sma_buf_t* buf) +{ + buf->pcie_data.pval = buf + 1; + buf->type = MCA_BTL_PCIE_TYPE_MAX; +} + +OBJ_CLASS_INSTANCE(mca_btl_pcie_sma_buf_eager_t, + ompi_free_list_item_t, + mca_btl_pcie_sma_buf_eager_constructor, + NULL); + +OBJ_CLASS_INSTANCE(mca_btl_pcie_sma_buf_max_t, + ompi_free_list_item_t, + mca_btl_pcie_sma_buf_max_constructor, + NULL); + + +static void +mca_btl_pcie_frag_dma_constructor(mca_btl_pcie_frag_t* frag) +{ + frag->base.des_src = NULL; + frag->base.des_src_cnt = 0; + frag->base.des_dst = NULL; + frag->base.des_dst_cnt = 0; + + frag->segment.seg_addr.pval = NULL; + frag->segment.seg_len = 0; + + frag->endpoint = NULL; + frag->hdr = NULL; + frag->size = 0; + frag->registration = NULL; + frag->type = MCA_BTL_PCIE_TYPE_RDMA; + frag->sma_buf = NULL; +} + + +static void +mca_btl_pcie_frag_common_constructor(mca_btl_pcie_frag_t* frag) +{ + frag->base.des_src = &frag->segment; + frag->base.des_src_cnt = 1; + frag->base.des_dst = NULL; + frag->base.des_dst_cnt = 0; + + frag->hdr = (mca_btl_pcie_header_t*) (frag + 1); + frag->hdr->send_frag.pval = frag; + + frag->segment.seg_addr.pval = ((unsigned char*) frag->hdr) + sizeof(mca_btl_pcie_header_t); + frag->segment.seg_len = frag->size; + + frag->endpoint = NULL; + frag->registration = NULL; + frag->sma_buf = NULL; +} + +static void +mca_btl_pcie_frag_eager_constructor(mca_btl_pcie_frag_t* frag) +{ + frag->size = mca_btl_pcie_module.super.btl_eager_limit; + mca_btl_pcie_frag_common_constructor(frag); + frag->type = MCA_BTL_PCIE_TYPE_EAGER; +} + +static void mca_btl_pcie_frag_max_constructor(mca_btl_pcie_frag_t* frag) +{ + frag->size = mca_btl_pcie_module.super.btl_max_send_size; + mca_btl_pcie_frag_common_constructor(frag); + frag->type = MCA_BTL_PCIE_TYPE_MAX; +} + + +static void mca_btl_pcie_frag_recv_constructor(mca_btl_pcie_frag_t *frag) +{ + frag->base.des_src = NULL; + frag->base.des_src_cnt = 0; + frag->base.des_dst = &frag->segment; + frag->base.des_dst_cnt = 1; + + frag->segment.seg_addr.pval = NULL; + frag->segment.seg_len = 0; + + frag->endpoint = NULL; + frag->hdr = NULL; + frag->size = 0; + frag->registration = NULL; + frag->type = MCA_BTL_PCIE_TYPE_RECV; + frag->sma_buf = NULL; +} + + +OBJ_CLASS_INSTANCE( + mca_btl_pcie_frag_eager_t, + mca_btl_base_descriptor_t, + mca_btl_pcie_frag_eager_constructor, + NULL); + +OBJ_CLASS_INSTANCE( + mca_btl_pcie_frag_max_t, + mca_btl_base_descriptor_t, + mca_btl_pcie_frag_max_constructor, + NULL); + + +OBJ_CLASS_INSTANCE( + mca_btl_pcie_frag_recv_t, + mca_btl_base_descriptor_t, + mca_btl_pcie_frag_recv_constructor, + NULL); + +OBJ_CLASS_INSTANCE( + mca_btl_pcie_frag_dma_t, + mca_btl_base_descriptor_t, + mca_btl_pcie_frag_dma_constructor, + NULL); diff --git a/ompi/mca/btl/pcie/btl_pcie_frag.h b/ompi/mca/btl/pcie/btl_pcie_frag.h new file mode 100644 index 0000000000..b31b9e1582 --- /dev/null +++ b/ompi/mca/btl/pcie/btl_pcie_frag.h @@ -0,0 +1,179 @@ +/* + * Copyright (c) 2007 Los Alamos National Security, LLC. + * All righs reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef MCA_BTL_PCIE_FRAG_H +#define MCA_BTL_PCIE_FRAG_H + +#include "ompi_config.h" + +#include "ompi/mca/btl/btl.h" + +BEGIN_C_DECLS + +#define MCA_BTL_PCIE_FRAG_ALIGN (16) + +/* Header that sits at top of any send message */ +struct mca_btl_pcie_header_t { + mca_btl_base_tag_t tag; + uint8_t pad[3]; + uint32_t length; + ompi_ptr_t send_frag; +}; +typedef struct mca_btl_pcie_header_t mca_btl_pcie_header_t; + +#define OMPI_BTL_PCIE_HEADER_HTON(header) \ +do { \ + (header).length = htonl((header).length); \ + } while (0) + + +#define OMPI_BTL_PCIE_HEADER_NTOH(header) \ +do { \ + (header).length = ntohl((header).length); \ + } while (0) + +struct mca_btl_pcie_frag_t; + +/** Type description for fragments / buffers */ +enum mca_btl_pcie_frag_type_t { + MCA_BTL_PCIE_TYPE_UNKNOWN, + MCA_BTL_PCIE_TYPE_EAGER, + MCA_BTL_PCIE_TYPE_MAX, + MCA_BTL_PCIE_TYPE_RDMA, + MCA_BTL_PCIE_TYPE_RECV +}; +typedef enum mca_btl_pcie_frag_type_t mca_btl_pcie_frag_type_t; + +/** SMA transfer fragment */ +struct mca_btl_pcie_sma_buf_t { + ompi_free_list_item_t super; + /** Pointer to the SMA space available for this copy. An + ompi_ptr_t because in v1.2, this sits in the sma region, + and we need to not have different sizes on each endpoint. */ + ompi_ptr_t pcie_data; + /** type of buffer */ + mca_btl_pcie_frag_type_t type; +}; +typedef struct mca_btl_pcie_sma_buf_t mca_btl_pcie_sma_buf_t; + +typedef mca_btl_pcie_sma_buf_t mca_btl_pcie_sma_buf_eager_t; +OBJ_CLASS_DECLARATION(mca_btl_pcie_sma_buf_eager_t); + +typedef mca_btl_pcie_sma_buf_t mca_btl_pcie_sma_buf_max_t; +OBJ_CLASS_DECLARATION(mca_btl_pcie_sma_buf_max_t); + +#define MCA_BTL_PCIE_SMA_BUF_ALLOC_EAGER(btl, buf, rc) \ +{ \ + ompi_free_list_item_t *item; \ + OMPI_FREE_LIST_GET(&((mca_btl_pcie_module_t*)btl)->pcie_sma_buf_eager, item, rc); \ + buf = (mca_btl_pcie_sma_buf_t*) item; \ +} + +#define MCA_BTL_PCIE_SMA_BUF_ALLOC_MAX(btl, buf, rc) \ +{ \ + ompi_free_list_item_t *item; \ + OMPI_FREE_LIST_GET(&((mca_btl_pcie_module_t*)btl)->pcie_sma_buf_max, item, rc); \ + buf = (mca_btl_pcie_sma_buf_t*) item; \ +} + +#define MCA_BTL_PCIE_SMA_BUF_RETURN(btl, buf, ret) \ +{ \ + ret = OMPI_SUCCESS; \ + switch ((buf)->type) { \ + case MCA_BTL_PCIE_TYPE_EAGER: \ + OMPI_FREE_LIST_RETURN(&((mca_btl_pcie_module_t*)btl)->pcie_sma_buf_eager, \ + (ompi_free_list_item_t*)(buf)); \ + break; \ + case MCA_BTL_PCIE_TYPE_MAX: \ + OMPI_FREE_LIST_RETURN(&((mca_btl_pcie_module_t*)btl)->pcie_sma_buf_max, \ + (ompi_free_list_item_t*)(buf)); \ + break; \ + default: \ + BTL_ERROR(("Invalid return type (%d) for frag 0x%lx in SMA_BUF_RETURN", \ + buf->type, buf)); \ + ret = OMPI_ERR_BAD_PARAM; \ + } \ +} + + +/** Fragment description -- used for send/rdma fragments */ +struct mca_btl_pcie_frag_t { + mca_btl_base_descriptor_t base; + mca_btl_base_segment_t segment; + struct mca_btl_base_endpoint_t *endpoint; + mca_btl_pcie_header_t *hdr; + size_t size; + struct mca_btl_pcie_reg_t *registration; + mca_btl_pcie_frag_type_t type; + mca_btl_pcie_sma_buf_t *sma_buf; +}; +typedef struct mca_btl_pcie_frag_t mca_btl_pcie_frag_t; + +typedef struct mca_btl_pcie_frag_t mca_btl_pcie_frag_eager_t; +OBJ_CLASS_DECLARATION(mca_btl_pcie_frag_eager_t); + +typedef struct mca_btl_pcie_frag_t mca_btl_pcie_frag_max_t; +OBJ_CLASS_DECLARATION(mca_btl_pcie_frag_max_t); + +typedef struct mca_btl_pcie_frag_t mca_btl_pcie_frag_recv_t; +OBJ_CLASS_DECLARATION(mca_btl_pcie_frag_recv_t); + +typedef struct mca_btl_pcie_frag_t mca_btl_pcie_frag_dma_t; +OBJ_CLASS_DECLARATION(mca_btl_pcie_frag_dma_t); + + +#define MCA_BTL_PCIE_FRAG_ALLOC_EAGER(btl, frag, rc) \ +{ \ + ompi_free_list_item_t *item; \ + OMPI_FREE_LIST_GET(&((mca_btl_pcie_module_t*)btl)->pcie_frag_eager, item, rc); \ + frag = (mca_btl_pcie_frag_t*) item; \ +} + +#define MCA_BTL_PCIE_FRAG_ALLOC_MAX(btl, frag, rc) \ +{ \ + ompi_free_list_item_t *item; \ + OMPI_FREE_LIST_GET(&((mca_btl_pcie_module_t*)btl)->pcie_frag_max, item, rc); \ + frag = (mca_btl_pcie_frag_t*) item; \ +} + +#define MCA_BTL_PCIE_FRAG_ALLOC_DMA(btl, frag, rc) \ +{ \ + \ + ompi_free_list_item_t *item; \ + OMPI_FREE_LIST_GET(&((mca_btl_pcie_module_t*)btl)->pcie_frag_dma, item, rc); \ + frag = (mca_btl_pcie_frag_t*) item; \ +} + +#define MCA_BTL_PCIE_FRAG_RETURN(btl, frag, ret) \ +{ \ + ret = OMPI_SUCCESS; \ + switch ((frag)->type) { \ + case MCA_BTL_PCIE_TYPE_EAGER: \ + OMPI_FREE_LIST_RETURN(&((mca_btl_pcie_module_t*)btl)->pcie_frag_eager, \ + (ompi_free_list_item_t*)(frag)); \ + break; \ + case MCA_BTL_PCIE_TYPE_MAX: \ + OMPI_FREE_LIST_RETURN(&((mca_btl_pcie_module_t*)btl)->pcie_frag_max, \ + (ompi_free_list_item_t*)(frag)); \ + break; \ + case MCA_BTL_PCIE_TYPE_RDMA: \ + OMPI_FREE_LIST_RETURN(&((mca_btl_pcie_module_t*)btl)->pcie_frag_dma, \ + (ompi_free_list_item_t*)(frag)); \ + break; \ + default: \ + BTL_ERROR(("Invalid return type (%d) for frag 0x%lx in FRAG_RETURN", \ + frag->type, frag)); \ + ret = OMPI_ERR_BAD_PARAM; \ + } \ +} + +END_C_DECLS + +#endif /* #ifndef MCA_BTL_PCIE_FRAG_H */ diff --git a/ompi/mca/btl/pcie/btl_pcie_lex.c b/ompi/mca/btl/pcie/btl_pcie_lex.c new file mode 100644 index 0000000000..eddb256a70 --- /dev/null +++ b/ompi/mca/btl/pcie/btl_pcie_lex.c @@ -0,0 +1,1698 @@ +#define yy_create_buffer btl_pcie_cfg_yy_create_buffer +#define yy_delete_buffer btl_pcie_cfg_yy_delete_buffer +#define yy_scan_buffer btl_pcie_cfg_yy_scan_buffer +#define yy_scan_string btl_pcie_cfg_yy_scan_string +#define yy_scan_bytes btl_pcie_cfg_yy_scan_bytes +#define yy_flex_debug btl_pcie_cfg_yy_flex_debug +#define yy_init_buffer btl_pcie_cfg_yy_init_buffer +#define yy_flush_buffer btl_pcie_cfg_yy_flush_buffer +#define yy_load_buffer_state btl_pcie_cfg_yy_load_buffer_state +#define yy_switch_to_buffer btl_pcie_cfg_yy_switch_to_buffer +#define yyin btl_pcie_cfg_yyin +#define yyleng btl_pcie_cfg_yyleng +#define yylex btl_pcie_cfg_yylex +#define yyout btl_pcie_cfg_yyout +#define yyrestart btl_pcie_cfg_yyrestart +#define yytext btl_pcie_cfg_yytext +#define yywrap btl_pcie_cfg_yywrap + +/* A lexical scanner generated by flex */ + +/* Scanner skeleton version: + * $Header: /home/usr/ddd/openmpi/cvs/pcie/ompi/mca/btl/pcie/btl_pcie_lex.c,v 1.1 2007/08/01 15:10:22 bbarrett Exp $ + */ + +#define FLEX_SCANNER +#define YY_FLEX_MAJOR_VERSION 2 +#define YY_FLEX_MINOR_VERSION 5 + +#include +#include + + +/* cfront 1.2 defines "c_plusplus" instead of "__cplusplus" */ +#ifdef c_plusplus +#ifndef __cplusplus +#define __cplusplus +#endif +#endif + + +#ifdef __cplusplus + +#include + +/* Use prototypes in function declarations. */ +#define YY_USE_PROTOS + +/* The "const" storage-class-modifier is valid. */ +#define YY_USE_CONST + +#else /* ! __cplusplus */ + +#if __STDC__ + +#define YY_USE_PROTOS +#define YY_USE_CONST + +#endif /* __STDC__ */ +#endif /* ! __cplusplus */ + +#ifdef __TURBOC__ + #pragma warn -rch + #pragma warn -use +#include +#include +#define YY_USE_CONST +#define YY_USE_PROTOS +#endif + +#ifdef YY_USE_CONST +#define yyconst const +#else +#define yyconst +#endif + + +#ifdef YY_USE_PROTOS +#define YY_PROTO(proto) proto +#else +#define YY_PROTO(proto) () +#endif + +/* Returned upon end-of-file. */ +#define YY_NULL 0 + +/* Promotes a possibly negative, possibly signed char to an unsigned + * integer for use as an array index. If the signed char is negative, + * we want to instead treat it as an 8-bit unsigned char, hence the + * double cast. + */ +#define YY_SC_TO_UI(c) ((unsigned int) (unsigned char) c) + +/* Enter a start condition. This macro really ought to take a parameter, + * but we do it the disgusting crufty way forced on us by the ()-less + * definition of BEGIN. + */ +#define BEGIN yy_start = 1 + 2 * + +/* Translate the current start state into a value that can be later handed + * to BEGIN to return to the state. The YYSTATE alias is for lex + * compatibility. + */ +#define YY_START ((yy_start - 1) / 2) +#define YYSTATE YY_START + +/* Action number for EOF rule of a given start state. */ +#define YY_STATE_EOF(state) (YY_END_OF_BUFFER + state + 1) + +/* Special action meaning "start processing a new file". */ +#define YY_NEW_FILE yyrestart( yyin ) + +#define YY_END_OF_BUFFER_CHAR 0 + +/* Size of default input buffer. */ +#define YY_BUF_SIZE 16384 + +typedef struct yy_buffer_state *YY_BUFFER_STATE; + +extern int yyleng; +extern FILE *yyin, *yyout; + +#define EOB_ACT_CONTINUE_SCAN 0 +#define EOB_ACT_END_OF_FILE 1 +#define EOB_ACT_LAST_MATCH 2 + +/* The funky do-while in the following #define is used to turn the definition + * int a single C statement (which needs a semi-colon terminator). This + * avoids problems with code like: + * + * if ( condition_holds ) + * yyless( 5 ); + * else + * do_something_else(); + * + * Prior to using the do-while the compiler would get upset at the + * "else" because it interpreted the "if" statement as being all + * done when it reached the ';' after the yyless() call. + */ + +/* Return all but the first 'n' matched characters back to the input stream. */ + +#define yyless(n) \ + do \ + { \ + /* Undo effects of setting up yytext. */ \ + *yy_cp = yy_hold_char; \ + YY_RESTORE_YY_MORE_OFFSET \ + yy_c_buf_p = yy_cp = yy_bp + n - YY_MORE_ADJ; \ + YY_DO_BEFORE_ACTION; /* set up yytext again */ \ + } \ + while ( 0 ) + +#define unput(c) yyunput( c, yytext_ptr ) + +/* The following is because we cannot portably get our hands on size_t + * (without autoconf's help, which isn't available because we want + * flex-generated scanners to compile on their own). + */ +typedef unsigned int yy_size_t; + + +struct yy_buffer_state + { + FILE *yy_input_file; + + char *yy_ch_buf; /* input buffer */ + char *yy_buf_pos; /* current position in input buffer */ + + /* Size of input buffer in bytes, not including room for EOB + * characters. + */ + yy_size_t yy_buf_size; + + /* Number of characters read into yy_ch_buf, not including EOB + * characters. + */ + int yy_n_chars; + + /* Whether we "own" the buffer - i.e., we know we created it, + * and can realloc() it to grow it, and should free() it to + * delete it. + */ + int yy_is_our_buffer; + + /* Whether this is an "interactive" input source; if so, and + * if we're using stdio for input, then we want to use getc() + * instead of fread(), to make sure we stop fetching input after + * each newline. + */ + int yy_is_interactive; + + /* Whether we're considered to be at the beginning of a line. + * If so, '^' rules will be active on the next match, otherwise + * not. + */ + int yy_at_bol; + + /* Whether to try to fill the input buffer when we reach the + * end of it. + */ + int yy_fill_buffer; + + int yy_buffer_status; +#define YY_BUFFER_NEW 0 +#define YY_BUFFER_NORMAL 1 + /* When an EOF's been seen but there's still some text to process + * then we mark the buffer as YY_EOF_PENDING, to indicate that we + * shouldn't try reading from the input source any more. We might + * still have a bunch of tokens to match, though, because of + * possible backing-up. + * + * When we actually see the EOF, we change the status to "new" + * (via yyrestart()), so that the user can continue scanning by + * just pointing yyin at a new input file. + */ +#define YY_BUFFER_EOF_PENDING 2 + }; + +static YY_BUFFER_STATE yy_current_buffer = 0; + +/* We provide macros for accessing buffer states in case in the + * future we want to put the buffer states in a more general + * "scanner state". + */ +#define YY_CURRENT_BUFFER yy_current_buffer + + +/* yy_hold_char holds the character lost when yytext is formed. */ +static char yy_hold_char; + +static int yy_n_chars; /* number of characters read into yy_ch_buf */ + + +int yyleng; + +/* Points to current character in buffer. */ +static char *yy_c_buf_p = (char *) 0; +static int yy_init = 1; /* whether we need to initialize */ +static int yy_start = 0; /* start state number */ + +/* Flag which is used to allow yywrap()'s to do buffer switches + * instead of setting up a fresh yyin. A bit of a hack ... + */ +static int yy_did_buffer_switch_on_eof; + +void yyrestart YY_PROTO(( FILE *input_file )); + +void yy_switch_to_buffer YY_PROTO(( YY_BUFFER_STATE new_buffer )); +void yy_load_buffer_state YY_PROTO(( void )); +YY_BUFFER_STATE yy_create_buffer YY_PROTO(( FILE *file, int size )); +void yy_delete_buffer YY_PROTO(( YY_BUFFER_STATE b )); +void yy_init_buffer YY_PROTO(( YY_BUFFER_STATE b, FILE *file )); +void yy_flush_buffer YY_PROTO(( YY_BUFFER_STATE b )); +#define YY_FLUSH_BUFFER yy_flush_buffer( yy_current_buffer ) + +YY_BUFFER_STATE yy_scan_buffer YY_PROTO(( char *base, yy_size_t size )); +YY_BUFFER_STATE yy_scan_string YY_PROTO(( yyconst char *yy_str )); +YY_BUFFER_STATE yy_scan_bytes YY_PROTO(( yyconst char *bytes, int len )); + +static void *yy_flex_alloc YY_PROTO(( yy_size_t )); +static void *yy_flex_realloc YY_PROTO(( void *, yy_size_t )); +static void yy_flex_free YY_PROTO(( void * )); + +#define yy_new_buffer yy_create_buffer + +#define yy_set_interactive(is_interactive) \ + { \ + if ( ! yy_current_buffer ) \ + yy_current_buffer = yy_create_buffer( yyin, YY_BUF_SIZE ); \ + yy_current_buffer->yy_is_interactive = is_interactive; \ + } + +#define yy_set_bol(at_bol) \ + { \ + if ( ! yy_current_buffer ) \ + yy_current_buffer = yy_create_buffer( yyin, YY_BUF_SIZE ); \ + yy_current_buffer->yy_at_bol = at_bol; \ + } + +#define YY_AT_BOL() (yy_current_buffer->yy_at_bol) + +typedef unsigned char YY_CHAR; +FILE *yyin = (FILE *) 0, *yyout = (FILE *) 0; +typedef int yy_state_type; +extern char *yytext; +#define yytext_ptr yytext + +static yy_state_type yy_get_previous_state YY_PROTO(( void )); +static yy_state_type yy_try_NUL_trans YY_PROTO(( yy_state_type current_state )); +static int yy_get_next_buffer YY_PROTO(( void )); +static void yy_fatal_error YY_PROTO(( yyconst char msg[] )); + +/* Done after the current pattern has been matched and before the + * corresponding action - sets up yytext. + */ +#define YY_DO_BEFORE_ACTION \ + yytext_ptr = yy_bp; \ + yyleng = (int) (yy_cp - yy_bp); \ + yy_hold_char = *yy_cp; \ + *yy_cp = '\0'; \ + yy_c_buf_p = yy_cp; + +#define YY_NUM_RULES 13 +#define YY_END_OF_BUFFER 14 +static yyconst short int yy_accept[41] = + { 0, + 0, 0, 5, 5, 0, 0, 0, 0, 0, 0, + 14, 13, 12, 1, 13, 11, 11, 11, 5, 7, + 6, 12, 1, 0, 2, 11, 11, 0, 4, 11, + 5, 6, 6, 8, 10, 9, 0, 3, 11, 0 + } ; + +static yyconst int yy_ec[256] = + { 0, + 1, 1, 1, 1, 1, 1, 1, 1, 2, 3, + 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 2, 1, 1, 4, 1, 1, 1, 1, 1, + 1, 5, 1, 1, 6, 6, 7, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 9, 1, 1, + 1, 1, 1, 1, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 1, 10, 1, 1, 6, 1, 6, 6, 6, 6, + + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1 + } ; + +static yyconst int yy_meta[11] = + { 0, + 1, 1, 2, 1, 3, 4, 4, 4, 1, 4 + } ; + +static yyconst short int yy_base[48] = + { 0, + 0, 0, 8, 11, 0, 0, 0, 0, 0, 0, + 44, 73, 15, 73, 38, 13, 19, 23, 0, 73, + 22, 0, 73, 22, 73, 0, 0, 7, 73, 29, + 0, 35, 38, 73, 0, 0, 40, 73, 0, 73, + 50, 54, 58, 60, 64, 68, 8 + } ; + +static yyconst short int yy_def[48] = + { 0, + 40, 1, 41, 41, 42, 42, 42, 42, 42, 42, + 40, 40, 40, 40, 43, 40, 44, 44, 45, 40, + 46, 13, 40, 43, 40, 16, 18, 47, 40, 40, + 45, 46, 46, 40, 47, 47, 30, 40, 30, 0, + 40, 40, 40, 40, 40, 40, 40 + } ; + +static yyconst short int yy_nxt[84] = + { 0, + 12, 13, 14, 15, 12, 16, 17, 16, 12, 18, + 20, 35, 21, 20, 36, 21, 22, 23, 26, 27, + 26, 28, 27, 29, 25, 30, 33, 40, 34, 37, + 37, 38, 37, 37, 39, 39, 39, 37, 39, 40, + 25, 40, 33, 40, 34, 37, 37, 37, 40, 37, + 19, 19, 19, 19, 12, 12, 12, 12, 24, 24, + 24, 24, 27, 27, 31, 40, 40, 31, 32, 40, + 32, 32, 11, 40, 40, 40, 40, 40, 40, 40, + 40, 40, 40 + } ; + +static yyconst short int yy_chk[84] = + { 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 3, 47, 3, 4, 28, 4, 13, 13, 16, 16, + 16, 16, 16, 17, 24, 17, 21, 18, 21, 30, + 30, 30, 30, 30, 30, 30, 30, 30, 30, 32, + 15, 32, 33, 11, 33, 37, 37, 37, 0, 37, + 41, 41, 41, 41, 42, 42, 42, 42, 43, 43, + 43, 43, 44, 44, 45, 0, 0, 45, 46, 0, + 46, 46, 40, 40, 40, 40, 40, 40, 40, 40, + 40, 40, 40 + } ; + +static yy_state_type yy_last_accepting_state; +static char *yy_last_accepting_cpos; + +/* The intent behind this definition is that it'll catch + * any uses of REJECT which flex missed. + */ +#define REJECT reject_used_but_not_detected +#define yymore() yymore_used_but_not_detected +#define YY_MORE_ADJ 0 +#define YY_RESTORE_YY_MORE_OFFSET +char *yytext; +#line 1 "btl_pcie_lex.l" +#define INITIAL 0 +#line 2 "btl_pcie_lex.l" +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "opal_config.h" + +#include +#if HAVE_UNISTD_H +#include +#endif + +#include "btl_pcie_lex.h" + +/* + * local functions + */ +static int finish_parsing(void) ; +static int btl_pcie_cfg_yywrap(void); + +/* + * global variables + */ +int btl_pcie_cfg_yynewlines = 1; +bool btl_pcie_cfg_parse_done = false; +char *btl_pcie_cfg_string = NULL; + +#define yyterminate() \ + return finish_parsing() + +#define comment 1 + +#define section_name 2 + +#define section_end 3 + +#define value 4 + +#line 461 "lex.btl_pcie_cfg_yy.c" + +/* Macros after this point can all be overridden by user definitions in + * section 1. + */ + +#ifndef YY_SKIP_YYWRAP +#ifdef __cplusplus +extern "C" int yywrap YY_PROTO(( void )); +#else +extern int yywrap YY_PROTO(( void )); +#endif +#endif + +#ifndef YY_NO_UNPUT +static void yyunput YY_PROTO(( int c, char *buf_ptr )); +#endif + +#ifndef yytext_ptr +static void yy_flex_strncpy YY_PROTO(( char *, yyconst char *, int )); +#endif + +#ifdef YY_NEED_STRLEN +static int yy_flex_strlen YY_PROTO(( yyconst char * )); +#endif + +#ifndef YY_NO_INPUT +#ifdef __cplusplus +static int yyinput YY_PROTO(( void )); +#else +static int input YY_PROTO(( void )); +#endif +#endif + +#if YY_STACK_USED +static int yy_start_stack_ptr = 0; +static int yy_start_stack_depth = 0; +static int *yy_start_stack = 0; +#ifndef YY_NO_PUSH_STATE +static void yy_push_state YY_PROTO(( int new_state )); +#endif +#ifndef YY_NO_POP_STATE +static void yy_pop_state YY_PROTO(( void )); +#endif +#ifndef YY_NO_TOP_STATE +static int yy_top_state YY_PROTO(( void )); +#endif + +#else +#define YY_NO_PUSH_STATE 1 +#define YY_NO_POP_STATE 1 +#define YY_NO_TOP_STATE 1 +#endif + +#ifdef YY_MALLOC_DECL +YY_MALLOC_DECL +#else +#if __STDC__ +#ifndef __cplusplus +#include +#endif +#else +/* Just try to get by without declaring the routines. This will fail + * miserably on non-ANSI systems for which sizeof(size_t) != sizeof(int) + * or sizeof(void*) != sizeof(int). + */ +#endif +#endif + +/* Amount of stuff to slurp up with each read. */ +#ifndef YY_READ_BUF_SIZE +#define YY_READ_BUF_SIZE 8192 +#endif + +/* Copy whatever the last rule matched to the standard output. */ + +#ifndef ECHO +/* This used to be an fputs(), but since the string might contain NUL's, + * we now use fwrite(). + */ +#define ECHO (void) fwrite( yytext, yyleng, 1, yyout ) +#endif + +/* Gets input and stuffs it into "buf". number of characters read, or YY_NULL, + * is returned in "result". + */ +#ifndef YY_INPUT +#define YY_INPUT(buf,result,max_size) \ + if ( yy_current_buffer->yy_is_interactive ) \ + { \ + int c = '*', n; \ + for ( n = 0; n < max_size && \ + (c = getc( yyin )) != EOF && c != '\n'; ++n ) \ + buf[n] = (char) c; \ + if ( c == '\n' ) \ + buf[n++] = (char) c; \ + if ( c == EOF && ferror( yyin ) ) \ + YY_FATAL_ERROR( "input in flex scanner failed" ); \ + result = n; \ + } \ + else if ( ((result = fread( buf, 1, max_size, yyin )) == 0) \ + && ferror( yyin ) ) \ + YY_FATAL_ERROR( "input in flex scanner failed" ); +#endif + +/* No semi-colon after return; correct usage is to write "yyterminate();" - + * we don't want an extra ';' after the "return" because that will cause + * some compilers to complain about unreachable statements. + */ +#ifndef yyterminate +#define yyterminate() return YY_NULL +#endif + +/* Number of entries by which start-condition stack grows. */ +#ifndef YY_START_STACK_INCR +#define YY_START_STACK_INCR 25 +#endif + +/* Report a fatal error. */ +#ifndef YY_FATAL_ERROR +#define YY_FATAL_ERROR(msg) yy_fatal_error( msg ) +#endif + +/* Default declaration of generated scanner - a define so the user can + * easily add parameters. + */ +#ifndef YY_DECL +#define YY_DECL int yylex YY_PROTO(( void )) +#endif + +/* Code executed at the beginning of each rule, after yytext and yyleng + * have been set up. + */ +#ifndef YY_USER_ACTION +#define YY_USER_ACTION +#endif + +/* Code executed at the end of each rule. */ +#ifndef YY_BREAK +#define YY_BREAK break; +#endif + +#define YY_RULE_SETUP \ + YY_USER_ACTION + +YY_DECL + { + register yy_state_type yy_current_state; + register char *yy_cp = NULL, *yy_bp = NULL; + register int yy_act; + +#line 57 "btl_pcie_lex.l" + + +#line 615 "lex.btl_pcie_cfg_yy.c" + + if ( yy_init ) + { + yy_init = 0; + +#ifdef YY_USER_INIT + YY_USER_INIT; +#endif + + if ( ! yy_start ) + yy_start = 1; /* first start state */ + + if ( ! yyin ) + yyin = stdin; + + if ( ! yyout ) + yyout = stdout; + + if ( ! yy_current_buffer ) + yy_current_buffer = + yy_create_buffer( yyin, YY_BUF_SIZE ); + + yy_load_buffer_state(); + } + + while ( 1 ) /* loops until end-of-file is reached */ + { + yy_cp = yy_c_buf_p; + + /* Support of yytext. */ + *yy_cp = yy_hold_char; + + /* yy_bp points to the position in yy_ch_buf of the start of + * the current run. + */ + yy_bp = yy_cp; + + yy_current_state = yy_start; +yy_match: + do + { + register YY_CHAR yy_c = yy_ec[YY_SC_TO_UI(*yy_cp)]; + if ( yy_accept[yy_current_state] ) + { + yy_last_accepting_state = yy_current_state; + yy_last_accepting_cpos = yy_cp; + } + while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state ) + { + yy_current_state = (int) yy_def[yy_current_state]; + if ( yy_current_state >= 41 ) + yy_c = yy_meta[(unsigned int) yy_c]; + } + yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c]; + ++yy_cp; + } + while ( yy_base[yy_current_state] != 73 ); + +yy_find_action: + yy_act = yy_accept[yy_current_state]; + if ( yy_act == 0 ) + { /* have to back up */ + yy_cp = yy_last_accepting_cpos; + yy_current_state = yy_last_accepting_state; + yy_act = yy_accept[yy_current_state]; + } + + YY_DO_BEFORE_ACTION; + + +do_action: /* This label is used only to access EOF actions. */ + + + switch ( yy_act ) + { /* beginning of action switch */ + case 0: /* must back up */ + /* undo the effects of YY_DO_BEFORE_ACTION */ + *yy_cp = yy_hold_char; + yy_cp = yy_last_accepting_cpos; + yy_current_state = yy_last_accepting_state; + goto yy_find_action; + +case 1: +YY_RULE_SETUP +#line 59 "btl_pcie_lex.l" +{ ++btl_pcie_cfg_yynewlines; + return BTL_PCIE_CFG_PARSE_NEWLINE; } + YY_BREAK +case 2: +YY_RULE_SETUP +#line 61 "btl_pcie_lex.l" +{ ++btl_pcie_cfg_yynewlines; + return BTL_PCIE_CFG_PARSE_NEWLINE; } + YY_BREAK +case 3: +YY_RULE_SETUP +#line 63 "btl_pcie_lex.l" +{ ++btl_pcie_cfg_yynewlines; + return BTL_PCIE_CFG_PARSE_NEWLINE; } + YY_BREAK +case 4: +YY_RULE_SETUP +#line 66 "btl_pcie_lex.l" +{ BEGIN(comment); + return BTL_PCIE_CFG_PARSE_NEWLINE; } + YY_BREAK +case 5: +YY_RULE_SETUP +#line 68 "btl_pcie_lex.l" +; /* Eat up non '*'s */ + YY_BREAK +case 6: +YY_RULE_SETUP +#line 69 "btl_pcie_lex.l" +; /* Eat '*'s not followed by a '/' */ + YY_BREAK +case 7: +YY_RULE_SETUP +#line 70 "btl_pcie_lex.l" +{ ++btl_pcie_cfg_yynewlines; + return BTL_PCIE_CFG_PARSE_NEWLINE; } + YY_BREAK +case 8: +YY_RULE_SETUP +#line 72 "btl_pcie_lex.l" +{ BEGIN(INITIAL); /* Done with block comment */ + return BTL_PCIE_CFG_PARSE_NEWLINE; } + YY_BREAK +case 9: +YY_RULE_SETUP +#line 75 "btl_pcie_lex.l" +{ return BTL_PCIE_CFG_PARSE_HOSTNAME_CORE; } + YY_BREAK +case 10: +YY_RULE_SETUP +#line 76 "btl_pcie_lex.l" +{ return BTL_PCIE_CFG_PARSE_HOSTNAME_DEVICE; } + YY_BREAK +case 11: +YY_RULE_SETUP +#line 78 "btl_pcie_lex.l" +{ return BTL_PCIE_CFG_PARSE_DEVICE; } + YY_BREAK +case 12: +YY_RULE_SETUP +#line 80 "btl_pcie_lex.l" +; /* whitespace */ + YY_BREAK +case 13: +YY_RULE_SETUP +#line 82 "btl_pcie_lex.l" +ECHO; + YY_BREAK +#line 769 "lex.btl_pcie_cfg_yy.c" +case YY_STATE_EOF(INITIAL): +case YY_STATE_EOF(comment): +case YY_STATE_EOF(section_name): +case YY_STATE_EOF(section_end): +case YY_STATE_EOF(value): + yyterminate(); + + case YY_END_OF_BUFFER: + { + /* Amount of text matched not including the EOB char. */ + int yy_amount_of_matched_text = (int) (yy_cp - yytext_ptr) - 1; + + /* Undo the effects of YY_DO_BEFORE_ACTION. */ + *yy_cp = yy_hold_char; + YY_RESTORE_YY_MORE_OFFSET + + if ( yy_current_buffer->yy_buffer_status == YY_BUFFER_NEW ) + { + /* We're scanning a new file or input source. It's + * possible that this happened because the user + * just pointed yyin at a new source and called + * yylex(). If so, then we have to assure + * consistency between yy_current_buffer and our + * globals. Here is the right place to do so, because + * this is the first action (other than possibly a + * back-up) that will match for the new input source. + */ + yy_n_chars = yy_current_buffer->yy_n_chars; + yy_current_buffer->yy_input_file = yyin; + yy_current_buffer->yy_buffer_status = YY_BUFFER_NORMAL; + } + + /* Note that here we test for yy_c_buf_p "<=" to the position + * of the first EOB in the buffer, since yy_c_buf_p will + * already have been incremented past the NUL character + * (since all states make transitions on EOB to the + * end-of-buffer state). Contrast this with the test + * in input(). + */ + if ( yy_c_buf_p <= &yy_current_buffer->yy_ch_buf[yy_n_chars] ) + { /* This was really a NUL. */ + yy_state_type yy_next_state; + + yy_c_buf_p = yytext_ptr + yy_amount_of_matched_text; + + yy_current_state = yy_get_previous_state(); + + /* Okay, we're now positioned to make the NUL + * transition. We couldn't have + * yy_get_previous_state() go ahead and do it + * for us because it doesn't know how to deal + * with the possibility of jamming (and we don't + * want to build jamming into it because then it + * will run more slowly). + */ + + yy_next_state = yy_try_NUL_trans( yy_current_state ); + + yy_bp = yytext_ptr + YY_MORE_ADJ; + + if ( yy_next_state ) + { + /* Consume the NUL. */ + yy_cp = ++yy_c_buf_p; + yy_current_state = yy_next_state; + goto yy_match; + } + + else + { + yy_cp = yy_c_buf_p; + goto yy_find_action; + } + } + + else switch ( yy_get_next_buffer() ) + { + case EOB_ACT_END_OF_FILE: + { + yy_did_buffer_switch_on_eof = 0; + + if ( yywrap() ) + { + /* Note: because we've taken care in + * yy_get_next_buffer() to have set up + * yytext, we can now set up + * yy_c_buf_p so that if some total + * hoser (like flex itself) wants to + * call the scanner after we return the + * YY_NULL, it'll still work - another + * YY_NULL will get returned. + */ + yy_c_buf_p = yytext_ptr + YY_MORE_ADJ; + + yy_act = YY_STATE_EOF(YY_START); + goto do_action; + } + + else + { + if ( ! yy_did_buffer_switch_on_eof ) + YY_NEW_FILE; + } + break; + } + + case EOB_ACT_CONTINUE_SCAN: + yy_c_buf_p = + yytext_ptr + yy_amount_of_matched_text; + + yy_current_state = yy_get_previous_state(); + + yy_cp = yy_c_buf_p; + yy_bp = yytext_ptr + YY_MORE_ADJ; + goto yy_match; + + case EOB_ACT_LAST_MATCH: + yy_c_buf_p = + &yy_current_buffer->yy_ch_buf[yy_n_chars]; + + yy_current_state = yy_get_previous_state(); + + yy_cp = yy_c_buf_p; + yy_bp = yytext_ptr + YY_MORE_ADJ; + goto yy_find_action; + } + break; + } + + default: + YY_FATAL_ERROR( + "fatal flex scanner internal error--no action found" ); + } /* end of action switch */ + } /* end of scanning one token */ + } /* end of yylex */ + + +/* yy_get_next_buffer - try to read in a new buffer + * + * Returns a code representing an action: + * EOB_ACT_LAST_MATCH - + * EOB_ACT_CONTINUE_SCAN - continue scanning from current position + * EOB_ACT_END_OF_FILE - end of file + */ + +static int yy_get_next_buffer() + { + register char *dest = yy_current_buffer->yy_ch_buf; + register char *source = yytext_ptr; + register int number_to_move, i; + int ret_val; + + if ( yy_c_buf_p > &yy_current_buffer->yy_ch_buf[yy_n_chars + 1] ) + YY_FATAL_ERROR( + "fatal flex scanner internal error--end of buffer missed" ); + + if ( yy_current_buffer->yy_fill_buffer == 0 ) + { /* Don't try to fill the buffer, so this is an EOF. */ + if ( yy_c_buf_p - yytext_ptr - YY_MORE_ADJ == 1 ) + { + /* We matched a single character, the EOB, so + * treat this as a final EOF. + */ + return EOB_ACT_END_OF_FILE; + } + + else + { + /* We matched some text prior to the EOB, first + * process it. + */ + return EOB_ACT_LAST_MATCH; + } + } + + /* Try to read more data. */ + + /* First move last chars to start of buffer. */ + number_to_move = (int) (yy_c_buf_p - yytext_ptr) - 1; + + for ( i = 0; i < number_to_move; ++i ) + *(dest++) = *(source++); + + if ( yy_current_buffer->yy_buffer_status == YY_BUFFER_EOF_PENDING ) + /* don't do the read, it's not guaranteed to return an EOF, + * just force an EOF + */ + yy_current_buffer->yy_n_chars = yy_n_chars = 0; + + else + { + int num_to_read = + yy_current_buffer->yy_buf_size - number_to_move - 1; + + while ( num_to_read <= 0 ) + { /* Not enough room in the buffer - grow it. */ +#ifdef YY_USES_REJECT + YY_FATAL_ERROR( +"input buffer overflow, can't enlarge buffer because scanner uses REJECT" ); +#else + + /* just a shorter name for the current buffer */ + YY_BUFFER_STATE b = yy_current_buffer; + + int yy_c_buf_p_offset = + (int) (yy_c_buf_p - b->yy_ch_buf); + + if ( b->yy_is_our_buffer ) + { + int new_size = b->yy_buf_size * 2; + + if ( new_size <= 0 ) + b->yy_buf_size += b->yy_buf_size / 8; + else + b->yy_buf_size *= 2; + + b->yy_ch_buf = (char *) + /* Include room in for 2 EOB chars. */ + yy_flex_realloc( (void *) b->yy_ch_buf, + b->yy_buf_size + 2 ); + } + else + /* Can't grow it, we don't own it. */ + b->yy_ch_buf = 0; + + if ( ! b->yy_ch_buf ) + YY_FATAL_ERROR( + "fatal error - scanner input buffer overflow" ); + + yy_c_buf_p = &b->yy_ch_buf[yy_c_buf_p_offset]; + + num_to_read = yy_current_buffer->yy_buf_size - + number_to_move - 1; +#endif + } + + if ( num_to_read > YY_READ_BUF_SIZE ) + num_to_read = YY_READ_BUF_SIZE; + + /* Read in more data. */ + YY_INPUT( (&yy_current_buffer->yy_ch_buf[number_to_move]), + yy_n_chars, num_to_read ); + + yy_current_buffer->yy_n_chars = yy_n_chars; + } + + if ( yy_n_chars == 0 ) + { + if ( number_to_move == YY_MORE_ADJ ) + { + ret_val = EOB_ACT_END_OF_FILE; + yyrestart( yyin ); + } + + else + { + ret_val = EOB_ACT_LAST_MATCH; + yy_current_buffer->yy_buffer_status = + YY_BUFFER_EOF_PENDING; + } + } + + else + ret_val = EOB_ACT_CONTINUE_SCAN; + + yy_n_chars += number_to_move; + yy_current_buffer->yy_ch_buf[yy_n_chars] = YY_END_OF_BUFFER_CHAR; + yy_current_buffer->yy_ch_buf[yy_n_chars + 1] = YY_END_OF_BUFFER_CHAR; + + yytext_ptr = &yy_current_buffer->yy_ch_buf[0]; + + return ret_val; + } + + +/* yy_get_previous_state - get the state just before the EOB char was reached */ + +static yy_state_type yy_get_previous_state() + { + register yy_state_type yy_current_state; + register char *yy_cp; + + yy_current_state = yy_start; + + for ( yy_cp = yytext_ptr + YY_MORE_ADJ; yy_cp < yy_c_buf_p; ++yy_cp ) + { + register YY_CHAR yy_c = (*yy_cp ? yy_ec[YY_SC_TO_UI(*yy_cp)] : 1); + if ( yy_accept[yy_current_state] ) + { + yy_last_accepting_state = yy_current_state; + yy_last_accepting_cpos = yy_cp; + } + while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state ) + { + yy_current_state = (int) yy_def[yy_current_state]; + if ( yy_current_state >= 41 ) + yy_c = yy_meta[(unsigned int) yy_c]; + } + yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c]; + } + + return yy_current_state; + } + + +/* yy_try_NUL_trans - try to make a transition on the NUL character + * + * synopsis + * next_state = yy_try_NUL_trans( current_state ); + */ + +#ifdef YY_USE_PROTOS +static yy_state_type yy_try_NUL_trans( yy_state_type yy_current_state ) +#else +static yy_state_type yy_try_NUL_trans( yy_current_state ) +yy_state_type yy_current_state; +#endif + { + register int yy_is_jam; + register char *yy_cp = yy_c_buf_p; + + register YY_CHAR yy_c = 1; + if ( yy_accept[yy_current_state] ) + { + yy_last_accepting_state = yy_current_state; + yy_last_accepting_cpos = yy_cp; + } + while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state ) + { + yy_current_state = (int) yy_def[yy_current_state]; + if ( yy_current_state >= 41 ) + yy_c = yy_meta[(unsigned int) yy_c]; + } + yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c]; + yy_is_jam = (yy_current_state == 40); + + return yy_is_jam ? 0 : yy_current_state; + } + + +#ifndef YY_NO_UNPUT +#ifdef YY_USE_PROTOS +static void yyunput( int c, register char *yy_bp ) +#else +static void yyunput( c, yy_bp ) +int c; +register char *yy_bp; +#endif + { + register char *yy_cp = yy_c_buf_p; + + /* undo effects of setting up yytext */ + *yy_cp = yy_hold_char; + + if ( yy_cp < yy_current_buffer->yy_ch_buf + 2 ) + { /* need to shift things up to make room */ + /* +2 for EOB chars. */ + register int number_to_move = yy_n_chars + 2; + register char *dest = &yy_current_buffer->yy_ch_buf[ + yy_current_buffer->yy_buf_size + 2]; + register char *source = + &yy_current_buffer->yy_ch_buf[number_to_move]; + + while ( source > yy_current_buffer->yy_ch_buf ) + *--dest = *--source; + + yy_cp += (int) (dest - source); + yy_bp += (int) (dest - source); + yy_current_buffer->yy_n_chars = + yy_n_chars = yy_current_buffer->yy_buf_size; + + if ( yy_cp < yy_current_buffer->yy_ch_buf + 2 ) + YY_FATAL_ERROR( "flex scanner push-back overflow" ); + } + + *--yy_cp = (char) c; + + + yytext_ptr = yy_bp; + yy_hold_char = *yy_cp; + yy_c_buf_p = yy_cp; + } +#endif /* ifndef YY_NO_UNPUT */ + + +#ifndef YY_NO_INPUT +#ifdef __cplusplus +static int yyinput() +#else +static int input() +#endif + { + int c; + + *yy_c_buf_p = yy_hold_char; + + if ( *yy_c_buf_p == YY_END_OF_BUFFER_CHAR ) + { + /* yy_c_buf_p now points to the character we want to return. + * If this occurs *before* the EOB characters, then it's a + * valid NUL; if not, then we've hit the end of the buffer. + */ + if ( yy_c_buf_p < &yy_current_buffer->yy_ch_buf[yy_n_chars] ) + /* This was really a NUL. */ + *yy_c_buf_p = '\0'; + + else + { /* need more input */ + int offset = yy_c_buf_p - yytext_ptr; + ++yy_c_buf_p; + + switch ( yy_get_next_buffer() ) + { + case EOB_ACT_LAST_MATCH: + /* This happens because yy_g_n_b() + * sees that we've accumulated a + * token and flags that we need to + * try matching the token before + * proceeding. But for input(), + * there's no matching to consider. + * So convert the EOB_ACT_LAST_MATCH + * to EOB_ACT_END_OF_FILE. + */ + + /* Reset buffer status. */ + yyrestart( yyin ); + + /* fall through */ + + case EOB_ACT_END_OF_FILE: + { + if ( yywrap() ) + return EOF; + + if ( ! yy_did_buffer_switch_on_eof ) + YY_NEW_FILE; +#ifdef __cplusplus + return yyinput(); +#else + return input(); +#endif + } + + case EOB_ACT_CONTINUE_SCAN: + yy_c_buf_p = yytext_ptr + offset; + break; + } + } + } + + c = *(unsigned char *) yy_c_buf_p; /* cast for 8-bit char's */ + *yy_c_buf_p = '\0'; /* preserve yytext */ + yy_hold_char = *++yy_c_buf_p; + + + return c; + } +#endif /* YY_NO_INPUT */ + +#ifdef YY_USE_PROTOS +void yyrestart( FILE *input_file ) +#else +void yyrestart( input_file ) +FILE *input_file; +#endif + { + if ( ! yy_current_buffer ) + yy_current_buffer = yy_create_buffer( yyin, YY_BUF_SIZE ); + + yy_init_buffer( yy_current_buffer, input_file ); + yy_load_buffer_state(); + } + + +#ifdef YY_USE_PROTOS +void yy_switch_to_buffer( YY_BUFFER_STATE new_buffer ) +#else +void yy_switch_to_buffer( new_buffer ) +YY_BUFFER_STATE new_buffer; +#endif + { + if ( yy_current_buffer == new_buffer ) + return; + + if ( yy_current_buffer ) + { + /* Flush out information for old buffer. */ + *yy_c_buf_p = yy_hold_char; + yy_current_buffer->yy_buf_pos = yy_c_buf_p; + yy_current_buffer->yy_n_chars = yy_n_chars; + } + + yy_current_buffer = new_buffer; + yy_load_buffer_state(); + + /* We don't actually know whether we did this switch during + * EOF (yywrap()) processing, but the only time this flag + * is looked at is after yywrap() is called, so it's safe + * to go ahead and always set it. + */ + yy_did_buffer_switch_on_eof = 1; + } + + +#ifdef YY_USE_PROTOS +void yy_load_buffer_state( void ) +#else +void yy_load_buffer_state() +#endif + { + yy_n_chars = yy_current_buffer->yy_n_chars; + yytext_ptr = yy_c_buf_p = yy_current_buffer->yy_buf_pos; + yyin = yy_current_buffer->yy_input_file; + yy_hold_char = *yy_c_buf_p; + } + + +#ifdef YY_USE_PROTOS +YY_BUFFER_STATE yy_create_buffer( FILE *file, int size ) +#else +YY_BUFFER_STATE yy_create_buffer( file, size ) +FILE *file; +int size; +#endif + { + YY_BUFFER_STATE b; + + b = (YY_BUFFER_STATE) yy_flex_alloc( sizeof( struct yy_buffer_state ) ); + if ( ! b ) + YY_FATAL_ERROR( "out of dynamic memory in yy_create_buffer()" ); + + b->yy_buf_size = size; + + /* yy_ch_buf has to be 2 characters longer than the size given because + * we need to put in 2 end-of-buffer characters. + */ + b->yy_ch_buf = (char *) yy_flex_alloc( b->yy_buf_size + 2 ); + if ( ! b->yy_ch_buf ) + YY_FATAL_ERROR( "out of dynamic memory in yy_create_buffer()" ); + + b->yy_is_our_buffer = 1; + + yy_init_buffer( b, file ); + + return b; + } + + +#ifdef YY_USE_PROTOS +void yy_delete_buffer( YY_BUFFER_STATE b ) +#else +void yy_delete_buffer( b ) +YY_BUFFER_STATE b; +#endif + { + if ( ! b ) + return; + + if ( b == yy_current_buffer ) + yy_current_buffer = (YY_BUFFER_STATE) 0; + + if ( b->yy_is_our_buffer ) + yy_flex_free( (void *) b->yy_ch_buf ); + + yy_flex_free( (void *) b ); + } + + + +#ifdef YY_USE_PROTOS +void yy_init_buffer( YY_BUFFER_STATE b, FILE *file ) +#else +void yy_init_buffer( b, file ) +YY_BUFFER_STATE b; +FILE *file; +#endif + + + { + yy_flush_buffer( b ); + + b->yy_input_file = file; + b->yy_fill_buffer = 1; + +#if YY_ALWAYS_INTERACTIVE + b->yy_is_interactive = 1; +#else +#if YY_NEVER_INTERACTIVE + b->yy_is_interactive = 0; +#else + b->yy_is_interactive = file ? (isatty( fileno(file) ) > 0) : 0; +#endif +#endif + } + + +#ifdef YY_USE_PROTOS +void yy_flush_buffer( YY_BUFFER_STATE b ) +#else +void yy_flush_buffer( b ) +YY_BUFFER_STATE b; +#endif + + { + if ( ! b ) + return; + + b->yy_n_chars = 0; + + /* We always need two end-of-buffer characters. The first causes + * a transition to the end-of-buffer state. The second causes + * a jam in that state. + */ + b->yy_ch_buf[0] = YY_END_OF_BUFFER_CHAR; + b->yy_ch_buf[1] = YY_END_OF_BUFFER_CHAR; + + b->yy_buf_pos = &b->yy_ch_buf[0]; + + b->yy_at_bol = 1; + b->yy_buffer_status = YY_BUFFER_NEW; + + if ( b == yy_current_buffer ) + yy_load_buffer_state(); + } + + +#ifndef YY_NO_SCAN_BUFFER +#ifdef YY_USE_PROTOS +YY_BUFFER_STATE yy_scan_buffer( char *base, yy_size_t size ) +#else +YY_BUFFER_STATE yy_scan_buffer( base, size ) +char *base; +yy_size_t size; +#endif + { + YY_BUFFER_STATE b; + + if ( size < 2 || + base[size-2] != YY_END_OF_BUFFER_CHAR || + base[size-1] != YY_END_OF_BUFFER_CHAR ) + /* They forgot to leave room for the EOB's. */ + return 0; + + b = (YY_BUFFER_STATE) yy_flex_alloc( sizeof( struct yy_buffer_state ) ); + if ( ! b ) + YY_FATAL_ERROR( "out of dynamic memory in yy_scan_buffer()" ); + + b->yy_buf_size = size - 2; /* "- 2" to take care of EOB's */ + b->yy_buf_pos = b->yy_ch_buf = base; + b->yy_is_our_buffer = 0; + b->yy_input_file = 0; + b->yy_n_chars = b->yy_buf_size; + b->yy_is_interactive = 0; + b->yy_at_bol = 1; + b->yy_fill_buffer = 0; + b->yy_buffer_status = YY_BUFFER_NEW; + + yy_switch_to_buffer( b ); + + return b; + } +#endif + + +#ifndef YY_NO_SCAN_STRING +#ifdef YY_USE_PROTOS +YY_BUFFER_STATE yy_scan_string( yyconst char *yy_str ) +#else +YY_BUFFER_STATE yy_scan_string( yy_str ) +yyconst char *yy_str; +#endif + { + int len; + for ( len = 0; yy_str[len]; ++len ) + ; + + return yy_scan_bytes( yy_str, len ); + } +#endif + + +#ifndef YY_NO_SCAN_BYTES +#ifdef YY_USE_PROTOS +YY_BUFFER_STATE yy_scan_bytes( yyconst char *bytes, int len ) +#else +YY_BUFFER_STATE yy_scan_bytes( bytes, len ) +yyconst char *bytes; +int len; +#endif + { + YY_BUFFER_STATE b; + char *buf; + yy_size_t n; + int i; + + /* Get memory for full buffer, including space for trailing EOB's. */ + n = len + 2; + buf = (char *) yy_flex_alloc( n ); + if ( ! buf ) + YY_FATAL_ERROR( "out of dynamic memory in yy_scan_bytes()" ); + + for ( i = 0; i < len; ++i ) + buf[i] = bytes[i]; + + buf[len] = buf[len+1] = YY_END_OF_BUFFER_CHAR; + + b = yy_scan_buffer( buf, n ); + if ( ! b ) + YY_FATAL_ERROR( "bad buffer in yy_scan_bytes()" ); + + /* It's okay to grow etc. this buffer, and we should throw it + * away when we're done. + */ + b->yy_is_our_buffer = 1; + + return b; + } +#endif + + +#ifndef YY_NO_PUSH_STATE +#ifdef YY_USE_PROTOS +static void yy_push_state( int new_state ) +#else +static void yy_push_state( new_state ) +int new_state; +#endif + { + if ( yy_start_stack_ptr >= yy_start_stack_depth ) + { + yy_size_t new_size; + + yy_start_stack_depth += YY_START_STACK_INCR; + new_size = yy_start_stack_depth * sizeof( int ); + + if ( ! yy_start_stack ) + yy_start_stack = (int *) yy_flex_alloc( new_size ); + + else + yy_start_stack = (int *) yy_flex_realloc( + (void *) yy_start_stack, new_size ); + + if ( ! yy_start_stack ) + YY_FATAL_ERROR( + "out of memory expanding start-condition stack" ); + } + + yy_start_stack[yy_start_stack_ptr++] = YY_START; + + BEGIN(new_state); + } +#endif + + +#ifndef YY_NO_POP_STATE +static void yy_pop_state() + { + if ( --yy_start_stack_ptr < 0 ) + YY_FATAL_ERROR( "start-condition stack underflow" ); + + BEGIN(yy_start_stack[yy_start_stack_ptr]); + } +#endif + + +#ifndef YY_NO_TOP_STATE +static int yy_top_state() + { + return yy_start_stack[yy_start_stack_ptr - 1]; + } +#endif + +#ifndef YY_EXIT_FAILURE +#define YY_EXIT_FAILURE 2 +#endif + +#ifdef YY_USE_PROTOS +static void yy_fatal_error( yyconst char msg[] ) +#else +static void yy_fatal_error( msg ) +char msg[]; +#endif + { + (void) fprintf( stderr, "%s\n", msg ); + exit( YY_EXIT_FAILURE ); + } + + + +/* Redefine yyless() so it works in section 3 code. */ + +#undef yyless +#define yyless(n) \ + do \ + { \ + /* Undo effects of setting up yytext. */ \ + yytext[yyleng] = yy_hold_char; \ + yy_c_buf_p = yytext + n; \ + yy_hold_char = *yy_c_buf_p; \ + *yy_c_buf_p = '\0'; \ + yyleng = n; \ + } \ + while ( 0 ) + + +/* Internal utility routines. */ + +#ifndef yytext_ptr +#ifdef YY_USE_PROTOS +static void yy_flex_strncpy( char *s1, yyconst char *s2, int n ) +#else +static void yy_flex_strncpy( s1, s2, n ) +char *s1; +yyconst char *s2; +int n; +#endif + { + register int i; + for ( i = 0; i < n; ++i ) + s1[i] = s2[i]; + } +#endif + +#ifdef YY_NEED_STRLEN +#ifdef YY_USE_PROTOS +static int yy_flex_strlen( yyconst char *s ) +#else +static int yy_flex_strlen( s ) +yyconst char *s; +#endif + { + register int n; + for ( n = 0; s[n]; ++n ) + ; + + return n; + } +#endif + + +#ifdef YY_USE_PROTOS +static void *yy_flex_alloc( yy_size_t size ) +#else +static void *yy_flex_alloc( size ) +yy_size_t size; +#endif + { + return (void *) malloc( size ); + } + +#ifdef YY_USE_PROTOS +static void *yy_flex_realloc( void *ptr, yy_size_t size ) +#else +static void *yy_flex_realloc( ptr, size ) +void *ptr; +yy_size_t size; +#endif + { + /* The cast to (char *) in the following accommodates both + * implementations that use char* generic pointers, and those + * that use void* generic pointers. It works with the latter + * because both ANSI C and C++ allow castless assignment from + * any pointer type to void*, and deal with argument conversions + * as though doing an assignment. + */ + return (void *) realloc( (char *) ptr, size ); + } + +#ifdef YY_USE_PROTOS +static void yy_flex_free( void *ptr ) +#else +static void yy_flex_free( ptr ) +void *ptr; +#endif + { + free( ptr ); + } + +#if YY_MAIN +int main() + { + yylex(); + return 0; + } +#endif +#line 82 "btl_pcie_lex.l" + + + +/* + * This cleans up at the end of the parse (since, in this case, we + * always parse the entire file) and prevents a memory leak. + */ +static int finish_parsing(void) +{ + if (NULL != YY_CURRENT_BUFFER) { + yy_delete_buffer(YY_CURRENT_BUFFER); +#if defined(YY_CURRENT_BUFFER_LVALUE) + YY_CURRENT_BUFFER_LVALUE = NULL; +#else + YY_CURRENT_BUFFER = NULL; +#endif /* YY_CURRENT_BUFFER_LVALUE */ + } + return YY_NULL; +} + + +static int btl_pcie_cfg_yywrap(void) +{ + btl_pcie_cfg_parse_done = true; + return 1; +} + + +/* + * Ensure that we have a valid yybuffer to use. Specifically, if this + * scanner is invoked a second time, finish_parsing() (above) will + * have been executed, and the current buffer will have been freed. + * Flex doesn't recognize this fact because as far as it's concerned, + * its internal state was already initialized, so it thinks it should + * have a valid buffer. Hence, here we ensure to give it a valid + * buffer. + */ +int btl_pcie_cfg_init_buffer(FILE *file) +{ + YY_BUFFER_STATE buf = yy_create_buffer(file, YY_BUF_SIZE); + yy_switch_to_buffer(buf); + + return 0; +} diff --git a/ompi/mca/btl/pcie/btl_pcie_lex.h b/ompi/mca/btl/pcie/btl_pcie_lex.h new file mode 100644 index 0000000000..6de15ece26 --- /dev/null +++ b/ompi/mca/btl/pcie/btl_pcie_lex.h @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2007 Los Alamos National Security, LLC. + * All righs reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef BTL_PCIE_CFG_LEX_H_ +#define BTL_PCIE_CFG_LEX_H_ + +#include "opal_config.h" + +#ifdef malloc +#undef malloc +#endif +#ifdef realloc +#undef realloc +#endif +#ifdef free +#undef free +#endif + +#include + +int btl_pcie_cfg_yylex(void); +int btl_pcie_cfg_init_buffer(FILE *file); + +extern FILE *btl_pcie_cfg_yyin; +extern bool btl_pcie_cfg_parse_done; +extern char *btl_pcie_cfg_yytext; +extern int btl_pcie_cfg_yynewlines; + +/* + * Make lex-generated files not issue compiler warnings + */ +#define YY_STACK_USED 0 +#define YY_ALWAYS_INTERACTIVE 0 +#define YY_NEVER_INTERACTIVE 0 +#define YY_MAIN 0 +#define YY_NO_UNPUT 1 +#define YY_SKIP_YYWRAP 1 + +enum { + BTL_PCIE_CFG_PARSE_DONE = 1, + BTL_PCIE_CFG_PARSE_ERROR, + + BTL_PCIE_CFG_PARSE_NEWLINE, + BTL_PCIE_CFG_PARSE_HOSTNAME_CORE, + BTL_PCIE_CFG_PARSE_HOSTNAME_DEVICE, + BTL_PCIE_CFG_PARSE_DEVICE, + + BTL_PCIE_CFG_PARSE_MAX +}; + +#endif /* #ifndef BTL_PCIE_CFG_LEX_H_ */ diff --git a/ompi/mca/btl/pcie/btl_pcie_lex.l b/ompi/mca/btl/pcie/btl_pcie_lex.l new file mode 100644 index 0000000000..8b2db79d64 --- /dev/null +++ b/ompi/mca/btl/pcie/btl_pcie_lex.l @@ -0,0 +1,125 @@ +%{ /* -*- C -*- */ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "opal_config.h" + +#include +#if HAVE_UNISTD_H +#include +#endif + +#include "btl_pcie_lex.h" + +/* + * local functions + */ +static int finish_parsing(void) ; +static int btl_pcie_cfg_yywrap(void); + +/* + * global variables + */ +int btl_pcie_cfg_yynewlines = 1; +bool btl_pcie_cfg_parse_done = false; +char *btl_pcie_cfg_string = NULL; + +#define yyterminate() \ + return finish_parsing() + +%} + +WHITE [\f\t\v ] +CHAR [A-Za-z0-9_\-\.] +NAME_CHAR [A-Za-z0-9_\-\.\\\/] + +%x comment +%x section_name +%x section_end +%x value + +%% + +{WHITE}*\n { ++btl_pcie_cfg_yynewlines; + return BTL_PCIE_CFG_PARSE_NEWLINE; } +#.*\n { ++btl_pcie_cfg_yynewlines; + return BTL_PCIE_CFG_PARSE_NEWLINE; } +"//".*\n { ++btl_pcie_cfg_yynewlines; + return BTL_PCIE_CFG_PARSE_NEWLINE; } + +"/*" { BEGIN(comment); + return BTL_PCIE_CFG_PARSE_NEWLINE; } +[^*\n]* ; /* Eat up non '*'s */ +"*"+[^*/\n]* ; /* Eat '*'s not followed by a '/' */ +\n { ++btl_pcie_cfg_yynewlines; + return BTL_PCIE_CFG_PARSE_NEWLINE; } +"*"+"/" { BEGIN(INITIAL); /* Done with block comment */ + return BTL_PCIE_CFG_PARSE_NEWLINE; } + +{CHAR}+":"[0-9] { return BTL_PCIE_CFG_PARSE_HOSTNAME_CORE; } +{CHAR}+":"{NAME_CHAR}+ { return BTL_PCIE_CFG_PARSE_HOSTNAME_DEVICE; } + +{NAME_CHAR}+ { return BTL_PCIE_CFG_PARSE_DEVICE; } + +{WHITE}+ ; /* whitespace */ + +%% + + +/* + * This cleans up at the end of the parse (since, in this case, we + * always parse the entire file) and prevents a memory leak. + */ +static int finish_parsing(void) +{ + if (NULL != YY_CURRENT_BUFFER) { + yy_delete_buffer(YY_CURRENT_BUFFER); +#if defined(YY_CURRENT_BUFFER_LVALUE) + YY_CURRENT_BUFFER_LVALUE = NULL; +#else + YY_CURRENT_BUFFER = NULL; +#endif /* YY_CURRENT_BUFFER_LVALUE */ + } + return YY_NULL; +} + + +static int btl_pcie_cfg_yywrap(void) +{ + btl_pcie_cfg_parse_done = true; + return 1; +} + + +/* + * Ensure that we have a valid yybuffer to use. Specifically, if this + * scanner is invoked a second time, finish_parsing() (above) will + * have been executed, and the current buffer will have been freed. + * Flex doesn't recognize this fact because as far as it's concerned, + * its internal state was already initialized, so it thinks it should + * have a valid buffer. Hence, here we ensure to give it a valid + * buffer. + */ +int btl_pcie_cfg_init_buffer(FILE *file) +{ + YY_BUFFER_STATE buf = yy_create_buffer(file, YY_BUF_SIZE); + yy_switch_to_buffer(buf); + + return 0; +} diff --git a/ompi/mca/btl/pcie/btl_pcie_proc.c b/ompi/mca/btl/pcie/btl_pcie_proc.c new file mode 100644 index 0000000000..60f2233782 --- /dev/null +++ b/ompi/mca/btl/pcie/btl_pcie_proc.c @@ -0,0 +1,194 @@ +/* + * Copyright (c) 2007 Los Alamos National Security, LLC. + * All righs reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "opal/class/opal_hash_table.h" +#include "ompi/mca/btl/base/btl_base_error.h" +#include "ompi/mca/mpool/base/base.h" +#include "ompi/runtime/ompi_module_exchange.h" + +#include "btl_pcie.h" +#include "btl_pcie_proc.h" + +static void mca_btl_pcie_proc_construct(mca_btl_pcie_proc_t* proc); +static void mca_btl_pcie_proc_destruct(mca_btl_pcie_proc_t* proc); + +OBJ_CLASS_INSTANCE(mca_btl_pcie_proc_t, + opal_list_item_t, mca_btl_pcie_proc_construct, + mca_btl_pcie_proc_destruct); + +void mca_btl_pcie_proc_construct(mca_btl_pcie_proc_t* proc) +{ + proc->proc_ompi = 0; + proc->proc_addr_count = 0; + proc->proc_endpoint_count = 0; + OBJ_CONSTRUCT(&proc->proc_lock, opal_mutex_t); + /* add to list of all proc instance */ + OPAL_THREAD_LOCK(&mca_btl_pcie_component.pcie_lock); + opal_list_append(&mca_btl_pcie_component.pcie_procs, &proc->super); + OPAL_THREAD_UNLOCK(&mca_btl_pcie_component.pcie_lock); +} + +/* + * Cleanup ib proc instance + */ + +void mca_btl_pcie_proc_destruct(mca_btl_pcie_proc_t* proc) +{ + /* remove from list of all proc instances */ + OPAL_THREAD_LOCK(&mca_btl_pcie_component.pcie_lock); + opal_list_remove_item(&mca_btl_pcie_component.pcie_procs, &proc->super); + OPAL_THREAD_UNLOCK(&mca_btl_pcie_component.pcie_lock); + + OBJ_DESTRUCT(&proc->proc_lock); +} + + +/* + * Look for an existing TEMPLATE process instances based on the associated + * ompi_proc_t instance. + */ +static mca_btl_pcie_proc_t* mca_btl_pcie_proc_lookup_ompi(ompi_proc_t* ompi_proc) +{ + mca_btl_pcie_proc_t* pcie_proc; + + OPAL_THREAD_LOCK(&mca_btl_pcie_component.pcie_lock); + + for(pcie_proc = (mca_btl_pcie_proc_t*) + opal_list_get_first(&mca_btl_pcie_component.pcie_procs); + pcie_proc != (mca_btl_pcie_proc_t*) + opal_list_get_end(&mca_btl_pcie_component.pcie_procs); + pcie_proc = (mca_btl_pcie_proc_t*)opal_list_get_next(pcie_proc)) { + + if(pcie_proc->proc_ompi == ompi_proc) { + OPAL_THREAD_UNLOCK(&mca_btl_pcie_component.pcie_lock); + return pcie_proc; + } + + } + + OPAL_THREAD_UNLOCK(&mca_btl_pcie_component.pcie_lock); + + return NULL; +} + + +/* + * Create a TEMPLATE process structure. There is a one-to-one correspondence + * between a ompi_proc_t and a mca_btl_pcie_proc_t instance. We cache + * additional data (specifically the list of mca_btl_pcie_endpoint_t instances, + * and published addresses) associated w/ a given destination on this + * datastructure. + */ + +int mca_btl_pcie_proc_create(ompi_proc_t* ompi_proc, + mca_btl_pcie_module_t* pcie_btl, + mca_btl_pcie_proc_t** ret_proc) +{ + mca_btl_pcie_proc_t* pcie_proc = NULL; + char *rem_dev_name = NULL, *lcl_dev_name = NULL; + char *rem_hostname = NULL; + int rc, num_peers, i; + size_t size; + mca_btl_pcie_modex_info_t *modex_info; + + /* Check if already have proc structure for this ompi process */ + pcie_proc = mca_btl_pcie_proc_lookup_ompi(ompi_proc); + + if(pcie_proc != NULL) { + /* Gotcha! */ + *ret_proc = pcie_proc; + return OMPI_SUCCESS; + } + + /* query for the peer's device name info */ + rc = ompi_modex_recv(&mca_btl_pcie_component.super.btl_version, + ompi_proc, + (void*)&modex_info, + &size); + if (OMPI_SUCCESS != rc) { + opal_output(mca_btl_base_output, "[%s:%d] ompi_modex_recv failed for peer %s", + __FILE__, __LINE__, ORTE_NAME_PRINT(&ompi_proc->proc_name)); + OBJ_RELEASE(pcie_proc); + *ret_proc = NULL; + return OMPI_ERROR; + } + + if (0 == size || 0 != size % sizeof(mca_btl_pcie_modex_info_t)) { + *ret_proc = NULL; + return OMPI_SUCCESS; + } + + num_peers = size / sizeof(mca_btl_pcie_modex_info_t); + + for (i = 0 ; i < num_peers ; ++i) { + MCA_BTL_PCIE_MODEX_INFO_NTOH(modex_info[i]); + rem_hostname = modex_info[i].hostname; + rem_dev_name = modex_info[i].devicename; + lcl_dev_name = ompi_btl_pcie_cfg_get_matching_device(rem_hostname, + rem_dev_name); + if (NULL != lcl_dev_name && + 0 == strcmp(lcl_dev_name, pcie_btl->lcl_dev_name)) { + /* we have a match. continue onward */ + break; + } + } + /* make sure the local device names match */ + if(NULL == lcl_dev_name || + 0 != strcmp(lcl_dev_name, pcie_btl->lcl_dev_name)){ + *ret_proc = NULL; + return OMPI_SUCCESS; + } + + BTL_VERBOSE(("Have matching devices: %s:%s <-> %s:%s", + orte_process_info.nodename, + pcie_btl->lcl_dev_name, + rem_hostname, + rem_dev_name)); + + pcie_proc = OBJ_NEW(mca_btl_pcie_proc_t); + if(NULL == pcie_proc){ + *ret_proc = NULL; + return OMPI_ERR_OUT_OF_RESOURCE; + } + + pcie_proc->proc_ompi = ompi_proc; + + /* build a unique identifier (of arbitrary + * size) to represent the proc */ + pcie_proc->proc_guid = ompi_proc->proc_name; + + /* Initialize number of peer */ + pcie_proc->proc_endpoint_count = 1; + + pcie_proc->endpoint_proc = OBJ_NEW(mca_btl_pcie_endpoint_t); + if(NULL == pcie_proc->endpoint_proc) { + free(rem_dev_name); + *ret_proc = NULL; + return OMPI_ERR_OUT_OF_RESOURCE; + } + + pcie_proc->endpoint_proc->lcl_dev_name = lcl_dev_name; + pcie_proc->endpoint_proc->rem_dev_name = rem_dev_name; + pcie_proc->endpoint_proc->endpoint_proc = pcie_proc; + pcie_proc->endpoint_proc->endpoint_btl = pcie_btl; + + if(OMPI_SUCCESS != mca_btl_pcie_endpoint_init(pcie_proc->endpoint_proc)) { + BTL_ERROR(("Error initializing the PCIE endpoint \n")); + *ret_proc = NULL; + return OMPI_ERROR; + } + + *ret_proc = pcie_proc; + return OMPI_SUCCESS; +} + + diff --git a/ompi/mca/btl/pcie/btl_pcie_proc.h b/ompi/mca/btl/pcie/btl_pcie_proc.h new file mode 100644 index 0000000000..60278ecb45 --- /dev/null +++ b/ompi/mca/btl/pcie/btl_pcie_proc.h @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2007 Los Alamos National Security, LLC. + * All righs reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef MCA_BTL_PCIE_PROC_H +#define MCA_BTL_PCIE_PROC_H + +#include "ompi_config.h" + +#include "opal/class/opal_list.h" +#include "ompi/proc/proc.h" + +#include "btl_pcie.h" +#include "btl_pcie_endpoint.h" + +BEGIN_C_DECLS + +/** + * Represents the state of a remote process and the set of addresses + * that it exports. Also cache an instance of mca_btl_base_endpoint_t for + * each + * BTL instance that attempts to open a connection to the process. + */ +struct mca_btl_pcie_proc_t { + opal_list_item_t super; + /**< allow proc to be placed on a list */ + + ompi_proc_t *proc_ompi; + /**< pointer to corresponding ompi_proc_t */ + + orte_process_name_t proc_guid; + /**< globally unique identifier for the process */ + + size_t proc_addr_count; + /**< number of addresses published by endpoint */ + + struct mca_btl_base_endpoint_t *endpoint_proc; + /**< endpoint that has been created to access this proc */ + + size_t proc_endpoint_count; + /**< number of endpoints */ + + opal_mutex_t proc_lock; + /**< lock to protect against concurrent access to proc state */ + +}; +typedef struct mca_btl_pcie_proc_t mca_btl_pcie_proc_t; +OBJ_CLASS_DECLARATION(mca_btl_pcie_proc_t); + +int mca_btl_pcie_proc_create(ompi_proc_t* ompi_proc, + mca_btl_pcie_module_t* pcie_btl, + mca_btl_pcie_proc_t** ret_proc); + +END_C_DECLS + +#endif /* #ifndef MCA_BTL_PCIE_PROC_H */ diff --git a/ompi/mca/btl/pcie/configure.m4 b/ompi/mca/btl/pcie/configure.m4 new file mode 100644 index 0000000000..e33317f842 --- /dev/null +++ b/ompi/mca/btl/pcie/configure.m4 @@ -0,0 +1,31 @@ +# -*- shell-script -*- +# +# Copyright (c) 2007 Los Alamos National Security, LLC. All rights +# reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + + +# MCA_btl_pcie_CONFIG(action-if-can-compile, +# [action-if-cant-compile]) +# ------------------------------------------------ +AC_DEFUN([MCA_btl_pcie_CONFIG],[ + OMPI_CHECK_PCIE([btl_pcie], + [btl_pcie_happy="yes"], + [btl_pcie_happy="no"]) + + AS_IF([test "$btl_pcie_happy" = "yes"], + [btl_pcie_WRAPPER_EXTRA_LDFLAGS="$btl_pcie_LDFLAGS" + btl_pcie_WRAPPER_EXTRA_LIBS="$btl_pcie_LIBS" + $1], + [$2]) + + # substitute in the things needed to build pcie + AC_SUBST([btl_pcie_CPPFLAGS]) + AC_SUBST([btl_pcie_LDFLAGS]) + AC_SUBST([btl_pcie_LIBS]) +])dnl diff --git a/ompi/mca/btl/pcie/configure.params b/ompi/mca/btl/pcie/configure.params new file mode 100644 index 0000000000..3513f8d956 --- /dev/null +++ b/ompi/mca/btl/pcie/configure.params @@ -0,0 +1,24 @@ +# -*- shell-script -*- +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2007 Los Alamos National Security, LLC. All rights +# reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# Specific to this module + +PARAM_CONFIG_FILES="Makefile" diff --git a/ompi/mca/btl/pcie/help-mpi-btl-pcie.txt b/ompi/mca/btl/pcie/help-mpi-btl-pcie.txt new file mode 100644 index 0000000000..7964b0af7f --- /dev/null +++ b/ompi/mca/btl/pcie/help-mpi-btl-pcie.txt @@ -0,0 +1,20 @@ +# -*- text -*- +# Copyright (c) 2007 Los Alamos National Security, LLC. +# All righs reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# +# This is the US/English help file for Open MPI's IBM PCIe support + +[initialization:more-than-one-cpu] +The PCIe BTL found that the CPU affinity mask for the current process +includes more than one CPU (%d). When using Open MPI on the Cell +machines with the PCIe driver, the affinity mask must include exactly +one CPU. +[initialization:no-cpus] +The PCIe BTL was unable to find any CPUs in the affinity mask for the +current process. This usually indicates a system issue that must be +resolved by the system administrator. diff --git a/ompi/mca/btl/pcie/mca-btl-pcie-local-resources.cfg b/ompi/mca/btl/pcie/mca-btl-pcie-local-resources.cfg new file mode 100644 index 0000000000..9f4fb43c48 --- /dev/null +++ b/ompi/mca/btl/pcie/mca-btl-pcie-local-resources.cfg @@ -0,0 +1,159 @@ +# hostname:core device +n01-001-0:0 /dev/axon0 +n01-001-0:1 /dev/axon1 +n01-001-0:2 /dev/axon2 +n01-001-0:3 /dev/axon3 + +n01-001-1:0 /dev/axon0 +n01-001-1:1 /dev/axon1 + +n01-001-2:0 /dev/axon0 +n01-001-2:1 /dev/axon1 + +n01-002-0:0 /dev/axon0 +n01-002-0:1 /dev/axon1 +n01-002-0:2 /dev/axon2 +n01-002-0:3 /dev/axon3 + +n01-002-1:0 /dev/axon0 +n01-002-1:1 /dev/axon1 + +n01-002-2:0 /dev/axon0 +n01-002-2:1 /dev/axon1 + +n01-003-0:0 /dev/axon0 +n01-003-0:1 /dev/axon1 +n01-003-0:2 /dev/axon2 +n01-003-0:3 /dev/axon3 + +n01-003-1:0 /dev/axon0 +n01-003-1:1 /dev/axon1 + +n01-004-0:0 /dev/axon0 +n01-004-0:1 /dev/axon1 +n01-004-0:2 /dev/axon2 +n01-004-0:3 /dev/axon3 + +n01-004-1:0 /dev/axon0 +n01-004-1:1 /dev/axon1 + +n01-005-0:0 /dev/axon0 +n01-005-0:1 /dev/axon1 +n01-005-0:2 /dev/axon2 +n01-005-0:3 /dev/axon3 + +n01-005-1:0 /dev/axon0 +n01-005-1:1 /dev/axon1 + +n01-005-2:0 /dev/axon0 +n01-005-2:1 /dev/axon1 + +n01-006-0:0 /dev/axon0 +n01-006-0:1 /dev/axon1 +n01-006-0:2 /dev/axon2 +n01-006-0:3 /dev/axon3 + +n01-006-1:0 /dev/axon0 +n01-006-1:1 /dev/axon1 + +n01-006-2:0 /dev/axon0 +n01-006-2:1 /dev/axon1 + +n01-007-0:0 /dev/axon0 +n01-007-0:1 /dev/axon1 +n01-007-0:2 /dev/axon2 +n01-007-0:3 /dev/axon3 + +n01-007-1:0 /dev/axon0 +n01-007-1:1 /dev/axon1 + +n01-007-2:0 /dev/axon0 +n01-007-2:1 /dev/axon1 + +n01-008-0:0 /dev/axon0 +n01-008-0:1 /dev/axon1 +n01-008-0:2 /dev/axon2 +n01-008-0:3 /dev/axon3 + +n01-008-1:0 /dev/axon0 +n01-008-1:1 /dev/axon1 + +n01-008-2:0 /dev/axon0 +n01-008-2:1 /dev/axon1 + +n01-009-0:0 /dev/axon0 +n01-009-0:1 /dev/axon1 +n01-009-0:2 /dev/axon2 +n01-009-0:3 /dev/axon3 + +n01-009-1:0 /dev/axon0 +n01-009-1:1 /dev/axon1 + +n01-009-2:0 /dev/axon0 +n01-009-2:1 /dev/axon1 + +n01-010-0:0 /dev/axon0 +n01-010-0:1 /dev/axon1 +n01-010-0:2 /dev/axon2 +n01-010-0:3 /dev/axon3 + +n01-010-1:0 /dev/axon0 +n01-010-1:1 /dev/axon1 + +n01-010-2:0 /dev/axon0 +n01-010-2:1 /dev/axon1 + +n01-011-0:0 /dev/axon0 +n01-011-0:1 /dev/axon1 +n01-011-0:2 /dev/axon2 +n01-011-0:3 /dev/axon3 + +n01-011-1:0 /dev/axon0 +n01-011-1:1 /dev/axon1 + +n01-011-2:0 /dev/axon0 +n01-011-2:1 /dev/axon1 + +n01-012-0:0 /dev/axon0 +n01-012-0:1 /dev/axon1 +n01-012-0:2 /dev/axon2 +n01-012-0:3 /dev/axon3 + +n01-012-1:0 /dev/axon0 +n01-012-1:1 /dev/axon1 + +n01-012-2:0 /dev/axon0 +n01-012-2:1 /dev/axon1 + +n01-013-0:0 /dev/axon0 +n01-013-0:1 /dev/axon1 +n01-013-0:2 /dev/axon2 +n01-013-0:3 /dev/axon3 + +n01-013-1:0 /dev/axon0 +n01-013-1:1 /dev/axon1 + +n01-013-2:0 /dev/axon0 +n01-013-2:1 /dev/axon1 + +n01-014-0:0 /dev/axon0 +n01-014-0:1 /dev/axon1 +n01-014-0:2 /dev/axon2 +n01-014-0:3 /dev/axon3 + +n01-014-1:0 /dev/axon0 +n01-014-1:1 /dev/axon1 + +n01-014-2:0 /dev/axon0 +n01-014-2:1 /dev/axon1 + +n01-015-0:0 /dev/axon0 +n01-015-0:1 /dev/axon1 +n01-015-0:2 /dev/axon2 +n01-015-0:3 /dev/axon3 + +n01-015-1:0 /dev/axon0 +n01-015-1:1 /dev/axon1 + +n01-015-2:0 /dev/axon0 +n01-015-2:1 /dev/axon1 diff --git a/ompi/mca/btl/pcie/mca-btl-pcie-remote-resources.cfg b/ompi/mca/btl/pcie/mca-btl-pcie-remote-resources.cfg new file mode 100644 index 0000000000..3f0531361a --- /dev/null +++ b/ompi/mca/btl/pcie/mca-btl-pcie-remote-resources.cfg @@ -0,0 +1,82 @@ +# opteron_host:device cell_host:device + +n01-001-0:/dev/axon0 n01-001-1:/dev/axon0 +n01-001-0:/dev/axon1 n01-001-1:/dev/axon1 +n01-001-0:/dev/axon2 n01-001-2:/dev/axon0 +n01-001-0:/dev/axon3 n01-001-2:/dev/axon1 + +n01-002-0:/dev/axon0 n01-002-1:/dev/axon0 +n01-002-0:/dev/axon1 n01-002-1:/dev/axon1 +n01-002-0:/dev/axon2 n01-002-2:/dev/axon0 +n01-002-0:/dev/axon3 n01-002-2:/dev/axon1 + +n01-003-0:/dev/axon0 n01-003-1:/dev/axon0 +n01-003-0:/dev/axon1 n01-003-1:/dev/axon1 +n01-003-0:/dev/axon2 n01-003-2:/dev/axon0 +n01-003-0:/dev/axon3 n01-003-2:/dev/axon1 + +n01-004-0:/dev/axon0 n01-004-1:/dev/axon0 +n01-004-0:/dev/axon1 n01-004-1:/dev/axon1 +n01-004-0:/dev/axon2 n01-004-2:/dev/axon0 +n01-004-0:/dev/axon3 n01-004-2:/dev/axon1 + +n01-005-0:/dev/axon0 n01-005-1:/dev/axon0 +n01-005-0:/dev/axon1 n01-005-1:/dev/axon1 +n01-005-0:/dev/axon2 n01-005-2:/dev/axon0 +n01-005-0:/dev/axon3 n01-005-2:/dev/axon1 + +n01-006-0:/dev/axon0 n01-006-1:/dev/axon0 +n01-006-0:/dev/axon1 n01-006-1:/dev/axon1 +n01-006-0:/dev/axon2 n01-006-2:/dev/axon0 +n01-006-0:/dev/axon3 n01-006-2:/dev/axon1 + +n01-007-0:/dev/axon0 n01-007-1:/dev/axon0 +n01-007-0:/dev/axon1 n01-007-1:/dev/axon1 +n01-007-0:/dev/axon2 n01-007-2:/dev/axon0 +n01-007-0:/dev/axon3 n01-007-2:/dev/axon1 + +n01-008-0:/dev/axon0 n01-008-1:/dev/axon0 +n01-008-0:/dev/axon1 n01-008-1:/dev/axon1 +n01-008-0:/dev/axon2 n01-008-2:/dev/axon0 +n01-008-0:/dev/axon3 n01-008-2:/dev/axon1 + +n01-009-0:/dev/axon0 n01-009-1:/dev/axon0 +n01-009-0:/dev/axon1 n01-009-1:/dev/axon1 +n01-009-0:/dev/axon2 n01-009-2:/dev/axon0 +n01-009-0:/dev/axon3 n01-009-2:/dev/axon1 + +n01-010-0:/dev/axon0 n01-010-1:/dev/axon0 +n01-010-0:/dev/axon1 n01-010-1:/dev/axon1 +n01-010-0:/dev/axon2 n01-010-2:/dev/axon0 +n01-010-0:/dev/axon3 n01-010-2:/dev/axon1 + +n01-011-0:/dev/axon0 n01-011-1:/dev/axon0 +n01-011-0:/dev/axon1 n01-011-1:/dev/axon1 +n01-011-0:/dev/axon2 n01-011-2:/dev/axon0 +n01-011-0:/dev/axon3 n01-011-2:/dev/axon1 + +n01-012-0:/dev/axon0 n01-012-1:/dev/axon0 +n01-012-0:/dev/axon1 n01-012-1:/dev/axon1 +n01-012-0:/dev/axon2 n01-012-2:/dev/axon0 +n01-012-0:/dev/axon3 n01-012-2:/dev/axon1 + +n01-013-0:/dev/axon0 n01-013-1:/dev/axon0 +n01-013-0:/dev/axon1 n01-013-1:/dev/axon1 +n01-013-0:/dev/axon2 n01-013-2:/dev/axon0 +n01-013-0:/dev/axon3 n01-013-2:/dev/axon1 + +n01-014-0:/dev/axon0 n01-014-1:/dev/axon0 +n01-014-0:/dev/axon1 n01-014-1:/dev/axon1 +n01-014-0:/dev/axon2 n01-014-2:/dev/axon0 +n01-014-0:/dev/axon3 n01-014-2:/dev/axon1 + +n01-015-0:/dev/axon0 n01-015-1:/dev/axon0 +n01-015-0:/dev/axon1 n01-015-1:/dev/axon1 +n01-015-0:/dev/axon2 n01-015-2:/dev/axon0 +n01-015-0:/dev/axon3 n01-015-2:/dev/axon1 + +n01-016-0:/dev/axon0 n01-016-1:/dev/axon0 +n01-016-0:/dev/axon1 n01-016-1:/dev/axon1 +n01-016-0:/dev/axon2 n01-016-2:/dev/axon0 +n01-016-0:/dev/axon3 n01-016-2:/dev/axon1 + diff --git a/ompi/mca/mpool/pcie/Makefile.am b/ompi/mca/mpool/pcie/Makefile.am new file mode 100644 index 0000000000..a77b98d692 --- /dev/null +++ b/ompi/mca/mpool/pcie/Makefile.am @@ -0,0 +1,57 @@ +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2007 Los Alamos National Security, LLC. All rights +# reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +AM_CPPFLAGS = $(mpool_pcie_CPPFLAGS) + +sources = \ + mpool_pcie.h \ + mpool_pcie_component.c \ + mpool_pcie_module.c + +if WANT_INSTALL_HEADERS +ompidir = $(includedir)/openmpi/ompi/mca/mpool/pcie +ompi_HEADERS = mpool_pcie.h +else +ompidir = $(includedir) +ompi_HEADERS = +endif + +# Make the output library in this directory, and name it either +# mca__.la (for DSO builds) or libmca__.la +# (for static builds). + +if OMPI_BUILD_mpool_pcie_DSO +component_noinst = +component_install = mca_mpool_pcie.la +else +component_noinst = libmca_mpool_pcie.la +component_install = +endif + +mcacomponentdir = $(pkglibdir) +mcacomponent_LTLIBRARIES = $(component_install) +mca_mpool_pcie_la_SOURCES = $(sources) +mca_mpool_pcie_la_LDFLAGS = -module -avoid-version +mca_mpool_pcie_la_LIBADD = $(mpool_pcie_LIBS) + +noinst_LTLIBRARIES = $(component_noinst) +libmca_mpool_pcie_la_SOURCES = $(sources) +libmca_mpool_pcie_la_LDFLAGS = -module -avoid-version +libmca_mpool_pcie_la_LIBADD = $(mpool_pcie_LIBS) diff --git a/ompi/mca/mpool/pcie/configure.m4 b/ompi/mca/mpool/pcie/configure.m4 new file mode 100644 index 0000000000..f093097c40 --- /dev/null +++ b/ompi/mca/mpool/pcie/configure.m4 @@ -0,0 +1,31 @@ +# -*- shell-script -*- +# +# Copyright (c) 2007 Los Alamos National Security, LLC. All rights +# reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + + +# MCA_mpool_pcie_CONFIG(action-if-can-compile, +# [action-if-cant-compile]) +# ------------------------------------------------ +AC_DEFUN([MCA_mpool_pcie_CONFIG],[ + OMPI_CHECK_PCIE([mpool_pcie], + [mpool_pcie_happy="yes"], + [mpool_pcie_happy="no"]) + + AS_IF([test "$mpool_pcie_happy" = "yes"], + [mpool_pcie_WRAPPER_EXTRA_LDFLAGS="$mpool_pcie_LDFLAGS" + mpool_pcie_WRAPPER_EXTRA_LIBS="$mpool_pcie_LIBS" + $1], + [$2]) + + # substitute in the things needed to build pcie + AC_SUBST([mpool_pcie_CPPFLAGS]) + AC_SUBST([mpool_pcie_LDFLAGS]) + AC_SUBST([mpool_pcie_LIBS]) +])dnl diff --git a/ompi/mca/mpool/pcie/configure.params b/ompi/mca/mpool/pcie/configure.params new file mode 100644 index 0000000000..71d3c8009d --- /dev/null +++ b/ompi/mca/mpool/pcie/configure.params @@ -0,0 +1,26 @@ +# -*- shell-script -*- +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2006 Voltaire. All rights reserved. +# Copyright (c) 2007 Los Alamos National Security, LLC. All rights +# reserved. +# +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# Specific to this module + +PARAM_CONFIG_FILES="Makefile" diff --git a/ompi/mca/mpool/pcie/mpool_pcie.h b/ompi/mca/mpool/pcie/mpool_pcie.h new file mode 100644 index 0000000000..3d65fe3826 --- /dev/null +++ b/ompi/mca/mpool/pcie/mpool_pcie.h @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2007 Los Alamos National Security, LLC. + * All righs reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef MCA_MPOOL_PCIE_H +#define MCA_MPOOL_PCIE_H + +#include "opal/class/opal_list.h" +#include "opal/event/event.h" +#include "ompi/class/ompi_free_list.h" +#include "ompi/mca/allocator/allocator.h" +#include "ompi/mca/mpool/mpool.h" + +BEGIN_C_DECLS + +struct mca_mpool_pcie_component_t { + mca_mpool_base_component_t super; + int verbose; +}; +typedef struct mca_mpool_pcie_component_t mca_mpool_pcie_component_t; + +OMPI_MODULE_DECLSPEC extern mca_mpool_pcie_component_t mca_mpool_pcie_component; + +struct mca_mpool_pcie_module_t { + mca_mpool_base_module_t super; + void* base; + size_t offset; + size_t len; + +}; typedef struct mca_mpool_pcie_module_t mca_mpool_pcie_module_t; + + +struct mca_mpool_base_resources_t { + void *base; + size_t len; +}; +typedef struct mca_mpool_base_resources_t mca_mpool_base_resources_t; + + +/* + * Initializes the mpool module. + */ +void mca_mpool_pcie_module_init(mca_mpool_pcie_module_t* mpool); + + +/* + * Returns base address of shared memory mapping. + */ +void* mca_mpool_pcie_base(mca_mpool_base_module_t*); + + +/** + * Allocate block of shared memory. + */ +void* mca_mpool_pcie_alloc(mca_mpool_base_module_t* mpool, + size_t size, + size_t align, + uint32_t flags, + mca_mpool_base_registration_t** registration); + + +/** + * realloc function typedef + */ +void* mca_mpool_pcie_realloc(mca_mpool_base_module_t* mpool, + void* addr, + size_t size, + mca_mpool_base_registration_t** registration); + + +/** + * free function typedef + */ +void mca_mpool_pcie_free(mca_mpool_base_module_t* mpool, + void * addr, + mca_mpool_base_registration_t* registration); + + +END_C_DECLS + +#endif diff --git a/ompi/mca/mpool/pcie/mpool_pcie_component.c b/ompi/mca/mpool/pcie/mpool_pcie_component.c new file mode 100644 index 0000000000..7a1bbea7fc --- /dev/null +++ b/ompi/mca/mpool/pcie/mpool_pcie_component.c @@ -0,0 +1,112 @@ +/* + * Copyright (c) 2006-2007 Los Alamos National Security, LLC. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#if HAVE_UNISTD_H +#include +#endif /* HAVE_UNISTD_H*/ +#ifdef HAVE_STDLIB_H +#include +#endif /* HAVE_STDLIB_H */ +#include + +#include "opal/util/output.h" +#include "opal/mca/base/base.h" +#include "opal/mca/base/mca_base_param.h" + +#include "orte/util/proc_info.h" + +#include "ompi/proc/proc.h" +#include "ompi/mca/allocator/base/base.h" + +#include "mpool_pcie.h" + +/* + * Local functions + */ +static int mca_mpool_pcie_open(void); +static int mca_mpool_pcie_close( void ); +static mca_mpool_base_module_t* mca_mpool_pcie_init( + struct mca_mpool_base_resources_t* resources); + +mca_mpool_pcie_component_t mca_mpool_pcie_component = { + { + /* First, the mca_base_component_t struct containing meta + information about the component itself */ + + { + /* Indicate that we are a mpool v2.0.0 component (which also + implies a specific MCA version) */ + + MCA_MPOOL_BASE_VERSION_2_0_0, + + "pcie", /* MCA component name */ + OMPI_MAJOR_VERSION, /* MCA component major version */ + OMPI_MINOR_VERSION, /* MCA component minor version */ + OMPI_RELEASE_VERSION, /* MCA component release version */ + mca_mpool_pcie_open, /* component open */ + mca_mpool_pcie_close + }, + + /* Next the MCA v2.0.0 component meta data */ + + { + /* The component is not checkpoint ready */ + false + }, + + mca_mpool_pcie_init + } +}; + + +static int +mca_mpool_pcie_open(void) +{ + return OMPI_SUCCESS; +} + + +static int +mca_mpool_pcie_close(void) +{ + return OMPI_SUCCESS; +} + + +static mca_mpool_base_module_t* +mca_mpool_pcie_init(struct mca_mpool_base_resources_t* resources) +{ + mca_mpool_pcie_module_t* mpool_module; + + mpool_module = (mca_mpool_pcie_module_t*)malloc(sizeof(mca_mpool_pcie_module_t)); + if(NULL == mpool_module) return NULL; + + mpool_module->super.mpool_component = &mca_mpool_pcie_component.super; + mpool_module->super.mpool_base = NULL; /* no base .. */ + mpool_module->super.mpool_alloc = mca_mpool_pcie_alloc; + mpool_module->super.mpool_realloc = mca_mpool_pcie_realloc; + mpool_module->super.mpool_free = mca_mpool_pcie_free; + mpool_module->super.mpool_register = NULL; + mpool_module->super.mpool_find = NULL; + mpool_module->super.mpool_deregister = NULL; + mpool_module->super.mpool_release_memory = NULL; + mpool_module->super.mpool_finalize = NULL; + mpool_module->super.rcache = NULL; + mpool_module->super.flags = MCA_MPOOL_FLAGS_MPI_ALLOC_MEM; + + mpool_module->base = resources->base; + mpool_module->len = resources->len; + mpool_module->offset = 0; + + return (mca_mpool_base_module_t*) mpool_module; +} + diff --git a/ompi/mca/mpool/pcie/mpool_pcie_module.c b/ompi/mca/mpool/pcie/mpool_pcie_module.c new file mode 100644 index 0000000000..965c2fa53a --- /dev/null +++ b/ompi/mca/mpool/pcie/mpool_pcie_module.c @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2006-2007 Los Alamos National Security, LLC. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" + +#include + +#include "opal/util/output.h" + +#include "mpool_pcie.h" + +void +mca_mpool_pcie_module_init(mca_mpool_pcie_module_t* mpool) +{ + +} + + +void* mca_mpool_pcie_base(mca_mpool_base_module_t* mpool) +{ + return ((mca_mpool_pcie_module_t*) mpool)->base; +} + + +void* +mca_mpool_pcie_alloc(mca_mpool_base_module_t* mpool, + size_t size, + size_t align, + uint32_t flags, + mca_mpool_base_registration_t** registration) +{ + mca_mpool_pcie_module_t* mpool_pcie = + (mca_mpool_pcie_module_t*) mpool; + void *addr; + + if(mpool_pcie->offset + size > mpool_pcie->len) { + addr = NULL; + } else { + addr = (char*)mpool_pcie->base + mpool_pcie->offset; + mpool_pcie->offset += size; + } + + return addr; +} + + +void* +mca_mpool_pcie_realloc(mca_mpool_base_module_t* mpool, + void* addr, + size_t size, + mca_mpool_base_registration_t** registration) +{ + /* we don't need no realloc */ + return NULL; +} + + +void +mca_mpool_pcie_free(mca_mpool_base_module_t* mpool, void * addr, + mca_mpool_base_registration_t* registration) +{ + /* we don't need no free */ +}