Rename the base header file containing the prototypes of the collective

functions.
2015-02-15 14:47:27 -05:00 · 2015-02-15 14:47:27 -05:00 · aa019e239e
--- a/ompi/mca/coll/base/Makefile.am
+++ b/ompi/mca/coll/base/Makefile.am
@ -2,7 +2,7 @@
 # Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
 #                         University Research and Technology
 #                         Corporation.  All rights reserved.
-# Copyright (c) 2004-2005 The University of Tennessee and The University
+# Copyright (c) 2004-2015 The University of Tennessee and The University
 #                         of Tennessee Research Foundation.  All rights
 #                         reserved.
 # Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, 
@ -20,10 +20,25 @@ dist_ompidata_DATA = base/help-mca-coll-base.txt

 headers += \
        base/base.h \
-        base/coll_tags.h
+        base/coll_tags.h \
+        base/coll_base_topo.h \
+        base/coll_base_util.h

 libmca_coll_la_SOURCES += \
        base/coll_base_comm_select.c \
        base/coll_base_comm_unselect.c \
        base/coll_base_find_available.c \
-        base/coll_base_frame.c
+        base/coll_base_frame.c \
+        base/coll_base_bcast.c \
+        base/coll_base_scatter.c \
+        base/coll_base_topo.c \
+        base/coll_base_allgather.c \
+        base/coll_base_allgatherv.c \
+        base/coll_base_util.c \
+        base/coll_base_allreduce.c \
+        base/coll_base_alltoall.c \
+        base/coll_base_gather.c \
+        base/coll_base_alltoallv.c \
+        base/coll_base_reduce.c \
+        base/coll_base_barrier.c \
+        base/coll_base_reduce_scatter.c
--- a/ompi/mca/coll/base/base.h
+++ b/ompi/mca/coll/base/base.h
@ -87,7 +87,7 @@ int mca_coll_base_find_available(bool enable_progress_threads,
 * coll component needs to be selected for it.  It should be invoked
 * near the end of the communicator creation process such that
 * almost everything else is functional on the communicator (e.g.,
- * point-to-point communication).  
+ * point-to-point communication).
 *
 * Note that new communicators may be created as a result of
 * invoking this function.  Specifically: this function is called in
--- a/ompi/mca/coll/base/coll_base.h
+++ b/ompi/mca/coll/base/coll_base.h
@ -1,558 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
-/*
- * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
- *                         University Research and Technology
- *                         Corporation.  All rights reserved.
- * Copyright (c) 2004-2009 The University of Tennessee and The University
- *                         of Tennessee Research Foundation.  All rights
- *                         reserved.
- * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, 
- *                         University of Stuttgart.  All rights reserved.
- * Copyright (c) 2004-2005 The Regents of the University of California.
- *                         All rights reserved.
- * Copyright (c) 2008      Sun Microsystems, Inc.  All rights reserved.
- * Copyright (c) 2008      Cisco Systems, Inc.  All rights reserved.
- * Copyright (c) 2013      Los Alamos National Security, LLC. All rights
- *                         reserved.
- * $COPYRIGHT$
- * 
- * Additional copyrights may follow
- * 
- * $HEADER$
- */
-
-#ifndef MCA_COLL_TUNED_EXPORT_H
-#define MCA_COLL_TUNED_EXPORT_H
-
-#include "ompi_config.h"
-
-#include "mpi.h"
-#include "opal/mca/mca.h"
-#include "ompi/mca/coll/coll.h"
-#include "ompi/request/request.h"
-
-/* need to include our own topo prototypes so we can malloc data on the comm correctly */
-#include "coll_tuned_topo.h"
-
-/* also need the dynamic rule structures */
-#include "coll_tuned_dynamic_rules.h"
-
-/* some fixed value index vars to simplify certain operations */
-typedef enum COLLTYPE {
-    ALLGATHER = 0,  /*  0 */
-    ALLGATHERV,     /*  1 */
-    ALLREDUCE,      /*  2 */
-    ALLTOALL,       /*  3 */
-    ALLTOALLV,      /*  4 */
-    ALLTOALLW,      /*  5 */
-    BARRIER,        /*  6 */
-    BCAST,          /*  7 */
-    EXSCAN,         /*  8 */
-    GATHER,         /*  9 */
-    GATHERV,        /* 10 */
-    REDUCE,         /* 11 */
-    REDUCESCATTER,  /* 12 */
-    SCAN,           /* 13 */
-    SCATTER,        /* 14 */
-    SCATTERV,       /* 15 */
-    COLLCOUNT       /* 16 end counter keep it as last element */
-} COLLTYPE_T;
-
-/* defined arg lists to simply auto inclusion of user overriding decision functions */
-#define ALLGATHER_ARGS void *sbuf, int scount, struct ompi_datatype_t *sdtype, void *rbuf, int rcount, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module
-#define ALLGATHERV_ARGS void *sbuf, int scount, struct ompi_datatype_t *sdtype, void * rbuf, int *rcounts, int *disps, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module
-#define ALLREDUCE_ARGS void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, mca_coll_base_module_t *module
-#define ALLTOALL_ARGS void *sbuf, int scount, struct ompi_datatype_t *sdtype, void* rbuf, int rcount, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module
-#define ALLTOALLV_ARGS void *sbuf, int *scounts, int *sdisps, struct ompi_datatype_t *sdtype, void *rbuf, int *rcounts, int *rdisps, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module
-#define ALLTOALLW_ARGS void *sbuf, int *scounts, int *sdisps,  struct ompi_datatype_t **sdtypes, void *rbuf, int *rcounts, int *rdisps, struct ompi_datatype_t **rdtypes, struct ompi_communicator_t *comm, mca_coll_base_module_t *module
-#define BARRIER_ARGS struct ompi_communicator_t *comm, mca_coll_base_module_t *module
-#define BCAST_ARGS void *buff, int count, struct ompi_datatype_t *datatype, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module
-#define EXSCAN_ARGS void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, mca_coll_base_module_t *module
-#define GATHER_ARGS void *sbuf, int scount, struct ompi_datatype_t *sdtype, void *rbuf, int rcount, struct ompi_datatype_t *rdtype, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module
-#define GATHERV_ARGS void *sbuf, int scount, struct ompi_datatype_t *sdtype, void *rbuf, int *rcounts, int *disps, struct ompi_datatype_t *rdtype, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module
-#define REDUCE_ARGS void *sbuf, void* rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module
-#define REDUCESCATTER_ARGS void *sbuf, void *rbuf, int *rcounts, struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, mca_coll_base_module_t *module
-#define SCAN_ARGS void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype,  struct ompi_op_t *op, struct ompi_communicator_t *comm, mca_coll_base_module_t *module
-#define SCATTER_ARGS void *sbuf, int scount, struct ompi_datatype_t *sdtype, void *rbuf, int rcount, struct ompi_datatype_t *rdtype, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module
-#define SCATTERV_ARGS void *sbuf, int *scounts, int *disps, struct ompi_datatype_t *sdtype, void* rbuf, int rcount, struct ompi_datatype_t *rdtype, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module
-/* end defined arg lists to simply auto inclusion of user overriding decision functions */
-
-BEGIN_C_DECLS
-
-/* these are the same across all modules and are loaded at component query time */
-extern int   ompi_coll_tuned_stream;
-extern int   ompi_coll_tuned_priority;
-extern int   ompi_coll_tuned_preallocate_memory_comm_size_limit;
-extern bool  ompi_coll_tuned_use_dynamic_rules;
-extern char* ompi_coll_tuned_dynamic_rules_filename;
-extern int   ompi_coll_tuned_init_tree_fanout;
-extern int   ompi_coll_tuned_init_chain_fanout;
-extern int   ompi_coll_tuned_init_max_requests;
-extern int   ompi_coll_tuned_alltoall_small_msg;
-extern int   ompi_coll_tuned_alltoall_intermediate_msg;
-
-/* forced algorithm choices */
-/* this structure is for storing the indexes to the forced algorithm mca params... */
-/* we get these at component query (so that registered values appear in ompi_infoi) */
-struct coll_tuned_force_algorithm_mca_param_indices_t {
-    int  algorithm_param_index;      /* which algorithm you want to force */
-    int  segsize_param_index;        /* segsize to use (if supported), 0 = no segmentation */
-    int  tree_fanout_param_index;    /* tree fanout/in to use */
-    int  chain_fanout_param_index;   /* K-chain fanout/in to use */
-    int  max_requests_param_index;   /* Maximum number of outstanding send or recv requests */
-};
-typedef struct coll_tuned_force_algorithm_mca_param_indices_t coll_tuned_force_algorithm_mca_param_indices_t;
-
-
-/* the following type is for storing actual value obtained from the MCA on each tuned module */
-/* via their mca param indices lookup in the component */
-/* this structure is stored once per collective type per communicator... */
-struct coll_tuned_force_algorithm_params_t {
-    int  algorithm;      /* which algorithm you want to force */
-    int  segsize;        /* segsize to use (if supported), 0 = no segmentation */
-    int  tree_fanout;    /* tree fanout/in to use */
-    int  chain_fanout;   /* K-chain fanout/in to use */
-    int  max_requests;   /* Maximum number of outstanding send or recv requests */
-};
-typedef struct coll_tuned_force_algorithm_params_t coll_tuned_force_algorithm_params_t;
-
-/* the indices to the MCA params so that modules can look them up at open / comm create time  */
-extern coll_tuned_force_algorithm_mca_param_indices_t ompi_coll_tuned_forced_params[COLLCOUNT];
-/* the actual max algorithm values (readonly), loaded at component open */
-extern int ompi_coll_tuned_forced_max_algorithms[COLLCOUNT];
-
-/*
- * coll API functions
- */
-
-/* API functions */
-
-int ompi_coll_tuned_init_query(bool enable_progress_threads,
-                               bool enable_mpi_threads);
-
-mca_coll_base_module_t *
-ompi_coll_tuned_comm_query(struct ompi_communicator_t *comm, int *priority);
-
-/* API functions of decision functions and any implementations */
-
-/*
- * Note this gets long as we have to have a prototype for each 
- * MPI collective 4 times.. 2 for the comm type and 2 for each decision
- * type. 
- * we might cut down the decision prototypes by conditional compiling
- */
-
-/* All Gather */
-int ompi_coll_tuned_allgather_intra_dec_fixed(ALLGATHER_ARGS);
-int ompi_coll_tuned_allgather_intra_dec_dynamic(ALLGATHER_ARGS);
-int ompi_coll_tuned_allgather_intra_do_forced(ALLGATHER_ARGS);
-int ompi_coll_tuned_allgather_intra_do_this(ALLGATHER_ARGS, int algorithm, int faninout, int segsize);
-int ompi_coll_tuned_allgather_intra_check_forced_init(coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices);
-int ompi_coll_tuned_allgather_intra_bruck(ALLGATHER_ARGS);
-int ompi_coll_tuned_allgather_intra_recursivedoubling(ALLGATHER_ARGS);
-int ompi_coll_tuned_allgather_intra_ring(ALLGATHER_ARGS);
-int ompi_coll_tuned_allgather_intra_neighborexchange(ALLGATHER_ARGS);
-int ompi_coll_tuned_allgather_intra_basic_linear(ALLGATHER_ARGS);
-int ompi_coll_tuned_allgather_intra_two_procs(ALLGATHER_ARGS);
-int ompi_coll_tuned_allgather_inter_dec_fixed(ALLGATHER_ARGS);
-int ompi_coll_tuned_allgather_inter_dec_dynamic(ALLGATHER_ARGS);
-
-/* All GatherV */
-int ompi_coll_tuned_allgatherv_intra_dec_fixed(ALLGATHERV_ARGS);
-int ompi_coll_tuned_allgatherv_intra_dec_dynamic(ALLGATHERV_ARGS);
-int ompi_coll_tuned_allgatherv_intra_do_forced(ALLGATHERV_ARGS);
-int ompi_coll_tuned_allgatherv_intra_do_this(ALLGATHERV_ARGS, int algorithm, int faninout, int segsize);
-int ompi_coll_tuned_allgatherv_intra_check_forced_init(coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices);
-int ompi_coll_tuned_allgatherv_intra_bruck(ALLGATHERV_ARGS);
-int ompi_coll_tuned_allgatherv_intra_ring(ALLGATHERV_ARGS);
-int ompi_coll_tuned_allgatherv_intra_neighborexchange(ALLGATHERV_ARGS);
-int ompi_coll_tuned_allgatherv_intra_basic_default(ALLGATHERV_ARGS);
-int ompi_coll_tuned_allgatherv_intra_two_procs(ALLGATHERV_ARGS);
-int ompi_coll_tuned_allgatherv_inter_dec_fixed(ALLGATHERV_ARGS);
-int ompi_coll_tuned_allgatherv_inter_dec_dynamic(ALLGATHERV_ARGS);
-
-/* All Reduce */
-int ompi_coll_tuned_allreduce_intra_dec_fixed(ALLREDUCE_ARGS);
-int ompi_coll_tuned_allreduce_intra_dec_dynamic(ALLREDUCE_ARGS);
-int ompi_coll_tuned_allreduce_intra_do_forced(ALLREDUCE_ARGS);
-int ompi_coll_tuned_allreduce_intra_do_this(ALLREDUCE_ARGS, int algorithm, int faninout, int segsize);
-int ompi_coll_tuned_allreduce_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices);
-int ompi_coll_tuned_allreduce_intra_nonoverlapping(ALLREDUCE_ARGS);
-int ompi_coll_tuned_allreduce_intra_recursivedoubling(ALLREDUCE_ARGS);
-int ompi_coll_tuned_allreduce_intra_ring(ALLREDUCE_ARGS);
-int ompi_coll_tuned_allreduce_intra_ring_segmented(ALLREDUCE_ARGS, uint32_t segsize);
-int ompi_coll_tuned_allreduce_intra_basic_linear(ALLREDUCE_ARGS);
-int ompi_coll_tuned_allreduce_inter_dec_fixed(ALLREDUCE_ARGS);
-int ompi_coll_tuned_allreduce_inter_dec_dynamic(ALLREDUCE_ARGS);
-
-/* AlltoAll */
-int ompi_coll_tuned_alltoall_intra_dec_fixed(ALLTOALL_ARGS);
-int ompi_coll_tuned_alltoall_intra_dec_dynamic(ALLTOALL_ARGS);
-int ompi_coll_tuned_alltoall_intra_do_forced(ALLTOALL_ARGS);
-int ompi_coll_tuned_alltoall_intra_do_this(ALLTOALL_ARGS, int algorithm, int faninout, int segsize, int max_requests);
-int ompi_coll_tuned_alltoall_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices);
-int ompi_coll_tuned_alltoall_intra_pairwise(ALLTOALL_ARGS);
-int ompi_coll_tuned_alltoall_intra_bruck(ALLTOALL_ARGS);
-int ompi_coll_tuned_alltoall_intra_basic_linear(ALLTOALL_ARGS);
-int ompi_coll_tuned_alltoall_intra_linear_sync(ALLTOALL_ARGS, int max_requests);
-int ompi_coll_tuned_alltoall_intra_two_procs(ALLTOALL_ARGS);
-int ompi_coll_tuned_alltoall_inter_dec_fixed(ALLTOALL_ARGS);
-int ompi_coll_tuned_alltoall_inter_dec_dynamic(ALLTOALL_ARGS);
-
-/* AlltoAllV */
-int ompi_coll_tuned_alltoallv_intra_dec_fixed(ALLTOALLV_ARGS);
-int ompi_coll_tuned_alltoallv_intra_dec_dynamic(ALLTOALLV_ARGS);
-int ompi_coll_tuned_alltoallv_intra_do_forced(ALLTOALLV_ARGS);
-int ompi_coll_tuned_alltoallv_intra_do_this(ALLTOALLV_ARGS, int algorithm);
-int ompi_coll_tuned_alltoallv_intra_check_forced_init(coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices);
-int ompi_coll_tuned_alltoallv_intra_pairwise(ALLTOALLV_ARGS);
-int ompi_coll_tuned_alltoallv_intra_basic_linear(ALLTOALLV_ARGS);
-int ompi_coll_tuned_alltoallv_inter_dec_fixed(ALLTOALLV_ARGS);
-int ompi_coll_tuned_alltoallv_inter_dec_dynamic(ALLTOALLV_ARGS);
-
-/* AlltoAllW */
-int ompi_coll_tuned_alltoallw_intra_dec_fixed(ALLTOALLW_ARGS);
-int ompi_coll_tuned_alltoallw_intra_dec_dynamic(ALLTOALLW_ARGS);
-int ompi_coll_tuned_alltoallw_inter_dec_fixed(ALLTOALLW_ARGS);
-int ompi_coll_tuned_alltoallw_inter_dec_dynamic(ALLTOALLW_ARGS);
-
-/* Barrier */
-int ompi_coll_tuned_barrier_intra_dec_fixed(BARRIER_ARGS);
-int ompi_coll_tuned_barrier_intra_dec_dynamic(BARRIER_ARGS);
-int ompi_coll_tuned_barrier_intra_do_forced(BARRIER_ARGS);
-int ompi_coll_tuned_barrier_intra_do_this(BARRIER_ARGS, int algorithm, int faninout, int segsize);
-int ompi_coll_tuned_barrier_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices);
-int ompi_coll_tuned_barrier_inter_dec_fixed(BARRIER_ARGS);
-int ompi_coll_tuned_barrier_inter_dec_dynamic(BARRIER_ARGS);
-int ompi_coll_tuned_barrier_intra_doublering(BARRIER_ARGS);
-int ompi_coll_tuned_barrier_intra_recursivedoubling(BARRIER_ARGS);
-int ompi_coll_tuned_barrier_intra_bruck(BARRIER_ARGS);
-int ompi_coll_tuned_barrier_intra_two_procs(BARRIER_ARGS);
-int ompi_coll_tuned_barrier_intra_linear(BARRIER_ARGS);
-int ompi_coll_tuned_barrier_intra_tree(BARRIER_ARGS);
-
-/* Bcast */
-int ompi_coll_tuned_bcast_intra_generic( BCAST_ARGS, uint32_t count_by_segment, ompi_coll_tree_t* tree );
-int ompi_coll_tuned_bcast_intra_dec_fixed(BCAST_ARGS);
-int ompi_coll_tuned_bcast_intra_dec_dynamic(BCAST_ARGS);
-int ompi_coll_tuned_bcast_intra_do_forced(BCAST_ARGS);
-int ompi_coll_tuned_bcast_intra_do_this(BCAST_ARGS, int algorithm, int faninout, int segsize);
-int ompi_coll_tuned_bcast_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices);
-int ompi_coll_tuned_bcast_intra_basic_linear(BCAST_ARGS);
-int ompi_coll_tuned_bcast_intra_chain(BCAST_ARGS, uint32_t segsize, int32_t chains);
-int ompi_coll_tuned_bcast_intra_pipeline(BCAST_ARGS, uint32_t segsize);
-int ompi_coll_tuned_bcast_intra_binomial(BCAST_ARGS, uint32_t segsize);
-int ompi_coll_tuned_bcast_intra_bintree(BCAST_ARGS, uint32_t segsize);
-int ompi_coll_tuned_bcast_intra_split_bintree(BCAST_ARGS, uint32_t segsize);
-int ompi_coll_tuned_bcast_inter_dec_fixed(BCAST_ARGS);
-int ompi_coll_tuned_bcast_inter_dec_dynamic(BCAST_ARGS);
-
-/* Exscan */
-int ompi_coll_tuned_exscan_intra_dec_fixed(EXSCAN_ARGS);
-int ompi_coll_tuned_exscan_intra_dec_dynamic(EXSCAN_ARGS);
-int ompi_coll_tuned_exscan_inter_dec_fixed(EXSCAN_ARGS);
-int ompi_coll_tuned_exscan_inter_dec_dynamic(EXSCAN_ARGS);
-
-/* Gather */
-int ompi_coll_tuned_gather_intra_dec_fixed(GATHER_ARGS);
-int ompi_coll_tuned_gather_intra_dec_dynamic(GATHER_ARGS);
-int ompi_coll_tuned_gather_intra_do_forced(GATHER_ARGS);
-int ompi_coll_tuned_gather_intra_do_this(GATHER_ARGS, int algorithm, int faninout, int segsize);
-int ompi_coll_tuned_gather_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices);
-int ompi_coll_tuned_gather_intra_basic_linear(GATHER_ARGS);
-int ompi_coll_tuned_gather_intra_binomial(GATHER_ARGS);
-int ompi_coll_tuned_gather_intra_linear_sync(GATHER_ARGS, int first_segment_size);
-int ompi_coll_tuned_gather_inter_dec_fixed(GATHER_ARGS);
-int ompi_coll_tuned_gather_inter_dec_dynamic(GATHER_ARGS);
-
-/* GatherV */
-int ompi_coll_tuned_gatherv_intra_dec_fixed(GATHERV_ARGS);
-int ompi_coll_tuned_gatherv_intra_dec_dynamic(GATHER_ARGS);
-int ompi_coll_tuned_gatherv_inter_dec_fixed(GATHER_ARGS);
-int ompi_coll_tuned_gatherv_inter_dec_dynamic(GATHER_ARGS);
-
-/* Reduce */
-int ompi_coll_tuned_reduce_generic( REDUCE_ARGS, ompi_coll_tree_t* tree, int count_by_segment, int max_outstanding_reqs );
-int ompi_coll_tuned_reduce_intra_dec_fixed(REDUCE_ARGS);
-int ompi_coll_tuned_reduce_intra_dec_dynamic(REDUCE_ARGS);
-int ompi_coll_tuned_reduce_intra_do_forced(REDUCE_ARGS);
-int ompi_coll_tuned_reduce_intra_do_this(REDUCE_ARGS, int algorithm, int faninout, int segsize, int max_oustanding_reqs);
-int ompi_coll_tuned_reduce_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices);
-int ompi_coll_tuned_reduce_intra_basic_linear(REDUCE_ARGS);
-int ompi_coll_tuned_reduce_intra_chain(REDUCE_ARGS, uint32_t segsize, int fanout, int max_outstanding_reqs );
-int ompi_coll_tuned_reduce_intra_pipeline(REDUCE_ARGS, uint32_t segsize, int max_outstanding_reqs );
-int ompi_coll_tuned_reduce_intra_binary(REDUCE_ARGS, uint32_t segsize, int max_outstanding_reqs );
-int ompi_coll_tuned_reduce_intra_binomial(REDUCE_ARGS, uint32_t segsize, int max_outstanding_reqs );
-int ompi_coll_tuned_reduce_intra_in_order_binary(REDUCE_ARGS, uint32_t segsize, int max_outstanding_reqs );
-int ompi_coll_tuned_reduce_inter_dec_fixed(REDUCE_ARGS);
-int ompi_coll_tuned_reduce_inter_dec_dynamic(REDUCE_ARGS);
-
-/* Reduce_scatter */
-int ompi_coll_tuned_reduce_scatter_intra_dec_fixed(REDUCESCATTER_ARGS);
-int ompi_coll_tuned_reduce_scatter_intra_dec_dynamic(REDUCESCATTER_ARGS);
-int ompi_coll_tuned_reduce_scatter_intra_do_forced(REDUCESCATTER_ARGS);
-int ompi_coll_tuned_reduce_scatter_intra_do_this(REDUCESCATTER_ARGS, int algorithm, int faninout, int segsize);
-int ompi_coll_tuned_reduce_scatter_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices);
-int ompi_coll_tuned_reduce_scatter_intra_nonoverlapping(REDUCESCATTER_ARGS);
-int ompi_coll_tuned_reduce_scatter_intra_basic_recursivehalving(REDUCESCATTER_ARGS);
-int ompi_coll_tuned_reduce_scatter_intra_ring(REDUCESCATTER_ARGS);
-
-int ompi_coll_tuned_reduce_scatter_inter_dec_fixed(REDUCESCATTER_ARGS);
-int ompi_coll_tuned_reduce_scatter_inter_dec_dynamic(REDUCESCATTER_ARGS);
-
-/* Scan */
-int ompi_coll_tuned_scan_intra_dec_fixed(SCAN_ARGS);
-int ompi_coll_tuned_scan_intra_dec_dynamic(SCAN_ARGS);
-int ompi_coll_tuned_scan_inter_dec_fixed(SCAN_ARGS);
-int ompi_coll_tuned_scan_inter_dec_dynamic(SCAN_ARGS);
-
-/* Scatter */
-int ompi_coll_tuned_scatter_intra_dec_fixed(SCATTER_ARGS);
-int ompi_coll_tuned_scatter_intra_dec_dynamic(SCATTER_ARGS);
-int ompi_coll_tuned_scatter_intra_do_forced(SCATTER_ARGS);
-int ompi_coll_tuned_scatter_intra_do_this(SCATTER_ARGS, int algorithm, int faninout, int segsize);
-int ompi_coll_tuned_scatter_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices);
-int ompi_coll_tuned_scatter_intra_basic_linear(SCATTER_ARGS);
-int ompi_coll_tuned_scatter_intra_binomial(SCATTER_ARGS);
-int ompi_coll_tuned_scatter_inter_dec_fixed(SCATTER_ARGS);
-int ompi_coll_tuned_scatter_inter_dec_dynamic(SCATTER_ARGS);
-
-/* ScatterV */
-int ompi_coll_tuned_scatterv_intra_dec_fixed(SCATTERV_ARGS);
-int ompi_coll_tuned_scatterv_intra_dec_dynamic(SCATTERV_ARGS);
-int ompi_coll_tuned_scatterv_inter_dec_fixed(SCATTERV_ARGS);
-int ompi_coll_tuned_scatterv_inter_dec_dynamic(SCATTERV_ARGS);
-
-int mca_coll_tuned_ft_event(int state);
-
-
-/* Utility functions */
-
-static inline void ompi_coll_tuned_free_reqs(ompi_request_t **reqs, int count)
-{
-	int i;
-	for (i = 0; i < count; ++i)
-	    ompi_request_free(&reqs[i]);
-}
-
-struct mca_coll_tuned_component_t {
-	/** Base coll component */ 
-	mca_coll_base_component_2_0_0_t super;
-    
-	/** MCA parameter: Priority of this component */
-	int tuned_priority;
-    
-	/** global stuff that I need the component to store */
-    
-	/* MCA parameters first */
-    
-	/* cached decision table stuff (moved from MCW module) */
-	ompi_coll_alg_rule_t *all_base_rules;
-};
-/**
- * Convenience typedef
- */
-typedef struct mca_coll_tuned_component_t mca_coll_tuned_component_t;
-
-/**
- * Global component instance
- */
-OMPI_MODULE_DECLSPEC extern mca_coll_tuned_component_t mca_coll_tuned_component;
-
-/*
- * Data structure for hanging data off the communicator 
- * i.e. per module instance
- */
-struct mca_coll_tuned_comm_t {
-	/* standard data for requests and PML usage */
-    
-	/* Precreate space for requests 
-	 * Note this does not effect basic, 
-	 * but if in wrong context can confuse a debugger
-	 * this is controlled by an MCA param
-	 */
-    
-	ompi_request_t **mcct_reqs;
-	int mcct_num_reqs;
-    
-	/* 
-	 * tuned topo information caching per communicator 
-	 *
-	 * for each communicator we cache the topo information so we can 
-	 * reuse without regenerating if we change the root, [or fanout]
-	 * then regenerate and recache this information 
-	 */
-    
-	/* general tree with n fan out */
-	ompi_coll_tree_t *cached_ntree;
-	int cached_ntree_root; 
-	int cached_ntree_fanout; 
-    
-	/* binary tree */
-	ompi_coll_tree_t *cached_bintree;
-	int cached_bintree_root; 
-    
-	/* binomial tree */
-	ompi_coll_tree_t *cached_bmtree;
-	int cached_bmtree_root;
-    
-	/* binomial tree */
-	ompi_coll_tree_t *cached_in_order_bmtree;
-	int cached_in_order_bmtree_root;
-    
-	/* chained tree (fanout followed by pipelines) */
-	ompi_coll_tree_t *cached_chain;
-	int cached_chain_root;
-	int cached_chain_fanout; 
-    
-	/* pipeline */
-	ompi_coll_tree_t *cached_pipeline;
-	int cached_pipeline_root;
-    
-	/* in-order binary tree (root of the in-order binary tree is rank 0) */
-	ompi_coll_tree_t *cached_in_order_bintree;
-	
-	/* moving to the component */
-	ompi_coll_com_rule_t *com_rules[COLLCOUNT]; /* the communicator rules for each MPI collective for ONLY my comsize */
-
-	/* for forced algorithms we store the information on the module */
-	/* previously we only had one shared copy, ops, it really is per comm/module */
-	coll_tuned_force_algorithm_params_t user_forced[COLLCOUNT];
-};
-typedef struct mca_coll_tuned_comm_t mca_coll_tuned_comm_t;
-
-struct mca_coll_tuned_module_t {
-	mca_coll_base_module_t super;
-    
-	mca_coll_tuned_comm_t *tuned_data;
-};
-typedef struct mca_coll_tuned_module_t mca_coll_tuned_module_t;
-OBJ_CLASS_DECLARATION(mca_coll_tuned_module_t);
-
-static inline void mca_coll_tuned_free_reqs(ompi_request_t ** reqs,
-                                            int count)
-{
-    int i;
-    for (i = 0; i < count; ++i)
-      ompi_request_free(reqs + i);
-}
-
-END_C_DECLS
-
-#define COLL_TUNED_UPDATE_BINTREE( OMPI_COMM, TUNED_MODULE, ROOT )	\
-do {                                                                                       \
-    mca_coll_tuned_comm_t* coll_comm = (TUNED_MODULE)->tuned_data;                  	   \
-    if( !( (coll_comm->cached_bintree)                                                     \
-           && (coll_comm->cached_bintree_root == (ROOT)) ) ) {                             \
-        if( coll_comm->cached_bintree ) { /* destroy previous binomial if defined */       \
-            ompi_coll_tuned_topo_destroy_tree( &(coll_comm->cached_bintree) );             \
-        }                                                                                  \
-        coll_comm->cached_bintree = ompi_coll_tuned_topo_build_tree(2,(OMPI_COMM),(ROOT)); \
-        coll_comm->cached_bintree_root = (ROOT);                                           \
-    }                                                                                      \
-} while (0)
-
-#define COLL_TUNED_UPDATE_BMTREE( OMPI_COMM, TUNED_MODULE, ROOT )	\
-do {                                                                                         \
-    mca_coll_tuned_comm_t* coll_comm = (TUNED_MODULE)->tuned_data;                           \
-    if( !( (coll_comm->cached_bmtree)                                                        \
-           && (coll_comm->cached_bmtree_root == (ROOT)) ) ) {                                \
-        if( coll_comm->cached_bmtree ) { /* destroy previous binomial if defined */          \
-            ompi_coll_tuned_topo_destroy_tree( &(coll_comm->cached_bmtree) );                \
-        }                                                                                    \
-        coll_comm->cached_bmtree = ompi_coll_tuned_topo_build_bmtree( (OMPI_COMM), (ROOT) ); \
-        coll_comm->cached_bmtree_root = (ROOT);                                              \
-    }                                                                                        \
-} while (0)
-
-#define COLL_TUNED_UPDATE_IN_ORDER_BMTREE( OMPI_COMM, TUNED_MODULE, ROOT ) \
-do {                                                                                         \
-    mca_coll_tuned_comm_t* coll_comm = (TUNED_MODULE)->tuned_data;                           \
-    if( !( (coll_comm->cached_in_order_bmtree)                                               \
-           && (coll_comm->cached_in_order_bmtree_root == (ROOT)) ) ) {                       \
-        if( coll_comm->cached_in_order_bmtree ) { /* destroy previous binomial if defined */ \
-            ompi_coll_tuned_topo_destroy_tree( &(coll_comm->cached_in_order_bmtree) );       \
-        }                                                                                    \
-        coll_comm->cached_in_order_bmtree = ompi_coll_tuned_topo_build_in_order_bmtree( (OMPI_COMM), (ROOT) ); \
-        coll_comm->cached_in_order_bmtree_root = (ROOT);                                     \
-    }                                                                                        \
-} while (0)
-
-#define COLL_TUNED_UPDATE_PIPELINE( OMPI_COMM, TUNED_MODULE, ROOT )	\
-do {                                                                                             \
-    mca_coll_tuned_comm_t* coll_comm = (TUNED_MODULE)->tuned_data;                               \
-    if( !( (coll_comm->cached_pipeline)                                                          \
-           && (coll_comm->cached_pipeline_root == (ROOT)) ) ) {                                  \
-        if (coll_comm->cached_pipeline) { /* destroy previous pipeline if defined */             \
-            ompi_coll_tuned_topo_destroy_tree( &(coll_comm->cached_pipeline) );                  \
-        }                                                                                        \
-        coll_comm->cached_pipeline = ompi_coll_tuned_topo_build_chain( 1, (OMPI_COMM), (ROOT) ); \
-        coll_comm->cached_pipeline_root = (ROOT);                                                \
-    }                                                                                            \
-} while (0)
-
-#define COLL_TUNED_UPDATE_CHAIN( OMPI_COMM, TUNED_MODULE, ROOT, FANOUT )	\
-do {                                                                                             \
-    mca_coll_tuned_comm_t* coll_comm = (TUNED_MODULE)->tuned_data;                               \
-    if( !( (coll_comm->cached_chain)                                                             \
-           && (coll_comm->cached_chain_root == (ROOT))                                           \
-           && (coll_comm->cached_chain_fanout == (FANOUT)) ) ) {                                 \
-        if( coll_comm->cached_chain) { /* destroy previous chain if defined */                   \
-            ompi_coll_tuned_topo_destroy_tree( &(coll_comm->cached_chain) );                     \
-        }                                                                                        \
-        coll_comm->cached_chain = ompi_coll_tuned_topo_build_chain((FANOUT), (OMPI_COMM), (ROOT)); \
-        coll_comm->cached_chain_root = (ROOT);                                                   \
-        coll_comm->cached_chain_fanout = (FANOUT);                                               \
-    }                                                                                            \
-} while (0)
-
-#define COLL_TUNED_UPDATE_IN_ORDER_BINTREE( OMPI_COMM, TUNED_MODULE )	\
-do {                                                                           \
-    mca_coll_tuned_comm_t* coll_comm = (TUNED_MODULE)->tuned_data;             \
-    if( !(coll_comm->cached_in_order_bintree) ) {                              \
-        /* In-order binary tree topology is defined by communicator size */    \
-        /* Thus, there is no need to destroy anything */                       \
-        coll_comm->cached_in_order_bintree =                                   \
-	    ompi_coll_tuned_topo_build_in_order_bintree((OMPI_COMM)); \
-    }                                                                          \
-} while (0)
-
-/**
- * This macro give a generic way to compute the best count of
- * the segment (i.e. the number of complete datatypes that
- * can fit in the specified SEGSIZE). Beware, when this macro
- * is called, the SEGCOUNT should be initialized to the count as
- * expected by the collective call.
- */
-#define COLL_TUNED_COMPUTED_SEGCOUNT(SEGSIZE, TYPELNG, SEGCOUNT)        \
-    if( ((SEGSIZE) >= (TYPELNG)) &&                                     \
-        ((SEGSIZE) < ((TYPELNG) * (SEGCOUNT))) ) {                      \
-        size_t residual;                                                \
-        (SEGCOUNT) = (int)((SEGSIZE) / (TYPELNG));                      \
-        residual = (SEGSIZE) - (SEGCOUNT) * (TYPELNG);                  \
-        if( residual > ((TYPELNG) >> 1) )                               \
-            (SEGCOUNT)++;                                               \
-    }                                                                   \
-
-/**
- * This macro gives a generic wait to compute the well distributed block counts
- * when the count and number of blocks are fixed.
- * Macro returns "early-block" count, "late-block" count, and "split-index"
- * which is the block at which we switch from "early-block" count to 
- * the "late-block" count.
- * count = split_index * early_block_count + 
- *         (block_count - split_index) * late_block_count
- * We do not perform ANY error checks - make sure that the input values 
- * make sense (eg. count > num_blocks).
- */
-#define COLL_TUNED_COMPUTE_BLOCKCOUNT( COUNT, NUM_BLOCKS, SPLIT_INDEX,       \
-                                       EARLY_BLOCK_COUNT, LATE_BLOCK_COUNT ) \
-    EARLY_BLOCK_COUNT = LATE_BLOCK_COUNT = COUNT / NUM_BLOCKS;               \
-    SPLIT_INDEX = COUNT % NUM_BLOCKS;                                        \
-    if (0 != SPLIT_INDEX) {                                                  \
-        EARLY_BLOCK_COUNT = EARLY_BLOCK_COUNT + 1;                           \
-    }                                                                        \
-
-
-#endif /* MCA_COLL_TUNED_EXPORT_H */
-
--- a/ompi/mca/coll/base/coll_base_allgather.c
+++ b/ompi/mca/coll/base/coll_base_allgather.c
@ -2,7 +2,7 @@
 * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
 *                         University Research and Technology
 *                         Corporation.  All rights reserved.
- * Copyright (c) 2004-2014 The University of Tennessee and The University
+ * Copyright (c) 2004-2015 The University of Tennessee and The University
 *                         of Tennessee Research Foundation.  All rights
 *                         reserved.
 * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@ -30,31 +30,12 @@
 #include "ompi/communicator/communicator.h"
 #include "ompi/mca/coll/coll.h"
 #include "ompi/mca/coll/base/coll_tags.h"
-#include "coll_tuned.h"
-#include "coll_tuned_topo.h"
-#include "coll_tuned_util.h"
-
-/* allgather algorithm variables */
-static int coll_tuned_allgather_algorithm_count = 6;
-static int coll_tuned_allgather_forced_algorithm = 0;
-static int coll_tuned_allgather_segment_size = 0;
-static int coll_tuned_allgather_tree_fanout;
-static int coll_tuned_allgather_chain_fanout;
-
-/* valid values for coll_tuned_allgather_forced_algorithm */
-static mca_base_var_enum_value_t allgather_algorithms[] = {
-    {0, "ignore"},
-    {1, "linear"},
-    {2, "bruck"},
-    {3, "recursive_doubling"},
-    {4, "ring"},
-    {5, "neighbor"},
-    {6, "two_proc"},
-    {0, NULL}
-};
+#include "ompi/mca/coll/base/coll_base_functions.h"
+#include "coll_base_topo.h"
+#include "coll_base_util.h"

 /*
- * ompi_coll_tuned_allgather_intra_bruck
+ * ompi_coll_base_allgather_intra_bruck
 *
 * Function:     allgather using O(log(N)) steps.
 * Accepts:      Same arguments as MPI_Allgather
@ -65,7 +46,7 @@ static mca_base_var_enum_value_t allgather_algorithms[] = {
 *                in Multiport Message-Passing Systems"
 * Memory requirements:  non-zero ranks require shift buffer to perform final
 *               step in the algorithm.
- * 
+ *
 * Example on 6 nodes:
 *   Initialization: everyone has its own buffer at location 0 in rbuf
 *                   This means if user specified MPI_IN_PLACE for sendbuf
@ -84,7 +65,7 @@ static mca_base_var_enum_value_t allgather_algorithms[] = {
 *         [2]    [3]    [4]    [5]    [0]    [1]
 *         [3]    [4]    [5]    [0]    [1]    [2]
 *   Step 2: send message to (rank - 2^2), receive message from (rank + 2^2)
- *           message size is "all remaining blocks" 
+ *           message size is "all remaining blocks"
 *    #     0      1      2      3      4      5
 *         [0]    [1]    [2]    [3]    [4]    [5]
 *         [1]    [2]    [3]    [4]    [5]    [0]
@ -101,7 +82,7 @@ static mca_base_var_enum_value_t allgather_algorithms[] = {
 *         [4]    [4]    [4]    [4]    [4]    [4]
 *         [5]    [5]    [5]    [5]    [5]    [5]
 */
-int ompi_coll_tuned_allgather_intra_bruck(void *sbuf, int scount,
+int ompi_coll_base_allgather_intra_bruck(void *sbuf, int scount,
                                          struct ompi_datatype_t *sdtype,
                                          void* rbuf, int rcount,
                                          struct ompi_datatype_t *rdtype,
@ -115,8 +96,8 @@ int ompi_coll_tuned_allgather_intra_bruck(void *sbuf, int scount,
    size = ompi_comm_size(comm);
    rank = ompi_comm_rank(comm);

-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "coll:tuned:allgather_intra_bruck rank %d", rank));
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                 "coll:base:allgather_intra_bruck rank %d", rank));

    err = ompi_datatype_get_extent (sdtype, &slb, &sext);
    if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
@ -125,7 +106,7 @@ int ompi_coll_tuned_allgather_intra_bruck(void *sbuf, int scount,
    if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }

    /* Initialization step:
-       - if send buffer is not MPI_IN_PLACE, copy send buffer to block 0 of 
+       - if send buffer is not MPI_IN_PLACE, copy send buffer to block 0 of
       receive buffer, else
       - if rank r != 0, copy r^th block from receive buffer to block 0.
    */
@ -140,15 +121,15 @@ int ompi_coll_tuned_allgather_intra_bruck(void *sbuf, int scount,
        err = ompi_datatype_copy_content_same_ddt(rdtype, rcount, tmprecv, tmpsend);
        if (err < 0) { line = __LINE__; goto err_hndl; }
    }
-   
+
    /* Communication step:
       At every step i, rank r:
       - doubles the distance
-       - sends message which starts at begining of rbuf and has size 
+       - sends message which starts at begining of rbuf and has size
       (blockcount * rcount) to rank (r - distance)
       - receives message of size blockcount * rcount from rank (r + distance)
       at location (rbuf + distance * rcount * rext)
-       - blockcount doubles until last step when only the remaining data is 
+       - blockcount doubles until last step when only the remaining data is
       exchanged.
    */
    blockcount = 1;
@ -162,14 +143,14 @@ int ompi_coll_tuned_allgather_intra_bruck(void *sbuf, int scount,

        if (distance <= (size >> 1)) {
            blockcount = distance;
-        } else { 
+        } else {
            blockcount = size - distance;
        }

        /* Sendreceive */
-        err = ompi_coll_tuned_sendrecv(tmpsend, blockcount * rcount, rdtype, 
+        err = ompi_coll_base_sendrecv(tmpsend, blockcount * rcount, rdtype,
                                       sendto, MCA_COLL_BASE_TAG_ALLGATHER,
-                                       tmprecv, blockcount * rcount, rdtype, 
+                                       tmprecv, blockcount * rcount, rdtype,
                                       recvfrom, MCA_COLL_BASE_TAG_ALLGATHER,
                                       comm, MPI_STATUS_IGNORE, rank);
        if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
@ -178,8 +159,8 @@ int ompi_coll_tuned_allgather_intra_bruck(void *sbuf, int scount,

    /* Finalization step:
       On all nodes except 0, data needs to be shifted locally:
-       - create temporary shift buffer, 
-       see discussion in coll_basic_reduce.c about the size and begining 
+       - create temporary shift buffer,
+       see discussion in coll_basic_reduce.c about the size and begining
       of temporary buffer.
       - copy blocks [0 .. (size - rank - 1)] from rbuf to shift buffer
       - move blocks [(size - rank) .. size] from rbuf to begining of rbuf
@ -195,8 +176,8 @@ int ompi_coll_tuned_allgather_intra_bruck(void *sbuf, int scount,
        free_buf = (char*) calloc(((true_extent +
                                    ((ptrdiff_t)(size - rank) * (ptrdiff_t)rcount - 1) * rext)),
                                  sizeof(char));
-        if (NULL == free_buf) { 
-            line = __LINE__; err = OMPI_ERR_OUT_OF_RESOURCE; goto err_hndl; 
+        if (NULL == free_buf) {
+            line = __LINE__; err = OMPI_ERR_OUT_OF_RESOURCE; goto err_hndl;
        }
        shift_buf = free_buf - true_lb;

@ -207,13 +188,13 @@ int ompi_coll_tuned_allgather_intra_bruck(void *sbuf, int scount,

        /* 2. move blocks [(size - rank) .. size] from rbuf to the begining of rbuf */
        tmpsend = (char*) rbuf + (ptrdiff_t)(size - rank) * (ptrdiff_t)rcount * rext;
-        err = ompi_datatype_copy_content_same_ddt(rdtype, (ptrdiff_t)rank * (ptrdiff_t)rcount, 
+        err = ompi_datatype_copy_content_same_ddt(rdtype, (ptrdiff_t)rank * (ptrdiff_t)rcount,
                                                  rbuf, tmpsend);
        if (err < 0) { line = __LINE__; goto err_hndl;  }

        /* 3. copy blocks from shift buffer back to rbuf starting at block [rank]. */
        tmprecv = (char*) rbuf + (ptrdiff_t)rank * (ptrdiff_t)rcount * rext;
-        err = ompi_datatype_copy_content_same_ddt(rdtype, (ptrdiff_t)(size - rank) * (ptrdiff_t)rcount, 
+        err = ompi_datatype_copy_content_same_ddt(rdtype, (ptrdiff_t)(size - rank) * (ptrdiff_t)rcount,
                                                  tmprecv, shift_buf);
        if (err < 0) { line = __LINE__; goto err_hndl;  }

@ -223,13 +204,13 @@ int ompi_coll_tuned_allgather_intra_bruck(void *sbuf, int scount,
    return OMPI_SUCCESS;

 err_hndl:
-    OPAL_OUTPUT((ompi_coll_tuned_stream,  "%s:%4d\tError occurred %d, rank %2d",
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,  "%s:%4d\tError occurred %d, rank %2d",
                 __FILE__, line, err, rank));
    return err;
 }

 /*
- * ompi_coll_tuned_allgather_intra_recursivedoubling
+ * ompi_coll_base_allgather_intra_recursivedoubling
 *
 * Function:     allgather using O(log(N)) steps.
 * Accepts:      Same arguments as MPI_Allgather
@ -239,29 +220,29 @@ int ompi_coll_tuned_allgather_intra_bruck(void *sbuf, int scount,
 *               This algorithm is used in MPICH-2 for small- and medium-sized
 *               messages on power-of-two processes.
 *
- * Limitation:   Current implementation only works on power-of-two number of 
- *               processes.  
+ * Limitation:   Current implementation only works on power-of-two number of
+ *               processes.
 *               In case this algorithm is invoked on non-power-of-two
 *               processes, Bruck algorithm will be invoked.
- * 
+ *
 * Memory requirements:
 *               No additional memory requirements beyond user-supplied buffers.
- * 
+ *
 * Example on 4 nodes:
 *   Initialization: everyone has its own buffer at location rank in rbuf
- *    #     0      1      2      3 
+ *    #     0      1      2      3
 *         [0]    [ ]    [ ]    [ ]
 *         [ ]    [1]    [ ]    [ ]
 *         [ ]    [ ]    [2]    [ ]
 *         [ ]    [ ]    [ ]    [3]
 *   Step 0: exchange data with (rank ^ 2^0)
- *    #     0      1      2      3 
+ *    #     0      1      2      3
 *         [0]    [0]    [ ]    [ ]
 *         [1]    [1]    [ ]    [ ]
 *         [ ]    [ ]    [2]    [2]
 *         [ ]    [ ]    [3]    [3]
 *   Step 1: exchange data with (rank ^ 2^1) (if you can)
- *    #     0      1      2      3 
+ *    #     0      1      2      3
 *         [0]    [0]    [0]    [0]
 *         [1]    [1]    [1]    [1]
 *         [2]    [2]    [2]    [2]
@ -269,12 +250,12 @@ int ompi_coll_tuned_allgather_intra_bruck(void *sbuf, int scount,
 *
 *  TODO: Modify the algorithm to work with any number of nodes.
 *        We can modify code to use identical implementation like MPICH-2:
- *        - using recursive-halving algorithm, at the end of each step, 
+ *        - using recursive-halving algorithm, at the end of each step,
 *          determine if there are nodes who did not exchange their data in that
 *          step, and send them appropriate messages.
 */
-int 
-ompi_coll_tuned_allgather_intra_recursivedoubling(void *sbuf, int scount,
+int
+ompi_coll_base_allgather_intra_recursivedoubling(void *sbuf, int scount,
                                                  struct ompi_datatype_t *sdtype,
                                                  void* rbuf, int rcount,
                                                  struct ompi_datatype_t *rdtype,
@ -293,21 +274,21 @@ ompi_coll_tuned_allgather_intra_recursivedoubling(void *sbuf, int scount,
    pow2size >>=1;

    /* Current implementation only handles power-of-two number of processes.
-       If the function was called on non-power-of-two number of processes, 
+       If the function was called on non-power-of-two number of processes,
       print warning and call bruck allgather algorithm with same parameters.
    */
    if (pow2size != size) {
-        OPAL_OUTPUT((ompi_coll_tuned_stream,
-                     "coll:tuned:allgather_intra_recursivedoubling WARNING: non-pow-2 size %d, switching to bruck algorithm", 
+        OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                     "coll:base:allgather_intra_recursivedoubling WARNING: non-pow-2 size %d, switching to bruck algorithm",
                     size));

-        return ompi_coll_tuned_allgather_intra_bruck(sbuf, scount, sdtype, 
+        return ompi_coll_base_allgather_intra_bruck(sbuf, scount, sdtype,
                                                     rbuf, rcount, rdtype,
                                                     comm, module);
    }

-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "coll:tuned:allgather_intra_recursivedoubling rank %d, size %d", 
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                 "coll:base:allgather_intra_recursivedoubling rank %d, size %d",
                 rank, size));

    err = ompi_datatype_get_extent (sdtype, &slb, &sext);
@ -317,7 +298,7 @@ ompi_coll_tuned_allgather_intra_recursivedoubling(void *sbuf, int scount,
    if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }

    /* Initialization step:
-       - if send buffer is not MPI_IN_PLACE, copy send buffer to block 0 of 
+       - if send buffer is not MPI_IN_PLACE, copy send buffer to block 0 of
       receive buffer
    */
    if (MPI_IN_PLACE != sbuf) {
@ -326,8 +307,8 @@ ompi_coll_tuned_allgather_intra_recursivedoubling(void *sbuf, int scount,
        err = ompi_datatype_sndrcv(tmpsend, scount, sdtype, tmprecv, rcount, rdtype);
        if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl;  }

-    } 
-   
+    }
+
    /* Communication step:
       At every step i, rank r:
       - exchanges message with rank remote = (r ^ 2^i).
@ -347,7 +328,7 @@ ompi_coll_tuned_allgather_intra_recursivedoubling(void *sbuf, int scount,
        }

        /* Sendreceive */
-        err = ompi_coll_tuned_sendrecv(tmpsend, (ptrdiff_t)distance * (ptrdiff_t)rcount, rdtype,
+        err = ompi_coll_base_sendrecv(tmpsend, (ptrdiff_t)distance * (ptrdiff_t)rcount, rdtype,
                                       remote, MCA_COLL_BASE_TAG_ALLGATHER,
                                       tmprecv, (ptrdiff_t)distance * (ptrdiff_t)rcount, rdtype,
                                       remote, MCA_COLL_BASE_TAG_ALLGATHER,
@ -359,7 +340,7 @@ ompi_coll_tuned_allgather_intra_recursivedoubling(void *sbuf, int scount,
    return OMPI_SUCCESS;

 err_hndl:
-    OPAL_OUTPUT((ompi_coll_tuned_stream,  "%s:%4d\tError occurred %d, rank %2d",
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,  "%s:%4d\tError occurred %d, rank %2d",
                 __FILE__, line, err, rank));
    return err;
 }
@ -367,7 +348,7 @@ ompi_coll_tuned_allgather_intra_recursivedoubling(void *sbuf, int scount,


 /*
- * ompi_coll_tuned_allgather_intra_ring
+ * ompi_coll_base_allgather_intra_ring
 *
 * Function:     allgather using O(N) steps.
 * Accepts:      Same arguments as MPI_Allgather
@ -379,9 +360,9 @@ ompi_coll_tuned_allgather_intra_recursivedoubling(void *sbuf, int scount,
 *               (r + 1) containing data from rank (r - i), with wrap arounds.
 * Memory requirements:
 *               No additional memory requirements.
- * 
+ *
 */
-int ompi_coll_tuned_allgather_intra_ring(void *sbuf, int scount,
+int ompi_coll_base_allgather_intra_ring(void *sbuf, int scount,
                                         struct ompi_datatype_t *sdtype,
                                         void* rbuf, int rcount,
                                         struct ompi_datatype_t *rdtype,
@ -395,8 +376,8 @@ int ompi_coll_tuned_allgather_intra_ring(void *sbuf, int scount,
    size = ompi_comm_size(comm);
    rank = ompi_comm_rank(comm);

-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "coll:tuned:allgather_intra_ring rank %d", rank));
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                 "coll:base:allgather_intra_ring rank %d", rank));

    err = ompi_datatype_get_extent (sdtype, &slb, &sext);
    if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
@ -413,15 +394,15 @@ int ompi_coll_tuned_allgather_intra_ring(void *sbuf, int scount,
        tmpsend = (char*) sbuf;
        err = ompi_datatype_sndrcv(tmpsend, scount, sdtype, tmprecv, rcount, rdtype);
        if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl;  }
-    } 
-   
+    }
+
    /* Communication step:
       At every step i: 0 .. (P-1), rank r:
       - receives message from [(r - 1 + size) % size] containing data from rank
       [(r - i - 1 + size) % size]
       - sends message to rank [(r + 1) % size] containing data from rank
       [(r - i + size) % size]
-       - sends message which starts at begining of rbuf and has size 
+       - sends message which starts at begining of rbuf and has size
    */
    sendto = (rank + 1) % size;
    recvfrom  = (rank - 1 + size) % size;
@ -434,7 +415,7 @@ int ompi_coll_tuned_allgather_intra_ring(void *sbuf, int scount,
        tmpsend = (char*)rbuf + (ptrdiff_t)senddatafrom * (ptrdiff_t)rcount * rext;

        /* Sendreceive */
-        err = ompi_coll_tuned_sendrecv(tmpsend, rcount, rdtype, sendto,
+        err = ompi_coll_base_sendrecv(tmpsend, rcount, rdtype, sendto,
                                       MCA_COLL_BASE_TAG_ALLGATHER,
                                       tmprecv, rcount, rdtype, recvfrom,
                                       MCA_COLL_BASE_TAG_ALLGATHER,
@ -446,34 +427,34 @@ int ompi_coll_tuned_allgather_intra_ring(void *sbuf, int scount,
    return OMPI_SUCCESS;

 err_hndl:
-    OPAL_OUTPUT((ompi_coll_tuned_stream,  "%s:%4d\tError occurred %d, rank %2d",
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,  "%s:%4d\tError occurred %d, rank %2d",
                 __FILE__, line, err, rank));
    return err;
 }

 /*
- * ompi_coll_tuned_allgather_intra_neighborexchange
+ * ompi_coll_base_allgather_intra_neighborexchange
 *
 * Function:     allgather using N/2 steps (O(N))
 * Accepts:      Same arguments as MPI_Allgather
 * Returns:      MPI_SUCCESS or error code
 *
 * Description:  Neighbor Exchange algorithm for allgather.
- *               Described by Chen et.al. in 
- *               "Performance Evaluation of Allgather Algorithms on 
+ *               Described by Chen et.al. in
+ *               "Performance Evaluation of Allgather Algorithms on
 *                Terascale Linux Cluster with Fast Ethernet",
- *               Proceedings of the Eighth International Conference on 
+ *               Proceedings of the Eighth International Conference on
 *               High-Performance Computing inn Asia-Pacific Region
 *               (HPCASIA'05), 2005
- * 
+ *
 *               Rank r exchanges message with one of its neighbors and
 *               forwards the data further in the next step.
 *
 *               No additional memory requirements.
- * 
+ *
 * Limitations:  Algorithm works only on even number of processes.
 *               For odd number of processes we switch to ring algorithm.
- * 
+ *
 * Example on 6 nodes:
 *  Initial state
 *    #     0      1      2      3      4      5
@ -508,8 +489,8 @@ int ompi_coll_tuned_allgather_intra_ring(void *sbuf, int scount,
 *         [4]    [4]    [4]    [4]    [4]    [4]
 *         [5]    [5]    [5]    [5]    [5]    [5]
 */
-int 
-ompi_coll_tuned_allgather_intra_neighborexchange(void *sbuf, int scount,
+int
+ompi_coll_base_allgather_intra_neighborexchange(void *sbuf, int scount,
                                                 struct ompi_datatype_t *sdtype,
                                                 void* rbuf, int rcount,
                                                 struct ompi_datatype_t *rdtype,
@ -525,16 +506,16 @@ ompi_coll_tuned_allgather_intra_neighborexchange(void *sbuf, int scount,
    rank = ompi_comm_rank(comm);

    if (size % 2) {
-        OPAL_OUTPUT((ompi_coll_tuned_stream,
-                     "coll:tuned:allgather_intra_neighborexchange WARNING: odd size %d, switching to ring algorithm", 
+        OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                     "coll:base:allgather_intra_neighborexchange WARNING: odd size %d, switching to ring algorithm",
                     size));
-        return ompi_coll_tuned_allgather_intra_ring(sbuf, scount, sdtype,
+        return ompi_coll_base_allgather_intra_ring(sbuf, scount, sdtype,
                                                    rbuf, rcount, rdtype,
                                                    comm, module);
    }

-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "coll:tuned:allgather_intra_neighborexchange rank %d", rank));
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                 "coll:base:allgather_intra_neighborexchange rank %d", rank));

    err = ompi_datatype_get_extent (sdtype, &slb, &sext);
    if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
@ -551,7 +532,7 @@ ompi_coll_tuned_allgather_intra_neighborexchange(void *sbuf, int scount,
        tmpsend = (char*) sbuf;
        err = ompi_datatype_sndrcv(tmpsend, scount, sdtype, tmprecv, rcount, rdtype);
        if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl;  }
-    } 
+    }

    /* Determine neighbors, order in which blocks will arrive, etc. */
    even_rank = !(rank % 2);
@ -573,15 +554,15 @@ ompi_coll_tuned_allgather_intra_neighborexchange(void *sbuf, int scount,

    /* Communication loop:
       - First step is special: exchange a single block with neighbor[0].
-       - Rest of the steps: 
-       update recv_data_from according to offset, and 
+       - Rest of the steps:
+       update recv_data_from according to offset, and
       exchange two blocks with appropriate neighbor.
       the send location becomes previous receve location.
    */
    tmprecv = (char*)rbuf + (ptrdiff_t)neighbor[0] * (ptrdiff_t)rcount * rext;
    tmpsend = (char*)rbuf + (ptrdiff_t)rank * (ptrdiff_t)rcount * rext;
    /* Sendreceive */
-    err = ompi_coll_tuned_sendrecv(tmpsend, rcount, rdtype, neighbor[0],
+    err = ompi_coll_base_sendrecv(tmpsend, rcount, rdtype, neighbor[0],
                                   MCA_COLL_BASE_TAG_ALLGATHER,
                                   tmprecv, rcount, rdtype, neighbor[0],
                                   MCA_COLL_BASE_TAG_ALLGATHER,
@ -597,15 +578,15 @@ ompi_coll_tuned_allgather_intra_neighborexchange(void *sbuf, int scount,

    for (i = 1; i < (size / 2); i++) {
        const int i_parity = i % 2;
-        recv_data_from[i_parity] = 
+        recv_data_from[i_parity] =
            (recv_data_from[i_parity] + offset_at_step[i_parity] + size) % size;

        tmprecv = (char*)rbuf + (ptrdiff_t)recv_data_from[i_parity] * (ptrdiff_t)rcount * rext;
        tmpsend = (char*)rbuf + (ptrdiff_t)send_data_from * rcount * rext;
-      
+
        /* Sendreceive */
-        err = ompi_coll_tuned_sendrecv(tmpsend, (ptrdiff_t)2 * (ptrdiff_t)rcount, rdtype, 
-                                       neighbor[i_parity], 
+        err = ompi_coll_base_sendrecv(tmpsend, (ptrdiff_t)2 * (ptrdiff_t)rcount, rdtype,
+                                       neighbor[i_parity],
                                       MCA_COLL_BASE_TAG_ALLGATHER,
                                       tmprecv, (ptrdiff_t)2 * (ptrdiff_t)rcount, rdtype,
                                       neighbor[i_parity],
@ -619,13 +600,13 @@ ompi_coll_tuned_allgather_intra_neighborexchange(void *sbuf, int scount,
    return OMPI_SUCCESS;

 err_hndl:
-    OPAL_OUTPUT((ompi_coll_tuned_stream,  "%s:%4d\tError occurred %d, rank %2d",
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,  "%s:%4d\tError occurred %d, rank %2d",
                 __FILE__, line, err, rank));
    return err;
 }


-int ompi_coll_tuned_allgather_intra_two_procs(void *sbuf, int scount,
+int ompi_coll_base_allgather_intra_two_procs(void *sbuf, int scount,
                                              struct ompi_datatype_t *sdtype,
                                              void* rbuf, int rcount,
                                              struct ompi_datatype_t *rdtype,
@ -638,8 +619,8 @@ int ompi_coll_tuned_allgather_intra_two_procs(void *sbuf, int scount,

    rank = ompi_comm_rank(comm);

-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "ompi_coll_tuned_allgather_intra_two_procs rank %d", rank));
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                 "ompi_coll_base_allgather_intra_two_procs rank %d", rank));

    err = ompi_datatype_get_extent (sdtype, &lb, &sext);
    if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
@ -661,7 +642,7 @@ int ompi_coll_tuned_allgather_intra_two_procs(void *sbuf, int scount,
    }
    tmprecv = (char*)rbuf + (ptrdiff_t)remote * (ptrdiff_t)rcount * rext;

-    err = ompi_coll_tuned_sendrecv(tmpsend, scount, sdtype, remote,
+    err = ompi_coll_base_sendrecv(tmpsend, scount, sdtype, remote,
                                   MCA_COLL_BASE_TAG_ALLGATHER,
                                   tmprecv, rcount, rdtype, remote,
                                   MCA_COLL_BASE_TAG_ALLGATHER,
@ -670,7 +651,7 @@ int ompi_coll_tuned_allgather_intra_two_procs(void *sbuf, int scount,

    /* Place your data in correct location if necessary */
    if (MPI_IN_PLACE != sbuf) {
-        err = ompi_datatype_sndrcv((char*)sbuf, scount, sdtype, 
+        err = ompi_datatype_sndrcv((char*)sbuf, scount, sdtype,
                                   (char*)rbuf + (ptrdiff_t)rank * (ptrdiff_t)rcount * rext, rcount, rdtype);
        if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl;  }
    }
@ -678,7 +659,7 @@ int ompi_coll_tuned_allgather_intra_two_procs(void *sbuf, int scount,
    return MPI_SUCCESS;

 err_hndl:
-    OPAL_OUTPUT((ompi_coll_tuned_stream, "%s:%4d\tError occurred %d, rank %2d",
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output, "%s:%4d\tError occurred %d, rank %2d",
                 __FILE__, line, err, rank));
    return err;
 }
@ -687,13 +668,13 @@ int ompi_coll_tuned_allgather_intra_two_procs(void *sbuf, int scount,
 /*
 * Linear functions are copied from the BASIC coll module
 * they do not segment the message and are simple implementations
- * but for some small number of nodes and/or small data sizes they 
- * are just as fast as tuned/tree based segmenting operations 
+ * but for some small number of nodes and/or small data sizes they
+ * are just as fast as base/tree based segmenting operations
 * and as such may be selected by the decision functions
 * These are copied into this module due to the way we select modules
 * in V1. i.e. in V2 we will handle this differently and so will not
 * have to duplicate code.
- * JPG following the examples from other coll_tuned implementations. Dec06.
+ * JPG following the examples from other coll_base implementations. Dec06.
 */

 /* copied function (with appropriate renaming) starts here */
@ -706,10 +687,10 @@ int ompi_coll_tuned_allgather_intra_two_procs(void *sbuf, int scount,
 *    Returns:    - MPI_SUCCESS or error code
 */
 int
-ompi_coll_tuned_allgather_intra_basic_linear(void *sbuf, int scount,
-                                             struct ompi_datatype_t *sdtype, 
+ompi_coll_base_allgather_intra_basic_linear(void *sbuf, int scount,
+                                             struct ompi_datatype_t *sdtype,
                                             void *rbuf,
-                                             int rcount, 
+                                             int rcount,
                                             struct ompi_datatype_t *rdtype,
                                             struct ompi_communicator_t *comm,
                                             mca_coll_base_module_t *module)
@ -727,7 +708,7 @@ ompi_coll_tuned_allgather_intra_basic_linear(void *sbuf, int scount,
        sbuf = ((char*) rbuf) + (ompi_comm_rank(comm) * extent * rcount);
        sdtype = rdtype;
        scount = rcount;
-    } 
+    }

    /* Gather and broadcast. */

@ -755,183 +736,3 @@ ompi_coll_tuned_allgather_intra_basic_linear(void *sbuf, int scount,
 }

 /* copied function (with appropriate renaming) ends here */
-
-/* The following are used by dynamic and forced rules */
-
-/* publish details of each algorithm and if its forced/fixed/locked in */
-/* as you add methods/algorithms you must update this and the query/map 
-   routines */
-
-/* this routine is called by the component only */
-/* this makes sure that the mca parameters are set to their initial values 
-   and perms */
-/* module does not call this they call the forced_getvalues routine instead */
-
-int 
-ompi_coll_tuned_allgather_intra_check_forced_init(coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices)
-{
-    mca_base_var_enum_t *new_enum;
-   
-    ompi_coll_tuned_forced_max_algorithms[ALLGATHER] = coll_tuned_allgather_algorithm_count;
-   
-    (void) mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                           "allgather_algorithm_count",
-                                           "Number of allgather algorithms available",
-                                           MCA_BASE_VAR_TYPE_INT, NULL, 0,
-                                           MCA_BASE_VAR_FLAG_DEFAULT_ONLY,
-                                           OPAL_INFO_LVL_5,
-                                           MCA_BASE_VAR_SCOPE_CONSTANT,
-                                           &coll_tuned_allgather_algorithm_count);
-
-    /* MPI_T: This variable should eventually be bound to a communicator */
-    coll_tuned_allgather_forced_algorithm = 0;
-    (void) mca_base_var_enum_create("coll_tuned_allgather_algorithms", allgather_algorithms, &new_enum);
-    mca_param_indices->algorithm_param_index =
-        mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                        "allgather_algorithm",
-                                        "Which allallgather algorithm is used. Can be locked down to choice of: 0 ignore, 1 basic linear, 2 bruck, 3 recursive doubling, 4 ring, 5 neighbor exchange, 6: two proc only.",
-                                        MCA_BASE_VAR_TYPE_INT, new_enum, 0, 0,
-                                        OPAL_INFO_LVL_5,
-                                        MCA_BASE_VAR_SCOPE_READONLY,
-                                        &coll_tuned_allgather_forced_algorithm);
-    OBJ_RELEASE(new_enum);
-    if (mca_param_indices->algorithm_param_index < 0) {
-        return mca_param_indices->algorithm_param_index;
-    }
-
-    coll_tuned_allgather_segment_size = 0;
-    mca_param_indices->segsize_param_index =
-        mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                        "allgather_algorithm_segmentsize",
-                                        "Segment size in bytes used by default for allgather algorithms. Only has meaning if algorithm is forced and supports segmenting. 0 bytes means no segmentation. Currently, available algorithms do not support segmentation.",
-                                        MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
-                                        OPAL_INFO_LVL_5,
-                                        MCA_BASE_VAR_SCOPE_READONLY,
-                                        &coll_tuned_allgather_segment_size);
-
-    coll_tuned_allgather_tree_fanout = ompi_coll_tuned_init_tree_fanout; /* get system wide default */
-    mca_param_indices->tree_fanout_param_index =
-        mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                        "allgather_algorithm_tree_fanout",
-                                        "Fanout for n-tree used for allgather algorithms. Only has meaning if algorithm is forced and supports n-tree topo based operation. Currently, available algorithms do not support n-tree topologies.",
-                                        MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
-                                        OPAL_INFO_LVL_5,
-                                        MCA_BASE_VAR_SCOPE_READONLY,
-                                        &coll_tuned_allgather_tree_fanout);
-
-    coll_tuned_allgather_chain_fanout = ompi_coll_tuned_init_chain_fanout; /* get system wide default */
-    mca_param_indices->chain_fanout_param_index = 
-      mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                      "allgather_algorithm_chain_fanout",
-                                      "Fanout for chains used for allgather algorithms. Only has meaning if algorithm is forced and supports chain topo based operation. Currently, available algorithms do not support chain topologies.",
-                                      MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
-                                      OPAL_INFO_LVL_5,
-                                      MCA_BASE_VAR_SCOPE_READONLY,
-                                      &coll_tuned_allgather_chain_fanout);
-
-    return (MPI_SUCCESS);
-}
-
-int ompi_coll_tuned_allgather_intra_do_forced(void *sbuf, int scount,
-                                              struct ompi_datatype_t *sdtype,
-                                              void* rbuf, int rcount,
-                                              struct ompi_datatype_t *rdtype,
-                                              struct ompi_communicator_t *comm,
-                                              mca_coll_base_module_t *module)
-{
-    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
-    mca_coll_tuned_comm_t *data = tuned_module->tuned_data;
-
-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "coll:tuned:allgather_intra_do_forced selected algorithm %d",
-                 data->user_forced[ALLGATHER].algorithm));
-
-    switch (data->user_forced[ALLGATHER].algorithm) {
-    case (0):   
-        return ompi_coll_tuned_allgather_intra_dec_fixed (sbuf, scount, sdtype, 
-                                                          rbuf, rcount, rdtype, 
-                                                          comm, module);
-    case (1):   
-        return ompi_coll_tuned_allgather_intra_basic_linear (sbuf, scount, sdtype,
-                                                             rbuf, rcount, rdtype,
-                                                             comm, module);
-    case (2):   
-        return ompi_coll_tuned_allgather_intra_bruck (sbuf, scount, sdtype, 
-                                                      rbuf, rcount, rdtype,
-                                                      comm, module);
-    case (3):   
-        return ompi_coll_tuned_allgather_intra_recursivedoubling (sbuf, scount, sdtype, 
-                                                                  rbuf, rcount, rdtype, 
-                                                                  comm, module);
-    case (4):
-        return ompi_coll_tuned_allgather_intra_ring (sbuf, scount, sdtype, 
-                                                     rbuf, rcount, rdtype,
-                                                     comm, module);
-    case (5):
-        return ompi_coll_tuned_allgather_intra_neighborexchange (sbuf, scount, sdtype, 
-                                                                 rbuf, rcount, rdtype, 
-                                                                 comm, module);
-    case (6):
-        return ompi_coll_tuned_allgather_intra_two_procs (sbuf, scount, sdtype, 
-                                                          rbuf, rcount, rdtype, 
-                                                          comm, module);
-    default:
-        OPAL_OUTPUT((ompi_coll_tuned_stream,
-                     "coll:tuned:allgather_intra_do_forced attempt to select algorithm %d when only 0-%d is valid?", 
-                     data->user_forced[ALLGATHER].algorithm,
-                     ompi_coll_tuned_forced_max_algorithms[ALLGATHER]));
-        return (MPI_ERR_ARG);
-    } /* switch */
-
-}
-
-
-int ompi_coll_tuned_allgather_intra_do_this(void *sbuf, int scount,
-                                            struct ompi_datatype_t *sdtype,
-                                            void* rbuf, int rcount,
-                                            struct ompi_datatype_t *rdtype,
-                                            struct ompi_communicator_t *comm,
-                                            mca_coll_base_module_t *module,
-                                            int algorithm, int faninout, int segsize)
-{
-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "coll:tuned:allgather_intra_do_this selected algorithm %d topo faninout %d segsize %d", 
-                 algorithm, faninout, segsize));
-   
-    switch (algorithm) {
-    case (0):   
-        return ompi_coll_tuned_allgather_intra_dec_fixed(sbuf, scount, sdtype, 
-                                                         rbuf, rcount, rdtype, 
-                                                         comm, module);
-    case (1):   
-        return ompi_coll_tuned_allgather_intra_basic_linear(sbuf, scount, sdtype,
-                                                            rbuf, rcount, rdtype,
-                                                            comm, module);
-    case (2): 
-        return ompi_coll_tuned_allgather_intra_bruck(sbuf, scount, sdtype, 
-                                                     rbuf, rcount, rdtype,
-                                                     comm, module);
-    case (3): 
-        return ompi_coll_tuned_allgather_intra_recursivedoubling(sbuf, scount, sdtype, 
-                                                                 rbuf, rcount, rdtype, 
-                                                                 comm, module);
-    case (4): 
-        return ompi_coll_tuned_allgather_intra_ring(sbuf, scount, sdtype, 
-                                                    rbuf, rcount, rdtype,
-                                                    comm, module);
-    case (5): 
-        return ompi_coll_tuned_allgather_intra_neighborexchange(sbuf, scount, sdtype, 
-                                                                rbuf, rcount, rdtype, 
-                                                                comm, module);
-    case (6):
-        return ompi_coll_tuned_allgather_intra_two_procs (sbuf, scount, sdtype, 
-                                                          rbuf, rcount, rdtype, 
-                                                          comm, module);
-    default:
-        OPAL_OUTPUT((ompi_coll_tuned_stream,
-                     "coll:tuned:allgather_intra_do_this attempt to select algorithm %d when only 0-%d is valid?", 
-                     algorithm, 
-                     ompi_coll_tuned_forced_max_algorithms[ALLGATHER]));
-        return (MPI_ERR_ARG);
-    } /* switch */
-}
--- a/ompi/mca/coll/base/coll_base_allgatherv.c
+++ b/ompi/mca/coll/base/coll_base_allgatherv.c
@ -30,19 +30,12 @@
 #include "ompi/communicator/communicator.h"
 #include "ompi/mca/coll/coll.h"
 #include "ompi/mca/coll/base/coll_tags.h"
-#include "coll_tuned.h"
-#include "coll_tuned_topo.h"
-#include "coll_tuned_util.h"
+#include "ompi/mca/coll/base/coll_base_functions.h"
+#include "coll_base_topo.h"
+#include "coll_base_util.h"

-/* allgatherv algorithm variables */
-static int coll_tuned_allgatherv_algorithm_count = 5;
-static int coll_tuned_allgatherv_forced_algorithm = 0;
-static int coll_tuned_allgatherv_segment_size = 0;
-static int coll_tuned_allgatherv_tree_fanout;
-static int coll_tuned_allgatherv_chain_fanout;
-
-/* valid values for coll_tuned_allgatherv_forced_algorithm */
-static mca_base_var_enum_value_t allgatherv_algorithms[] = {
+/* valid values for coll_base_allgatherv_forced_algorithm */
+mca_base_var_enum_value_t coll_base_allgatherv_algorithms[] = {
    {0, "ignore"},
    {1, "default"},
    {2, "bruck"},
@ -53,7 +46,7 @@ static mca_base_var_enum_value_t allgatherv_algorithms[] = {
 };

 /*
- * ompi_coll_tuned_allgatherv_intra_bruck
+ * ompi_coll_base_allgatherv_intra_bruck
 *
 * Function:     allgather using O(log(N)) steps.
 * Accepts:      Same arguments as MPI_Allgather
@ -107,7 +100,7 @@ static mca_base_var_enum_value_t allgatherv_algorithms[] = {
 *         [5]    [5]    [5]    [5]    [5]    [5]    [5]
 *         [6]    [6]    [6]    [6]    [6]    [6]    [6]
 */
-int ompi_coll_tuned_allgatherv_intra_bruck(void *sbuf, int scount,
+int ompi_coll_base_allgatherv_intra_bruck(void *sbuf, int scount,
                                           struct ompi_datatype_t *sdtype,
                                           void *rbuf, int *rcounts,
                                           int *rdispls, 
@ -124,8 +117,8 @@ int ompi_coll_tuned_allgatherv_intra_bruck(void *sbuf, int scount,
    size = ompi_comm_size(comm);
    rank = ompi_comm_rank(comm);

-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "coll:tuned:allgather_intra_bruck rank %d", rank));
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                 "coll:base:allgather_intra_bruck rank %d", rank));
   
    err = ompi_datatype_get_extent (sdtype, &slb, &sext);
    if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
@ -198,7 +191,7 @@ int ompi_coll_tuned_allgatherv_intra_bruck(void *sbuf, int scount,
        if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }

        /* Sendreceive */
-        err = ompi_coll_tuned_sendrecv(rbuf, 1, new_sdtype, sendto,
+        err = ompi_coll_base_sendrecv(rbuf, 1, new_sdtype, sendto,
                                       MCA_COLL_BASE_TAG_ALLGATHERV,
                                       rbuf, 1, new_rdtype, recvfrom,
                                       MCA_COLL_BASE_TAG_ALLGATHERV,
@ -217,14 +210,14 @@ int ompi_coll_tuned_allgatherv_intra_bruck(void *sbuf, int scount,
 err_hndl:
    if( NULL != new_rcounts ) free(new_rcounts);

-    OPAL_OUTPUT((ompi_coll_tuned_stream,  "%s:%4d\tError occurred %d, rank %2d",
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,  "%s:%4d\tError occurred %d, rank %2d",
                 __FILE__, line, err, rank));
    return err;
 }


 /*
- * ompi_coll_tuned_allgatherv_intra_ring
+ * ompi_coll_base_allgatherv_intra_ring
 *
 * Function:     allgatherv using O(N) steps.
 * Accepts:      Same arguments as MPI_Allgatherv
@ -238,7 +231,7 @@ int ompi_coll_tuned_allgatherv_intra_bruck(void *sbuf, int scount,
 *               No additional memory requirements.
 * 
 */
-int ompi_coll_tuned_allgatherv_intra_ring(void *sbuf, int scount,
+int ompi_coll_base_allgatherv_intra_ring(void *sbuf, int scount,
                                          struct ompi_datatype_t *sdtype,
                                          void* rbuf, int *rcounts, int *rdisps,
                                          struct ompi_datatype_t *rdtype,
@ -252,8 +245,8 @@ int ompi_coll_tuned_allgatherv_intra_ring(void *sbuf, int scount,
    size = ompi_comm_size(comm);
    rank = ompi_comm_rank(comm);

-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "coll:tuned:allgatherv_intra_ring rank %d", rank));
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                 "coll:base:allgatherv_intra_ring rank %d", rank));

    err = ompi_datatype_get_extent (sdtype, &slb, &sext);
    if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
@ -292,7 +285,7 @@ int ompi_coll_tuned_allgatherv_intra_ring(void *sbuf, int scount,
        tmpsend = (char*)rbuf + rdisps[senddatafrom] * rext;

        /* Sendreceive */
-        err = ompi_coll_tuned_sendrecv(tmpsend, rcounts[senddatafrom], rdtype, 
+        err = ompi_coll_base_sendrecv(tmpsend, rcounts[senddatafrom], rdtype, 
                                       sendto, MCA_COLL_BASE_TAG_ALLGATHERV,
                                       tmprecv, rcounts[recvdatafrom], rdtype, 
                                       recvfrom, MCA_COLL_BASE_TAG_ALLGATHERV,
@ -304,13 +297,13 @@ int ompi_coll_tuned_allgatherv_intra_ring(void *sbuf, int scount,
    return OMPI_SUCCESS;

 err_hndl:
-    OPAL_OUTPUT((ompi_coll_tuned_stream,  "%s:%4d\tError occurred %d, rank %2d",
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,  "%s:%4d\tError occurred %d, rank %2d",
                 __FILE__, line, err, rank));
    return err;
 }

 /*
- * ompi_coll_tuned_allgatherv_intra_neighborexchange
+ * ompi_coll_base_allgatherv_intra_neighborexchange
 *
 * Function:     allgatherv using N/2 steps (O(N))
 * Accepts:      Same arguments as MPI_Allgatherv
@ -368,7 +361,7 @@ int ompi_coll_tuned_allgatherv_intra_ring(void *sbuf, int scount,
 *         [5]    [5]    [5]    [5]    [5]    [5]
 */
 int 
-ompi_coll_tuned_allgatherv_intra_neighborexchange(void *sbuf, int scount,
+ompi_coll_base_allgatherv_intra_neighborexchange(void *sbuf, int scount,
                                                  struct ompi_datatype_t *sdtype,
                                                  void* rbuf, int *rcounts, int *rdispls,
                                                  struct ompi_datatype_t *rdtype,
@ -386,17 +379,17 @@ ompi_coll_tuned_allgatherv_intra_neighborexchange(void *sbuf, int scount,
    rank = ompi_comm_rank(comm);

    if (size % 2) {
-        OPAL_OUTPUT((ompi_coll_tuned_stream,
-                     "coll:tuned:allgatherv_intra_neighborexchange WARNING: odd size %d, switching to ring algorithm", 
+        OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                     "coll:base:allgatherv_intra_neighborexchange WARNING: odd size %d, switching to ring algorithm", 
                     size));
-        return ompi_coll_tuned_allgatherv_intra_ring(sbuf, scount, sdtype,
+        return ompi_coll_base_allgatherv_intra_ring(sbuf, scount, sdtype,
                                                     rbuf, rcounts, 
                                                     rdispls, rdtype,
                                                     comm, module);
    }

-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "coll:tuned:allgatherv_intra_neighborexchange rank %d", rank));
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                 "coll:base:allgatherv_intra_neighborexchange rank %d", rank));

    err = ompi_datatype_get_extent (sdtype, &slb, &sext);
    if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
@ -445,7 +438,7 @@ ompi_coll_tuned_allgatherv_intra_neighborexchange(void *sbuf, int scount,
    */
    tmprecv = (char*)rbuf + (ptrdiff_t)rdispls[neighbor[0]] * rext;
    tmpsend = (char*)rbuf + (ptrdiff_t)rdispls[rank] * rext;
-    err = ompi_coll_tuned_sendrecv(tmpsend, rcounts[rank], rdtype, 
+    err = ompi_coll_base_sendrecv(tmpsend, rcounts[rank], rdtype, 
                                   neighbor[0], MCA_COLL_BASE_TAG_ALLGATHERV,
                                   tmprecv, rcounts[neighbor[0]], rdtype, 
                                   neighbor[0], MCA_COLL_BASE_TAG_ALLGATHERV,
@ -493,7 +486,7 @@ ompi_coll_tuned_allgatherv_intra_neighborexchange(void *sbuf, int scount,
        tmpsend = (char*)rbuf;
      
        /* Sendreceive */
-        err = ompi_coll_tuned_sendrecv(tmpsend, 1, new_sdtype, neighbor[i_parity],
+        err = ompi_coll_base_sendrecv(tmpsend, 1, new_sdtype, neighbor[i_parity],
                                       MCA_COLL_BASE_TAG_ALLGATHERV,
                                       tmprecv, 1, new_rdtype, neighbor[i_parity],
                                       MCA_COLL_BASE_TAG_ALLGATHERV,
@ -509,13 +502,13 @@ ompi_coll_tuned_allgatherv_intra_neighborexchange(void *sbuf, int scount,
    return OMPI_SUCCESS;

 err_hndl:
-    OPAL_OUTPUT((ompi_coll_tuned_stream,  "%s:%4d\tError occurred %d, rank %2d",
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,  "%s:%4d\tError occurred %d, rank %2d",
                 __FILE__, line, err, rank));
    return err;
 }


-int ompi_coll_tuned_allgatherv_intra_two_procs(void *sbuf, int scount,
+int ompi_coll_base_allgatherv_intra_two_procs(void *sbuf, int scount,
                                               struct ompi_datatype_t *sdtype,
                                               void* rbuf, int *rcounts,
                                               int *rdispls,
@ -529,8 +522,8 @@ int ompi_coll_tuned_allgatherv_intra_two_procs(void *sbuf, int scount,

    rank = ompi_comm_rank(comm);

-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "ompi_coll_tuned_allgatherv_intra_two_procs rank %d", rank));
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                 "ompi_coll_base_allgatherv_intra_two_procs rank %d", rank));

    err = ompi_datatype_get_extent (sdtype, &lb, &sext);
    if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
@ -552,7 +545,7 @@ int ompi_coll_tuned_allgatherv_intra_two_procs(void *sbuf, int scount,
    }
    tmprecv = (char*)rbuf + (ptrdiff_t)rdispls[remote] * rext;

-    err = ompi_coll_tuned_sendrecv(tmpsend, scount, sdtype, remote,
+    err = ompi_coll_base_sendrecv(tmpsend, scount, sdtype, remote,
                                   MCA_COLL_BASE_TAG_ALLGATHERV,
                                   tmprecv, rcounts[remote], rdtype, remote,
                                   MCA_COLL_BASE_TAG_ALLGATHERV,
@ -570,7 +563,7 @@ int ompi_coll_tuned_allgatherv_intra_two_procs(void *sbuf, int scount,
    return MPI_SUCCESS;

 err_hndl:
-    OPAL_OUTPUT((ompi_coll_tuned_stream, "%s:%4d\tError occurred %d, rank %2d",
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output, "%s:%4d\tError occurred %d, rank %2d",
                 __FILE__, line, err, rank));
    return err;
 }
@ -580,12 +573,12 @@ int ompi_coll_tuned_allgatherv_intra_two_procs(void *sbuf, int scount,
 * Linear functions are copied from the BASIC coll module
 * they do not segment the message and are simple implementations
 * but for some small number of nodes and/or small data sizes they 
- * are just as fast as tuned/tree based segmenting operations 
+ * are just as fast as base/tree based segmenting operations 
 * and as such may be selected by the decision functions
 * These are copied into this module due to the way we select modules
 * in V1. i.e. in V2 we will handle this differently and so will not
 * have to duplicate code.
- * JPG following the examples from other coll_tuned implementations. Dec06.
+ * JPG following the examples from other coll_base implementations. Dec06.
 */

 /* copied function (with appropriate renaming) starts here */
@ -599,7 +592,7 @@ int ompi_coll_tuned_allgatherv_intra_two_procs(void *sbuf, int scount,
 *	Returns:	- MPI_SUCCESS or error code
 */
 int
-ompi_coll_tuned_allgatherv_intra_basic_default(void *sbuf, int scount,
+ompi_coll_base_allgatherv_intra_basic_default(void *sbuf, int scount,
                                               struct ompi_datatype_t *sdtype,
                                               void *rbuf, int *rcounts,
                                               int *disps,
@ -619,8 +612,8 @@ ompi_coll_tuned_allgatherv_intra_basic_default(void *sbuf, int scount,
     * to process with rank 0 (OMPI convention)
     */

-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "ompi_coll_tuned_allgatherv_intra_basic_default rank %d", 
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                 "ompi_coll_base_allgatherv_intra_basic_default rank %d", 
                 rank));

    if (MPI_IN_PLACE == sbuf) {
@ -676,177 +669,3 @@ ompi_coll_tuned_allgatherv_intra_basic_default(void *sbuf, int scount,

 /* copied function (with appropriate renaming) ends here */

-/* The following are used by dynamic and forced rules */
-
-/* publish details of each algorithm and if its forced/fixed/locked in */
-/* as you add methods/algorithms you must update this and the query/map 
-   routines */
-
-/* this routine is called by the component only */
-/* this makes sure that the mca parameters are set to their initial values 
-   and perms */
-/* module does not call this they call the forced_getvalues routine instead */
-
-int 
-ompi_coll_tuned_allgatherv_intra_check_forced_init(coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices)
-{
-    mca_base_var_enum_t *new_enum;
-
-    ompi_coll_tuned_forced_max_algorithms[ALLGATHERV] = coll_tuned_allgatherv_algorithm_count;
-
-    (void) mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                           "allgatherv_algorithm_count",
-                                           "Number of allgatherv algorithms available",
-                                           MCA_BASE_VAR_TYPE_INT, NULL, 0,
-                                           MCA_BASE_VAR_FLAG_DEFAULT_ONLY,
-                                           OPAL_INFO_LVL_5,
-                                           MCA_BASE_VAR_SCOPE_CONSTANT,
-                                           &coll_tuned_allgatherv_algorithm_count);
-
-    /* MPI_T: This variable should eventually be bound to a communicator */
-    coll_tuned_allgatherv_forced_algorithm = 0;
-    (void) mca_base_var_enum_create("coll_tuned_allgatherv_algorithms", allgatherv_algorithms, &new_enum);
-    mca_param_indices->algorithm_param_index =
-        mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                        "allgatherv_algorithm",
-                                        "Which allallgatherv algorithm is used. Can be locked down to choice of: 0 ignore, 1 default (allgathervv + bcast), 2 bruck, 3 ring, 4 neighbor exchange, 5: two proc only.",
-                                        MCA_BASE_VAR_TYPE_INT, new_enum, 0, 0,
-                                        OPAL_INFO_LVL_5,
-                                        MCA_BASE_VAR_SCOPE_READONLY,
-                                        &coll_tuned_allgatherv_forced_algorithm);
-    OBJ_RELEASE(new_enum);
-    if (mca_param_indices->algorithm_param_index < 0) {
-        return mca_param_indices->algorithm_param_index;
-    }
-
-    coll_tuned_allgatherv_segment_size = 0;
-    mca_param_indices->segsize_param_index =
-        mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                        "allgatherv_algorithm_segmentsize",
-                                        "Segment size in bytes used by default for allgatherv algorithms. Only has meaning if algorithm is forced and supports segmenting. 0 bytes means no segmentation. Currently, available algorithms do not support segmentation.",
-                                        MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
-                                        OPAL_INFO_LVL_5,
-                                        MCA_BASE_VAR_SCOPE_READONLY,
-                                        &coll_tuned_allgatherv_segment_size);
-
-    coll_tuned_allgatherv_tree_fanout = ompi_coll_tuned_init_tree_fanout; /* get system wide default */
-    mca_param_indices->tree_fanout_param_index =
-        mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                        "allgatherv_algorithm_tree_fanout",
-                                        "Fanout for n-tree used for allgatherv algorithms. Only has meaning if algorithm is forced and supports n-tree topo based operation. Currently, available algorithms do not support n-tree topologies.",
-                                        MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
-                                        OPAL_INFO_LVL_5,
-                                        MCA_BASE_VAR_SCOPE_READONLY,
-                                        &coll_tuned_allgatherv_tree_fanout);
-
-    coll_tuned_allgatherv_chain_fanout = ompi_coll_tuned_init_chain_fanout; /* get system wide default */
-    mca_param_indices->chain_fanout_param_index = 
-      mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                      "allgatherv_algorithm_chain_fanout",
-                                      "Fanout for chains used for allgatherv algorithms. Only has meaning if algorithm is forced and supports chain topo based operation. Currently, available algorithms do not support chain topologies.",
-                                      MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
-                                      OPAL_INFO_LVL_5,
-                                      MCA_BASE_VAR_SCOPE_READONLY,
-                                      &coll_tuned_allgatherv_chain_fanout);
-
-    return (MPI_SUCCESS);
-}
-
-int ompi_coll_tuned_allgatherv_intra_do_forced(void *sbuf, int scount,
-                                               struct ompi_datatype_t *sdtype,
-                                               void *rbuf, int *rcounts, 
-                                               int *rdispls,
-                                               struct ompi_datatype_t *rdtype,
-                                               struct ompi_communicator_t *comm,
-                                               mca_coll_base_module_t *module)
-{
-    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
-    mca_coll_tuned_comm_t *data = tuned_module->tuned_data;
-
-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "coll:tuned:allgatherv_intra_do_forced selected algorithm %d",
-                 data->user_forced[ALLGATHERV].algorithm));
-
-    switch (data->user_forced[ALLGATHERV].algorithm) {
-    case (0):   
-        return ompi_coll_tuned_allgatherv_intra_dec_fixed (sbuf, scount, sdtype, 
-                                                           rbuf, rcounts, rdispls, rdtype, 
-                                                           comm, module);
-    case (1):   
-        return ompi_coll_tuned_allgatherv_intra_basic_default (sbuf, scount, sdtype,
-                                                               rbuf, rcounts, rdispls, rdtype,
-                                                               comm, module);
-    case (2):   
-        return ompi_coll_tuned_allgatherv_intra_bruck (sbuf, scount, sdtype,
-                                                       rbuf, rcounts, rdispls, rdtype, 
-                                                       comm, module);
-    case (3):   
-        return ompi_coll_tuned_allgatherv_intra_ring (sbuf, scount, sdtype, 
-                                                      rbuf, rcounts, rdispls, rdtype, 
-                                                      comm, module);
-    case (4):
-        return ompi_coll_tuned_allgatherv_intra_neighborexchange (sbuf, scount, sdtype, 
-                                                                  rbuf, rcounts, rdispls, rdtype,
-                                                                  comm, module);
-    case (5):
-        return ompi_coll_tuned_allgatherv_intra_two_procs (sbuf, scount, sdtype, 
-                                                           rbuf, rcounts, rdispls, rdtype, 
-                                                           comm, module);
-    default:
-        OPAL_OUTPUT((ompi_coll_tuned_stream,
-                     "coll:tuned:allgatherv_intra_do_forced attempt to select algorithm %d when only 0-%d is valid?", 
-                     data->user_forced[ALLGATHERV].algorithm,
-                     ompi_coll_tuned_forced_max_algorithms[ALLGATHERV]));
-        return (MPI_ERR_ARG);
-    } /* switch */
-
-}
-
-
-int ompi_coll_tuned_allgatherv_intra_do_this(void *sbuf, int scount,
-                                             struct ompi_datatype_t *sdtype,
-                                             void *rbuf, int *rcounts, 
-                                             int *rdispls, 
-                                             struct ompi_datatype_t *rdtype,
-                                             struct ompi_communicator_t *comm,
-                                             mca_coll_base_module_t *module,
-                                             int algorithm, int faninout, 
-                                             int segsize)
-{
-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "coll:tuned:allgatherv_intra_do_this selected algorithm %d topo faninout %d segsize %d", 
-                 algorithm, faninout, segsize));
-   
-    switch (algorithm) {
-    case (0):   
-        return ompi_coll_tuned_allgatherv_intra_dec_fixed(sbuf, scount, sdtype, 
-                                                          rbuf, rcounts, rdispls, rdtype, 
-                                                          comm, module);
-    case (1):   
-        return ompi_coll_tuned_allgatherv_intra_basic_default(sbuf, scount, sdtype,
-                                                              rbuf, rcounts, rdispls, rdtype,
-                                                              comm, module);
-    case (2): 
-        return ompi_coll_tuned_allgatherv_intra_bruck(sbuf, scount, sdtype, 
-                                                      rbuf, rcounts, rdispls, rdtype,
-                                                      comm, module);
-    case (3): 
-        return ompi_coll_tuned_allgatherv_intra_ring(sbuf, scount, sdtype, 
-                                                     rbuf, rcounts, rdispls, rdtype,
-                                                     comm, module);
-    case (4): 
-        return ompi_coll_tuned_allgatherv_intra_neighborexchange(sbuf, scount, sdtype, 
-                                                                 rbuf, rcounts, rdispls, rdtype,
-                                                                 comm, module);
-    case (5):
-        return ompi_coll_tuned_allgatherv_intra_two_procs (sbuf, scount, sdtype,
-                                                           rbuf, rcounts, rdispls, rdtype, 
-                                                           comm, module);
-    default:
-        OPAL_OUTPUT((ompi_coll_tuned_stream,
-                     "coll:tuned:allgatherv_intra_do_this attempt to select algorithm %d when only 0-%d is valid?", 
-                     algorithm, 
-                     ompi_coll_tuned_forced_max_algorithms[ALLGATHERV]));
-        return (MPI_ERR_ARG);
-    } /* switch */
-}
--- a/ompi/mca/coll/base/coll_base_allreduce.c
+++ b/ompi/mca/coll/base/coll_base_allreduce.c
@ -3,7 +3,7 @@
 * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
 *                         University Research and Technology
 *                         Corporation.  All rights reserved.
- * Copyright (c) 2004-2014 The University of Tennessee and The University
+ * Copyright (c) 2004-2015 The University of Tennessee and The University
 *                         of Tennessee Research Foundation.  All rights
 *                         reserved.
 * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@ -31,41 +31,23 @@
 #include "ompi/mca/coll/base/coll_tags.h"
 #include "ompi/mca/pml/pml.h"
 #include "ompi/op/op.h"
-#include "coll_tuned.h"
-#include "coll_tuned_topo.h"
-#include "coll_tuned_util.h"
-
-/* allreduce algorithm variables */
-static int coll_tuned_allreduce_algorithm_count = 5;
-static int coll_tuned_allreduce_forced_algorithm = 0;
-static int coll_tuned_allreduce_segment_size = 0;
-static int coll_tuned_allreduce_tree_fanout;
-static int coll_tuned_allreduce_chain_fanout;
-
-/* valid values for coll_tuned_allreduce_forced_algorithm */
-static mca_base_var_enum_value_t allreduce_algorithms[] = {
-    {0, "ignore"},
-    {1, "basic_linear"},
-    {2, "nonoverlapping"},
-    {3, "recursive_doubling"},
-    {4, "ring"},
-    {5, "segmented_ring"},
-    {0, NULL}
-};
+#include "ompi/mca/coll/base/coll_base_functions.h"
+#include "coll_base_topo.h"
+#include "coll_base_util.h"

 /*
- * ompi_coll_tuned_allreduce_intra_nonoverlapping
+ * ompi_coll_base_allreduce_intra_nonoverlapping
 *
 * This function just calls a reduce followed by a broadcast
- * both called functions are tuned but they complete sequentially,
+ * both called functions are base but they complete sequentially,
 * i.e. no additional overlapping
- * meaning if the number of segments used is greater than the topo depth 
+ * meaning if the number of segments used is greater than the topo depth
 * then once the first segment of data is fully 'reduced' it is not broadcast
 * while the reduce continues (cost = cost-reduce + cost-bcast + decision x 3)
 *
 */
 int
-ompi_coll_tuned_allreduce_intra_nonoverlapping(void *sbuf, void *rbuf, int count,
+ompi_coll_base_allreduce_intra_nonoverlapping(void *sbuf, void *rbuf, int count,
                                               struct ompi_datatype_t *dtype,
                                               struct ompi_op_t *op,
                                               struct ompi_communicator_t *comm,
@ -75,16 +57,16 @@ ompi_coll_tuned_allreduce_intra_nonoverlapping(void *sbuf, void *rbuf, int count

    rank = ompi_comm_rank(comm);

-    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:allreduce_intra_nonoverlapping rank %d", rank));
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:allreduce_intra_nonoverlapping rank %d", rank));

    /* Reduce to 0 and broadcast. */

    if (MPI_IN_PLACE == sbuf) {
        if (0 == rank) {
-            err = comm->c_coll.coll_reduce (MPI_IN_PLACE, rbuf, count, dtype, 
+            err = comm->c_coll.coll_reduce (MPI_IN_PLACE, rbuf, count, dtype,
                                            op, 0, comm, comm->c_coll.coll_reduce_module);
        } else {
-            err = comm->c_coll.coll_reduce (rbuf, NULL, count, dtype, op, 0, 
+            err = comm->c_coll.coll_reduce (rbuf, NULL, count, dtype, op, 0,
                                            comm, comm->c_coll.coll_reduce_module);
        }
    } else {
@ -100,21 +82,21 @@ ompi_coll_tuned_allreduce_intra_nonoverlapping(void *sbuf, void *rbuf, int count
 }

 /*
- *   ompi_coll_tuned_allreduce_intra_recursivedoubling
+ *   ompi_coll_base_allreduce_intra_recursivedoubling
 *
 *   Function:       Recursive doubling algorithm for allreduce operation
 *   Accepts:        Same as MPI_Allreduce()
 *   Returns:        MPI_SUCCESS or error code
 *
- *   Description:    Implements recursive doubling algorithm for allreduce.  
- *                   Original (non-segmented) implementation is used in MPICH-2 
+ *   Description:    Implements recursive doubling algorithm for allreduce.
+ *                   Original (non-segmented) implementation is used in MPICH-2
 *                   for small and intermediate size messages.
- *                   The algorithm preserves order of operations so it can 
+ *                   The algorithm preserves order of operations so it can
 *                   be used both by commutative and non-commutative operations.
 *
 *         Example on 7 nodes:
 *         Initial state
- *         #      0       1      2       3      4       5      6 
+ *         #      0       1      2       3      4       5      6
 *               [0]     [1]    [2]     [3]    [4]     [5]    [6]
 *         Initial adjustment step for non-power of two nodes.
 *         old rank      1              3              5      6
@ -129,24 +111,24 @@ ompi_coll_tuned_allreduce_intra_nonoverlapping(void *sbuf, void *rbuf, int count
 *         old rank      1              3              5      6
 *         new rank      0              1              2      3
 *                     [0+1+]         [0+1+]         [0+1+]  [0+1+]
- *                     [2+3+]         [2+3+]         [2+3+]  [2+3+]  
+ *                     [2+3+]         [2+3+]         [2+3+]  [2+3+]
 *                     [4+5+]         [4+5+]         [4+5+]  [4+5+]
 *                     [6   ]         [6   ]         [6   ]  [6   ]
 *         Final adjustment step for non-power of two nodes
- *         #      0       1      2       3      4       5      6 
+ *         #      0       1      2       3      4       5      6
 *              [0+1+] [0+1+] [0+1+]  [0+1+] [0+1+]  [0+1+] [0+1+]
- *              [2+3+] [2+3+] [2+3+]  [2+3+] [2+3+]  [2+3+] [2+3+] 
+ *              [2+3+] [2+3+] [2+3+]  [2+3+] [2+3+]  [2+3+] [2+3+]
 *              [4+5+] [4+5+] [4+5+]  [4+5+] [4+5+]  [4+5+] [4+5+]
 *              [6   ] [6   ] [6   ]  [6   ] [6   ]  [6   ] [6   ]
 *
 */
-int 
-ompi_coll_tuned_allreduce_intra_recursivedoubling(void *sbuf, void *rbuf, 
+int
+ompi_coll_base_allreduce_intra_recursivedoubling(void *sbuf, void *rbuf,
                                                  int count,
                                                  struct ompi_datatype_t *dtype,
                                                  struct ompi_op_t *op,
                                                  struct ompi_communicator_t *comm,
-                                                  mca_coll_base_module_t *module) 
+                                                  mca_coll_base_module_t *module)
 {
    int ret, line, rank, size, adjsize, remote, distance;
    int newrank, newremote, extra_ranks;
@ -157,9 +139,9 @@ ompi_coll_tuned_allreduce_intra_recursivedoubling(void *sbuf, void *rbuf,
    size = ompi_comm_size(comm);
    rank = ompi_comm_rank(comm);

-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "coll:tuned:allreduce_intra_recursivedoubling rank %d", rank));
-   
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                 "coll:base:allreduce_intra_recursivedoubling rank %d", rank));
+
    /* Special case for size == 1 */
    if (1 == size) {
        if (MPI_IN_PLACE != sbuf) {
@ -194,16 +176,16 @@ ompi_coll_tuned_allreduce_intra_recursivedoubling(void *sbuf, void *rbuf,
    adjsize >>= 1;

    /* Handle non-power-of-two case:
-       - Even ranks less than 2 * extra_ranks send their data to (rank + 1), and 
+       - Even ranks less than 2 * extra_ranks send their data to (rank + 1), and
       sets new rank to -1.
-       - Odd ranks less than 2 * extra_ranks receive data from (rank - 1), 
+       - Odd ranks less than 2 * extra_ranks receive data from (rank - 1),
       apply appropriate operation, and set new rank to rank/2
       - Everyone else sets rank to rank - extra_ranks
    */
    extra_ranks = size - adjsize;
    if (rank <  (2 * extra_ranks)) {
        if (0 == (rank % 2)) {
-            ret = MCA_PML_CALL(send(tmpsend, count, dtype, (rank + 1), 
+            ret = MCA_PML_CALL(send(tmpsend, count, dtype, (rank + 1),
                                    MCA_COLL_BASE_TAG_ALLREDUCE,
                                    MCA_PML_BASE_SEND_STANDARD, comm));
            if (MPI_SUCCESS != ret) { line = __LINE__; goto error_hndl; }
@ -221,7 +203,7 @@ ompi_coll_tuned_allreduce_intra_recursivedoubling(void *sbuf, void *rbuf,
        newrank = rank - extra_ranks;
    }

-    /* Communication/Computation loop 
+    /* Communication/Computation loop
       - Exchange message with remote node.
       - Perform appropriate operation taking in account order of operations:
       result = value (op) result
@ -230,14 +212,14 @@ ompi_coll_tuned_allreduce_intra_recursivedoubling(void *sbuf, void *rbuf,
        if (newrank < 0) break;
        /* Determine remote node */
        newremote = newrank ^ distance;
-        remote = (newremote < extra_ranks)? 
+        remote = (newremote < extra_ranks)?
            (newremote * 2 + 1):(newremote + extra_ranks);

        /* Exchange the data */
        ret = MCA_PML_CALL(irecv(tmprecv, count, dtype, remote,
                                 MCA_COLL_BASE_TAG_ALLREDUCE, comm, &reqs[0]));
        if (MPI_SUCCESS != ret) { line = __LINE__; goto error_hndl; }
-        ret = MCA_PML_CALL(isend(tmpsend, count, dtype, remote, 
+        ret = MCA_PML_CALL(isend(tmpsend, count, dtype, remote,
                                 MCA_COLL_BASE_TAG_ALLREDUCE,
                                 MCA_PML_BASE_SEND_STANDARD, comm, &reqs[1]));
        if (MPI_SUCCESS != ret) { line = __LINE__; goto error_hndl; }
@ -258,14 +240,14 @@ ompi_coll_tuned_allreduce_intra_recursivedoubling(void *sbuf, void *rbuf,
    }

    /* Handle non-power-of-two case:
-       - Odd ranks less than 2 * extra_ranks send result from tmpsend to 
+       - Odd ranks less than 2 * extra_ranks send result from tmpsend to
       (rank - 1)
       - Even ranks less than 2 * extra_ranks receive result from (rank + 1)
    */
    if (rank < (2 * extra_ranks)) {
        if (0 == (rank % 2)) {
            ret = MCA_PML_CALL(recv(rbuf, count, dtype, (rank + 1),
-                                    MCA_COLL_BASE_TAG_ALLREDUCE, comm, 
+                                    MCA_COLL_BASE_TAG_ALLREDUCE, comm,
                                    MPI_STATUS_IGNORE));
            if (MPI_SUCCESS != ret) { line = __LINE__; goto error_hndl; }
            tmpsend = (char*)rbuf;
@ -287,14 +269,14 @@ ompi_coll_tuned_allreduce_intra_recursivedoubling(void *sbuf, void *rbuf,
    return MPI_SUCCESS;

 error_hndl:
-    OPAL_OUTPUT((ompi_coll_tuned_stream, "%s:%4d\tRank %d Error occurred %d\n",
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output, "%s:%4d\tRank %d Error occurred %d\n",
                 __FILE__, line, rank, ret));
    if (NULL != inplacebuf) free(inplacebuf);
    return ret;
 }

 /*
- *   ompi_coll_tuned_allreduce_intra_ring
+ *   ompi_coll_base_allreduce_intra_ring
 *
 *   Function:       Ring algorithm for allreduce operation
 *   Accepts:        Same as MPI_Allreduce()
@ -304,9 +286,9 @@ ompi_coll_tuned_allreduce_intra_recursivedoubling(void *sbuf, void *rbuf,
 *                   automatically segmented to segment of size M/N.
 *                   Algorithm requires 2*N - 1 steps.
 *
- *   Limitations:    The algorithm DOES NOT preserve order of operations so it 
+ *   Limitations:    The algorithm DOES NOT preserve order of operations so it
 *                   can be used only for commutative operations.
- *                   In addition, algorithm cannot work if the total count is 
+ *                   In addition, algorithm cannot work if the total count is
 *                   less than size.
 *         Example on 5 nodes:
 *         Initial state
@ -318,7 +300,7 @@ ompi_coll_tuned_allreduce_intra_recursivedoubling(void *sbuf, void *rbuf,
 *        [04]           [14]          [24]           [34]           [44]
 *
 *        COMPUTATION PHASE
- *         Step 0: rank r sends block r to rank (r+1) and receives bloc (r-1) 
+ *         Step 0: rank r sends block r to rank (r+1) and receives bloc (r-1)
 *                 from rank (r-1) [with wraparound].
 *    #     0              1             2              3             4
 *        [00]          [00+10]        [20]           [30]           [40]
@ -327,7 +309,7 @@ ompi_coll_tuned_allreduce_intra_recursivedoubling(void *sbuf, void *rbuf,
 *        [03]           [13]          [23]           [33]         [33+43]
 *      [44+04]          [14]          [24]           [34]           [44]
 *
- *         Step 1: rank r sends block (r-1) to rank (r+1) and receives bloc 
+ *         Step 1: rank r sends block (r-1) to rank (r+1) and receives bloc
 *                 (r-2) from rank (r-1) [with wraparound].
 *    #      0              1             2              3             4
 *         [00]          [00+10]     [01+10+20]        [30]           [40]
@ -336,7 +318,7 @@ ompi_coll_tuned_allreduce_intra_recursivedoubling(void *sbuf, void *rbuf,
 *      [33+43+03]        [13]          [23]           [33]         [33+43]
 *        [44+04]       [44+04+14]       [24]           [34]           [44]
 *
- *         Step 2: rank r sends block (r-2) to rank (r+1) and receives bloc 
+ *         Step 2: rank r sends block (r-2) to rank (r+1) and receives bloc
 *                 (r-2) from rank (r-1) [with wraparound].
 *    #      0              1             2              3             4
 *         [00]          [00+10]     [01+10+20]    [01+10+20+30]      [40]
@ -345,7 +327,7 @@ ompi_coll_tuned_allreduce_intra_recursivedoubling(void *sbuf, void *rbuf,
 *      [33+43+03]    [33+43+03+13]     [23]           [33]         [33+43]
 *        [44+04]       [44+04+14]  [44+04+14+24]      [34]           [44]
 *
- *         Step 3: rank r sends block (r-3) to rank (r+1) and receives bloc 
+ *         Step 3: rank r sends block (r-3) to rank (r+1) and receives bloc
 *                 (r-3) from rank (r-1) [with wraparound].
 *    #      0              1             2              3             4
 *         [00]          [00+10]     [01+10+20]    [01+10+20+30]     [FULL]
@ -353,16 +335,16 @@ ompi_coll_tuned_allreduce_intra_recursivedoubling(void *sbuf, void *rbuf,
 *     [22+32+42+02]     [FULL]          [22]         [22+32]      [22+32+42]
 *      [33+43+03]    [33+43+03+13]     [FULL]          [33]         [33+43]
 *        [44+04]       [44+04+14]  [44+04+14+24]      [FULL]         [44]
- *         
+ *
 *        DISTRIBUTION PHASE: ring ALLGATHER with ranks shifted by 1.
 *
 */
-int 
-ompi_coll_tuned_allreduce_intra_ring(void *sbuf, void *rbuf, int count,
+int
+ompi_coll_base_allreduce_intra_ring(void *sbuf, void *rbuf, int count,
                                     struct ompi_datatype_t *dtype,
                                     struct ompi_op_t *op,
                                     struct ompi_communicator_t *comm,
-                                     mca_coll_base_module_t *module) 
+                                     mca_coll_base_module_t *module)
 {
    int ret, line, rank, size, k, recv_from, send_to, block_count, inbi;
    int early_segcount, late_segcount, split_rank, max_segcount;
@ -375,9 +357,9 @@ ompi_coll_tuned_allreduce_intra_ring(void *sbuf, void *rbuf, int count,
    size = ompi_comm_size(comm);
    rank = ompi_comm_rank(comm);

-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "coll:tuned:allreduce_intra_ring rank %d, count %d", rank, count));
-      
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                 "coll:base:allreduce_intra_ring rank %d, count %d", rank, count));
+
    /* Special case for size == 1 */
    if (1 == size) {
        if (MPI_IN_PLACE != sbuf) {
@ -389,10 +371,10 @@ ompi_coll_tuned_allreduce_intra_ring(void *sbuf, void *rbuf, int count,

    /* Special case for count less than size - use recursive doubling */
    if (count < size) {
-        OPAL_OUTPUT((ompi_coll_tuned_stream, "coll:tuned:allreduce_ring rank %d/%d, count %d, switching to recursive doubling", rank, size, count));
-        return (ompi_coll_tuned_allreduce_intra_recursivedoubling(sbuf, rbuf, 
+        OPAL_OUTPUT((ompi_coll_base_framework.framework_output, "coll:base:allreduce_ring rank %d/%d, count %d, switching to recursive doubling", rank, size, count));
+        return (ompi_coll_base_allreduce_intra_recursivedoubling(sbuf, rbuf,
                                                                  count,
-                                                                  dtype, op, 
+                                                                  dtype, op,
                                                                  comm, module));
    }

@ -404,14 +386,14 @@ ompi_coll_tuned_allreduce_intra_ring(void *sbuf, void *rbuf, int count,
    ret = ompi_datatype_type_size( dtype, &typelng);
    if (MPI_SUCCESS != ret) { line = __LINE__; goto error_hndl; }

-    /* Determine the number of elements per block and corresponding 
+    /* Determine the number of elements per block and corresponding
       block sizes.
       The blocks are divided into "early" and "late" ones:
-       blocks 0 .. (split_rank - 1) are "early" and 
+       blocks 0 .. (split_rank - 1) are "early" and
       blocks (split_rank) .. (size - 1) are "late".
       Early blocks are at most 1 element larger than the late ones.
    */
-    COLL_TUNED_COMPUTE_BLOCKCOUNT( count, size, split_rank, 
+    COLL_BASE_COMPUTE_BLOCKCOUNT( count, size, split_rank,
                                   early_segcount, late_segcount );
    max_segcount = early_segcount;
    max_real_segsize = true_extent + (max_segcount - 1) * extent;
@ -432,7 +414,7 @@ ompi_coll_tuned_allreduce_intra_ring(void *sbuf, void *rbuf, int count,

    /* Computation loop */

-    /* 
+    /*
       For each of the remote nodes:
       - post irecv for block (r-1)
       - send block (r)
@ -456,8 +438,8 @@ ompi_coll_tuned_allreduce_intra_ring(void *sbuf, void *rbuf, int count,
                             MCA_COLL_BASE_TAG_ALLREDUCE, comm, &reqs[inbi]));
    if (MPI_SUCCESS != ret) { line = __LINE__; goto error_hndl; }
    /* Send first block (my block) to the neighbor on the right */
-    block_offset = ((rank < split_rank)? 
-                    ((ptrdiff_t)rank * (ptrdiff_t)early_segcount) : 
+    block_offset = ((rank < split_rank)?
+                    ((ptrdiff_t)rank * (ptrdiff_t)early_segcount) :
                    ((ptrdiff_t)rank * (ptrdiff_t)late_segcount + split_rank));
    block_count = ((rank < split_rank)? early_segcount : late_segcount);
    tmpsend = ((char*)rbuf) + block_offset * extent;
@ -465,21 +447,21 @@ ompi_coll_tuned_allreduce_intra_ring(void *sbuf, void *rbuf, int count,
                            MCA_COLL_BASE_TAG_ALLREDUCE,
                            MCA_PML_BASE_SEND_STANDARD, comm));
    if (MPI_SUCCESS != ret) { line = __LINE__; goto error_hndl; }
-   
+
    for (k = 2; k < size; k++) {
        const int prevblock = (rank + size - k + 1) % size;
-      
+
        inbi = inbi ^ 0x1;
-      
+
        /* Post irecv for the current block */
        ret = MCA_PML_CALL(irecv(inbuf[inbi], max_segcount, dtype, recv_from,
                                 MCA_COLL_BASE_TAG_ALLREDUCE, comm, &reqs[inbi]));
        if (MPI_SUCCESS != ret) { line = __LINE__; goto error_hndl; }
-      
+
        /* Wait on previous block to arrive */
        ret = ompi_request_wait(&reqs[inbi ^ 0x1], MPI_STATUS_IGNORE);
        if (MPI_SUCCESS != ret) { line = __LINE__; goto error_hndl; }
-      
+
        /* Apply operation on previous block: result goes to rbuf
           rbuf[prevblock] = inbuf[inbi ^ 0x1] (op) rbuf[prevblock]
        */
@ -489,7 +471,7 @@ ompi_coll_tuned_allreduce_intra_ring(void *sbuf, void *rbuf, int count,
        block_count = ((prevblock < split_rank)? early_segcount : late_segcount);
        tmprecv = ((char*)rbuf) + (ptrdiff_t)block_offset * extent;
        ompi_op_reduce(op, inbuf[inbi ^ 0x1], tmprecv, block_count, dtype);
-      
+
        /* send previous block to send_to */
        ret = MCA_PML_CALL(send(tmprecv, block_count, dtype, send_to,
                                MCA_COLL_BASE_TAG_ALLREDUCE,
@ -501,7 +483,7 @@ ompi_coll_tuned_allreduce_intra_ring(void *sbuf, void *rbuf, int count,
    ret = ompi_request_wait(&reqs[inbi], MPI_STATUS_IGNORE);
    if (MPI_SUCCESS != ret) { line = __LINE__; goto error_hndl; }

-    /* Apply operation on the last block (from neighbor (rank + 1) 
+    /* Apply operation on the last block (from neighbor (rank + 1)
       rbuf[rank+1] = inbuf[inbi] (op) rbuf[rank + 1] */
    recv_from = (rank + 1) % size;
    block_offset = ((recv_from < split_rank)?
@ -510,28 +492,28 @@ ompi_coll_tuned_allreduce_intra_ring(void *sbuf, void *rbuf, int count,
    block_count = ((recv_from < split_rank)? early_segcount : late_segcount);
    tmprecv = ((char*)rbuf) + (ptrdiff_t)block_offset * extent;
    ompi_op_reduce(op, inbuf[inbi], tmprecv, block_count, dtype);
-   
+
    /* Distribution loop - variation of ring allgather */
    send_to = (rank + 1) % size;
    recv_from = (rank + size - 1) % size;
    for (k = 0; k < size - 1; k++) {
        const int recv_data_from = (rank + size - k) % size;
        const int send_data_from = (rank + 1 + size - k) % size;
-        const int send_block_offset = 
+        const int send_block_offset =
            ((send_data_from < split_rank)?
             ((ptrdiff_t)send_data_from * early_segcount) :
             ((ptrdiff_t)send_data_from * late_segcount + split_rank));
-        const int recv_block_offset = 
+        const int recv_block_offset =
            ((recv_data_from < split_rank)?
             ((ptrdiff_t)recv_data_from * early_segcount) :
             ((ptrdiff_t)recv_data_from * late_segcount + split_rank));
-        block_count = ((send_data_from < split_rank)? 
+        block_count = ((send_data_from < split_rank)?
                       early_segcount : late_segcount);

        tmprecv = (char*)rbuf + (ptrdiff_t)recv_block_offset * extent;
        tmpsend = (char*)rbuf + (ptrdiff_t)send_block_offset * extent;

-        ret = ompi_coll_tuned_sendrecv(tmpsend, block_count, dtype, send_to,
+        ret = ompi_coll_base_sendrecv(tmpsend, block_count, dtype, send_to,
                                       MCA_COLL_BASE_TAG_ALLREDUCE,
                                       tmprecv, max_segcount, dtype, recv_from,
                                       MCA_COLL_BASE_TAG_ALLREDUCE,
@ -546,7 +528,7 @@ ompi_coll_tuned_allreduce_intra_ring(void *sbuf, void *rbuf, int count,
    return MPI_SUCCESS;

 error_hndl:
-    OPAL_OUTPUT((ompi_coll_tuned_stream, "%s:%4d\tRank %d Error occurred %d\n",
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output, "%s:%4d\tRank %d Error occurred %d\n",
                 __FILE__, line, rank, ret));
    if (NULL != inbuf[0]) free(inbuf[0]);
    if (NULL != inbuf[1]) free(inbuf[1]);
@ -554,30 +536,30 @@ ompi_coll_tuned_allreduce_intra_ring(void *sbuf, void *rbuf, int count,
 }

 /*
- *   ompi_coll_tuned_allreduce_intra_ring_segmented
+ *   ompi_coll_base_allreduce_intra_ring_segmented
 *
 *   Function:       Pipelined ring algorithm for allreduce operation
 *   Accepts:        Same as MPI_Allreduce(), segment size
 *   Returns:        MPI_SUCCESS or error code
 *
- *   Description:    Implements pipelined ring algorithm for allreduce: 
+ *   Description:    Implements pipelined ring algorithm for allreduce:
 *                   user supplies suggested segment size for the pipelining of
 *                   reduce operation.
- *                   The segment size determines the number of phases, np, for 
- *                   the algorithm execution.  
- *                   The message is automatically divided into blocks of 
+ *                   The segment size determines the number of phases, np, for
+ *                   the algorithm execution.
+ *                   The message is automatically divided into blocks of
 *                   approximately  (count / (np * segcount)) elements.
- *                   At the end of reduction phase, allgather like step is 
+ *                   At the end of reduction phase, allgather like step is
 *                   executed.
 *                   Algorithm requires (np + 1)*(N - 1) steps.
 *
- *   Limitations:    The algorithm DOES NOT preserve order of operations so it 
+ *   Limitations:    The algorithm DOES NOT preserve order of operations so it
 *                   can be used only for commutative operations.
- *                   In addition, algorithm cannot work if the total size is 
+ *                   In addition, algorithm cannot work if the total size is
 *                   less than size * segment size.
 *         Example on 3 nodes with 2 phases
 *         Initial state
- *   #      0              1             2 
+ *   #      0              1             2
 *        [00a]          [10a]         [20a]
 *        [00b]          [10b]         [20b]
 *        [01a]          [11a]         [21a]
@ -586,9 +568,9 @@ ompi_coll_tuned_allreduce_intra_ring(void *sbuf, void *rbuf, int count,
 *        [02b]          [12b]         [22b]
 *
 *        COMPUTATION PHASE 0 (a)
- *         Step 0: rank r sends block ra to rank (r+1) and receives bloc (r-1)a 
+ *         Step 0: rank r sends block ra to rank (r+1) and receives bloc (r-1)a
 *                 from rank (r-1) [with wraparound].
- *    #     0              1             2  
+ *    #     0              1             2
 *        [00a]        [00a+10a]       [20a]
 *        [00b]          [10b]         [20b]
 *        [01a]          [11a]       [11a+21a]
@ -596,20 +578,20 @@ ompi_coll_tuned_allreduce_intra_ring(void *sbuf, void *rbuf, int count,
 *      [22a+02a]        [12a]         [22a]
 *        [02b]          [12b]         [22b]
 *
- *         Step 1: rank r sends block (r-1)a to rank (r+1) and receives bloc 
+ *         Step 1: rank r sends block (r-1)a to rank (r+1) and receives bloc
 *                 (r-2)a from rank (r-1) [with wraparound].
- *    #     0              1             2  
+ *    #     0              1             2
 *        [00a]        [00a+10a]   [00a+10a+20a]
 *        [00b]          [10b]         [20b]
 *    [11a+21a+01a]      [11a]       [11a+21a]
 *        [01b]          [11b]         [21b]
 *      [22a+02a]    [22a+02a+12a]     [22a]
- *        [02b]          [12b]         [22b] 
+ *        [02b]          [12b]         [22b]
 *
 *        COMPUTATION PHASE 1 (b)
- *         Step 0: rank r sends block rb to rank (r+1) and receives bloc (r-1)b 
+ *         Step 0: rank r sends block rb to rank (r+1) and receives bloc (r-1)b
 *                 from rank (r-1) [with wraparound].
- *    #     0              1             2  
+ *    #     0              1             2
 *        [00a]        [00a+10a]       [20a]
 *        [00b]        [00b+10b]       [20b]
 *        [01a]          [11a]       [11a+21a]
@ -617,31 +599,31 @@ ompi_coll_tuned_allreduce_intra_ring(void *sbuf, void *rbuf, int count,
 *      [22a+02a]        [12a]         [22a]
 *      [22b+02b]        [12b]         [22b]
 *
- *         Step 1: rank r sends block (r-1)b to rank (r+1) and receives bloc 
+ *         Step 1: rank r sends block (r-1)b to rank (r+1) and receives bloc
 *                 (r-2)b from rank (r-1) [with wraparound].
- *    #     0              1             2  
+ *    #     0              1             2
 *        [00a]        [00a+10a]   [00a+10a+20a]
 *        [00b]          [10b]     [0bb+10b+20b]
 *    [11a+21a+01a]      [11a]       [11a+21a]
 *    [11b+21b+01b]      [11b]         [21b]
 *      [22a+02a]    [22a+02a+12a]     [22a]
- *        [02b]      [22b+01b+12b]     [22b] 
+ *        [02b]      [22b+01b+12b]     [22b]
+ *
 *
- *         
 *        DISTRIBUTION PHASE: ring ALLGATHER with ranks shifted by 1 (same as
 *         in regular ring algorithm.
 *
 */
-int 
-ompi_coll_tuned_allreduce_intra_ring_segmented(void *sbuf, void *rbuf, int count,
+int
+ompi_coll_base_allreduce_intra_ring_segmented(void *sbuf, void *rbuf, int count,
                                               struct ompi_datatype_t *dtype,
                                               struct ompi_op_t *op,
                                               struct ompi_communicator_t *comm,
                                               mca_coll_base_module_t *module,
-                                               uint32_t segsize) 
+                                               uint32_t segsize)
 {
    int ret, line, rank, size, k, recv_from, send_to;
-    int early_blockcount, late_blockcount, split_rank; 
+    int early_blockcount, late_blockcount, split_rank;
    int segcount, max_segcount, num_phases, phase, block_count, inbi;
    size_t typelng;
    char *tmpsend = NULL, *tmprecv = NULL, *inbuf[2] = {NULL, NULL};
@ -652,9 +634,9 @@ ompi_coll_tuned_allreduce_intra_ring_segmented(void *sbuf, void *rbuf, int count
    size = ompi_comm_size(comm);
    rank = ompi_comm_rank(comm);

-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "coll:tuned:allreduce_intra_ring_segmented rank %d, count %d", rank, count));
-      
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                 "coll:base:allreduce_intra_ring_segmented rank %d, count %d", rank, count));
+
    /* Special case for size == 1 */
    if (1 == size) {
        if (MPI_IN_PLACE != sbuf) {
@ -672,34 +654,34 @@ ompi_coll_tuned_allreduce_intra_ring_segmented(void *sbuf, void *rbuf, int count
    ret = ompi_datatype_type_size( dtype, &typelng);
    if (MPI_SUCCESS != ret) { line = __LINE__; goto error_hndl; }
    segcount = count;
-    COLL_TUNED_COMPUTED_SEGCOUNT(segsize, typelng, segcount)
+    COLL_BASE_COMPUTED_SEGCOUNT(segsize, typelng, segcount)

        /* Special case for count less than size * segcount - use regular ring */
        if (count < (size * segcount)) {
-            OPAL_OUTPUT((ompi_coll_tuned_stream, "coll:tuned:allreduce_ring_segmented rank %d/%d, count %d, switching to regular ring", rank, size, count));
-            return (ompi_coll_tuned_allreduce_intra_ring(sbuf, rbuf, count, dtype, op, 
+            OPAL_OUTPUT((ompi_coll_base_framework.framework_output, "coll:base:allreduce_ring_segmented rank %d/%d, count %d, switching to regular ring", rank, size, count));
+            return (ompi_coll_base_allreduce_intra_ring(sbuf, rbuf, count, dtype, op,
                                                         comm, module));
        }

    /* Determine the number of phases of the algorithm */
    num_phases = count / (size * segcount);
-    if ((count % (size * segcount) >= size) && 
+    if ((count % (size * segcount) >= size) &&
        (count % (size * segcount) > ((size * segcount) / 2))) {
        num_phases++;
    }

-    /* Determine the number of elements per block and corresponding 
+    /* Determine the number of elements per block and corresponding
       block sizes.
       The blocks are divided into "early" and "late" ones:
-       blocks 0 .. (split_rank - 1) are "early" and 
+       blocks 0 .. (split_rank - 1) are "early" and
       blocks (split_rank) .. (size - 1) are "late".
       Early blocks are at most 1 element larger than the late ones.
       Note, these blocks will be split into num_phases segments,
       out of the largest one will have max_segcount elements.
    */
-    COLL_TUNED_COMPUTE_BLOCKCOUNT( count, size, split_rank, 
+    COLL_BASE_COMPUTE_BLOCKCOUNT( count, size, split_rank,
                                   early_blockcount, late_blockcount );
-    COLL_TUNED_COMPUTE_BLOCKCOUNT( early_blockcount, num_phases, inbi,
+    COLL_BASE_COMPUTE_BLOCKCOUNT( early_blockcount, num_phases, inbi,
                                   max_segcount, k);
    max_real_segsize = true_extent + (ptrdiff_t)(max_segcount - 1) * extent;

@ -722,7 +704,7 @@ ompi_coll_tuned_allreduce_intra_ring_segmented(void *sbuf, void *rbuf, int count
        ptrdiff_t phase_offset;
        int early_phase_segcount, late_phase_segcount, split_phase, phase_count;

-        /* 
+        /*
           For each of the remote nodes:
           - post irecv for block (r-1)
           - send block (r)
@ -741,7 +723,7 @@ ompi_coll_tuned_allreduce_intra_ring_segmented(void *sbuf, void *rbuf, int count
        */
        send_to = (rank + 1) % size;
        recv_from = (rank + size - 1) % size;
-      
+
        inbi = 0;
        /* Initialize first receive from the neighbor on the left */
        ret = MCA_PML_CALL(irecv(inbuf[inbi], max_segcount, dtype, recv_from,
@ -750,81 +732,81 @@ ompi_coll_tuned_allreduce_intra_ring_segmented(void *sbuf, void *rbuf, int count
        /* Send first block (my block) to the neighbor on the right:
           - compute my block and phase offset
           - send data */
-        block_offset = ((rank < split_rank)? 
-                        ((ptrdiff_t)rank * (ptrdiff_t)early_blockcount) : 
+        block_offset = ((rank < split_rank)?
+                        ((ptrdiff_t)rank * (ptrdiff_t)early_blockcount) :
                        ((ptrdiff_t)rank * (ptrdiff_t)late_blockcount + split_rank));
        block_count = ((rank < split_rank)? early_blockcount : late_blockcount);
-        COLL_TUNED_COMPUTE_BLOCKCOUNT(block_count, num_phases, split_phase,
+        COLL_BASE_COMPUTE_BLOCKCOUNT(block_count, num_phases, split_phase,
                                      early_phase_segcount, late_phase_segcount)
            phase_count = ((phase < split_phase)?
                           (early_phase_segcount) : (late_phase_segcount));
        phase_offset = ((phase < split_phase)?
-                        ((ptrdiff_t)phase * (ptrdiff_t)early_phase_segcount) : 
+                        ((ptrdiff_t)phase * (ptrdiff_t)early_phase_segcount) :
                        ((ptrdiff_t)phase * (ptrdiff_t)late_phase_segcount + split_phase));
        tmpsend = ((char*)rbuf) + (ptrdiff_t)(block_offset + phase_offset) * extent;
        ret = MCA_PML_CALL(send(tmpsend, phase_count, dtype, send_to,
                                MCA_COLL_BASE_TAG_ALLREDUCE,
                                MCA_PML_BASE_SEND_STANDARD, comm));
        if (MPI_SUCCESS != ret) { line = __LINE__; goto error_hndl; }
-      
+
        for (k = 2; k < size; k++) {
            const int prevblock = (rank + size - k + 1) % size;
-         
+
            inbi = inbi ^ 0x1;
-         
+
            /* Post irecv for the current block */
            ret = MCA_PML_CALL(irecv(inbuf[inbi], max_segcount, dtype, recv_from,
-                                     MCA_COLL_BASE_TAG_ALLREDUCE, comm, 
+                                     MCA_COLL_BASE_TAG_ALLREDUCE, comm,
                                     &reqs[inbi]));
            if (MPI_SUCCESS != ret) { line = __LINE__; goto error_hndl; }
-         
+
            /* Wait on previous block to arrive */
            ret = ompi_request_wait(&reqs[inbi ^ 0x1], MPI_STATUS_IGNORE);
            if (MPI_SUCCESS != ret) { line = __LINE__; goto error_hndl; }
-         
+
            /* Apply operation on previous block: result goes to rbuf
               rbuf[prevblock] = inbuf[inbi ^ 0x1] (op) rbuf[prevblock]
            */
            block_offset = ((prevblock < split_rank)?
                            ((ptrdiff_t)prevblock * (ptrdiff_t)early_blockcount) :
                            ((ptrdiff_t)prevblock * (ptrdiff_t)late_blockcount + split_rank));
-            block_count = ((prevblock < split_rank)? 
+            block_count = ((prevblock < split_rank)?
                           early_blockcount : late_blockcount);
-            COLL_TUNED_COMPUTE_BLOCKCOUNT(block_count, num_phases, split_phase,
+            COLL_BASE_COMPUTE_BLOCKCOUNT(block_count, num_phases, split_phase,
                                          early_phase_segcount, late_phase_segcount)
                phase_count = ((phase < split_phase)?
                               (early_phase_segcount) : (late_phase_segcount));
            phase_offset = ((phase < split_phase)?
-                            ((ptrdiff_t)phase * (ptrdiff_t)early_phase_segcount) : 
+                            ((ptrdiff_t)phase * (ptrdiff_t)early_phase_segcount) :
                            ((ptrdiff_t)phase * (ptrdiff_t)late_phase_segcount + split_phase));
            tmprecv = ((char*)rbuf) + (ptrdiff_t)(block_offset + phase_offset) * extent;
            ompi_op_reduce(op, inbuf[inbi ^ 0x1], tmprecv, phase_count, dtype);
-         
+
            /* send previous block to send_to */
            ret = MCA_PML_CALL(send(tmprecv, phase_count, dtype, send_to,
                                    MCA_COLL_BASE_TAG_ALLREDUCE,
                                    MCA_PML_BASE_SEND_STANDARD, comm));
            if (MPI_SUCCESS != ret) { line = __LINE__; goto error_hndl; }
        }
-      
+
        /* Wait on the last block to arrive */
        ret = ompi_request_wait(&reqs[inbi], MPI_STATUS_IGNORE);
        if (MPI_SUCCESS != ret) { line = __LINE__; goto error_hndl; }
-      
-        /* Apply operation on the last block (from neighbor (rank + 1) 
+
+        /* Apply operation on the last block (from neighbor (rank + 1)
           rbuf[rank+1] = inbuf[inbi] (op) rbuf[rank + 1] */
        recv_from = (rank + 1) % size;
        block_offset = ((recv_from < split_rank)?
                        ((ptrdiff_t)recv_from * (ptrdiff_t)early_blockcount) :
                        ((ptrdiff_t)recv_from * (ptrdiff_t)late_blockcount + split_rank));
-        block_count = ((recv_from < split_rank)? 
+        block_count = ((recv_from < split_rank)?
                       early_blockcount : late_blockcount);
-        COLL_TUNED_COMPUTE_BLOCKCOUNT(block_count, num_phases, split_phase,
+        COLL_BASE_COMPUTE_BLOCKCOUNT(block_count, num_phases, split_phase,
                                      early_phase_segcount, late_phase_segcount)
            phase_count = ((phase < split_phase)?
                           (early_phase_segcount) : (late_phase_segcount));
        phase_offset = ((phase < split_phase)?
-                        ((ptrdiff_t)phase * (ptrdiff_t)early_phase_segcount) : 
+                        ((ptrdiff_t)phase * (ptrdiff_t)early_phase_segcount) :
                        ((ptrdiff_t)phase * (ptrdiff_t)late_phase_segcount + split_phase));
        tmprecv = ((char*)rbuf) + (ptrdiff_t)(block_offset + phase_offset) * extent;
        ompi_op_reduce(op, inbuf[inbi], tmprecv, phase_count, dtype);
@ -836,21 +818,21 @@ ompi_coll_tuned_allreduce_intra_ring_segmented(void *sbuf, void *rbuf, int count
    for (k = 0; k < size - 1; k++) {
        const int recv_data_from = (rank + size - k) % size;
        const int send_data_from = (rank + 1 + size - k) % size;
-        const int send_block_offset = 
+        const int send_block_offset =
            ((send_data_from < split_rank)?
             ((ptrdiff_t)send_data_from * (ptrdiff_t)early_blockcount) :
             ((ptrdiff_t)send_data_from * (ptrdiff_t)late_blockcount + split_rank));
-        const int recv_block_offset = 
+        const int recv_block_offset =
            ((recv_data_from < split_rank)?
             ((ptrdiff_t)recv_data_from * (ptrdiff_t)early_blockcount) :
             ((ptrdiff_t)recv_data_from * (ptrdiff_t)late_blockcount + split_rank));
-        block_count = ((send_data_from < split_rank)? 
+        block_count = ((send_data_from < split_rank)?
                       early_blockcount : late_blockcount);

        tmprecv = (char*)rbuf + (ptrdiff_t)recv_block_offset * extent;
        tmpsend = (char*)rbuf + (ptrdiff_t)send_block_offset * extent;

-        ret = ompi_coll_tuned_sendrecv(tmpsend, block_count, dtype, send_to,
+        ret = ompi_coll_base_sendrecv(tmpsend, block_count, dtype, send_to,
                                       MCA_COLL_BASE_TAG_ALLREDUCE,
                                       tmprecv, early_blockcount, dtype, recv_from,
                                       MCA_COLL_BASE_TAG_ALLREDUCE,
@ -865,7 +847,7 @@ ompi_coll_tuned_allreduce_intra_ring_segmented(void *sbuf, void *rbuf, int count
    return MPI_SUCCESS;

 error_hndl:
-    OPAL_OUTPUT((ompi_coll_tuned_stream, "%s:%4d\tRank %d Error occurred %d\n",
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output, "%s:%4d\tRank %d Error occurred %d\n",
                 __FILE__, line, rank, ret));
    if (NULL != inbuf[0]) free(inbuf[0]);
    if (NULL != inbuf[1]) free(inbuf[1]);
@ -875,8 +857,8 @@ ompi_coll_tuned_allreduce_intra_ring_segmented(void *sbuf, void *rbuf, int count
 /*
 * Linear functions are copied from the BASIC coll module
 * they do not segment the message and are simple implementations
- * but for some small number of nodes and/or small data sizes they 
- * are just as fast as tuned/tree based segmenting operations 
+ * but for some small number of nodes and/or small data sizes they
+ * are just as fast as base/tree based segmenting operations
 * and as such may be selected by the decision functions
 * These are copied into this module due to the way we select modules
 * in V1. i.e. in V2 we will handle this differently and so will not
@ -895,7 +877,7 @@ ompi_coll_tuned_allreduce_intra_ring_segmented(void *sbuf, void *rbuf, int count
 *	Returns:	- MPI_SUCCESS or error code
 */
 int
-ompi_coll_tuned_allreduce_intra_basic_linear(void *sbuf, void *rbuf, int count,
+ompi_coll_base_allreduce_intra_basic_linear(void *sbuf, void *rbuf, int count,
                                             struct ompi_datatype_t *dtype,
                                             struct ompi_op_t *op,
                                             struct ompi_communicator_t *comm,
@ -905,158 +887,28 @@ ompi_coll_tuned_allreduce_intra_basic_linear(void *sbuf, void *rbuf, int count,

    rank = ompi_comm_rank(comm);

-    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:allreduce_intra_basic_linear rank %d", rank));
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:allreduce_intra_basic_linear rank %d", rank));

    /* Reduce to 0 and broadcast. */

    if (MPI_IN_PLACE == sbuf) {
        if (0 == rank) {
-            err = ompi_coll_tuned_reduce_intra_basic_linear (MPI_IN_PLACE, rbuf, count, dtype,
+            err = ompi_coll_base_reduce_intra_basic_linear (MPI_IN_PLACE, rbuf, count, dtype,
                                                             op, 0, comm, module);
        } else {
-            err = ompi_coll_tuned_reduce_intra_basic_linear(rbuf, NULL, count, dtype,
+            err = ompi_coll_base_reduce_intra_basic_linear(rbuf, NULL, count, dtype,
                                                            op, 0, comm, module);
        }
    } else {
-        err = ompi_coll_tuned_reduce_intra_basic_linear(sbuf, rbuf, count, dtype,
+        err = ompi_coll_base_reduce_intra_basic_linear(sbuf, rbuf, count, dtype,
                                                        op, 0, comm, module);
    }
    if (MPI_SUCCESS != err) {
        return err;
    }

-    return ompi_coll_tuned_bcast_intra_basic_linear(rbuf, count, dtype, 0, comm, module);
+    return ompi_coll_base_bcast_intra_basic_linear(rbuf, count, dtype, 0, comm, module);
 }


 /* copied function (with appropriate renaming) ends here */
-
-/* The following are used by dynamic and forced rules */
-
-/* publish details of each algorithm and if its forced/fixed/locked in */
-/* as you add methods/algorithms you must update this and the query/map routines */
-
-/* this routine is called by the component only */
-/* this makes sure that the mca parameters are set to their initial values and perms */
-/* module does not call this they call the forced_getvalues routine instead */
-
-int ompi_coll_tuned_allreduce_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices)
-{
-    mca_base_var_enum_t *new_enum;
-
-    ompi_coll_tuned_forced_max_algorithms[ALLREDUCE] = coll_tuned_allreduce_algorithm_count;
-
-    (void) mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                           "allreduce_algorithm_count",
-                                           "Number of allreduce algorithms available",
-                                           MCA_BASE_VAR_TYPE_INT, NULL, 0,
-                                           MCA_BASE_VAR_FLAG_DEFAULT_ONLY,
-                                           OPAL_INFO_LVL_5,
-                                           MCA_BASE_VAR_SCOPE_CONSTANT,
-                                           &coll_tuned_allreduce_algorithm_count);
-
-    /* MPI_T: This variable should eventually be bound to a communicator */
-    coll_tuned_allreduce_forced_algorithm = 0;
-    (void) mca_base_var_enum_create("coll_tuned_allreduce_algorithms", allreduce_algorithms, &new_enum);
-    mca_param_indices->algorithm_param_index =
-        mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                        "allreduce_algorithm",
-                                        "Which allreduce algorithm is used. Can be locked down to any of: 0 ignore, 1 basic linear, 2 nonoverlapping (tuned reduce + tuned bcast), 3 recursive doubling, 4 ring, 5 segmented ring",
-                                        MCA_BASE_VAR_TYPE_INT, new_enum, 0, 0,
-                                        OPAL_INFO_LVL_5,
-                                        MCA_BASE_VAR_SCOPE_READONLY,
-                                        &coll_tuned_allreduce_forced_algorithm);
-    OBJ_RELEASE(new_enum);
-    if (mca_param_indices->algorithm_param_index < 0) {
-        return mca_param_indices->algorithm_param_index;
-    }
-
-    coll_tuned_allreduce_segment_size = 0;
-    mca_param_indices->segsize_param_index =
-        mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                        "allreduce_algorithm_segmentsize",
-                                        "Segment size in bytes used by default for allreduce algorithms. Only has meaning if algorithm is forced and supports segmenting. 0 bytes means no segmentation.",
-                                        MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
-                                        OPAL_INFO_LVL_5,
-                                        MCA_BASE_VAR_SCOPE_READONLY,
-                                        &coll_tuned_allreduce_segment_size);
-
-    coll_tuned_allreduce_tree_fanout = ompi_coll_tuned_init_tree_fanout; /* get system wide default */
-    mca_param_indices->tree_fanout_param_index =
-        mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                        "allreduce_algorithm_tree_fanout",
-                                        "Fanout for n-tree used for allreduce algorithms. Only has meaning if algorithm is forced and supports n-tree topo based operation.",
-                                        MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
-                                        OPAL_INFO_LVL_5,
-                                        MCA_BASE_VAR_SCOPE_READONLY,
-                                        &coll_tuned_allreduce_tree_fanout);
-
-    coll_tuned_allreduce_chain_fanout = ompi_coll_tuned_init_chain_fanout; /* get system wide default */
-    mca_param_indices->chain_fanout_param_index = 
-      mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                      "allreduce_algorithm_chain_fanout",
-                                      "Fanout for chains used for allreduce algorithms. Only has meaning if algorithm is forced and supports chain topo based operation.",
-                                      MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
-                                      OPAL_INFO_LVL_5,
-                                      MCA_BASE_VAR_SCOPE_READONLY,
-                                      &coll_tuned_allreduce_chain_fanout);
-
-    return (MPI_SUCCESS);
-}
-
-
-int ompi_coll_tuned_allreduce_intra_do_forced(void *sbuf, void *rbuf, int count,
-                                              struct ompi_datatype_t *dtype,
-                                              struct ompi_op_t *op,
-                                              struct ompi_communicator_t *comm,
-                                              mca_coll_base_module_t *module)
-{
-    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
-    mca_coll_tuned_comm_t *data = tuned_module->tuned_data;
-
-    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:allreduce_intra_do_forced selected algorithm %d, segment size %d", 
-                 data->user_forced[ALLREDUCE].algorithm,
-                 data->user_forced[ALLREDUCE].segsize));
-
-    switch (data->user_forced[ALLREDUCE].algorithm) {
-    case (0):  return ompi_coll_tuned_allreduce_intra_dec_fixed (sbuf, rbuf, count, dtype, op, comm, module);
-    case (1):  return ompi_coll_tuned_allreduce_intra_basic_linear (sbuf, rbuf, count, dtype, op, comm, module);
-    case (2):  return ompi_coll_tuned_allreduce_intra_nonoverlapping (sbuf, rbuf, count, dtype, op, comm, module);
-    case (3):  return ompi_coll_tuned_allreduce_intra_recursivedoubling (sbuf, rbuf, count, dtype, op, comm, module);
-    case (4):  return ompi_coll_tuned_allreduce_intra_ring (sbuf, rbuf, count, dtype, op, comm, module);
-    case (5):  return ompi_coll_tuned_allreduce_intra_ring_segmented (sbuf, rbuf, count, dtype, op, comm, module, data->user_forced[ALLREDUCE].segsize);
-    default:
-        OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:allreduce_intra_do_forced attempt to select algorithm %d when only 0-%d is valid?",
-                     data->user_forced[ALLREDUCE].algorithm, 
-                     ompi_coll_tuned_forced_max_algorithms[ALLREDUCE]));
-        return (MPI_ERR_ARG);
-    } /* switch */
-
-}
-
-
-int ompi_coll_tuned_allreduce_intra_do_this(void *sbuf, void *rbuf, int count,
-                                            struct ompi_datatype_t *dtype,
-                                            struct ompi_op_t *op,
-                                            struct ompi_communicator_t *comm,
-                                            mca_coll_base_module_t *module,
-                                            int algorithm, int faninout, int segsize)
-{
-    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:allreduce_intra_do_this algorithm %d topo fan in/out %d segsize %d", 
-                 algorithm, faninout, segsize));
-
-    switch (algorithm) {
-    case (0):   return ompi_coll_tuned_allreduce_intra_dec_fixed (sbuf, rbuf, count, dtype, op, comm, module);
-    case (1):   return ompi_coll_tuned_allreduce_intra_basic_linear (sbuf, rbuf, count, dtype, op, comm, module);
-    case (2):   return ompi_coll_tuned_allreduce_intra_nonoverlapping (sbuf, rbuf, count, dtype, op, comm, module);
-    case (3):   return ompi_coll_tuned_allreduce_intra_recursivedoubling (sbuf, rbuf, count, dtype, op, comm, module);
-    case (4):   return ompi_coll_tuned_allreduce_intra_ring (sbuf, rbuf, count, dtype, op, comm, module);
-    case (5):   return ompi_coll_tuned_allreduce_intra_ring_segmented (sbuf, rbuf, count, dtype, op, comm, module, segsize);
-    default:
-        OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:allreduce_intra_do_this attempt to select algorithm %d when only 0-%d is valid?",
-                     algorithm, ompi_coll_tuned_forced_max_algorithms[ALLREDUCE]));
-        return (MPI_ERR_ARG);
-    } /* switch */
-
-}
-
--- a/ompi/mca/coll/base/coll_base_alltoall.c
+++ b/ompi/mca/coll/base/coll_base_alltoall.c
@ -3,7 +3,7 @@
 * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
 *                         University Research and Technology
 *                         Corporation.  All rights reserved.
- * Copyright (c) 2004-2012 The University of Tennessee and The University
+ * Copyright (c) 2004-2015 The University of Tennessee and The University
 *                         of Tennessee Research Foundation.  All rights
 *                         reserved.
 * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@ -30,37 +30,18 @@
 #include "ompi/mca/coll/coll.h"
 #include "ompi/mca/coll/base/coll_tags.h"
 #include "ompi/mca/pml/pml.h"
-#include "coll_tuned.h"
-#include "coll_tuned_topo.h"
-#include "coll_tuned_util.h"
-
-/* alltoall algorithm variables */
-static int coll_tuned_alltoall_algorithm_count = 5;
-static int coll_tuned_alltoall_forced_algorithm = 0;
-static int coll_tuned_alltoall_segment_size = 0;
-static int coll_tuned_alltoall_max_requests;
-static int coll_tuned_alltoall_tree_fanout;
-static int coll_tuned_alltoall_chain_fanout;
-
-/* valid values for coll_tuned_alltoall_forced_algorithm */
-static mca_base_var_enum_value_t alltoall_algorithms[] = {
-    {0, "ignore"},
-    {1, "linear"},
-    {2, "pairwise"},
-    {3, "modified_bruck"},
-    {4, "linear_sync"},
-    {5, "two_proc"},
-    {0, NULL}
-};
+#include "ompi/mca/coll/base/coll_base_functions.h"
+#include "coll_base_topo.h"
+#include "coll_base_util.h"

 /* MPI_IN_PLACE all to all algorithm. TODO: implement a better one. */
 static int
-mca_coll_tuned_alltoall_intra_basic_inplace(void *rbuf, int rcount,
+mca_coll_base_alltoall_intra_basic_inplace(void *rbuf, int rcount,
                                            struct ompi_datatype_t *rdtype,
                                            struct ompi_communicator_t *comm,
                                            mca_coll_base_module_t *module)
 {
-    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
+    mca_coll_base_module_t *base_module = (mca_coll_base_module_t*) module;
    int i, j, size, rank, err=MPI_SUCCESS;
    MPI_Request *preq;
    char *tmp_buffer;
@ -91,7 +72,7 @@ mca_coll_tuned_alltoall_intra_basic_inplace(void *rbuf, int rcount,
    for (i = 0 ; i < size ; ++i) {
        for (j = i+1 ; j < size ; ++j) {
            /* Initiate all send/recv to/from others. */
-            preq = tuned_module->tuned_data->mcct_reqs;
+            preq = base_module->base_data->mcct_reqs;

            if (i == rank) {
                /* Copy the data into the temporary buffer */
@ -128,11 +109,8 @@ mca_coll_tuned_alltoall_intra_basic_inplace(void *rbuf, int rcount,
            }

            /* Wait for the requests to complete */
-            err = ompi_request_wait_all (2, tuned_module->tuned_data->mcct_reqs, MPI_STATUSES_IGNORE);
+            err = ompi_request_wait_all (2, base_module->base_data->mcct_reqs, MPI_STATUSES_IGNORE);
            if (MPI_SUCCESS != err) { goto error_hndl; }
-
-            /* Free the requests. */
-            mca_coll_tuned_free_reqs(tuned_module->tuned_data->mcct_reqs, 2);
        }
    }

@ -145,7 +123,7 @@ mca_coll_tuned_alltoall_intra_basic_inplace(void *rbuf, int rcount,
    return err;
 }

-int ompi_coll_tuned_alltoall_intra_pairwise(void *sbuf, int scount, 
+int ompi_coll_base_alltoall_intra_pairwise(void *sbuf, int scount,
                                            struct ompi_datatype_t *sdtype,
                                            void* rbuf, int rcount,
                                            struct ompi_datatype_t *rdtype,
@ -157,22 +135,22 @@ int ompi_coll_tuned_alltoall_intra_pairwise(void *sbuf, int scount,
    ptrdiff_t lb, sext, rext;

    if (MPI_IN_PLACE == sbuf) {
-        return mca_coll_tuned_alltoall_intra_basic_inplace (rbuf, rcount, rdtype,
+        return mca_coll_base_alltoall_intra_basic_inplace (rbuf, rcount, rdtype,
                                                            comm, module);
    }

    size = ompi_comm_size(comm);
    rank = ompi_comm_rank(comm);

-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "coll:tuned:alltoall_intra_pairwise rank %d", rank));
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                 "coll:base:alltoall_intra_pairwise rank %d", rank));

    err = ompi_datatype_get_extent (sdtype, &lb, &sext);
    if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl; }
    err = ompi_datatype_get_extent (rdtype, &lb, &rext);
    if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl; }

-    
+
    /* Perform pairwise exchange - starting from 1 so the local copy is last */
    for (step = 1; step < size + 1; step++) {

@ -185,25 +163,25 @@ int ompi_coll_tuned_alltoall_intra_pairwise(void *sbuf, int scount,
        tmprecv = (char*)rbuf + (ptrdiff_t)recvfrom * rext * (ptrdiff_t)rcount;

        /* send and receive */
-        err = ompi_coll_tuned_sendrecv( tmpsend, scount, sdtype, sendto, 
+        err = ompi_coll_base_sendrecv( tmpsend, scount, sdtype, sendto,
                                        MCA_COLL_BASE_TAG_ALLTOALL,
-                                        tmprecv, rcount, rdtype, recvfrom, 
+                                        tmprecv, rcount, rdtype, recvfrom,
                                        MCA_COLL_BASE_TAG_ALLTOALL,
                                        comm, MPI_STATUS_IGNORE, rank);
        if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl;  }
    }

    return MPI_SUCCESS;
- 
+
 err_hndl:
-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "%s:%4d\tError occurred %d, rank %2d", __FILE__, line, 
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                 "%s:%4d\tError occurred %d, rank %2d", __FILE__, line,
                 err, rank));
    return err;
 }


-int ompi_coll_tuned_alltoall_intra_bruck(void *sbuf, int scount,
+int ompi_coll_base_alltoall_intra_bruck(void *sbuf, int scount,
                                         struct ompi_datatype_t *sdtype,
                                         void* rbuf, int rcount,
                                         struct ompi_datatype_t *rdtype,
@ -216,20 +194,20 @@ int ompi_coll_tuned_alltoall_intra_bruck(void *sbuf, int scount,
    ptrdiff_t rlb, slb, tlb, sext, rext, tsext;
    struct ompi_datatype_t *new_ddt;
 #ifdef blahblah
-    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
-    mca_coll_tuned_comm_t *data = tuned_module->tuned_data;
+    mca_coll_base_module_t *base_module = (mca_coll_base_module_t*) module;
+    mca_coll_base_comm_t *data = base_module->base_data;
 #endif

    if (MPI_IN_PLACE == sbuf) {
-        return mca_coll_tuned_alltoall_intra_basic_inplace (rbuf, rcount, rdtype,
+        return mca_coll_base_alltoall_intra_basic_inplace (rbuf, rcount, rdtype,
                                                            comm, module);
    }

    size = ompi_comm_size(comm);
    rank = ompi_comm_rank(comm);

-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "coll:tuned:alltoall_intra_bruck rank %d", rank));
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                 "coll:base:alltoall_intra_bruck rank %d", rank));

    err = ompi_datatype_get_extent (sdtype, &slb, &sext);
    if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl; }
@ -242,14 +220,14 @@ int ompi_coll_tuned_alltoall_intra_bruck(void *sbuf, int scount,


 #ifdef blahblah
-    /* try and SAVE memory by using the data segment hung off 
+    /* try and SAVE memory by using the data segment hung off
       the communicator if possible */
-    if (data->mcct_num_reqs >= size) { 
+    if (data->mcct_num_reqs >= size) {
        /* we have enought preallocated for displments and lengths */
        displs = (int*) data->mcct_reqs;
        blen = (int *) (displs + size);
        weallocated = 0;
-    } 
+    }
    else { /* allocate the buffers ourself */
 #endif
        displs = (int *) malloc(size * sizeof(int));
@ -267,9 +245,9 @@ int ompi_coll_tuned_alltoall_intra_bruck(void *sbuf, int scount,
    tmpbuf = tmpbuf_free - slb;

    /* Step 1 - local rotation - shift up by rank */
-    err = ompi_datatype_copy_content_same_ddt (sdtype, 
+    err = ompi_datatype_copy_content_same_ddt (sdtype,
                                               (int32_t) ((ptrdiff_t)(size - rank) * (ptrdiff_t)scount),
-                                               tmpbuf, 
+                                               tmpbuf,
                                               ((char*) sbuf) + (ptrdiff_t)rank * (ptrdiff_t)scount * sext);
    if (err<0) {
        line = __LINE__; err = -1; goto err_hndl;
@ -277,7 +255,7 @@ int ompi_coll_tuned_alltoall_intra_bruck(void *sbuf, int scount,

    if (rank != 0) {
        err = ompi_datatype_copy_content_same_ddt (sdtype, (ptrdiff_t)rank * (ptrdiff_t)scount,
-                                                   tmpbuf + (ptrdiff_t)(size - rank) * (ptrdiff_t)scount* sext, 
+                                                   tmpbuf + (ptrdiff_t)(size - rank) * (ptrdiff_t)scount* sext,
                                                   (char*) sbuf);
        if (err<0) {
            line = __LINE__; err = -1; goto err_hndl;
@ -294,7 +272,7 @@ int ompi_coll_tuned_alltoall_intra_bruck(void *sbuf, int scount,
        /* create indexed datatype */
        for (i = 1; i < size; i++) {
            if (( i & distance) == distance) {
-                displs[k] = (ptrdiff_t)i * (ptrdiff_t)scount; 
+                displs[k] = (ptrdiff_t)i * (ptrdiff_t)scount;
                blen[k] = scount;
                k++;
            }
@ -307,7 +285,7 @@ int ompi_coll_tuned_alltoall_intra_bruck(void *sbuf, int scount,
        if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl;  }

        /* Sendreceive */
-        err = ompi_coll_tuned_sendrecv ( tmpbuf, 1, new_ddt, sendto,
+        err = ompi_coll_base_sendrecv ( tmpbuf, 1, new_ddt, sendto,
                                         MCA_COLL_BASE_TAG_ALLTOALL,
                                         rbuf, 1, new_ddt, recvfrom,
                                         MCA_COLL_BASE_TAG_ALLTOALL,
@ -327,7 +305,7 @@ int ompi_coll_tuned_alltoall_intra_bruck(void *sbuf, int scount,
    for (i = 0; i < size; i++) {

        err = ompi_datatype_copy_content_same_ddt (rdtype, (int32_t) rcount,
-                                                   ((char*)rbuf) + ((ptrdiff_t)((rank - i + size) % size) * (ptrdiff_t)rcount * rext), 
+                                                   ((char*)rbuf) + ((ptrdiff_t)((rank - i + size) % size) * (ptrdiff_t)rcount * rext),
                                                   tmpbuf + (ptrdiff_t)i * (ptrdiff_t)rcount * rext);
        if (err < 0) { line = __LINE__; err = -1; goto err_hndl;  }
    }
@ -341,8 +319,8 @@ int ompi_coll_tuned_alltoall_intra_bruck(void *sbuf, int scount,
    return OMPI_SUCCESS;

 err_hndl:
-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "%s:%4d\tError occurred %d, rank %2d", __FILE__, line, err, 
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                 "%s:%4d\tError occurred %d, rank %2d", __FILE__, line, err,
                 rank));
    if (tmpbuf != NULL) free(tmpbuf_free);
    if (displs != NULL) free(displs);
@ -352,10 +330,10 @@ int ompi_coll_tuned_alltoall_intra_bruck(void *sbuf, int scount,

 /*
 * alltoall_intra_linear_sync
- * 
+ *
 * Function:       Linear implementation of alltoall with limited number
 *                 of outstanding requests.
- * Accepts:        Same as MPI_Alltoall(), and the maximum number of 
+ * Accepts:        Same as MPI_Alltoall(), and the maximum number of
 *                 outstanding requests (actual number is 2 * max, since
 *                 we count receive and send requests separately).
 * Returns:        MPI_SUCCESS or error code
@ -367,7 +345,7 @@ int ompi_coll_tuned_alltoall_intra_bruck(void *sbuf, int scount,
 *                    - wait for any request to complete
 *                    - replace that request by the new one of the same type.
 */
-int ompi_coll_tuned_alltoall_intra_linear_sync(void *sbuf, int scount,
+int ompi_coll_base_alltoall_intra_linear_sync(void *sbuf, int scount,
                                               struct ompi_datatype_t *sdtype,
                                               void* rbuf, int rcount,
                                               struct ompi_datatype_t *rdtype,
@ -382,7 +360,7 @@ int ompi_coll_tuned_alltoall_intra_linear_sync(void *sbuf, int scount,
    ompi_request_t **reqs = NULL;

    if (MPI_IN_PLACE == sbuf) {
-        return mca_coll_tuned_alltoall_intra_basic_inplace (rbuf, rcount, rdtype,
+        return mca_coll_base_alltoall_intra_basic_inplace (rbuf, rcount, rdtype,
                                                            comm, module);
    }

@ -391,8 +369,8 @@ int ompi_coll_tuned_alltoall_intra_linear_sync(void *sbuf, int scount,
    size = ompi_comm_size(comm);
    rank = ompi_comm_rank(comm);

-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "ompi_coll_tuned_alltoall_intra_linear_sync rank %d", rank));
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                 "ompi_coll_base_alltoall_intra_linear_sync rank %d", rank));

    error = ompi_datatype_get_extent(sdtype, &slb, &sext);
    if (OMPI_SUCCESS != error) {
@ -423,18 +401,18 @@ int ompi_coll_tuned_alltoall_intra_linear_sync(void *sbuf, int scount,
    }

    /* Initiate send/recv to/from others. */
-    total_reqs =  (((max_outstanding_reqs > (size - 1)) || 
+    total_reqs =  (((max_outstanding_reqs > (size - 1)) ||
                    (max_outstanding_reqs <= 0)) ?
                   (size - 1) : (max_outstanding_reqs));
-    reqs = (ompi_request_t**) malloc( 2 * total_reqs * 
+    reqs = (ompi_request_t**) malloc( 2 * total_reqs *
                                      sizeof(ompi_request_t*));
    if (NULL == reqs) { error = -1; line = __LINE__; goto error_hndl; }
-    
+
    prcv = (char *) rbuf;
    psnd = (char *) sbuf;

    /* Post first batch or ireceive and isend requests  */
-    for (nreqs = 0, nrreqs = 0, ri = (rank + 1) % size; nreqs < total_reqs; 
+    for (nreqs = 0, nrreqs = 0, ri = (rank + 1) % size; nreqs < total_reqs;
         ri = (ri + 1) % size, ++nreqs, ++nrreqs) {
        error =
            MCA_PML_CALL(irecv
@ -442,7 +420,7 @@ int ompi_coll_tuned_alltoall_intra_linear_sync(void *sbuf, int scount,
                          MCA_COLL_BASE_TAG_ALLTOALL, comm, &reqs[nreqs]));
        if (MPI_SUCCESS != error) { line = __LINE__; goto error_hndl; }
    }
-    for ( nsreqs = 0, si =  (rank + size - 1) % size; nreqs < 2 * total_reqs; 
+    for ( nsreqs = 0, si =  (rank + size - 1) % size; nreqs < 2 * total_reqs;
          si = (si + size - 1) % size, ++nreqs, ++nsreqs) {
        error =
            MCA_PML_CALL(isend
@ -457,12 +435,12 @@ int ompi_coll_tuned_alltoall_intra_linear_sync(void *sbuf, int scount,
        /* Optimization for the case when all requests have been posted  */
        error = ompi_request_wait_all(nreqs, reqs, MPI_STATUSES_IGNORE);
        if (MPI_SUCCESS != error) { line = __LINE__; goto error_hndl; }
-       
+
    } else {
        /* As requests complete, replace them with corresponding requests:
-           - wait for any request to complete, mark the request as 
+           - wait for any request to complete, mark the request as
           MPI_REQUEST_NULL
-           - If it was a receive request, replace it with new irecv request 
+           - If it was a receive request, replace it with new irecv request
           (if any)
           - if it was a send request, replace it with new isend request (if any)
        */
@ -476,10 +454,10 @@ int ompi_coll_tuned_alltoall_intra_linear_sync(void *sbuf, int scount,
            ncreqs++;
            if (completed < total_reqs) {
                if (nrreqs < (size - 1)) {
-                    error = 
+                    error =
                        MCA_PML_CALL(irecv
                                     (prcv + (ptrdiff_t)ri * rext, rcount, rdtype, ri,
-                                      MCA_COLL_BASE_TAG_ALLTOALL, comm, 
+                                      MCA_COLL_BASE_TAG_ALLTOALL, comm,
                                      &reqs[completed]));
                    if (MPI_SUCCESS != error) { line = __LINE__; goto error_hndl; }
                    ++nrreqs;
@ -493,7 +471,7 @@ int ompi_coll_tuned_alltoall_intra_linear_sync(void *sbuf, int scount,
                                          MCA_PML_BASE_SEND_STANDARD, comm,
                                          &reqs[completed]));
                    ++nsreqs;
-                    si = (si + size - 1) % size; 
+                    si = (si + size - 1) % size;
                }
            }
        }
@ -506,15 +484,15 @@ int ompi_coll_tuned_alltoall_intra_linear_sync(void *sbuf, int scount,
    return MPI_SUCCESS;

 error_hndl:
-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "%s:%4d\tError occurred %d, rank %2d", __FILE__, line, error, 
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                 "%s:%4d\tError occurred %d, rank %2d", __FILE__, line, error,
                 rank));
    if (NULL != reqs) free(reqs);
    return error;
 }


-int ompi_coll_tuned_alltoall_intra_two_procs(void *sbuf, int scount,
+int ompi_coll_base_alltoall_intra_two_procs(void *sbuf, int scount,
                                             struct ompi_datatype_t *sdtype,
                                             void* rbuf, int rcount,
                                             struct ompi_datatype_t *rdtype,
@ -526,14 +504,14 @@ int ompi_coll_tuned_alltoall_intra_two_procs(void *sbuf, int scount,
    ptrdiff_t sext, rext, lb;

    if (MPI_IN_PLACE == sbuf) {
-        return mca_coll_tuned_alltoall_intra_basic_inplace (rbuf, rcount, rdtype,
+        return mca_coll_base_alltoall_intra_basic_inplace (rbuf, rcount, rdtype,
                                                            comm, module);
    }

    rank = ompi_comm_rank(comm);

-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "ompi_coll_tuned_alltoall_intra_two_procs rank %d", rank));
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                 "ompi_coll_base_alltoall_intra_two_procs rank %d", rank));

    err = ompi_datatype_get_extent (sdtype, &lb, &sext);
    if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl; }
@ -548,17 +526,17 @@ int ompi_coll_tuned_alltoall_intra_two_procs(void *sbuf, int scount,
    tmprecv = (char*)rbuf + (ptrdiff_t)remote * rext * (ptrdiff_t)rcount;

    /* send and receive */
-    err = ompi_coll_tuned_sendrecv ( tmpsend, scount, sdtype, remote, 
+    err = ompi_coll_base_sendrecv ( tmpsend, scount, sdtype, remote,
                                     MCA_COLL_BASE_TAG_ALLTOALL,
-                                     tmprecv, rcount, rdtype, remote, 
+                                     tmprecv, rcount, rdtype, remote,
                                     MCA_COLL_BASE_TAG_ALLTOALL,
                                     comm, MPI_STATUS_IGNORE, rank );
    if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl;  }

    /* ddt sendrecv your own data */
-    err = ompi_datatype_sndrcv((char*) sbuf + (ptrdiff_t)rank * sext * (ptrdiff_t)scount, 
-                               (int32_t) scount, sdtype, 
-                               (char*) rbuf + (ptrdiff_t)rank * rext * (ptrdiff_t)rcount, 
+    err = ompi_datatype_sndrcv((char*) sbuf + (ptrdiff_t)rank * sext * (ptrdiff_t)scount,
+                               (int32_t) scount, sdtype,
+                               (char*) rbuf + (ptrdiff_t)rank * rext * (ptrdiff_t)rcount,
                               (int32_t) rcount, rdtype);
    if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl;  }

@ -566,7 +544,7 @@ int ompi_coll_tuned_alltoall_intra_two_procs(void *sbuf, int scount,
    return MPI_SUCCESS;

 err_hndl:
-    OPAL_OUTPUT((ompi_coll_tuned_stream,
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
                 "%s:%4d\tError occurred %d, rank %2d", __FILE__, line, err,
                 rank));
    return err;
@ -577,8 +555,8 @@ int ompi_coll_tuned_alltoall_intra_two_procs(void *sbuf, int scount,
 /*
 * Linear functions are copied from the BASIC coll module
 * they do not segment the message and are simple implementations
- * but for some small number of nodes and/or small data sizes they 
- * are just as fast as tuned/tree based segmenting operations 
+ * but for some small number of nodes and/or small data sizes they
+ * are just as fast as base/tree based segmenting operations
 * and as such may be selected by the decision functions
 * These are copied into this module due to the way we select modules
 * in V1. i.e. in V2 we will handle this differently and so will not
@ -588,7 +566,7 @@ int ompi_coll_tuned_alltoall_intra_two_procs(void *sbuf, int scount,

 /* copied function (with appropriate renaming) starts here */

-int ompi_coll_tuned_alltoall_intra_basic_linear(void *sbuf, int scount,
+int ompi_coll_base_alltoall_intra_basic_linear(void *sbuf, int scount,
                                                struct ompi_datatype_t *sdtype,
                                                void* rbuf, int rcount,
                                                struct ompi_datatype_t *rdtype,
@ -599,11 +577,11 @@ int ompi_coll_tuned_alltoall_intra_basic_linear(void *sbuf, int scount,
    char *psnd, *prcv;
    MPI_Aint lb, sndinc, rcvinc;
    ompi_request_t **req, **sreq, **rreq;
-    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
-    mca_coll_tuned_comm_t *data = tuned_module->tuned_data;
+    mca_coll_base_module_t *base_module = (mca_coll_base_module_t*) module;
+    mca_coll_base_comm_t *data = base_module->base_data;

    if (MPI_IN_PLACE == sbuf) {
-        return mca_coll_tuned_alltoall_intra_basic_inplace (rbuf, rcount, rdtype,
+        return mca_coll_base_alltoall_intra_basic_inplace (rbuf, rcount, rdtype,
                                                            comm, module);
    }

@ -612,8 +590,8 @@ int ompi_coll_tuned_alltoall_intra_basic_linear(void *sbuf, int scount,
    size = ompi_comm_size(comm);
    rank = ompi_comm_rank(comm);

-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "ompi_coll_tuned_alltoall_intra_basic_linear rank %d", rank));
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                 "ompi_coll_base_alltoall_intra_basic_linear rank %d", rank));


    err = ompi_datatype_get_extent(sdtype, &lb, &sndinc);
@ -654,23 +632,23 @@ int ompi_coll_tuned_alltoall_intra_basic_linear(void *sbuf, int scount,

    /* Post all receives first -- a simple optimization */

-    for (nreqs = 0, i = (rank + 1) % size; i != rank; 
+    for (nreqs = 0, i = (rank + 1) % size; i != rank;
         i = (i + 1) % size, ++rreq, ++nreqs) {
        err =
            MCA_PML_CALL(irecv_init
                         (prcv + (ptrdiff_t)i * rcvinc, rcount, rdtype, i,
                          MCA_COLL_BASE_TAG_ALLTOALL, comm, rreq));
        if (MPI_SUCCESS != err) {
-            ompi_coll_tuned_free_reqs(req, rreq - req);
+            ompi_coll_base_free_reqs(req, rreq - req);
            return err;
        }
    }

-    /* Now post all sends in reverse order 
+    /* Now post all sends in reverse order
       - We would like to minimize the search time through message queue
         when messages actually arrive in the order in which they were posted.
     */
-    for (nreqs = 0, i = (rank + size - 1) % size; i != rank; 
+    for (nreqs = 0, i = (rank + size - 1) % size; i != rank;
         i = (i + size - 1) % size, ++sreq, ++nreqs) {
        err =
            MCA_PML_CALL(isend_init
@ -678,7 +656,7 @@ int ompi_coll_tuned_alltoall_intra_basic_linear(void *sbuf, int scount,
                          MCA_COLL_BASE_TAG_ALLTOALL,
                          MCA_PML_BASE_SEND_STANDARD, comm, sreq));
        if (MPI_SUCCESS != err) {
-            ompi_coll_tuned_free_reqs(req, sreq - req);
+            ompi_coll_base_free_reqs(req, sreq - req);
            return err;
        }
    }
@ -698,165 +676,10 @@ int ompi_coll_tuned_alltoall_intra_basic_linear(void *sbuf, int scount,
    err = ompi_request_wait_all(nreqs, req, MPI_STATUSES_IGNORE);

    /* Free the reqs */
-
-    ompi_coll_tuned_free_reqs(req, nreqs);
+    ompi_coll_base_free_reqs(req, nreqs);

    /* All done */
-
    return err;
 }

 /* copied function (with appropriate renaming) ends here */
-
-/* The following are used by dynamic and forced rules */
-
-/* publish details of each algorithm and if its forced/fixed/locked in */
-/* as you add methods/algorithms you must update this and the query/map routines */
-
-/* this routine is called by the component only */
-/* this makes sure that the mca parameters are set to their initial values and perms */
-/* module does not call this they call the forced_getvalues routine instead */
-
-int ompi_coll_tuned_alltoall_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices)
-{
-    mca_base_var_enum_t*new_enum;
-
-    ompi_coll_tuned_forced_max_algorithms[ALLTOALL] = coll_tuned_alltoall_algorithm_count;
-
-    (void) mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                           "alltoall_algorithm_count",
-                                           "Number of alltoall algorithms available",
-                                           MCA_BASE_VAR_TYPE_INT, NULL, 0,
-                                           MCA_BASE_VAR_FLAG_DEFAULT_ONLY,
-                                           OPAL_INFO_LVL_5,
-                                           MCA_BASE_VAR_SCOPE_CONSTANT,
-                                           &coll_tuned_alltoall_algorithm_count);
-
-    /* MPI_T: This variable should eventually be bound to a communicator */
-    coll_tuned_alltoall_forced_algorithm = 0;
-    (void) mca_base_var_enum_create("coll_tuned_alltoall_algorithms", alltoall_algorithms, &new_enum);
-    mca_param_indices->algorithm_param_index =
-        mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                        "alltoall_algorithm",
-                                        "Which alltoall algorithm is used. Can be locked down to choice of: 0 ignore, 1 basic linear, 2 pairwise, 3: modified bruck, 4: linear with sync, 5:two proc only.",
-                                        MCA_BASE_VAR_TYPE_INT, new_enum, 0, 0,
-                                        OPAL_INFO_LVL_5,
-                                        MCA_BASE_VAR_SCOPE_READONLY,
-                                        &coll_tuned_alltoall_forced_algorithm);
-    OBJ_RELEASE(new_enum);
-    if (mca_param_indices->algorithm_param_index < 0) {
-        return mca_param_indices->algorithm_param_index;
-    }
-
-    coll_tuned_alltoall_segment_size = 0;
-    mca_param_indices->segsize_param_index =
-        mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                        "alltoall_algorithm_segmentsize",
-                                        "Segment size in bytes used by default for alltoall algorithms. Only has meaning if algorithm is forced and supports segmenting. 0 bytes means no segmentation.",
-                                        MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
-                                        OPAL_INFO_LVL_5,
-                                        MCA_BASE_VAR_SCOPE_READONLY,
-                                        &coll_tuned_alltoall_segment_size);
-
-    coll_tuned_alltoall_tree_fanout = ompi_coll_tuned_init_tree_fanout; /* get system wide default */
-    mca_param_indices->tree_fanout_param_index =
-        mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                        "alltoall_algorithm_tree_fanout",
-                                        "Fanout for n-tree used for alltoall algorithms. Only has meaning if algorithm is forced and supports n-tree topo based operation.",
-                                        MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
-                                        OPAL_INFO_LVL_5,
-                                        MCA_BASE_VAR_SCOPE_READONLY,
-                                        &coll_tuned_alltoall_tree_fanout);
-
-    coll_tuned_alltoall_chain_fanout = ompi_coll_tuned_init_chain_fanout; /* get system wide default */
-    mca_param_indices->chain_fanout_param_index = 
-      mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                      "alltoall_algorithm_chain_fanout",
-                                      "Fanout for chains used for alltoall algorithms. Only has meaning if algorithm is forced and supports chain topo based operation.",
-                                      MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
-                                      OPAL_INFO_LVL_5,
-                                      MCA_BASE_VAR_SCOPE_READONLY,
-                                      &coll_tuned_alltoall_chain_fanout);
-
-    coll_tuned_alltoall_max_requests = 0; /* no limit for alltoall by default */
-    mca_param_indices->max_requests_param_index = 
-      mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                      "alltoall_algorithm_max_requests",
-                                      "Maximum number of outstanding send or recv requests.  Only has meaning for synchronized algorithms.",
-                                      MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
-                                      OPAL_INFO_LVL_5,
-                                      MCA_BASE_VAR_SCOPE_READONLY,
-                                      &coll_tuned_alltoall_max_requests);
-    if (mca_param_indices->max_requests_param_index < 0) {
-        return mca_param_indices->max_requests_param_index;
-    }
-
-    if (coll_tuned_alltoall_max_requests < 0) {
-        if( 0 == ompi_comm_rank( MPI_COMM_WORLD ) ) {
-            opal_output( 0, "Maximum outstanding requests must be positive number greater than 1.  Switching to system level default %d \n",
-                         ompi_coll_tuned_init_max_requests );
-        }
-        coll_tuned_alltoall_max_requests = 0;
-    }
-
-    return (MPI_SUCCESS);
-}
-
-
-
-int ompi_coll_tuned_alltoall_intra_do_forced(void *sbuf, int scount,
-                                             struct ompi_datatype_t *sdtype,
-                                             void* rbuf, int rcount,
-                                             struct ompi_datatype_t *rdtype,
-                                             struct ompi_communicator_t *comm,
-                                             mca_coll_base_module_t *module)
-{
-    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
-    mca_coll_tuned_comm_t *data = tuned_module->tuned_data;
-
-    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:alltoall_intra_do_forced selected algorithm %d",
-                 data->user_forced[ALLTOALL].algorithm));
-
-    switch (data->user_forced[ALLTOALL].algorithm) {
-    case (0):   return ompi_coll_tuned_alltoall_intra_dec_fixed (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm, module);
-    case (1):   return ompi_coll_tuned_alltoall_intra_basic_linear (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm, module);
-    case (2):   return ompi_coll_tuned_alltoall_intra_pairwise (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm, module);
-    case (3):   return ompi_coll_tuned_alltoall_intra_bruck (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm, module);
-    case (4):   return ompi_coll_tuned_alltoall_intra_linear_sync (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm, module, data->user_forced[ALLTOALL].max_requests);
-    case (5):   return ompi_coll_tuned_alltoall_intra_two_procs (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm, module);
-    default:
-        OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:alltoall_intra_do_forced attempt to select algorithm %d when only 0-%d is valid?", 
-                     data->user_forced[ALLTOALL].algorithm, ompi_coll_tuned_forced_max_algorithms[ALLTOALL]));
-        return (MPI_ERR_ARG);
-    } /* switch */
-
-}
-
-
-int ompi_coll_tuned_alltoall_intra_do_this(void *sbuf, int scount,
-                                           struct ompi_datatype_t *sdtype,
-                                           void* rbuf, int rcount,
-                                           struct ompi_datatype_t *rdtype,
-                                           struct ompi_communicator_t *comm,
-                                           mca_coll_base_module_t *module,
-                                           int algorithm, int faninout, int segsize, 
-                                           int max_requests)
-{
-    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:alltoall_intra_do_this selected algorithm %d topo faninout %d segsize %d", 
-                 algorithm, faninout, segsize));
-
-    switch (algorithm) {
-    case (0):   return ompi_coll_tuned_alltoall_intra_dec_fixed (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm, module);
-    case (1):   return ompi_coll_tuned_alltoall_intra_basic_linear (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm, module);
-    case (2):   return ompi_coll_tuned_alltoall_intra_pairwise (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm, module);
-    case (3):   return ompi_coll_tuned_alltoall_intra_bruck (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm, module);
-    case (4):   return ompi_coll_tuned_alltoall_intra_linear_sync (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm, module, max_requests);
-    case (5):   return ompi_coll_tuned_alltoall_intra_two_procs (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm, module);
-    default:
-        OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:alltoall_intra_do_this attempt to select algorithm %d when only 0-%d is valid?", 
-                     algorithm, ompi_coll_tuned_forced_max_algorithms[ALLTOALL]));
-        return (MPI_ERR_ARG);
-    } /* switch */
-
-}
-
--- a/ompi/mca/coll/base/coll_base_alltoallv.c
+++ b/ompi/mca/coll/base/coll_base_alltoallv.c
@ -3,7 +3,7 @@
 * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
 *                         University Research and Technology
 *                         Corporation.  All rights reserved.
- * Copyright (c) 2004-2012 The University of Tennessee and The University
+ * Copyright (c) 2004-2015 The University of Tennessee and The University
 *                         of Tennessee Research Foundation.  All rights
 *                         reserved.
 * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@ -32,29 +32,17 @@
 #include "ompi/mca/coll/coll.h"
 #include "ompi/mca/coll/base/coll_tags.h"
 #include "ompi/mca/pml/pml.h"
-#include "coll_tuned.h"
-#include "coll_tuned_topo.h"
-#include "coll_tuned_util.h"
-
-/* alltoallv algorithm variables */
-static int coll_tuned_alltoallv_algorithm_count = 2;
-static int coll_tuned_alltoallv_forced_algorithm = 0;
-
-/* valid values for coll_tuned_alltoallv_forced_algorithm */
-static mca_base_var_enum_value_t alltoallv_algorithms[] = {
-    {0, "ignore"},
-    {1, "basic_linear"},
-    {2, "pairwise"},
-    {0, NULL}
-};
+#include "ompi/mca/coll/base/coll_base_functions.h"
+#include "coll_base_topo.h"
+#include "coll_base_util.h"

 static int
-mca_coll_tuned_alltoallv_intra_basic_inplace(void *rbuf, const int *rcounts, const int *rdisps,
+mca_coll_base_alltoallv_intra_basic_inplace(void *rbuf, const int *rcounts, const int *rdisps,
                                             struct ompi_datatype_t *rdtype,
                                             struct ompi_communicator_t *comm,
                                             mca_coll_base_module_t *module)
 {
-    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
+    mca_coll_base_module_t *base_module = (mca_coll_base_module_t*) module;
    int i, j, size, rank, err=MPI_SUCCESS;
    MPI_Request *preq;
    char *tmp_buffer;
@ -90,7 +78,7 @@ mca_coll_tuned_alltoallv_intra_basic_inplace(void *rbuf, const int *rcounts, con
    for (i = 0 ; i < size ; ++i) {
        for (j = i+1 ; j < size ; ++j) {
            /* Initiate all send/recv to/from others. */
-            preq = tuned_module->tuned_data->mcct_reqs;
+            preq = base_module->base_data->mcct_reqs;

            if (i == rank && rcounts[j]) {
                /* Copy the data into the temporary buffer */
@ -127,11 +115,8 @@ mca_coll_tuned_alltoallv_intra_basic_inplace(void *rbuf, const int *rcounts, con
            }

            /* Wait for the requests to complete */
-            err = ompi_request_wait_all (2, tuned_module->tuned_data->mcct_reqs, MPI_STATUSES_IGNORE);
+            err = ompi_request_wait_all (2, base_module->base_data->mcct_reqs, MPI_STATUSES_IGNORE);
            if (MPI_SUCCESS != err) { goto error_hndl; }
-
-            /* Free the requests. */
-            mca_coll_tuned_free_reqs(tuned_module->tuned_data->mcct_reqs, 2);
        }
    }

@ -145,7 +130,7 @@ mca_coll_tuned_alltoallv_intra_basic_inplace(void *rbuf, const int *rcounts, con
 }

 int
-ompi_coll_tuned_alltoallv_intra_pairwise(void *sbuf, int *scounts, int *sdisps,
+ompi_coll_base_alltoallv_intra_pairwise(void *sbuf, int *scounts, int *sdisps,
                                         struct ompi_datatype_t *sdtype,
                                         void* rbuf, int *rcounts, int *rdisps,
                                         struct ompi_datatype_t *rdtype,
@ -157,15 +142,15 @@ ompi_coll_tuned_alltoallv_intra_pairwise(void *sbuf, int *scounts, int *sdisps,
    ptrdiff_t sext, rext;

    if (MPI_IN_PLACE == sbuf) {
-        return mca_coll_tuned_alltoallv_intra_basic_inplace (rbuf, rcounts, rdisps,
+        return mca_coll_base_alltoallv_intra_basic_inplace (rbuf, rcounts, rdisps,
                                                             rdtype, comm, module);
    }

    size = ompi_comm_size(comm);
    rank = ompi_comm_rank(comm);

-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "coll:tuned:alltoallv_intra_pairwise rank %d", rank));
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                 "coll:base:alltoallv_intra_pairwise rank %d", rank));

    ompi_datatype_type_extent(sdtype, &sext);
    ompi_datatype_type_extent(rdtype, &rext);
@ -182,34 +167,33 @@ ompi_coll_tuned_alltoallv_intra_pairwise(void *sbuf, int *scounts, int *sdisps,
        prcv = (char*)rbuf + (ptrdiff_t)rdisps[recvfrom] * rext;

        /* send and receive */
-        err = ompi_coll_tuned_sendrecv( psnd, scounts[sendto], sdtype, sendto, 
+        err = ompi_coll_base_sendrecv( psnd, scounts[sendto], sdtype, sendto,
                                        MCA_COLL_BASE_TAG_ALLTOALLV,
-                                        prcv, rcounts[recvfrom], rdtype, recvfrom, 
+                                        prcv, rcounts[recvfrom], rdtype, recvfrom,
                                        MCA_COLL_BASE_TAG_ALLTOALLV,
                                        comm, MPI_STATUS_IGNORE, rank);
        if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl;  }
    }

    return MPI_SUCCESS;
- 
+
 err_hndl:
-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "%s:%4d\tError occurred %d, rank %2d at step %d", __FILE__, line, 
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                 "%s:%4d\tError occurred %d, rank %2d at step %d", __FILE__, line,
                 err, rank, step));
    return err;
 }

-/*  
+/**
 * Linear functions are copied from the basic coll module.  For
 * some small number of nodes and/or small data sizes they are just as
- * fast as tuned/tree based segmenting operations and as such may be
+ * fast as base/tree based segmenting operations and as such may be
 * selected by the decision functions.  These are copied into this module
 * due to the way we select modules in V1. i.e. in V2 we will handle this
- * differently and so will not have to duplicate code.  
- * GEF Oct05 after asking Jeff.  
+ * differently and so will not have to duplicate code.
 */
 int
-ompi_coll_tuned_alltoallv_intra_basic_linear(void *sbuf, int *scounts, int *sdisps,
+ompi_coll_base_alltoallv_intra_basic_linear(void *sbuf, int *scounts, int *sdisps,
                                            struct ompi_datatype_t *sdtype,
                                            void *rbuf, int *rcounts, int *rdisps,
                                            struct ompi_datatype_t *rdtype,
@ -220,19 +204,19 @@ ompi_coll_tuned_alltoallv_intra_basic_linear(void *sbuf, int *scounts, int *sdis
    char *psnd, *prcv;
    ptrdiff_t sext, rext;
    MPI_Request *preq;
-    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
-    mca_coll_tuned_comm_t *data = tuned_module->tuned_data;
+    mca_coll_base_module_t *base_module = (mca_coll_base_module_t*) module;
+    mca_coll_base_comm_t *data = base_module->base_data;

    if (MPI_IN_PLACE == sbuf) {
-        return  mca_coll_tuned_alltoallv_intra_basic_inplace (rbuf, rcounts, rdisps,
+        return  mca_coll_base_alltoallv_intra_basic_inplace (rbuf, rcounts, rdisps,
                                                              rdtype, comm, module);
    }

    size = ompi_comm_size(comm);
    rank = ompi_comm_rank(comm);

-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "coll:tuned:alltoallv_intra_basic_linear rank %d", rank));
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                 "coll:base:alltoallv_intra_basic_linear rank %d", rank));

    ompi_datatype_type_extent(sdtype, &sext);
    ompi_datatype_type_extent(rdtype, &rext);
@ -269,7 +253,7 @@ ompi_coll_tuned_alltoallv_intra_basic_linear(void *sbuf, int *scounts, int *sdis
                                      preq++));
        ++nreqs;
        if (MPI_SUCCESS != err) {
-            ompi_coll_tuned_free_reqs(data->mcct_reqs, nreqs);
+            ompi_coll_base_free_reqs(data->mcct_reqs, nreqs);
            return err;
        }
    }
@ -287,7 +271,7 @@ ompi_coll_tuned_alltoallv_intra_basic_linear(void *sbuf, int *scounts, int *sdis
                                      preq++));
        ++nreqs;
        if (MPI_SUCCESS != err) {
-            ompi_coll_tuned_free_reqs(data->mcct_reqs, nreqs);
+            ompi_coll_base_free_reqs(data->mcct_reqs, nreqs);
            return err;
        }
    }
@ -305,128 +289,7 @@ ompi_coll_tuned_alltoallv_intra_basic_linear(void *sbuf, int *scounts, int *sdis
                                MPI_STATUSES_IGNORE);

    /* Free the requests. */
-    ompi_coll_tuned_free_reqs(data->mcct_reqs, nreqs);
+    ompi_coll_base_free_reqs(data->mcct_reqs, nreqs);

    return err;
 }
-
-/* 
- * The following are used by dynamic and forced rules.  Publish
- * details of each algorithm and if its forced/fixed/locked in as you add
- * methods/algorithms you must update this and the query/map routines.
- * This routine is called by the component only.  This makes sure that
- * the mca parameters are set to their initial values and perms.
- * Module does not call this.  They call the forced_getvalues routine
- * instead.
- */
-int ompi_coll_tuned_alltoallv_intra_check_forced_init(coll_tuned_force_algorithm_mca_param_indices_t
-                                                      *mca_param_indices)
-{
-    mca_base_var_enum_t *new_enum;
-
-    ompi_coll_tuned_forced_max_algorithms[ALLTOALLV] = coll_tuned_alltoallv_algorithm_count;
-
-    (void) mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                           "alltoallv_algorithm_count",
-                                           "Number of alltoallv algorithms available",
-                                           MCA_BASE_VAR_TYPE_INT, NULL, 0,
-                                           MCA_BASE_VAR_FLAG_DEFAULT_ONLY,
-                                           OPAL_INFO_LVL_5,
-                                           MCA_BASE_VAR_SCOPE_CONSTANT,
-                                           &coll_tuned_alltoallv_algorithm_count);
-
-    /* MPI_T: This variable should eventually be bound to a communicator */
-    coll_tuned_alltoallv_forced_algorithm = 0;
-    (void) mca_base_var_enum_create("coll_tuned_alltoallv_algorithms", alltoallv_algorithms, &new_enum);
-    mca_param_indices->algorithm_param_index =
-        mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                        "alltoallv_algorithm",
-                                        "Which alltoallv algorithm is used. "
-                                        "Can be locked down to choice of: 0 ignore, "
-                                        "1 basic linear, 2 pairwise.",
-                                        MCA_BASE_VAR_TYPE_INT, new_enum, 0, 0,
-                                        OPAL_INFO_LVL_5,
-                                        MCA_BASE_VAR_SCOPE_READONLY,
-                                        &coll_tuned_alltoallv_forced_algorithm);
-    OBJ_RELEASE(new_enum);
-    if (mca_param_indices->algorithm_param_index < 0) {
-        return mca_param_indices->algorithm_param_index;
-    }
-    
-    return (MPI_SUCCESS);
-}
-
-
-
-int ompi_coll_tuned_alltoallv_intra_do_forced(void *sbuf, int *scounts, int *sdisps, 
-                                              struct ompi_datatype_t *sdtype,
-                                              void* rbuf, int *rcounts, int *rdisps, 
-                                              struct ompi_datatype_t *rdtype,
-                                              struct ompi_communicator_t *comm,
-                                              mca_coll_base_module_t *module)
-{
-    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
-    mca_coll_tuned_comm_t *data = tuned_module->tuned_data;
-
-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "coll:tuned:alltoallv_intra_do_forced selected algorithm %d",
-                 data->user_forced[ALLTOALLV].algorithm));
-
-    switch (data->user_forced[ALLTOALLV].algorithm) {
-    case (0):   
-        return ompi_coll_tuned_alltoallv_intra_dec_fixed(sbuf, scounts, sdisps, sdtype, 
-                                                         rbuf, rcounts, rdisps, rdtype,
-                                                         comm, module);
-    case (1):
-        return ompi_coll_tuned_alltoallv_intra_basic_linear(sbuf, scounts, sdisps, sdtype,
-                                                            rbuf, rcounts, rdisps, rdtype,      
-                                                            comm, module);
-    case (2): 
-        return ompi_coll_tuned_alltoallv_intra_pairwise(sbuf, scounts, sdisps, sdtype,
-                                                        rbuf, rcounts, rdisps, rdtype,
-                                                        comm, module);
-    default:
-        OPAL_OUTPUT((ompi_coll_tuned_stream,
-                     "coll:tuned:alltoallv_intra_do_forced attempt to "
-                     "select algorithm %d when only 0-%d is valid.", 
-                     data->user_forced[ALLTOALLV].algorithm,
-                     ompi_coll_tuned_forced_max_algorithms[ALLTOALLV]));
-        return (MPI_ERR_ARG);
-    }
-}
-
-/* If the user selects dynamic rules and specifies the algorithm to
- * use, then this function is called.  */
-int ompi_coll_tuned_alltoallv_intra_do_this(void *sbuf, int *scounts, int *sdisps,
-                                            struct ompi_datatype_t *sdtype,
-                                            void* rbuf, int *rcounts, int *rdisps,
-                                            struct ompi_datatype_t *rdtype,
-                                            struct ompi_communicator_t *comm,
-                                            mca_coll_base_module_t *module,
-                                            int algorithm)
-{
-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "coll:tuned:alltoallv_intra_do_this selected algorithm %d ",
-                 algorithm));
-
-    switch (algorithm) {
-    case (0):
-        return ompi_coll_tuned_alltoallv_intra_dec_fixed(sbuf, scounts, sdisps, sdtype,
-                                                         rbuf, rcounts, rdisps, rdtype,
-                                                         comm, module);
-    case (1):
-        return ompi_coll_tuned_alltoallv_intra_basic_linear(sbuf, scounts, sdisps, sdtype,
-                                                            rbuf, rcounts, rdisps, rdtype,
-                                                            comm, module);
-    case (2):
-        return ompi_coll_tuned_alltoallv_intra_pairwise(sbuf, scounts, sdisps, sdtype,
-                                                        rbuf, rcounts, rdisps, rdtype,
-                                                        comm, module);
-    default:
-        OPAL_OUTPUT((ompi_coll_tuned_stream,
-                     "coll:tuned:alltoall_intra_do_this attempt to select "
-                     "algorithm %d when only 0-%d is valid.", 
-                     algorithm, ompi_coll_tuned_forced_max_algorithms[ALLTOALLV]));
-        return (MPI_ERR_ARG);
-    }
-}
--- a/ompi/mca/coll/base/coll_base_barrier.c
+++ b/ompi/mca/coll/base/coll_base_barrier.c
@ -3,7 +3,7 @@
 * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
 *                         University Research and Technology
 *                         Corporation.  All rights reserved.
- * Copyright (c) 2004-2014 The University of Tennessee and The University
+ * Copyright (c) 2004-2015 The University of Tennessee and The University
 *                         of Tennessee Research Foundation.  All rights
 *                         reserved.
 * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@ -31,25 +31,9 @@
 #include "ompi/mca/coll/coll.h"
 #include "ompi/mca/coll/base/coll_tags.h"
 #include "ompi/mca/pml/pml.h"
-#include "coll_tuned.h"
-#include "coll_tuned_topo.h"
-#include "coll_tuned_util.h"
-
-/* barrier algorithm variables */
-static int coll_tuned_barrier_algorithm_count = 6;
-static int coll_tuned_barrier_forced_algorithm = 0;
-
-/* valid values for coll_tuned_barrier_forced_algorithm */
-static mca_base_var_enum_value_t barrier_algorithms[] = {
-    {0, "ignore"},
-    {1, "linear"},
-    {2, "double_ring"},
-    {3, "recursive_doubling"},
-    {4, "bruck"},
-    {5, "two_proc"},
-    {6, "tree"},
-    {0, NULL}
-};
+#include "ompi/mca/coll/base/coll_base_functions.h"
+#include "coll_base_topo.h"
+#include "coll_base_util.h"

 /**
 * A quick version of the MPI_Sendreceive implemented for the barrier.
@ -57,7 +41,7 @@ static mca_base_var_enum_value_t barrier_algorithms[] = {
 * signal a two peer synchronization.
 */
 static inline int
-ompi_coll_tuned_sendrecv_zero(int dest, int stag,
+ompi_coll_base_sendrecv_zero(int dest, int stag,
                              int source, int rtag,
                              MPI_Comm comm)

@ -87,8 +71,8 @@ ompi_coll_tuned_sendrecv_zero(int dest, int stag,
            err_index = 1;
        }
        err = statuses[err_index].MPI_ERROR;
-        OPAL_OUTPUT ((ompi_coll_tuned_stream, "%s:%d: Error %d occurred in the %s"
-                                              " stage of ompi_coll_tuned_sendrecv_zero\n",
+        OPAL_OUTPUT ((ompi_coll_base_framework.framework_output, "%s:%d: Error %d occurred in the %s"
+                                              " stage of ompi_coll_base_sendrecv_zero\n",
                      __FILE__, line, err, (0 == err_index ? "receive" : "send")));
        return err;
    }
@ -100,21 +84,21 @@ ompi_coll_tuned_sendrecv_zero(int dest, int stag,
    /* Error discovered during the posting of the irecv or isend,
     * and no status is available.
     */
-    OPAL_OUTPUT ((ompi_coll_tuned_stream, "%s:%d: Error %d occurred\n",
+    OPAL_OUTPUT ((ompi_coll_base_framework.framework_output, "%s:%d: Error %d occurred\n",
                  __FILE__, line, err));
    return err;
 }

 /*
- * Barrier is ment to be a synchronous operation, as some BTLs can mark 
- * a request done before its passed to the NIC and progress might not be made 
- * elsewhere we cannot allow a process to exit the barrier until its last 
+ * Barrier is ment to be a synchronous operation, as some BTLs can mark
+ * a request done before its passed to the NIC and progress might not be made
+ * elsewhere we cannot allow a process to exit the barrier until its last
 * [round of] sends are completed.
 *
- * It is last round of sends rather than 'last' individual send as each pair of 
- * peers can use different channels/devices/btls and the receiver of one of 
+ * It is last round of sends rather than 'last' individual send as each pair of
+ * peers can use different channels/devices/btls and the receiver of one of
 * these sends might be forced to wait as the sender
- * leaves the collective and does not make progress until the next mpi call 
+ * leaves the collective and does not make progress until the next mpi call
 *
 */

@ -124,7 +108,7 @@ ompi_coll_tuned_sendrecv_zero(int dest, int stag,
 * synchronous gurantee made by last ring of sends are synchronous
 *
 */
-int ompi_coll_tuned_barrier_intra_doublering(struct ompi_communicator_t *comm,
+int ompi_coll_base_barrier_intra_doublering(struct ompi_communicator_t *comm,
                                             mca_coll_base_module_t *module)
 {
    int rank, size, err = 0, line = 0, left, right;
@ -132,50 +116,50 @@ int ompi_coll_tuned_barrier_intra_doublering(struct ompi_communicator_t *comm,
    rank = ompi_comm_rank(comm);
    size = ompi_comm_size(comm);

-    OPAL_OUTPUT((ompi_coll_tuned_stream,"ompi_coll_tuned_barrier_intra_doublering rank %d", rank));
-  
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"ompi_coll_base_barrier_intra_doublering rank %d", rank));
+
    left = ((rank-1)%size);
    right = ((rank+1)%size);

    if (rank > 0) { /* receive message from the left */
-        err = MCA_PML_CALL(recv((void*)NULL, 0, MPI_BYTE, left, 
-                                MCA_COLL_BASE_TAG_BARRIER, comm, 
+        err = MCA_PML_CALL(recv((void*)NULL, 0, MPI_BYTE, left,
+                                MCA_COLL_BASE_TAG_BARRIER, comm,
                                MPI_STATUS_IGNORE));
        if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl; }
    }

    /* Send message to the right */
-    err = MCA_PML_CALL(send((void*)NULL, 0, MPI_BYTE, right, 
-                            MCA_COLL_BASE_TAG_BARRIER, 
+    err = MCA_PML_CALL(send((void*)NULL, 0, MPI_BYTE, right,
+                            MCA_COLL_BASE_TAG_BARRIER,
                            MCA_PML_BASE_SEND_STANDARD, comm));
    if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl;  }

    /* root needs to receive from the last node */
    if (rank == 0) {
-        err = MCA_PML_CALL(recv((void*)NULL, 0, MPI_BYTE, left, 
-                                MCA_COLL_BASE_TAG_BARRIER, comm, 
+        err = MCA_PML_CALL(recv((void*)NULL, 0, MPI_BYTE, left,
+                                MCA_COLL_BASE_TAG_BARRIER, comm,
                                MPI_STATUS_IGNORE));
        if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl; }
    }

    /* Allow nodes to exit */
    if (rank > 0) { /* post Receive from left */
-        err = MCA_PML_CALL(recv((void*)NULL, 0, MPI_BYTE, left, 
-                                MCA_COLL_BASE_TAG_BARRIER, comm, 
+        err = MCA_PML_CALL(recv((void*)NULL, 0, MPI_BYTE, left,
+                                MCA_COLL_BASE_TAG_BARRIER, comm,
                                MPI_STATUS_IGNORE));
        if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl; }
    }

    /* send message to the right one */
-    err = MCA_PML_CALL(send((void*)NULL, 0, MPI_BYTE, right, 
-                            MCA_COLL_BASE_TAG_BARRIER, 
+    err = MCA_PML_CALL(send((void*)NULL, 0, MPI_BYTE, right,
+                            MCA_COLL_BASE_TAG_BARRIER,
                            MCA_PML_BASE_SEND_SYNCHRONOUS, comm));
    if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl;  }
- 
+
    /* rank 0 post receive from the last node */
    if (rank == 0) {
-        err = MCA_PML_CALL(recv((void*)NULL, 0, MPI_BYTE, left, 
-                                MCA_COLL_BASE_TAG_BARRIER, comm, 
+        err = MCA_PML_CALL(recv((void*)NULL, 0, MPI_BYTE, left,
+                                MCA_COLL_BASE_TAG_BARRIER, comm,
                                MPI_STATUS_IGNORE));
        if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl;  }
    }
@ -183,7 +167,7 @@ int ompi_coll_tuned_barrier_intra_doublering(struct ompi_communicator_t *comm,
    return MPI_SUCCESS;

 err_hndl:
-    OPAL_OUTPUT((ompi_coll_tuned_stream,"%s:%4d\tError occurred %d, rank %2d", 
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"%s:%4d\tError occurred %d, rank %2d",
                 __FILE__, line, err, rank));
    return err;
 }
@ -193,15 +177,15 @@ int ompi_coll_tuned_barrier_intra_doublering(struct ompi_communicator_t *comm,
 * To make synchronous, uses sync sends and sync sendrecvs
 */

-int ompi_coll_tuned_barrier_intra_recursivedoubling(struct ompi_communicator_t *comm,
+int ompi_coll_base_barrier_intra_recursivedoubling(struct ompi_communicator_t *comm,
                                                    mca_coll_base_module_t *module)
 {
    int rank, size, adjsize, err, line, mask, remote;

    rank = ompi_comm_rank(comm);
    size = ompi_comm_size(comm);
-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "ompi_coll_tuned_barrier_intra_recursivedoubling rank %d", 
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                 "ompi_coll_base_barrier_intra_recursivedoubling rank %d",
                 rank));

    /* do nearest power of 2 less than size calc */
@ -213,7 +197,7 @@ int ompi_coll_tuned_barrier_intra_recursivedoubling(struct ompi_communicator_t *
        if (rank >= adjsize) {
            /* send message to lower ranked node */
            remote = rank - adjsize;
-            err = ompi_coll_tuned_sendrecv_zero(remote, MCA_COLL_BASE_TAG_BARRIER,
+            err = ompi_coll_base_sendrecv_zero(remote, MCA_COLL_BASE_TAG_BARRIER,
                                                remote, MCA_COLL_BASE_TAG_BARRIER,
                                                comm);
            if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl;}
@ -222,7 +206,7 @@ int ompi_coll_tuned_barrier_intra_recursivedoubling(struct ompi_communicator_t *

            /* receive message from high level rank */
            err = MCA_PML_CALL(recv((void*)NULL, 0, MPI_BYTE, rank+adjsize,
-                                    MCA_COLL_BASE_TAG_BARRIER, comm, 
+                                    MCA_COLL_BASE_TAG_BARRIER, comm,
                                    MPI_STATUS_IGNORE));

            if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl;}
@ -238,7 +222,7 @@ int ompi_coll_tuned_barrier_intra_recursivedoubling(struct ompi_communicator_t *
            if (remote >= adjsize) continue;

            /* post receive from the remote node */
-            err = ompi_coll_tuned_sendrecv_zero(remote, MCA_COLL_BASE_TAG_BARRIER,
+            err = ompi_coll_base_sendrecv_zero(remote, MCA_COLL_BASE_TAG_BARRIER,
                                                remote, MCA_COLL_BASE_TAG_BARRIER,
                                                comm);
            if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl;}
@ -250,8 +234,8 @@ int ompi_coll_tuned_barrier_intra_recursivedoubling(struct ompi_communicator_t *
        if (rank < (size - adjsize)) {
            /* send enter message to higher ranked node */
            remote = rank + adjsize;
-            err = MCA_PML_CALL(send((void*)NULL, 0, MPI_BYTE, remote, 
-                                    MCA_COLL_BASE_TAG_BARRIER, 
+            err = MCA_PML_CALL(send((void*)NULL, 0, MPI_BYTE, remote,
+                                    MCA_COLL_BASE_TAG_BARRIER,
                                    MCA_PML_BASE_SEND_SYNCHRONOUS, comm));

            if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl;}
@ -261,7 +245,7 @@ int ompi_coll_tuned_barrier_intra_recursivedoubling(struct ompi_communicator_t *
    return MPI_SUCCESS;

 err_hndl:
-    OPAL_OUTPUT((ompi_coll_tuned_stream,"%s:%4d\tError occurred %d, rank %2d",
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"%s:%4d\tError occurred %d, rank %2d",
                 __FILE__, line, err, rank));
    return err;
 }
@ -271,23 +255,23 @@ int ompi_coll_tuned_barrier_intra_recursivedoubling(struct ompi_communicator_t *
 * To make synchronous, uses sync sends and sync sendrecvs
 */

-int ompi_coll_tuned_barrier_intra_bruck(struct ompi_communicator_t *comm,
+int ompi_coll_base_barrier_intra_bruck(struct ompi_communicator_t *comm,
                                        mca_coll_base_module_t *module)
 {
    int rank, size, distance, to, from, err, line = 0;

    rank = ompi_comm_rank(comm);
    size = ompi_comm_size(comm);
-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "ompi_coll_tuned_barrier_intra_bruck rank %d", rank));
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                 "ompi_coll_base_barrier_intra_bruck rank %d", rank));

    /* exchange data with rank-2^k and rank+2^k */
-    for (distance = 1; distance < size; distance <<= 1) { 
+    for (distance = 1; distance < size; distance <<= 1) {
        from = (rank + size - distance) % size;
        to   = (rank + distance) % size;

        /* send message to lower ranked node */
-        err = ompi_coll_tuned_sendrecv_zero(to, MCA_COLL_BASE_TAG_BARRIER,
+        err = ompi_coll_base_sendrecv_zero(to, MCA_COLL_BASE_TAG_BARRIER,
                                            from, MCA_COLL_BASE_TAG_BARRIER,
                                            comm);
        if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl;}
@ -296,7 +280,7 @@ int ompi_coll_tuned_barrier_intra_bruck(struct ompi_communicator_t *comm,
    return MPI_SUCCESS;

 err_hndl:
-    OPAL_OUTPUT((ompi_coll_tuned_stream,"%s:%4d\tError occurred %d, rank %2d", 
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"%s:%4d\tError occurred %d, rank %2d",
                 __FILE__, line, err, rank));
    return err;
 }
@ -306,17 +290,17 @@ int ompi_coll_tuned_barrier_intra_bruck(struct ompi_communicator_t *comm,
 * To make synchronous, uses sync sends and sync sendrecvs
 */
 /* special case for two processes */
-int ompi_coll_tuned_barrier_intra_two_procs(struct ompi_communicator_t *comm,
+int ompi_coll_base_barrier_intra_two_procs(struct ompi_communicator_t *comm,
                                            mca_coll_base_module_t *module)
 {
    int remote, err;

    remote = ompi_comm_rank(comm);
-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "ompi_coll_tuned_barrier_intra_two_procs rank %d", remote));
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                 "ompi_coll_base_barrier_intra_two_procs rank %d", remote));
    remote = (remote + 1) & 0x1;

-    err = ompi_coll_tuned_sendrecv_zero(remote, MCA_COLL_BASE_TAG_BARRIER, 
+    err = ompi_coll_base_sendrecv_zero(remote, MCA_COLL_BASE_TAG_BARRIER,
                                        remote, MCA_COLL_BASE_TAG_BARRIER,
                                        comm);
    return (err);
@ -327,7 +311,7 @@ int ompi_coll_tuned_barrier_intra_two_procs(struct ompi_communicator_t *comm,
 * Linear functions are copied from the BASIC coll module
 * they do not segment the message and are simple implementations
 * but for some small number of nodes and/or small data sizes they
- * are just as fast as tuned/tree based segmenting operations
+ * are just as fast as base/tree based segmenting operations
 * and as such may be selected by the decision functions
 * These are copied into this module due to the way we select modules
 * in V1. i.e. in V2 we will handle this differently and so will not
@ -337,7 +321,7 @@ int ompi_coll_tuned_barrier_intra_two_procs(struct ompi_communicator_t *comm,

 /* copied function (with appropriate renaming) starts here */

-static int ompi_coll_tuned_barrier_intra_basic_linear(struct ompi_communicator_t *comm,
+static int ompi_coll_base_barrier_intra_basic_linear(struct ompi_communicator_t *comm,
                                                      mca_coll_base_module_t *module)
 {
    int i, err, rank, size;
@ -347,14 +331,14 @@ static int ompi_coll_tuned_barrier_intra_basic_linear(struct ompi_communicator_t

    /* All non-root send & receive zero-length message. */
    if (rank > 0) {
-        err = MCA_PML_CALL(send (NULL, 0, MPI_BYTE, 0, 
+        err = MCA_PML_CALL(send (NULL, 0, MPI_BYTE, 0,
                                 MCA_COLL_BASE_TAG_BARRIER,
                                 MCA_PML_BASE_SEND_STANDARD, comm));
        if (MPI_SUCCESS != err) {
            return err;
        }

-        err = MCA_PML_CALL(recv (NULL, 0, MPI_BYTE, 0, 
+        err = MCA_PML_CALL(recv (NULL, 0, MPI_BYTE, 0,
                                 MCA_COLL_BASE_TAG_BARRIER,
                                 comm, MPI_STATUS_IGNORE));
        if (MPI_SUCCESS != err) {
@ -370,7 +354,7 @@ static int ompi_coll_tuned_barrier_intra_basic_linear(struct ompi_communicator_t
        requests = (ompi_request_t**)malloc( size * sizeof(ompi_request_t*) );
        for (i = 1; i < size; ++i) {
            err = MCA_PML_CALL(irecv(NULL, 0, MPI_BYTE, MPI_ANY_SOURCE,
-                                     MCA_COLL_BASE_TAG_BARRIER, comm, 
+                                     MCA_COLL_BASE_TAG_BARRIER, comm,
                                     &(requests[i])));
            if (MPI_SUCCESS != err) {
                return err;
@ -380,7 +364,7 @@ static int ompi_coll_tuned_barrier_intra_basic_linear(struct ompi_communicator_t

        for (i = 1; i < size; ++i) {
            err = MCA_PML_CALL(isend(NULL, 0, MPI_BYTE, i,
-                                     MCA_COLL_BASE_TAG_BARRIER, 
+                                     MCA_COLL_BASE_TAG_BARRIER,
                                     MCA_PML_BASE_SEND_STANDARD, comm,
                                     &(requests[i])));
            if (MPI_SUCCESS != err) {
@ -400,17 +384,17 @@ static int ompi_coll_tuned_barrier_intra_basic_linear(struct ompi_communicator_t

 /*
 * Another recursive doubling type algorithm, but in this case
- * we go up the tree and back down the tree.  
+ * we go up the tree and back down the tree.
 */
-int ompi_coll_tuned_barrier_intra_tree(struct ompi_communicator_t *comm,
+int ompi_coll_base_barrier_intra_tree(struct ompi_communicator_t *comm,
                                       mca_coll_base_module_t *module)
 {
    int rank, size, depth, err, jump, partner;

    rank = ompi_comm_rank(comm);
    size = ompi_comm_size(comm);
-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "ompi_coll_tuned_barrier_intra_tree %d", 
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                 "ompi_coll_base_barrier_intra_tree %d",
                 rank));

    /* Find the nearest power of 2 of the communicator size. */
@ -420,21 +404,21 @@ int ompi_coll_tuned_barrier_intra_tree(struct ompi_communicator_t *comm,
        partner = rank ^ jump;
        if (!(partner & (jump-1)) && partner < size) {
            if (partner > rank) {
-                err = MCA_PML_CALL(recv (NULL, 0, MPI_BYTE, partner, 
+                err = MCA_PML_CALL(recv (NULL, 0, MPI_BYTE, partner,
                                         MCA_COLL_BASE_TAG_BARRIER, comm,
                                         MPI_STATUS_IGNORE));
                if (MPI_SUCCESS != err)
                    return err;
            } else if (partner < rank) {
                err = MCA_PML_CALL(send (NULL, 0, MPI_BYTE, partner,
-                                         MCA_COLL_BASE_TAG_BARRIER, 
+                                         MCA_COLL_BASE_TAG_BARRIER,
                                         MCA_PML_BASE_SEND_STANDARD, comm));
                if (MPI_SUCCESS != err)
                    return err;
            }
        }
    }
-    
+
    depth >>= 1;
    for (jump = depth; jump>0; jump>>=1) {
        partner = rank ^ jump;
@ -446,7 +430,7 @@ int ompi_coll_tuned_barrier_intra_tree(struct ompi_communicator_t *comm,
                if (MPI_SUCCESS != err)
                    return err;
            } else if (partner < rank) {
-                err = MCA_PML_CALL(recv (NULL, 0, MPI_BYTE, partner, 
+                err = MCA_PML_CALL(recv (NULL, 0, MPI_BYTE, partner,
                                         MCA_COLL_BASE_TAG_BARRIER, comm,
                                         MPI_STATUS_IGNORE));
                if (MPI_SUCCESS != err)
@ -457,101 +441,3 @@ int ompi_coll_tuned_barrier_intra_tree(struct ompi_communicator_t *comm,

    return MPI_SUCCESS;
 }
-
-
-/* The following are used by dynamic and forced rules */
-
-/* publish details of each algorithm and if its forced/fixed/locked in */
-/* as you add methods/algorithms you must update this and the query/map  */
-/* routines */
-
-/* this routine is called by the component only */
-/* this makes sure that the mca parameters are set to their initial values */
-/* and perms */
-/* module does not call this they call the forced_getvalues routine instead */
-
-int ompi_coll_tuned_barrier_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices)
-{
-    mca_base_var_enum_t *new_enum;
-
-    ompi_coll_tuned_forced_max_algorithms[BARRIER] = coll_tuned_barrier_algorithm_count;
-
-    (void) mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                           "barrier_algorithm_count",
-                                           "Number of barrier algorithms available",
-                                           MCA_BASE_VAR_TYPE_INT, NULL, 0,
-                                           MCA_BASE_VAR_FLAG_DEFAULT_ONLY,
-                                           OPAL_INFO_LVL_5,
-                                           MCA_BASE_VAR_SCOPE_CONSTANT,
-                                           &coll_tuned_barrier_algorithm_count);
-
-    /* MPI_T: This variable should eventually be bound to a communicator */
-    coll_tuned_barrier_forced_algorithm = 0;
-    (void) mca_base_var_enum_create("coll_tuned_barrier_algorithms", barrier_algorithms, &new_enum);
-    mca_param_indices->algorithm_param_index =
-        mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                        "barrier_algorithm",
-                                        "Which barrier algorithm is used. Can be locked down to choice of: 0 ignore, 1 linear, 2 double ring, 3: recursive doubling 4: bruck, 5: two proc only, 6: tree",
-                                        MCA_BASE_VAR_TYPE_INT, new_enum, 0, 0,
-                                        OPAL_INFO_LVL_5,
-                                        MCA_BASE_VAR_SCOPE_READONLY,
-                                        &coll_tuned_barrier_forced_algorithm);
-    OBJ_RELEASE(new_enum);
-    if (mca_param_indices->algorithm_param_index < 0) {
-        return mca_param_indices->algorithm_param_index;
-    }
-
-    return (MPI_SUCCESS);
-}
-
-
-
-int ompi_coll_tuned_barrier_intra_do_forced(struct ompi_communicator_t *comm,
-                                            mca_coll_base_module_t *module)
-{
-    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
-    mca_coll_tuned_comm_t *data = tuned_module->tuned_data;
-
-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "coll:tuned:barrier_intra_do_forced selected algorithm %d",
-                 data->user_forced[BARRIER].algorithm));
-
-    switch (data->user_forced[BARRIER].algorithm) {
-    case (0):   return ompi_coll_tuned_barrier_intra_dec_fixed (comm, module);
-    case (1):   return ompi_coll_tuned_barrier_intra_basic_linear (comm, module); 
-    case (2):   return ompi_coll_tuned_barrier_intra_doublering (comm, module);
-    case (3):   return ompi_coll_tuned_barrier_intra_recursivedoubling (comm, module);
-    case (4):   return ompi_coll_tuned_barrier_intra_bruck (comm, module);
-    case (5):   return ompi_coll_tuned_barrier_intra_two_procs (comm, module);
-    case (6):   return ompi_coll_tuned_barrier_intra_tree (comm, module);
-    default:
-        OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:barrier_intra_do_forced attempt to select algorithm %d when only 0-%d is valid?",
-                     data->user_forced[BARRIER].algorithm,
-                     ompi_coll_tuned_forced_max_algorithms[BARRIER]));
-        return (MPI_ERR_ARG);
-    } /* switch */
-
-}
-
-
-int ompi_coll_tuned_barrier_intra_do_this (struct ompi_communicator_t *comm,
-                                           mca_coll_base_module_t *module,
-                                           int algorithm, int faninout, int segsize)
-{
-    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:barrier_intra_do_this selected algorithm %d topo fanin/out%d", algorithm, faninout));
-
-    switch (algorithm) {
-    case (0):   return ompi_coll_tuned_barrier_intra_dec_fixed (comm, module);
-    case (1):   return ompi_coll_tuned_barrier_intra_basic_linear (comm, module); 
-    case (2):   return ompi_coll_tuned_barrier_intra_doublering (comm, module);
-    case (3):   return ompi_coll_tuned_barrier_intra_recursivedoubling (comm, module);
-    case (4):   return ompi_coll_tuned_barrier_intra_bruck (comm, module);
-    case (5):   return ompi_coll_tuned_barrier_intra_two_procs (comm, module);
-    case (6):   return ompi_coll_tuned_barrier_intra_tree (comm, module);
-    default:
-        OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:barrier_intra_do_this attempt to select algorithm %d when only 0-%d is valid?",
-                     algorithm, ompi_coll_tuned_forced_max_algorithms[BARRIER]));
-        return (MPI_ERR_ARG);
-    } /* switch */
-}
-
--- a/ompi/mca/coll/base/coll_base_bcast.c
+++ b/ompi/mca/coll/base/coll_base_bcast.c
@ -3,18 +3,18 @@
 * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
 *                         University Research and Technology
 *                         Corporation.  All rights reserved.
- * Copyright (c) 2004-2012 The University of Tennessee and The University
+ * Copyright (c) 2004-2015 The University of Tennessee and The University
 *                         of Tennessee Research Foundation.  All rights
 *                         reserved.
- * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, 
+ * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
 *                         University of Stuttgart.  All rights reserved.
 * Copyright (c) 2004-2005 The Regents of the University of California.
 *                         All rights reserved.
 * Copyright (c) 2012 Cisco Systems, Inc.  All rights reserved.
 * $COPYRIGHT$
- * 
+ *
 * Additional copyrights may follow
- * 
+ *
 * $HEADER$
 */

@ -27,33 +27,14 @@
 #include "ompi/mca/coll/coll.h"
 #include "ompi/mca/coll/base/coll_tags.h"
 #include "ompi/mca/pml/pml.h"
-#include "coll_tuned.h"
-#include "coll_tuned_topo.h"
-#include "coll_tuned_util.h"
-
-/* bcast algorithm variables */
-static int coll_tuned_bcast_algorithm_count = 6;
-static int coll_tuned_bcast_forced_algorithm = 0;
-static int coll_tuned_bcast_segment_size = 0;
-static int coll_tuned_bcast_tree_fanout;
-static int coll_tuned_bcast_chain_fanout;
-
-/* valid values for coll_tuned_bcast_forced_algorithm */
-static mca_base_var_enum_value_t bcast_algorithms[] = {
-    {0, "ignore"},
-    {1, "basic_linear"},
-    {2, "chain"},
-    {3, "pipeline"},
-    {4, "split_binary_tree"},
-    {5, "binary_tree"},
-    {6, "binomial"},
-    {0, NULL}
-};
+#include "ompi/mca/coll/base/coll_base_functions.h"
+#include "coll_base_topo.h"
+#include "coll_base_util.h"

 int
-ompi_coll_tuned_bcast_intra_generic( void* buffer,
-                                     int original_count, 
-                                     struct ompi_datatype_t* datatype, 
+ompi_coll_base_bcast_intra_generic( void* buffer,
+                                     int original_count,
+                                     struct ompi_datatype_t* datatype,
                                     int root,
                                     struct ompi_communicator_t* comm,
                                     mca_coll_base_module_t *module,
@ -62,12 +43,12 @@ ompi_coll_tuned_bcast_intra_generic( void* buffer,
 {
    int err = 0, line, i, rank, size, segindex, req_index;
    int num_segments; /* Number of segments */
-    int sendcount;    /* number of elements sent in this segment */ 
+    int sendcount;    /* number of elements sent in this segment */
    size_t realsegsize, type_size;
    char *tmpbuf;
    ptrdiff_t extent, lb;
    ompi_request_t *recv_reqs[2] = {MPI_REQUEST_NULL, MPI_REQUEST_NULL};
-#if !defined(COLL_TUNED_BCAST_USE_BLOCKING)
+#if !defined(COLL_BASE_BCAST_USE_BLOCKING)
    ompi_request_t **send_reqs = NULL;
 #endif

@ -79,20 +60,20 @@ ompi_coll_tuned_bcast_intra_generic( void* buffer,
    ompi_datatype_type_size( datatype, &type_size );
    num_segments = (original_count + count_by_segment - 1) / count_by_segment;
    realsegsize = (ptrdiff_t)count_by_segment * extent;
-    
+
    /* Set the buffer pointers */
    tmpbuf = (char *) buffer;

-#if !defined(COLL_TUNED_BCAST_USE_BLOCKING)
+#if !defined(COLL_BASE_BCAST_USE_BLOCKING)
    if( tree->tree_nextsize != 0 ) {
-        send_reqs = (ompi_request_t**)malloc( (ptrdiff_t)tree->tree_nextsize * 
+        send_reqs = (ompi_request_t**)malloc( (ptrdiff_t)tree->tree_nextsize *
                                              sizeof(ompi_request_t*) );
    }
 #endif

    /* Root code */
    if( rank == root ) {
-        /* 
+        /*
           For each segment:
           - send segment to all children.
           The last segment may have less elements than other segments.
@ -102,39 +83,39 @@ ompi_coll_tuned_bcast_intra_generic( void* buffer,
            if( segindex == (num_segments - 1) ) {
                sendcount = original_count - segindex * count_by_segment;
            }
-            for( i = 0; i < tree->tree_nextsize; i++ ) { 
-#if defined(COLL_TUNED_BCAST_USE_BLOCKING)
+            for( i = 0; i < tree->tree_nextsize; i++ ) {
+#if defined(COLL_BASE_BCAST_USE_BLOCKING)
                err = MCA_PML_CALL(send(tmpbuf, sendcount, datatype,
-                                        tree->tree_next[i], 
+                                        tree->tree_next[i],
                                        MCA_COLL_BASE_TAG_BCAST,
                                        MCA_PML_BASE_SEND_STANDARD, comm));
 #else
                err = MCA_PML_CALL(isend(tmpbuf, sendcount, datatype,
-                                         tree->tree_next[i], 
+                                         tree->tree_next[i],
                                         MCA_COLL_BASE_TAG_BCAST,
-                                         MCA_PML_BASE_SEND_STANDARD, comm, 
+                                         MCA_PML_BASE_SEND_STANDARD, comm,
                                         &send_reqs[i]));
-#endif  /* COLL_TUNED_BCAST_USE_BLOCKING */
+#endif  /* COLL_BASE_BCAST_USE_BLOCKING */
                if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
-            } 
+            }

-#if !defined(COLL_TUNED_BCAST_USE_BLOCKING)
+#if !defined(COLL_BASE_BCAST_USE_BLOCKING)
            /* complete the sends before starting the next sends */
-            err = ompi_request_wait_all( tree->tree_nextsize, send_reqs, 
+            err = ompi_request_wait_all( tree->tree_nextsize, send_reqs,
                                         MPI_STATUSES_IGNORE );
            if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
-#endif /* not COLL_TUNED_BCAST_USE_BLOCKING */
+#endif /* not COLL_BASE_BCAST_USE_BLOCKING */

            /* update tmp buffer */
            tmpbuf += realsegsize;

        }
-    } 
-    
+    }
+
    /* Intermediate nodes code */
-    else if( tree->tree_nextsize > 0 ) { 
-        /* 
-           Create the pipeline. 
+    else if( tree->tree_nextsize > 0 ) {
+        /*
+           Create the pipeline.
           1) Post the first receive
           2) For segments 1 .. num_segments
           - post new receive
@ -149,49 +130,49 @@ ompi_coll_tuned_bcast_intra_generic( void* buffer,
                                 tree->tree_prev, MCA_COLL_BASE_TAG_BCAST,
                                 comm, &recv_reqs[req_index]));
        if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
-        
+
        for( segindex = 1; segindex < num_segments; segindex++ ) {
-            
+
            req_index = req_index ^ 0x1;
-            
+
            /* post new irecv */
            err = MCA_PML_CALL(irecv( tmpbuf + realsegsize, count_by_segment,
-                                      datatype, tree->tree_prev, 
-                                      MCA_COLL_BASE_TAG_BCAST, 
+                                      datatype, tree->tree_prev,
+                                      MCA_COLL_BASE_TAG_BCAST,
                                      comm, &recv_reqs[req_index]));
            if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
-            
+
            /* wait for and forward the previous segment to children */
-            err = ompi_request_wait( &recv_reqs[req_index ^ 0x1], 
+            err = ompi_request_wait( &recv_reqs[req_index ^ 0x1],
                                     MPI_STATUSES_IGNORE );
            if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
-            
-            for( i = 0; i < tree->tree_nextsize; i++ ) { 
-#if defined(COLL_TUNED_BCAST_USE_BLOCKING)
+
+            for( i = 0; i < tree->tree_nextsize; i++ ) {
+#if defined(COLL_BASE_BCAST_USE_BLOCKING)
                err = MCA_PML_CALL(send(tmpbuf, count_by_segment, datatype,
-                                        tree->tree_next[i], 
+                                        tree->tree_next[i],
                                        MCA_COLL_BASE_TAG_BCAST,
                                        MCA_PML_BASE_SEND_STANDARD, comm));
 #else
                err = MCA_PML_CALL(isend(tmpbuf, count_by_segment, datatype,
-                                         tree->tree_next[i], 
+                                         tree->tree_next[i],
                                         MCA_COLL_BASE_TAG_BCAST,
-                                         MCA_PML_BASE_SEND_STANDARD, comm, 
+                                         MCA_PML_BASE_SEND_STANDARD, comm,
                                         &send_reqs[i]));
-#endif  /* COLL_TUNED_BCAST_USE_BLOCKING */
+#endif  /* COLL_BASE_BCAST_USE_BLOCKING */
                if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
-            } 
-            
-#if !defined(COLL_TUNED_BCAST_USE_BLOCKING)
+            }
+
+#if !defined(COLL_BASE_BCAST_USE_BLOCKING)
            /* complete the sends before starting the next iteration */
-            err = ompi_request_wait_all( tree->tree_nextsize, send_reqs, 
+            err = ompi_request_wait_all( tree->tree_nextsize, send_reqs,
                                         MPI_STATUSES_IGNORE );
            if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
-#endif  /* COLL_TUNED_BCAST_USE_BLOCKING */
-            
+#endif  /* COLL_BASE_BCAST_USE_BLOCKING */
+
            /* Update the receive buffer */
            tmpbuf += realsegsize;
-            
+
        }

        /* Process the last segment */
@ -199,31 +180,31 @@ ompi_coll_tuned_bcast_intra_generic( void* buffer,
        if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
        sendcount = original_count - (ptrdiff_t)(num_segments - 1) * count_by_segment;
        for( i = 0; i < tree->tree_nextsize; i++ ) {
-#if defined(COLL_TUNED_BCAST_USE_BLOCKING)
+#if defined(COLL_BASE_BCAST_USE_BLOCKING)
            err = MCA_PML_CALL(send(tmpbuf, sendcount, datatype,
-                                    tree->tree_next[i], 
+                                    tree->tree_next[i],
                                    MCA_COLL_BASE_TAG_BCAST,
                                    MCA_PML_BASE_SEND_STANDARD, comm));
 #else
            err = MCA_PML_CALL(isend(tmpbuf, sendcount, datatype,
-                                     tree->tree_next[i], 
+                                     tree->tree_next[i],
                                     MCA_COLL_BASE_TAG_BCAST,
-                                     MCA_PML_BASE_SEND_STANDARD, comm, 
+                                     MCA_PML_BASE_SEND_STANDARD, comm,
                                     &send_reqs[i]));
-#endif  /* COLL_TUNED_BCAST_USE_BLOCKING */
+#endif  /* COLL_BASE_BCAST_USE_BLOCKING */
            if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
        }
-        
-#if !defined(COLL_TUNED_BCAST_USE_BLOCKING)
-        err = ompi_request_wait_all( tree->tree_nextsize, send_reqs, 
+
+#if !defined(COLL_BASE_BCAST_USE_BLOCKING)
+        err = ompi_request_wait_all( tree->tree_nextsize, send_reqs,
                                     MPI_STATUSES_IGNORE );
        if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
-#endif  /* COLL_TUNED_BCAST_USE_BLOCKING */
+#endif  /* COLL_BASE_BCAST_USE_BLOCKING */
    }
-  
+
    /* Leaf nodes */
    else {
-        /* 
+        /*
           Receive all segments from parent in a loop:
           1) post irecv for the first segment
           2) for segments 1 .. num_segments
@ -241,12 +222,12 @@ ompi_coll_tuned_bcast_intra_generic( void* buffer,
            req_index = req_index ^ 0x1;
            tmpbuf += realsegsize;
            /* post receive for the next segment */
-            err = MCA_PML_CALL(irecv(tmpbuf, count_by_segment, datatype, 
-                                     tree->tree_prev, MCA_COLL_BASE_TAG_BCAST, 
+            err = MCA_PML_CALL(irecv(tmpbuf, count_by_segment, datatype,
+                                     tree->tree_prev, MCA_COLL_BASE_TAG_BCAST,
                                     comm, &recv_reqs[req_index]));
            if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
            /* wait on the previous segment */
-            err = ompi_request_wait( &recv_reqs[req_index ^ 0x1], 
+            err = ompi_request_wait( &recv_reqs[req_index ^ 0x1],
                                     MPI_STATUS_IGNORE );
            if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
        }
@ -255,25 +236,25 @@ ompi_coll_tuned_bcast_intra_generic( void* buffer,
        if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
    }

-#if !defined(COLL_TUNED_BCAST_USE_BLOCKING)
+#if !defined(COLL_BASE_BCAST_USE_BLOCKING)
    if( NULL != send_reqs ) free(send_reqs);
 #endif

    return (MPI_SUCCESS);
-  
+
 error_hndl:
-    OPAL_OUTPUT( (ompi_coll_tuned_stream,"%s:%4d\tError occurred %d, rank %2d",
+    OPAL_OUTPUT( (ompi_coll_base_framework.framework_output,"%s:%4d\tError occurred %d, rank %2d",
                  __FILE__, line, err, rank) );
-#if !defined(COLL_TUNED_BCAST_USE_BLOCKING)
+#if !defined(COLL_BASE_BCAST_USE_BLOCKING)
    if( NULL != send_reqs ) free(send_reqs);
 #endif
    return (err);
 }

 int
-ompi_coll_tuned_bcast_intra_bintree ( void* buffer,
-                                      int count, 
-                                      struct ompi_datatype_t* datatype, 
+ompi_coll_base_bcast_intra_bintree ( void* buffer,
+                                      int count,
+                                      struct ompi_datatype_t* datatype,
                                      int root,
                                      struct ompi_communicator_t* comm,
                                      mca_coll_base_module_t *module,
@ -281,28 +262,27 @@ ompi_coll_tuned_bcast_intra_bintree ( void* buffer,
 {
    int segcount = count;
    size_t typelng;
-    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
-    mca_coll_tuned_comm_t *data = tuned_module->tuned_data;
+    mca_coll_base_comm_t *data = module->base_data;

-    COLL_TUNED_UPDATE_BINTREE( comm, tuned_module, root );
+    COLL_BASE_UPDATE_BINTREE( comm, module, root );

    /**
     * Determine number of elements sent per operation.
     */
    ompi_datatype_type_size( datatype, &typelng );
-    COLL_TUNED_COMPUTED_SEGCOUNT( segsize, typelng, segcount );
+    COLL_BASE_COMPUTED_SEGCOUNT( segsize, typelng, segcount );

-    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:bcast_intra_binary rank %d ss %5d typelng %lu segcount %d",
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:bcast_intra_binary rank %d ss %5d typelng %lu segcount %d",
                 ompi_comm_rank(comm), segsize, (unsigned long)typelng, segcount));

-    return ompi_coll_tuned_bcast_intra_generic( buffer, count, datatype, root, comm, module,
+    return ompi_coll_base_bcast_intra_generic( buffer, count, datatype, root, comm, module,
                                                segcount, data->cached_bintree );
 }

 int
-ompi_coll_tuned_bcast_intra_pipeline( void* buffer,
-                                      int count, 
-                                      struct ompi_datatype_t* datatype, 
+ompi_coll_base_bcast_intra_pipeline( void* buffer,
+                                      int count,
+                                      struct ompi_datatype_t* datatype,
                                      int root,
                                      struct ompi_communicator_t* comm,
                                      mca_coll_base_module_t *module,
@ -310,28 +290,27 @@ ompi_coll_tuned_bcast_intra_pipeline( void* buffer,
 {
    int segcount = count;
    size_t typelng;
-    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
-    mca_coll_tuned_comm_t *data = tuned_module->tuned_data;
+    mca_coll_base_comm_t *data = module->base_data;

-    COLL_TUNED_UPDATE_PIPELINE( comm, tuned_module, root );
+    COLL_BASE_UPDATE_PIPELINE( comm, module, root );

    /**
     * Determine number of elements sent per operation.
     */
    ompi_datatype_type_size( datatype, &typelng );
-    COLL_TUNED_COMPUTED_SEGCOUNT( segsize, typelng, segcount );
+    COLL_BASE_COMPUTED_SEGCOUNT( segsize, typelng, segcount );

-    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:bcast_intra_pipeline rank %d ss %5d typelng %lu segcount %d",
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:bcast_intra_pipeline rank %d ss %5d typelng %lu segcount %d",
                 ompi_comm_rank(comm), segsize, (unsigned long)typelng, segcount));

-    return ompi_coll_tuned_bcast_intra_generic( buffer, count, datatype, root, comm, module,
+    return ompi_coll_base_bcast_intra_generic( buffer, count, datatype, root, comm, module,
                                                segcount, data->cached_pipeline );
 }

 int
-ompi_coll_tuned_bcast_intra_chain( void* buffer,
-                                   int count, 
-                                   struct ompi_datatype_t* datatype, 
+ompi_coll_base_bcast_intra_chain( void* buffer,
+                                   int count,
+                                   struct ompi_datatype_t* datatype,
                                   int root,
                                   struct ompi_communicator_t* comm,
                                   mca_coll_base_module_t *module,
@ -339,28 +318,27 @@ ompi_coll_tuned_bcast_intra_chain( void* buffer,
 {
    int segcount = count;
    size_t typelng;
-    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
-    mca_coll_tuned_comm_t *data = tuned_module->tuned_data;
+    mca_coll_base_comm_t *data = module->base_data;

-    COLL_TUNED_UPDATE_CHAIN( comm, tuned_module, root, chains );
+    COLL_BASE_UPDATE_CHAIN( comm, module, root, chains );

    /**
     * Determine number of elements sent per operation.
     */
    ompi_datatype_type_size( datatype, &typelng );
-    COLL_TUNED_COMPUTED_SEGCOUNT( segsize, typelng, segcount );
+    COLL_BASE_COMPUTED_SEGCOUNT( segsize, typelng, segcount );

-    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:bcast_intra_chain rank %d fo %d ss %5d typelng %lu segcount %d",
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:bcast_intra_chain rank %d fo %d ss %5d typelng %lu segcount %d",
                 ompi_comm_rank(comm), chains, segsize, (unsigned long)typelng, segcount));

-    return ompi_coll_tuned_bcast_intra_generic( buffer, count, datatype, root, comm, module,
+    return ompi_coll_base_bcast_intra_generic( buffer, count, datatype, root, comm, module,
                                                segcount, data->cached_chain );
 }

 int
-ompi_coll_tuned_bcast_intra_binomial( void* buffer,
-                                      int count, 
-                                      struct ompi_datatype_t* datatype, 
+ompi_coll_base_bcast_intra_binomial( void* buffer,
+                                      int count,
+                                      struct ompi_datatype_t* datatype,
                                      int root,
                                      struct ompi_communicator_t* comm,
                                      mca_coll_base_module_t *module,
@ -368,28 +346,27 @@ ompi_coll_tuned_bcast_intra_binomial( void* buffer,
 {
    int segcount = count;
    size_t typelng;
-    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
-    mca_coll_tuned_comm_t *data = tuned_module->tuned_data;
+    mca_coll_base_comm_t *data = module->base_data;

-    COLL_TUNED_UPDATE_BMTREE( comm, tuned_module, root );
+    COLL_BASE_UPDATE_BMTREE( comm, module, root );

    /**
     * Determine number of elements sent per operation.
     */
    ompi_datatype_type_size( datatype, &typelng );
-    COLL_TUNED_COMPUTED_SEGCOUNT( segsize, typelng, segcount );
+    COLL_BASE_COMPUTED_SEGCOUNT( segsize, typelng, segcount );

-    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:bcast_intra_binomial rank %d ss %5d typelng %lu segcount %d",
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:bcast_intra_binomial rank %d ss %5d typelng %lu segcount %d",
                 ompi_comm_rank(comm), segsize, (unsigned long)typelng, segcount));

-    return ompi_coll_tuned_bcast_intra_generic( buffer, count, datatype, root, comm, module,
+    return ompi_coll_base_bcast_intra_generic( buffer, count, datatype, root, comm, module,
                                                segcount, data->cached_bmtree );
 }

 int
-ompi_coll_tuned_bcast_intra_split_bintree ( void* buffer,
-                                            int count, 
-                                            struct ompi_datatype_t* datatype, 
+ompi_coll_base_bcast_intra_split_bintree ( void* buffer,
+                                            int count,
+                                            struct ompi_datatype_t* datatype,
                                            int root,
                                            struct ompi_communicator_t* comm,
                                            mca_coll_base_module_t *module,
@ -399,26 +376,25 @@ ompi_coll_tuned_bcast_intra_split_bintree ( void* buffer,
    uint32_t counts[2];
    int segcount[2];       /* Number of elements sent with each segment */
    int num_segments[2];   /* Number of segmenets */
-    int sendcount[2];      /* the same like segcount, except for the last segment */ 
+    int sendcount[2];      /* the same like segcount, except for the last segment */
    size_t realsegsize[2], type_size;
    char *tmpbuf[2];
    ptrdiff_t type_extent, lb;
    ompi_request_t *base_req, *new_req;
    ompi_coll_tree_t *tree;
-    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
-    mca_coll_tuned_comm_t *data = tuned_module->tuned_data;
+    mca_coll_base_comm_t *data = module->base_data;

    size = ompi_comm_size(comm);
    rank = ompi_comm_rank(comm);

-    OPAL_OUTPUT((ompi_coll_tuned_stream,"ompi_coll_tuned_bcast_intra_split_bintree rank %d root %d ss %5d", rank, root, segsize));
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"ompi_coll_base_bcast_intra_split_bintree rank %d root %d ss %5d", rank, root, segsize));

    if (size == 1) {
        return MPI_SUCCESS;
    }

    /* setup the binary tree topology. */
-    COLL_TUNED_UPDATE_BINTREE( comm, tuned_module, root );
+    COLL_BASE_UPDATE_BINTREE( comm, module, root );
    tree = data->cached_bintree;

    err = ompi_datatype_type_size( datatype, &type_size );
@ -431,10 +407,10 @@ ompi_coll_tuned_bcast_intra_split_bintree ( void* buffer,
        /* Note that ompi_datatype_type_size() will never return a negative
           value in typelng; it returns an int [vs. an unsigned type]
           because of the MPI spec. */
-    	if (segsize < ((uint32_t) type_size)) {
+        if (segsize < ((uint32_t) type_size)) {
            segsize = type_size; /* push segsize up to hold one type */
        }
-        segcount[0] = segcount[1] = segsize / type_size; 
+        segcount[0] = segcount[1] = segsize / type_size;
        num_segments[0] = counts[0]/segcount[0];
        if ((counts[0] % segcount[0]) != 0) num_segments[0]++;
        num_segments[1] = counts[1]/segcount[1];
@ -450,17 +426,17 @@ ompi_coll_tuned_bcast_intra_split_bintree ( void* buffer,
        (segsize > ((ptrdiff_t)counts[0] * type_size)) ||
        (segsize > ((ptrdiff_t)counts[1] * type_size)) ) {
        /* call linear version here ! */
-        return (ompi_coll_tuned_bcast_intra_chain ( buffer, count, datatype, 
+        return (ompi_coll_base_bcast_intra_chain ( buffer, count, datatype,
                                                    root, comm, module,
                                                    segsize, 1 ));
    }

    err = ompi_datatype_get_extent (datatype, &lb, &type_extent);
-    
+
    /* Determine real segment size */
    realsegsize[0] = (ptrdiff_t)segcount[0] * type_extent;
    realsegsize[1] = (ptrdiff_t)segcount[1] * type_extent;
-  
+
    /* set the buffer pointers */
    tmpbuf[0] = (char *) buffer;
    tmpbuf[1] = (char *) buffer + (ptrdiff_t)counts[0] * type_extent;
@ -473,11 +449,11 @@ ompi_coll_tuned_bcast_intra_split_bintree ( void* buffer,

    /* determine if I am left (0) or right (1), (root is right) */
    lr = ((rank + size - root)%size + 1)%2;
-  
+
    /* root code */
    if( rank == root ) {
        /* determine segment count */
-        sendcount[0] = segcount[0]; 
+        sendcount[0] = segcount[0];
        sendcount[1] = segcount[1];
        /* for each segment */
        for (segindex = 0; segindex < num_segments[0]; segindex++) {
@ -487,7 +463,7 @@ ompi_coll_tuned_bcast_intra_split_bintree ( void* buffer,
                    continue;
                }
                /* determine how many elements are being sent in this round */
-                if(segindex == (num_segments[i] - 1)) 
+                if(segindex == (num_segments[i] - 1))
                    sendcount[i] = counts[i] - segindex*segcount[i];
                /* send data */
                MCA_PML_CALL(send(tmpbuf[i], sendcount[i], datatype,
@ -498,19 +474,19 @@ ompi_coll_tuned_bcast_intra_split_bintree ( void* buffer,
                tmpbuf[i] += realsegsize[i];
            }
        }
-    } 
-    
+    }
+
    /* intermediate nodes code */
-    else if( tree->tree_nextsize > 0 ) { 
+    else if( tree->tree_nextsize > 0 ) {
        /* Intermediate nodes:
         * It will receive segments only from one half of the data.
-         * Which one is determined by whether the node belongs to the "left" or "right" 
+         * Which one is determined by whether the node belongs to the "left" or "right"
         * subtree. Topoloby building function builds binary tree such that
         * odd "shifted ranks" ((rank + size - root)%size) are on the left subtree,
         * and even on the right subtree.
         *
         * Create the pipeline. We first post the first receive, then in the loop we
-         * post the next receive and after that wait for the previous receive to complete 
+         * post the next receive and after that wait for the previous receive to complete
         * and we disseminating the data to all children.
         */
        sendcount[lr] = segcount[lr];
@ -521,11 +497,11 @@ ompi_coll_tuned_bcast_intra_split_bintree ( void* buffer,

        for( segindex = 1; segindex < num_segments[lr]; segindex++ ) {
            /* determine how many elements to expect in this round */
-            if( segindex == (num_segments[lr] - 1)) 
+            if( segindex == (num_segments[lr] - 1))
                sendcount[lr] = counts[lr] - (ptrdiff_t)segindex * (ptrdiff_t)segcount[lr];
            /* post new irecv */
            err = MCA_PML_CALL(irecv( tmpbuf[lr] + realsegsize[lr], sendcount[lr],
-                                      datatype, tree->tree_prev, MCA_COLL_BASE_TAG_BCAST, 
+                                      datatype, tree->tree_prev, MCA_COLL_BASE_TAG_BCAST,
                                      comm, &new_req));
            if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }

@ -539,7 +515,7 @@ ompi_coll_tuned_bcast_intra_split_bintree ( void* buffer,
            } /* end of for each child */

            /* upate the base request */
-            base_req = new_req;     
+            base_req = new_req;
            /* go to the next buffer (ie. the one corresponding to the next recv) */
            tmpbuf[lr] += realsegsize[lr];
        } /* end of for segindex */
@ -552,10 +528,10 @@ ompi_coll_tuned_bcast_intra_split_bintree ( void* buffer,
                                    MCA_PML_BASE_SEND_STANDARD, comm));
            if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
        } /* end of for each child */
-    } 
-  
+    }
+
    /* leaf nodes */
-    else { 
+    else {
        /* Just consume segments as fast as possible */
        sendcount[lr] = segcount[lr];
        for (segindex = 0; segindex < num_segments[lr]; segindex++) {
@ -577,9 +553,9 @@ ompi_coll_tuned_bcast_intra_split_bintree ( void* buffer,
    tmpbuf[1] = (char *) buffer + (ptrdiff_t)counts[0] * type_extent;

    /* Step 2:
-       Find your immediate pair (identical node in opposite subtree) and SendRecv 
+       Find your immediate pair (identical node in opposite subtree) and SendRecv
       data buffer with them.
-       The tree building function ensures that 
+       The tree building function ensures that
       if (we are not root)
       if we are in the left subtree (lr == 0) our pair is (rank+1)%size.
       if we are in the right subtree (lr == 1) our pair is (rank-1)%size
@ -591,9 +567,9 @@ ompi_coll_tuned_bcast_intra_split_bintree ( void* buffer,
        pair = (rank+size-1)%size;
    }

-    if ( (size%2) != 0 && rank != root) { 
+    if ( (size%2) != 0 && rank != root) {

-        err = ompi_coll_tuned_sendrecv( tmpbuf[lr], counts[lr], datatype,
+        err = ompi_coll_base_sendrecv( tmpbuf[lr], counts[lr], datatype,
                                        pair, MCA_COLL_BASE_TAG_BCAST,
                                        tmpbuf[(lr+1)%2], counts[(lr+1)%2], datatype,
                                        pair, MCA_COLL_BASE_TAG_BCAST,
@ -607,28 +583,28 @@ ompi_coll_tuned_bcast_intra_split_bintree ( void* buffer,
                                    MCA_PML_BASE_SEND_STANDARD, comm));
            if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }

-        } 
+        }
        /* last node receives right buffer from the root */
        else if (rank == (root+size-1)%size) {
            err = MCA_PML_CALL(recv(tmpbuf[1], counts[1], datatype,
                                    root, MCA_COLL_BASE_TAG_BCAST,
                                    comm, MPI_STATUS_IGNORE));
            if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
-        } 
+        }
        /* everyone else exchanges buffers */
        else {
-            err = ompi_coll_tuned_sendrecv( tmpbuf[lr], counts[lr], datatype,
+            err = ompi_coll_base_sendrecv( tmpbuf[lr], counts[lr], datatype,
                                            pair, MCA_COLL_BASE_TAG_BCAST,
                                            tmpbuf[(lr+1)%2], counts[(lr+1)%2], datatype,
                                            pair, MCA_COLL_BASE_TAG_BCAST,
                                            comm, MPI_STATUS_IGNORE, rank);
-            if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }  
+            if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
        }
    }
    return (MPI_SUCCESS);
-  
+
 error_hndl:
-    OPAL_OUTPUT((ompi_coll_tuned_stream,"%s:%4d\tError occurred %d, rank %2d", __FILE__,line,err,rank));
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"%s:%4d\tError occurred %d, rank %2d", __FILE__,line,err,rank));
    return (err);
 }

@ -636,8 +612,8 @@ ompi_coll_tuned_bcast_intra_split_bintree ( void* buffer,
 /*
 * Linear functions are copied from the BASIC coll module
 * they do not segment the message and are simple implementations
- * but for some small number of nodes and/or small data sizes they 
- * are just as fast as tuned/tree based segmenting operations 
+ * but for some small number of nodes and/or small data sizes they
+ * are just as fast as base/tree based segmenting operations
 * and as such may be selected by the decision functions
 * These are copied into this module due to the way we select modules
 * in V1. i.e. in V2 we will handle this differently and so will not
@ -655,21 +631,20 @@ ompi_coll_tuned_bcast_intra_split_bintree ( void* buffer,
 *  Returns:    - MPI_SUCCESS or error code
 */
 int
-ompi_coll_tuned_bcast_intra_basic_linear (void *buff, int count,
+ompi_coll_base_bcast_intra_basic_linear (void *buff, int count,
                                          struct ompi_datatype_t *datatype, int root,
                                          struct ompi_communicator_t *comm,
                                          mca_coll_base_module_t *module)
 {
    int i, size, rank, err;
-    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
-    mca_coll_tuned_comm_t *data = tuned_module->tuned_data;
+    mca_coll_base_comm_t *data = module->base_data;
    ompi_request_t **preq, **reqs = data->mcct_reqs;


    size = ompi_comm_size(comm);
    rank = ompi_comm_rank(comm);

-    OPAL_OUTPUT((ompi_coll_tuned_stream,"ompi_coll_tuned_bcast_intra_basic_linear rank %d root %d", rank, root));
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"ompi_coll_base_bcast_intra_basic_linear rank %d root %d", rank, root));

    /* Non-root receive the data. */

@ -710,148 +685,11 @@ ompi_coll_tuned_bcast_intra_basic_linear (void *buff, int count,
    err = ompi_request_wait_all(i, reqs, MPI_STATUSES_IGNORE);

    /* Free the reqs */
-
-    ompi_coll_tuned_free_reqs(reqs, i);
+    ompi_coll_base_free_reqs(reqs, i);

    /* All done */
-
    return err;
 }


 /* copied function (with appropriate renaming) ends here */
-
-/* The following are used by dynamic and forced rules */
-
-/* publish details of each algorithm and if its forced/fixed/locked in */
-/* as you add methods/algorithms you must update this and the query/map routines */
-
-/* this routine is called by the component only */
-/* this makes sure that the mca parameters are set to their initial values and perms */
-/* module does not call this they call the forced_getvalues routine instead */
-
-int ompi_coll_tuned_bcast_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices)
-{
-    mca_base_var_enum_t *new_enum;
-
-    ompi_coll_tuned_forced_max_algorithms[BCAST] = coll_tuned_bcast_algorithm_count;
-
-    (void) mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                           "bcast_algorithm_count",
-                                           "Number of bcast algorithms available",
-                                           MCA_BASE_VAR_TYPE_INT, NULL, 0,
-                                           MCA_BASE_VAR_FLAG_DEFAULT_ONLY,
-                                           OPAL_INFO_LVL_5,
-                                           MCA_BASE_VAR_SCOPE_CONSTANT,
-                                           &coll_tuned_bcast_algorithm_count);
-
-    /* MPI_T: This variable should eventually be bound to a communicator */
-    coll_tuned_bcast_forced_algorithm = 0;
-    (void) mca_base_var_enum_create("coll_tuned_bcast_algorithms", bcast_algorithms, &new_enum);
-    mca_param_indices->algorithm_param_index =
-        mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                        "bcast_algorithm",
-                                        "Which bcast algorithm is used. Can be locked down to choice of: 0 ignore, 1 basic linear, 2 chain, 3: pipeline, 4: split binary tree, 5: binary tree, 6: binomial tree.",
-                                        MCA_BASE_VAR_TYPE_INT, new_enum, 0, 0,
-                                        OPAL_INFO_LVL_5,
-                                        MCA_BASE_VAR_SCOPE_READONLY,
-                                        &coll_tuned_bcast_forced_algorithm);
-    OBJ_RELEASE(new_enum);
-    if (mca_param_indices->algorithm_param_index < 0) {
-        return mca_param_indices->algorithm_param_index;
-    }
-
-    coll_tuned_bcast_segment_size = 0;
-    mca_param_indices->segsize_param_index =
-        mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                        "bcast_algorithm_segmentsize",
-                                        "Segment size in bytes used by default for bcast algorithms. Only has meaning if algorithm is forced and supports segmenting. 0 bytes means no segmentation.",
-                                        MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
-                                        OPAL_INFO_LVL_5,
-                                        MCA_BASE_VAR_SCOPE_READONLY,
-                                        &coll_tuned_bcast_segment_size);
-
-    coll_tuned_bcast_tree_fanout = ompi_coll_tuned_init_tree_fanout; /* get system wide default */
-    mca_param_indices->tree_fanout_param_index =
-        mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                        "bcast_algorithm_tree_fanout",
-                                        "Fanout for n-tree used for bcast algorithms. Only has meaning if algorithm is forced and supports n-tree topo based operation.",
-                                        MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
-                                        OPAL_INFO_LVL_5,
-                                        MCA_BASE_VAR_SCOPE_READONLY,
-                                        &coll_tuned_bcast_tree_fanout);
-
-    coll_tuned_bcast_chain_fanout = ompi_coll_tuned_init_chain_fanout; /* get system wide default */
-    mca_param_indices->chain_fanout_param_index = 
-      mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                      "bcast_algorithm_chain_fanout",
-                                      "Fanout for chains used for bcast algorithms. Only has meaning if algorithm is forced and supports chain topo based operation.",
-                                      MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
-                                      OPAL_INFO_LVL_5,
-                                      MCA_BASE_VAR_SCOPE_READONLY,
-                                      &coll_tuned_bcast_chain_fanout);
-
-    return (MPI_SUCCESS);
-}
-
-
-int ompi_coll_tuned_bcast_intra_do_forced(void *buf, int count,
-                                          struct ompi_datatype_t *dtype,
-                                          int root,
-                                          struct ompi_communicator_t *comm,
-                                          mca_coll_base_module_t *module)
-{
-    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
-    mca_coll_tuned_comm_t *data = tuned_module->tuned_data;
-
-    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:bcast_intra_do_forced algorithm %d", 
-                 data->user_forced[BCAST].algorithm));
-
-    switch (data->user_forced[BCAST].algorithm) {
-    case (0):   return ompi_coll_tuned_bcast_intra_dec_fixed( buf, count, dtype, root, comm, module );
-    case (1):   return ompi_coll_tuned_bcast_intra_basic_linear( buf, count, dtype, root, comm, module );
-    case (2):   return ompi_coll_tuned_bcast_intra_chain( buf, count, dtype, root, comm, module,
-                                                          data->user_forced[BCAST].segsize,
-                                                          data->user_forced[BCAST].chain_fanout );
-    case (3):   return ompi_coll_tuned_bcast_intra_pipeline( buf, count, dtype, root, comm, module,
-                                                             data->user_forced[BCAST].segsize );
-    case (4):   return ompi_coll_tuned_bcast_intra_split_bintree( buf, count, dtype, root, comm, module,
-                                                                  data->user_forced[BCAST].segsize );
-    case (5):   return ompi_coll_tuned_bcast_intra_bintree( buf, count, dtype, root, comm, module,
-                                                            data->user_forced[BCAST].segsize );
-    case (6):   return ompi_coll_tuned_bcast_intra_binomial( buf, count, dtype, root, comm, module,
-                                                             data->user_forced[BCAST].segsize );
-    default:
-        OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:bcast_intra_do_forced attempt to select algorithm %d when only 0-%d is valid?",
-                     data->user_forced[BCAST].algorithm, ompi_coll_tuned_forced_max_algorithms[BCAST]));
-    } /* switch */
-    return (MPI_ERR_ARG);
-}
-
-
-int ompi_coll_tuned_bcast_intra_do_this(void *buf, int count,
-                                        struct ompi_datatype_t *dtype,
-                                        int root,
-                                        struct ompi_communicator_t *comm,
-                                        mca_coll_base_module_t *module,
-                                        int algorithm, int faninout, int segsize)
-
-{
-    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:bcast_intra_do_this algorithm %d topo faninout %d segsize %d", 
-                 algorithm, faninout, segsize));
-
-    switch (algorithm) {
-    case (0):   return ompi_coll_tuned_bcast_intra_dec_fixed( buf, count, dtype, root, comm, module );
-    case (1):   return ompi_coll_tuned_bcast_intra_basic_linear( buf, count, dtype, root, comm, module );
-    case (2):   return ompi_coll_tuned_bcast_intra_chain( buf, count, dtype, root, comm, module, segsize, faninout );
-    case (3):   return ompi_coll_tuned_bcast_intra_pipeline( buf, count, dtype, root, comm, module, segsize );
-    case (4):   return ompi_coll_tuned_bcast_intra_split_bintree( buf, count, dtype, root, comm, module, segsize );
-    case (5):   return ompi_coll_tuned_bcast_intra_bintree( buf, count, dtype, root, comm, module, segsize );
-    case (6):   return ompi_coll_tuned_bcast_intra_binomial( buf, count, dtype, root, comm, module, segsize );
-    default:
-        OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:bcast_intra_do_this attempt to select algorithm %d when only 0-%d is valid?",
-                     algorithm, ompi_coll_tuned_forced_max_algorithms[BCAST]));
-    } /* switch */
-    return (MPI_ERR_ARG);
-}
-
--- a/ompi/mca/coll/base/coll_base_frame.c
+++ b/ompi/mca/coll/base/coll_base_frame.c
@ -3,10 +3,10 @@
 * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
 *                         University Research and Technology
 *                         Corporation.  All rights reserved.
- * Copyright (c) 2004-2005 The University of Tennessee and The University
+ * Copyright (c) 2004-2015 The University of Tennessee and The University
 *                         of Tennessee Research Foundation.  All rights
 *                         reserved.
- * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, 
+ * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
 *                         University of Stuttgart.  All rights reserved.
 * Copyright (c) 2004-2005 The Regents of the University of California.
 *                         All rights reserved.
@ -15,9 +15,9 @@
 * Copyright (c) 2014      Research Organization for Information Science
 *                         and Technology (RIST). All rights reserved.
 * $COPYRIGHT$
- * 
+ *
 * Additional copyrights may follow
- * 
+ *
 * $HEADER$
 */

@ -33,6 +33,7 @@

 #include "ompi/mca/coll/coll.h"
 #include "ompi/mca/coll/base/base.h"
+#include "ompi/mca/coll/base/coll_base_functions.h"

 /*
 * The following file was created by configure.  It contains extern
@ -49,10 +50,55 @@ static void coll_base_module_construct(mca_coll_base_module_t *m)
    /* zero out all functions */
    memset ((char *) m + sizeof (m->super), 0, sizeof (*m) - sizeof (m->super));
    m->coll_module_disable = NULL;
+    m->base_data = NULL;
 }

-OBJ_CLASS_INSTANCE(mca_coll_base_module_t, opal_object_t, 
-                   coll_base_module_construct, NULL);
+static void
+coll_base_module_destruct(mca_coll_base_module_t *module)
+{
+    mca_coll_base_comm_t* data = module->base_data;
+
+    if (NULL != data) {
+        if( NULL != data->mcct_reqs ) {
+            for( int i = 0; i < data->mcct_num_reqs; ++i ) {
+                if( MPI_REQUEST_NULL != data->mcct_reqs[i] )
+                    ompi_request_free(&data->mcct_reqs[i]);
+            }
+            free(data->mcct_reqs);
+            data->mcct_reqs = NULL;
+            data->mcct_num_reqs = 0;
+        }
+        assert(0 == data->mcct_num_reqs);
+
+        /* free any cached information that has been allocated */
+        if (data->cached_ntree) { /* destroy general tree if defined */
+            ompi_coll_base_topo_destroy_tree (&data->cached_ntree);
+        }
+        if (data->cached_bintree) { /* destroy bintree if defined */
+            ompi_coll_base_topo_destroy_tree (&data->cached_bintree);
+        }
+        if (data->cached_bmtree) { /* destroy bmtree if defined */
+            ompi_coll_base_topo_destroy_tree (&data->cached_bmtree);
+        }
+        if (data->cached_in_order_bmtree) { /* destroy bmtree if defined */
+            ompi_coll_base_topo_destroy_tree (&data->cached_in_order_bmtree);
+        }
+        if (data->cached_chain) { /* destroy general chain if defined */
+            ompi_coll_base_topo_destroy_tree (&data->cached_chain);
+        }
+        if (data->cached_pipeline) { /* destroy pipeline if defined */
+            ompi_coll_base_topo_destroy_tree (&data->cached_pipeline);
+        }
+        if (data->cached_in_order_bintree) { /* destroy in order bintree if defined */
+            ompi_coll_base_topo_destroy_tree (&data->cached_in_order_bintree);
+        }
+
+        free(data);
+    }
+}
+
+OBJ_CLASS_INSTANCE(mca_coll_base_module_t, opal_object_t,
+                   coll_base_module_construct, coll_base_module_destruct);

 MCA_BASE_FRAMEWORK_DECLARE(ompi, coll, "Collectives", NULL, NULL, NULL,
                           mca_coll_base_static_components, 0);
--- a/ompi/mca/coll/base/coll_base_functions.h
+++ b/ompi/mca/coll/base/coll_base_functions.h
@ -0,0 +1,341 @@
+/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
+/*
+ * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
+ *                         University Research and Technology
+ *                         Corporation.  All rights reserved.
+ * Copyright (c) 2004-2015 The University of Tennessee and The University
+ *                         of Tennessee Research Foundation.  All rights
+ *                         reserved.
+ * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
+ *                         University of Stuttgart.  All rights reserved.
+ * Copyright (c) 2004-2005 The Regents of the University of California.
+ *                         All rights reserved.
+ * Copyright (c) 2008      Sun Microsystems, Inc.  All rights reserved.
+ * Copyright (c) 2008      Cisco Systems, Inc.  All rights reserved.
+ * Copyright (c) 2013      Los Alamos National Security, LLC. All rights
+ *                         reserved.
+ * $COPYRIGHT$
+ *
+ * Additional copyrights may follow
+ *
+ * $HEADER$
+ */
+
+#ifndef MCA_COLL_BASE_EXPORT_H
+#define MCA_COLL_BASE_EXPORT_H
+
+#include "ompi_config.h"
+
+#include "ompi/mca/coll/base/base.h"
+#include "opal/mca/mca.h"
+#include "ompi/mca/coll/coll.h"
+#include "ompi/request/request.h"
+
+/* need to include our own topo prototypes so we can malloc data on the comm correctly */
+#include "coll_base_topo.h"
+
+/* some fixed value index vars to simplify certain operations */
+typedef enum COLLTYPE {
+    ALLGATHER = 0,  /*  0 */
+    ALLGATHERV,     /*  1 */
+    ALLREDUCE,      /*  2 */
+    ALLTOALL,       /*  3 */
+    ALLTOALLV,      /*  4 */
+    ALLTOALLW,      /*  5 */
+    BARRIER,        /*  6 */
+    BCAST,          /*  7 */
+    EXSCAN,         /*  8 */
+    GATHER,         /*  9 */
+    GATHERV,        /* 10 */
+    REDUCE,         /* 11 */
+    REDUCESCATTER,  /* 12 */
+    SCAN,           /* 13 */
+    SCATTER,        /* 14 */
+    SCATTERV,       /* 15 */
+    COLLCOUNT       /* 16 end counter keep it as last element */
+} COLLTYPE_T;
+
+/* defined arg lists to simply auto inclusion of user overriding decision functions */
+#define ALLGATHER_ARGS void *sbuf, int scount, struct ompi_datatype_t *sdtype, void *rbuf, int rcount, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module
+#define ALLGATHERV_ARGS void *sbuf, int scount, struct ompi_datatype_t *sdtype, void * rbuf, int *rcounts, int *disps, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module
+#define ALLREDUCE_ARGS void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, mca_coll_base_module_t *module
+#define ALLTOALL_ARGS void *sbuf, int scount, struct ompi_datatype_t *sdtype, void* rbuf, int rcount, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module
+#define ALLTOALLV_ARGS void *sbuf, int *scounts, int *sdisps, struct ompi_datatype_t *sdtype, void *rbuf, int *rcounts, int *rdisps, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module
+#define ALLTOALLW_ARGS void *sbuf, int *scounts, int *sdisps,  struct ompi_datatype_t **sdtypes, void *rbuf, int *rcounts, int *rdisps, struct ompi_datatype_t **rdtypes, struct ompi_communicator_t *comm, mca_coll_base_module_t *module
+#define BARRIER_ARGS struct ompi_communicator_t *comm, mca_coll_base_module_t *module
+#define BCAST_ARGS void *buff, int count, struct ompi_datatype_t *datatype, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module
+#define EXSCAN_ARGS void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, mca_coll_base_module_t *module
+#define GATHER_ARGS void *sbuf, int scount, struct ompi_datatype_t *sdtype, void *rbuf, int rcount, struct ompi_datatype_t *rdtype, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module
+#define GATHERV_ARGS void *sbuf, int scount, struct ompi_datatype_t *sdtype, void *rbuf, int *rcounts, int *disps, struct ompi_datatype_t *rdtype, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module
+#define REDUCE_ARGS void *sbuf, void* rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module
+#define REDUCESCATTER_ARGS void *sbuf, void *rbuf, int *rcounts, struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, mca_coll_base_module_t *module
+#define SCAN_ARGS void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype,  struct ompi_op_t *op, struct ompi_communicator_t *comm, mca_coll_base_module_t *module
+#define SCATTER_ARGS void *sbuf, int scount, struct ompi_datatype_t *sdtype, void *rbuf, int rcount, struct ompi_datatype_t *rdtype, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module
+#define SCATTERV_ARGS void *sbuf, int *scounts, int *disps, struct ompi_datatype_t *sdtype, void* rbuf, int rcount, struct ompi_datatype_t *rdtype, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module
+/* end defined arg lists to simply auto inclusion of user overriding decision functions */
+
+BEGIN_C_DECLS
+
+/* All Gather */
+int ompi_coll_base_allgather_intra_bruck(ALLGATHER_ARGS);
+int ompi_coll_base_allgather_intra_recursivedoubling(ALLGATHER_ARGS);
+int ompi_coll_base_allgather_intra_ring(ALLGATHER_ARGS);
+int ompi_coll_base_allgather_intra_neighborexchange(ALLGATHER_ARGS);
+int ompi_coll_base_allgather_intra_basic_linear(ALLGATHER_ARGS);
+int ompi_coll_base_allgather_intra_two_procs(ALLGATHER_ARGS);
+
+/* All GatherV */
+int ompi_coll_base_allgatherv_intra_bruck(ALLGATHERV_ARGS);
+int ompi_coll_base_allgatherv_intra_ring(ALLGATHERV_ARGS);
+int ompi_coll_base_allgatherv_intra_neighborexchange(ALLGATHERV_ARGS);
+int ompi_coll_base_allgatherv_intra_basic_default(ALLGATHERV_ARGS);
+int ompi_coll_base_allgatherv_intra_two_procs(ALLGATHERV_ARGS);
+
+/* All Reduce */
+int ompi_coll_base_allreduce_intra_nonoverlapping(ALLREDUCE_ARGS);
+int ompi_coll_base_allreduce_intra_recursivedoubling(ALLREDUCE_ARGS);
+int ompi_coll_base_allreduce_intra_ring(ALLREDUCE_ARGS);
+int ompi_coll_base_allreduce_intra_ring_segmented(ALLREDUCE_ARGS, uint32_t segsize);
+int ompi_coll_base_allreduce_intra_basic_linear(ALLREDUCE_ARGS);
+
+/* AlltoAll */
+int ompi_coll_base_alltoall_intra_pairwise(ALLTOALL_ARGS);
+int ompi_coll_base_alltoall_intra_bruck(ALLTOALL_ARGS);
+int ompi_coll_base_alltoall_intra_basic_linear(ALLTOALL_ARGS);
+int ompi_coll_base_alltoall_intra_linear_sync(ALLTOALL_ARGS, int max_requests);
+int ompi_coll_base_alltoall_intra_two_procs(ALLTOALL_ARGS);
+
+/* AlltoAllV */
+int ompi_coll_base_alltoallv_intra_pairwise(ALLTOALLV_ARGS);
+int ompi_coll_base_alltoallv_intra_basic_linear(ALLTOALLV_ARGS);
+
+/* AlltoAllW */
+
+/* Barrier */
+int ompi_coll_base_barrier_intra_doublering(BARRIER_ARGS);
+int ompi_coll_base_barrier_intra_recursivedoubling(BARRIER_ARGS);
+int ompi_coll_base_barrier_intra_bruck(BARRIER_ARGS);
+int ompi_coll_base_barrier_intra_two_procs(BARRIER_ARGS);
+int ompi_coll_base_barrier_intra_linear(BARRIER_ARGS);
+int ompi_coll_base_barrier_intra_tree(BARRIER_ARGS);
+
+/* Bcast */
+int ompi_coll_base_bcast_intra_basic_linear(BCAST_ARGS);
+int ompi_coll_base_bcast_intra_chain(BCAST_ARGS, uint32_t segsize, int32_t chains);
+int ompi_coll_base_bcast_intra_pipeline(BCAST_ARGS, uint32_t segsize);
+int ompi_coll_base_bcast_intra_binomial(BCAST_ARGS, uint32_t segsize);
+int ompi_coll_base_bcast_intra_bintree(BCAST_ARGS, uint32_t segsize);
+int ompi_coll_base_bcast_intra_split_bintree(BCAST_ARGS, uint32_t segsize);
+
+/* Exscan */
+
+/* Gather */
+int ompi_coll_base_gather_intra_basic_linear(GATHER_ARGS);
+int ompi_coll_base_gather_intra_binomial(GATHER_ARGS);
+int ompi_coll_base_gather_intra_linear_sync(GATHER_ARGS, int first_segment_size);
+
+/* GatherV */
+
+/* Reduce */
+int ompi_coll_base_reduce_intra_basic_linear(REDUCE_ARGS);
+int ompi_coll_base_reduce_intra_chain(REDUCE_ARGS, uint32_t segsize, int fanout, int max_outstanding_reqs );
+int ompi_coll_base_reduce_intra_pipeline(REDUCE_ARGS, uint32_t segsize, int max_outstanding_reqs );
+int ompi_coll_base_reduce_intra_binary(REDUCE_ARGS, uint32_t segsize, int max_outstanding_reqs );
+int ompi_coll_base_reduce_intra_binomial(REDUCE_ARGS, uint32_t segsize, int max_outstanding_reqs );
+int ompi_coll_base_reduce_intra_in_order_binary(REDUCE_ARGS, uint32_t segsize, int max_outstanding_reqs );
+
+/* Reduce_scatter */
+int ompi_coll_base_reduce_scatter_intra_nonoverlapping(REDUCESCATTER_ARGS);
+int ompi_coll_base_reduce_scatter_intra_basic_recursivehalving(REDUCESCATTER_ARGS);
+int ompi_coll_base_reduce_scatter_intra_ring(REDUCESCATTER_ARGS);
+
+/* Scan */
+
+/* Scatter */
+int ompi_coll_base_scatter_intra_basic_linear(SCATTER_ARGS);
+int ompi_coll_base_scatter_intra_binomial(SCATTER_ARGS);
+
+/* ScatterV */
+
+END_C_DECLS
+
+#define COLL_BASE_UPDATE_BINTREE( OMPI_COMM, BASE_MODULE, ROOT )	\
+do {                                                                                       \
+    mca_coll_base_comm_t* coll_comm = (BASE_MODULE)->base_data;                        \
+    if( !( (coll_comm->cached_bintree)                                                     \
+           && (coll_comm->cached_bintree_root == (ROOT)) ) ) {                             \
+        if( coll_comm->cached_bintree ) { /* destroy previous binomial if defined */       \
+            ompi_coll_base_topo_destroy_tree( &(coll_comm->cached_bintree) );             \
+        }                                                                                  \
+        coll_comm->cached_bintree = ompi_coll_base_topo_build_tree(2,(OMPI_COMM),(ROOT)); \
+        coll_comm->cached_bintree_root = (ROOT);                                           \
+    }                                                                                      \
+} while (0)
+
+#define COLL_BASE_UPDATE_BMTREE( OMPI_COMM, BASE_MODULE, ROOT )	\
+do {                                                                                         \
+    mca_coll_base_comm_t* coll_comm = (BASE_MODULE)->base_data;                           \
+    if( !( (coll_comm->cached_bmtree)                                                        \
+           && (coll_comm->cached_bmtree_root == (ROOT)) ) ) {                                \
+        if( coll_comm->cached_bmtree ) { /* destroy previous binomial if defined */          \
+            ompi_coll_base_topo_destroy_tree( &(coll_comm->cached_bmtree) );                \
+        }                                                                                    \
+        coll_comm->cached_bmtree = ompi_coll_base_topo_build_bmtree( (OMPI_COMM), (ROOT) ); \
+        coll_comm->cached_bmtree_root = (ROOT);                                              \
+    }                                                                                        \
+} while (0)
+
+#define COLL_BASE_UPDATE_IN_ORDER_BMTREE( OMPI_COMM, BASE_MODULE, ROOT ) \
+do {                                                                                         \
+    mca_coll_base_comm_t* coll_comm = (BASE_MODULE)->base_data;                           \
+    if( !( (coll_comm->cached_in_order_bmtree)                                               \
+           && (coll_comm->cached_in_order_bmtree_root == (ROOT)) ) ) {                       \
+        if( coll_comm->cached_in_order_bmtree ) { /* destroy previous binomial if defined */ \
+            ompi_coll_base_topo_destroy_tree( &(coll_comm->cached_in_order_bmtree) );       \
+        }                                                                                    \
+        coll_comm->cached_in_order_bmtree = ompi_coll_base_topo_build_in_order_bmtree( (OMPI_COMM), (ROOT) ); \
+        coll_comm->cached_in_order_bmtree_root = (ROOT);                                     \
+    }                                                                                        \
+} while (0)
+
+#define COLL_BASE_UPDATE_PIPELINE( OMPI_COMM, BASE_MODULE, ROOT )	\
+do {                                                                                             \
+    mca_coll_base_comm_t* coll_comm = (BASE_MODULE)->base_data;                               \
+    if( !( (coll_comm->cached_pipeline)                                                          \
+           && (coll_comm->cached_pipeline_root == (ROOT)) ) ) {                                  \
+        if (coll_comm->cached_pipeline) { /* destroy previous pipeline if defined */             \
+            ompi_coll_base_topo_destroy_tree( &(coll_comm->cached_pipeline) );                  \
+        }                                                                                        \
+        coll_comm->cached_pipeline = ompi_coll_base_topo_build_chain( 1, (OMPI_COMM), (ROOT) ); \
+        coll_comm->cached_pipeline_root = (ROOT);                                                \
+    }                                                                                            \
+} while (0)
+
+#define COLL_BASE_UPDATE_CHAIN( OMPI_COMM, BASE_MODULE, ROOT, FANOUT )	\
+do {                                                                                             \
+    mca_coll_base_comm_t* coll_comm = (BASE_MODULE)->base_data;                               \
+    if( !( (coll_comm->cached_chain)                                                             \
+           && (coll_comm->cached_chain_root == (ROOT))                                           \
+           && (coll_comm->cached_chain_fanout == (FANOUT)) ) ) {                                 \
+        if( coll_comm->cached_chain) { /* destroy previous chain if defined */                   \
+            ompi_coll_base_topo_destroy_tree( &(coll_comm->cached_chain) );                     \
+        }                                                                                        \
+        coll_comm->cached_chain = ompi_coll_base_topo_build_chain((FANOUT), (OMPI_COMM), (ROOT)); \
+        coll_comm->cached_chain_root = (ROOT);                                                   \
+        coll_comm->cached_chain_fanout = (FANOUT);                                               \
+    }                                                                                            \
+} while (0)
+
+#define COLL_BASE_UPDATE_IN_ORDER_BINTREE( OMPI_COMM, BASE_MODULE )	\
+do {                                                                           \
+    mca_coll_base_comm_t* coll_comm = (BASE_MODULE)->base_data;             \
+    if( !(coll_comm->cached_in_order_bintree) ) {                              \
+        /* In-order binary tree topology is defined by communicator size */    \
+        /* Thus, there is no need to destroy anything */                       \
+        coll_comm->cached_in_order_bintree =                                   \
+        ompi_coll_base_topo_build_in_order_bintree((OMPI_COMM)); \
+    }                                                                          \
+} while (0)
+
+/**
+ * This macro give a generic way to compute the best count of
+ * the segment (i.e. the number of complete datatypes that
+ * can fit in the specified SEGSIZE). Beware, when this macro
+ * is called, the SEGCOUNT should be initialized to the count as
+ * expected by the collective call.
+ */
+#define COLL_BASE_COMPUTED_SEGCOUNT(SEGSIZE, TYPELNG, SEGCOUNT)        \
+    if( ((SEGSIZE) >= (TYPELNG)) &&                                     \
+        ((SEGSIZE) < ((TYPELNG) * (SEGCOUNT))) ) {                      \
+        size_t residual;                                                \
+        (SEGCOUNT) = (int)((SEGSIZE) / (TYPELNG));                      \
+        residual = (SEGSIZE) - (SEGCOUNT) * (TYPELNG);                  \
+        if( residual > ((TYPELNG) >> 1) )                               \
+            (SEGCOUNT)++;                                               \
+    }                                                                   \
+
+/**
+ * This macro gives a generic wait to compute the well distributed block counts
+ * when the count and number of blocks are fixed.
+ * Macro returns "early-block" count, "late-block" count, and "split-index"
+ * which is the block at which we switch from "early-block" count to
+ * the "late-block" count.
+ * count = split_index * early_block_count +
+ *         (block_count - split_index) * late_block_count
+ * We do not perform ANY error checks - make sure that the input values
+ * make sense (eg. count > num_blocks).
+ */
+#define COLL_BASE_COMPUTE_BLOCKCOUNT( COUNT, NUM_BLOCKS, SPLIT_INDEX,       \
+                                       EARLY_BLOCK_COUNT, LATE_BLOCK_COUNT ) \
+    EARLY_BLOCK_COUNT = LATE_BLOCK_COUNT = COUNT / NUM_BLOCKS;               \
+    SPLIT_INDEX = COUNT % NUM_BLOCKS;                                        \
+    if (0 != SPLIT_INDEX) {                                                  \
+        EARLY_BLOCK_COUNT = EARLY_BLOCK_COUNT + 1;                           \
+    }                                                                        \
+
+/*
+ * Data structure for hanging data off the communicator
+ * i.e. per module instance
+ */
+struct mca_coll_base_comm_t {
+    opal_object_t super;
+
+    /* standard data for requests and PML usage */
+
+    /* Precreate space for requests
+     * Note this does not effect basic,
+     * but if in wrong context can confuse a debugger
+     * this is controlled by an MCA param
+     */
+
+    ompi_request_t **mcct_reqs;
+    int mcct_num_reqs;
+
+    /*
+     * base topo information caching per communicator
+     *
+     * for each communicator we cache the topo information so we can
+     * reuse without regenerating if we change the root, [or fanout]
+     * then regenerate and recache this information
+     */
+
+    /* general tree with n fan out */
+    ompi_coll_tree_t *cached_ntree;
+    int cached_ntree_root;
+    int cached_ntree_fanout;
+
+    /* binary tree */
+    ompi_coll_tree_t *cached_bintree;
+    int cached_bintree_root;
+
+    /* binomial tree */
+    ompi_coll_tree_t *cached_bmtree;
+    int cached_bmtree_root;
+
+    /* binomial tree */
+    ompi_coll_tree_t *cached_in_order_bmtree;
+    int cached_in_order_bmtree_root;
+
+    /* chained tree (fanout followed by pipelines) */
+    ompi_coll_tree_t *cached_chain;
+    int cached_chain_root;
+    int cached_chain_fanout;
+
+    /* pipeline */
+    ompi_coll_tree_t *cached_pipeline;
+    int cached_pipeline_root;
+
+    /* in-order binary tree (root of the in-order binary tree is rank 0) */
+    ompi_coll_tree_t *cached_in_order_bintree;
+};
+typedef struct mca_coll_base_comm_t mca_coll_base_comm_t;
+OMPI_DECLSPEC OBJ_CLASS_DECLARATION(mca_coll_base_comm_t);
+
+static inline void ompi_coll_base_free_reqs(ompi_request_t **reqs, int count)
+{
+    int i;
+    for (i = 0; i < count; ++i)
+        ompi_request_free(&reqs[i]);
+}
+
+#endif /* MCA_COLL_BASE_EXPORT_H */
--- a/ompi/mca/coll/base/coll_base_gather.c
+++ b/ompi/mca/coll/base/coll_base_gather.c
@ -3,7 +3,7 @@
 * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
 *                         University Research and Technology
 *                         Corporation.  All rights reserved.
- * Copyright (c) 2004-2014 The University of Tennessee and The University
+ * Copyright (c) 2004-2015 The University of Tennessee and The University
 *                         of Tennessee Research Foundation.  All rights
 *                         reserved.
 * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@ -30,30 +30,14 @@
 #include "ompi/mca/coll/coll.h"
 #include "ompi/mca/coll/base/coll_tags.h"
 #include "ompi/mca/pml/pml.h"
-#include "coll_tuned.h"
-#include "coll_tuned_topo.h"
-#include "coll_tuned_util.h"
-
-/* gather algorithm variables */
-static int coll_tuned_gather_algorithm_count = 3;
-static int coll_tuned_gather_forced_algorithm = 0;
-static int coll_tuned_gather_segment_size = 0;
-static int coll_tuned_gather_tree_fanout;
-static int coll_tuned_gather_chain_fanout;
-
-/* valid values for coll_tuned_gather_forced_algorithm */
-static mca_base_var_enum_value_t gather_algorithms[] = {
-    {0, "ignore"},
-    {1, "basic_linear"},
-    {2, "binomial"},
-    {3, "linear_sync"},
-    {0, NULL}
-};
+#include "ompi/mca/coll/base/coll_base_functions.h"
+#include "coll_base_topo.h"
+#include "coll_base_util.h"

 /* Todo: gather_intra_generic, gather_intra_binary, gather_intra_chain,
 * gather_intra_pipeline, segmentation? */
 int
-ompi_coll_tuned_gather_intra_binomial(void *sbuf, int scount,
+ompi_coll_base_gather_intra_binomial(void *sbuf, int scount,
                                      struct ompi_datatype_t *sdtype,
                                      void *rbuf, int rcount,
                                      struct ompi_datatype_t *rdtype,
@ -65,19 +49,19 @@ ompi_coll_tuned_gather_intra_binomial(void *sbuf, int scount,
    char *ptmp     = NULL, *tempbuf  = NULL;
    ompi_coll_tree_t* bmtree;
    MPI_Status status;
-    MPI_Aint sextent, slb, strue_lb, strue_extent; 
+    MPI_Aint sextent, slb, strue_lb, strue_extent;
    MPI_Aint rextent, rlb, rtrue_lb, rtrue_extent;
-    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
-    mca_coll_tuned_comm_t *data = tuned_module->tuned_data;
+    mca_coll_base_module_t *base_module = (mca_coll_base_module_t*) module;
+    mca_coll_base_comm_t *data = base_module->base_data;

    size = ompi_comm_size(comm);
    rank = ompi_comm_rank(comm);

-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "ompi_coll_tuned_gather_intra_binomial rank %d", rank));
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                 "ompi_coll_base_gather_intra_binomial rank %d", rank));

    /* create the binomial tree */
-    COLL_TUNED_UPDATE_IN_ORDER_BMTREE( comm, tuned_module, root );
+    COLL_BASE_UPDATE_IN_ORDER_BMTREE( comm, base_module, root );
    bmtree = data->cached_in_order_bmtree;

    ompi_datatype_get_extent(sdtype, &slb, &sextent);
@ -112,7 +96,7 @@ ompi_coll_tuned_gather_intra_binomial(void *sbuf, int scount,
                if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
            } else {
                /* copy from rbuf to temp buffer  */
-                err = ompi_datatype_copy_content_same_ddt(rdtype, rcount, ptmp, 
+                err = ompi_datatype_copy_content_same_ddt(rdtype, rcount, ptmp,
                                                          (char *)rbuf + (ptrdiff_t)rank * rextent * (ptrdiff_t)rcount);
                if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
            }
@ -157,8 +141,8 @@ ompi_coll_tuned_gather_intra_binomial(void *sbuf, int scount,
                mycount = size - vkid;
            mycount *= rcount;

-            OPAL_OUTPUT((ompi_coll_tuned_stream,
-                         "ompi_coll_tuned_gather_intra_binomial rank %d recv %d mycount = %d",
+            OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                         "ompi_coll_base_gather_intra_binomial rank %d recv %d mycount = %d",
                         rank, bmtree->tree_next[i], mycount));

            err = MCA_PML_CALL(recv(ptmp + total_recv*rextent, (ptrdiff_t)rcount * size - total_recv, rdtype,
@ -172,8 +156,8 @@ ompi_coll_tuned_gather_intra_binomial(void *sbuf, int scount,

    if (rank != root) {
        /* all nodes except root send to parents */
-        OPAL_OUTPUT((ompi_coll_tuned_stream,
-                     "ompi_coll_tuned_gather_intra_binomial rank %d send %d count %d\n",
+        OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                     "ompi_coll_base_gather_intra_binomial rank %d send %d count %d\n",
                     rank, bmtree->tree_prev, total_recv));

        err = MCA_PML_CALL(send(ptmp, total_recv, sdtype,
@ -207,7 +191,7 @@ ompi_coll_tuned_gather_intra_binomial(void *sbuf, int scount,
    if (NULL != tempbuf)
        free(tempbuf);

-    OPAL_OUTPUT((ompi_coll_tuned_stream,  "%s:%4d\tError occurred %d, rank %2d",
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,  "%s:%4d\tError occurred %d, rank %2d",
                 __FILE__, line, err, rank));
    return err;
 }
@ -220,11 +204,11 @@ ompi_coll_tuned_gather_intra_binomial(void *sbuf, int scount,
 *	Returns:	- MPI_SUCCESS or error code
 */
 int
-ompi_coll_tuned_gather_intra_linear_sync(void *sbuf, int scount,
+ompi_coll_base_gather_intra_linear_sync(void *sbuf, int scount,
                                         struct ompi_datatype_t *sdtype,
                                         void *rbuf, int rcount,
                                         struct ompi_datatype_t *rdtype,
-                                         int root, 
+                                         int root,
                                         struct ompi_communicator_t *comm,
                                         mca_coll_base_module_t *module,
                                         int first_segment_size)
@ -237,8 +221,8 @@ ompi_coll_tuned_gather_intra_linear_sync(void *sbuf, int scount,
    size = ompi_comm_size(comm);
    rank = ompi_comm_rank(comm);

-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "ompi_coll_tuned_gather_intra_linear_sync rank %d, segment %d", rank, first_segment_size));
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                 "ompi_coll_base_gather_intra_linear_sync rank %d, segment %d", rank, first_segment_size));

    if (rank != root) {
        /* Non-root processes:
@ -250,10 +234,10 @@ ompi_coll_tuned_gather_intra_linear_sync(void *sbuf, int scount,
        ompi_datatype_type_size(sdtype, &typelng);
        ompi_datatype_get_extent(sdtype, &lb, &extent);
        first_segment_count = scount;
-        COLL_TUNED_COMPUTED_SEGCOUNT( (size_t) first_segment_size, typelng, 
+        COLL_BASE_COMPUTED_SEGCOUNT( (size_t) first_segment_size, typelng,
                                      first_segment_count );

-        ret = MCA_PML_CALL(recv(sbuf, 0, MPI_BYTE, root, 
+        ret = MCA_PML_CALL(recv(sbuf, 0, MPI_BYTE, root,
                                MCA_COLL_BASE_TAG_GATHER,
                                comm, MPI_STATUS_IGNORE));
        if (ret != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
@ -263,15 +247,15 @@ ompi_coll_tuned_gather_intra_linear_sync(void *sbuf, int scount,
                                MCA_PML_BASE_SEND_STANDARD, comm));
        if (ret != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }

-        ret = MCA_PML_CALL(send((char*)sbuf + extent * first_segment_count, 
-                                (scount - first_segment_count), sdtype, 
+        ret = MCA_PML_CALL(send((char*)sbuf + extent * first_segment_count,
+                                (scount - first_segment_count), sdtype,
                                root, MCA_COLL_BASE_TAG_GATHER,
                                MCA_PML_BASE_SEND_STANDARD, comm));
        if (ret != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }

    } else {

-        /* Root process, 
+        /* Root process,
           - For every non-root node:
           - post irecv for the first segment of the message
           - send zero byte message to signal node to send the message
@ -284,20 +268,20 @@ ompi_coll_tuned_gather_intra_linear_sync(void *sbuf, int scount,
        ompi_request_t *first_segment_req;
        reqs = (ompi_request_t**) calloc(size, sizeof(ompi_request_t*));
        if (NULL == reqs) { ret = -1; line = __LINE__; goto error_hndl; }
-        
+
        ompi_datatype_type_size(rdtype, &typelng);
        ompi_datatype_get_extent(rdtype, &lb, &extent);
        first_segment_count = rcount;
-        COLL_TUNED_COMPUTED_SEGCOUNT( (size_t)first_segment_size, typelng, 
+        COLL_BASE_COMPUTED_SEGCOUNT( (size_t)first_segment_size, typelng,
                                      first_segment_count );

        ptmp = (char *) rbuf;
        for (i = 0; i < size; ++i) {
-            if (i == rank) {  
+            if (i == rank) {
                /* skip myself */
-                reqs[i] = MPI_REQUEST_NULL; 
-                continue; 
-            } 
+                reqs[i] = MPI_REQUEST_NULL;
+                continue;
+            }

            /* irecv for the first segment from i */
            ptmp = (char*)rbuf + (ptrdiff_t)i * (ptrdiff_t)rcount * extent;
@ -305,7 +289,7 @@ ompi_coll_tuned_gather_intra_linear_sync(void *sbuf, int scount,
                                     MCA_COLL_BASE_TAG_GATHER, comm,
                                     &first_segment_req));
            if (ret != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
-            
+
            /* send sync message */
            ret = MCA_PML_CALL(send(rbuf, 0, MPI_BYTE, i,
                                    MCA_COLL_BASE_TAG_GATHER,
@ -314,7 +298,7 @@ ompi_coll_tuned_gather_intra_linear_sync(void *sbuf, int scount,

            /* irecv for the second segment */
            ptmp = (char*)rbuf + ((ptrdiff_t)i * (ptrdiff_t)rcount + first_segment_count) * extent;
-            ret = MCA_PML_CALL(irecv(ptmp, (rcount - first_segment_count), 
+            ret = MCA_PML_CALL(irecv(ptmp, (rcount - first_segment_count),
                                     rdtype, i, MCA_COLL_BASE_TAG_GATHER, comm,
                                     &reqs[i]));
            if (ret != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
@ -327,11 +311,11 @@ ompi_coll_tuned_gather_intra_linear_sync(void *sbuf, int scount,
        /* copy local data if necessary */
        if (MPI_IN_PLACE != sbuf) {
            ret = ompi_datatype_sndrcv(sbuf, scount, sdtype,
-                                       (char*)rbuf + (ptrdiff_t)rank * (ptrdiff_t)rcount * extent, 
+                                       (char*)rbuf + (ptrdiff_t)rank * (ptrdiff_t)rcount * extent,
                                       rcount, rdtype);
            if (ret != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
        }
-        
+
        /* wait all second segments to complete */
        ret = ompi_request_wait_all(size, reqs, MPI_STATUSES_IGNORE);
        if (ret != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
@ -346,8 +330,8 @@ ompi_coll_tuned_gather_intra_linear_sync(void *sbuf, int scount,
    if (NULL != reqs) {
        free(reqs);
    }
-    OPAL_OUTPUT (( ompi_coll_tuned_stream, 
-                   "ERROR_HNDL: node %d file %s line %d error %d\n", 
+    OPAL_OUTPUT (( ompi_coll_base_framework.framework_output,
+                   "ERROR_HNDL: node %d file %s line %d error %d\n",
                   rank, __FILE__, line, ret ));
    return ret;
 }
@ -355,13 +339,13 @@ ompi_coll_tuned_gather_intra_linear_sync(void *sbuf, int scount,
 /*
 * Linear functions are copied from the BASIC coll module
 * they do not segment the message and are simple implementations
- * but for some small number of nodes and/or small data sizes they 
- * are just as fast as tuned/tree based segmenting operations 
+ * but for some small number of nodes and/or small data sizes they
+ * are just as fast as base/tree based segmenting operations
 * and as such may be selected by the decision functions
 * These are copied into this module due to the way we select modules
 * in V1. i.e. in V2 we will handle this differently and so will not
 * have to duplicate code.
- * JPG following the examples from other coll_tuned implementations. Dec06.
+ * JPG following the examples from other coll_base implementations. Dec06.
 */

 /* copied function (with appropriate renaming) starts here */
@ -373,7 +357,7 @@ ompi_coll_tuned_gather_intra_linear_sync(void *sbuf, int scount,
 *	Returns:	- MPI_SUCCESS or error code
 */
 int
-ompi_coll_tuned_gather_intra_basic_linear(void *sbuf, int scount,
+ompi_coll_base_gather_intra_basic_linear(void *sbuf, int scount,
                                          struct ompi_datatype_t *sdtype,
                                          void *rbuf, int rcount,
                                          struct ompi_datatype_t *rdtype,
@ -389,8 +373,8 @@ ompi_coll_tuned_gather_intra_basic_linear(void *sbuf, int scount,
    rank = ompi_comm_rank(comm);

    /* Everyone but root sends data and returns. */
-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "ompi_coll_tuned_gather_intra_basic_linear rank %d", rank));
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                 "ompi_coll_base_gather_intra_basic_linear rank %d", rank));

    if (rank != root) {
        return MCA_PML_CALL(send(sbuf, scount, sdtype, root,
@ -427,164 +411,3 @@ ompi_coll_tuned_gather_intra_basic_linear(void *sbuf, int scount,


 /* copied function (with appropriate renaming) ends here */
-
-/* The following are used by dynamic and forced rules */
-
-/* publish details of each algorithm and if its forced/fixed/locked in */
-/* as you add methods/algorithms you must update this and the query/map 
-   routines */
-
-/* this routine is called by the component only */
-/* this makes sure that the mca parameters are set to their initial values 
-   and perms */
-/* module does not call this they call the forced_getvalues routine instead */
-
-int 
-ompi_coll_tuned_gather_intra_check_forced_init(coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices)
-{
-    mca_base_var_enum_t *new_enum;
-
-    ompi_coll_tuned_forced_max_algorithms[GATHER] = coll_tuned_gather_algorithm_count;
-
-    (void) mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                           "gather_algorithm_count",
-                                           "Number of gather algorithms available",
-                                           MCA_BASE_VAR_TYPE_INT, NULL, 0,
-                                           MCA_BASE_VAR_FLAG_DEFAULT_ONLY,
-                                           OPAL_INFO_LVL_5,
-                                           MCA_BASE_VAR_SCOPE_CONSTANT,
-                                           &coll_tuned_gather_algorithm_count);
-
-    /* MPI_T: This variable should eventually be bound to a communicator */
-    coll_tuned_gather_forced_algorithm = 0;
-    (void) mca_base_var_enum_create("coll_tuned_gather_algorithms", gather_algorithms, &new_enum);
-    mca_param_indices->algorithm_param_index =
-        mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                        "gather_algorithm",
-                                        "Which gather algorithm is used. Can be locked down to choice of: 0 ignore, 1 basic linear, 2 binomial, 3 linear with synchronization.",
-                                        MCA_BASE_VAR_TYPE_INT, new_enum, 0, 0,
-                                        OPAL_INFO_LVL_5,
-                                        MCA_BASE_VAR_SCOPE_READONLY,
-                                        &coll_tuned_gather_forced_algorithm);
-    OBJ_RELEASE(new_enum);
-    if (mca_param_indices->algorithm_param_index < 0) {
-        return mca_param_indices->algorithm_param_index;
-    }
-
-    coll_tuned_gather_segment_size = 0;
-    mca_param_indices->segsize_param_index =
-        mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                        "gather_algorithm_segmentsize",
-                                        "Segment size in bytes used by default for gather algorithms. Only has meaning if algorithm is forced and supports segmenting. 0 bytes means no segmentation. Currently, available algorithms do not support segmentation.",
-                                        MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
-                                        OPAL_INFO_LVL_5,
-                                        MCA_BASE_VAR_SCOPE_READONLY,
-                                        &coll_tuned_gather_segment_size);
-
-    coll_tuned_gather_tree_fanout = ompi_coll_tuned_init_tree_fanout; /* get system wide default */
-    mca_param_indices->tree_fanout_param_index =
-        mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                        "gather_algorithm_tree_fanout",
-                                        "Fanout for n-tree used for gather algorithms. Only has meaning if algorithm is forced and supports n-tree topo based operation. Currently, available algorithms do not support n-tree topologies.",
-                                        MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
-                                        OPAL_INFO_LVL_5,
-                                        MCA_BASE_VAR_SCOPE_READONLY,
-                                        &coll_tuned_gather_tree_fanout);
-
-    coll_tuned_gather_chain_fanout = ompi_coll_tuned_init_chain_fanout; /* get system wide default */
-    mca_param_indices->chain_fanout_param_index = 
-      mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                      "gather_algorithm_chain_fanout",
-                                      "Fanout for chains used for gather algorithms. Only has meaning if algorithm is forced and supports chain topo based operation. Currently, available algorithms do not support chain topologies.",
-                                      MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
-                                      OPAL_INFO_LVL_5,
-                                      MCA_BASE_VAR_SCOPE_READONLY,
-                                      &coll_tuned_gather_chain_fanout);
-
-    return (MPI_SUCCESS);
-}
-
-int
-ompi_coll_tuned_gather_intra_do_forced(void *sbuf, int scount,
-                                       struct ompi_datatype_t *sdtype,
-                                       void* rbuf, int rcount,
-                                       struct ompi_datatype_t *rdtype,
-                                       int root,
-                                       struct ompi_communicator_t *comm,
-                                       mca_coll_base_module_t *module)
-{
-    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
-    mca_coll_tuned_comm_t *data = tuned_module->tuned_data;
-
-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "coll:tuned:gather_intra_do_forced selected algorithm %d",
-                 data->user_forced[GATHER].algorithm));
-
-    switch (data->user_forced[GATHER].algorithm) {
-    case (0):
-        return ompi_coll_tuned_gather_intra_dec_fixed (sbuf, scount, sdtype, 
-                                                       rbuf, rcount, rdtype, 
-                                                       root, comm, module);
-    case (1):
-        return ompi_coll_tuned_gather_intra_basic_linear (sbuf, scount, sdtype,
-                                                          rbuf, rcount, rdtype,
-                                                          root, comm, module);
-    case (2):
-        return ompi_coll_tuned_gather_intra_binomial(sbuf, scount, sdtype,
-                                                     rbuf, rcount, rdtype,
-                                                     root, comm, module);
-    case (3):
-            return ompi_coll_tuned_gather_intra_linear_sync (sbuf, scount, sdtype,
-                                                             rbuf, rcount, rdtype,
-                                                             root, comm, module,
-                                                             data->user_forced[GATHER].segsize);
-    default:
-        OPAL_OUTPUT((ompi_coll_tuned_stream,
-                     "coll:tuned:gather_intra_do_forced attempt to select algorithm %d when only 0-%d is valid?", 
-                     data->user_forced[GATHER].algorithm,
-                     ompi_coll_tuned_forced_max_algorithms[GATHER]));
-        return (MPI_ERR_ARG);
-    } /* switch */
-}
-
-int
-ompi_coll_tuned_gather_intra_do_this(void *sbuf, int scount,
-                                     struct ompi_datatype_t *sdtype,
-                                     void* rbuf, int rcount,
-                                     struct ompi_datatype_t *rdtype,
-                                     int root,
-                                     struct ompi_communicator_t *comm,
-                                     mca_coll_base_module_t *module,
-                                     int algorithm, int faninout, int segsize)
-{
-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "coll:tuned:gather_intra_do_this selected algorithm %d topo faninout %d segsize %d", 
-                 algorithm, faninout, segsize));
-   
-    switch (algorithm) {
-    case (0):
-        return ompi_coll_tuned_gather_intra_dec_fixed (sbuf, scount, sdtype, 
-                                                       rbuf, rcount, rdtype, 
-                                                       root, comm, module);
-    case (1):
-        return ompi_coll_tuned_gather_intra_basic_linear (sbuf, scount, sdtype,
-                                                          rbuf, rcount, rdtype,
-                                                          root, comm, module);
-    case (2):  
-        return ompi_coll_tuned_gather_intra_binomial(sbuf, scount, sdtype,
-                                                     rbuf, rcount, rdtype,
-                                                     root, comm, module);
-    case (3):
-        return ompi_coll_tuned_gather_intra_linear_sync (sbuf, scount, sdtype,
-                                                         rbuf, rcount, rdtype,
-                                                         root, comm, module,
-                                                         segsize);
-
-    default:
-        OPAL_OUTPUT((ompi_coll_tuned_stream,
-                     "coll:tuned:gather_intra_do_this attempt to select algorithm %d when only 0-%d is valid?", 
-                     algorithm, 
-                     ompi_coll_tuned_forced_max_algorithms[GATHER]));
-        return (MPI_ERR_ARG);
-    } /* switch */
-}
--- a/ompi/mca/coll/base/coll_base_reduce.c
+++ b/ompi/mca/coll/base/coll_base_reduce.c
@ -3,7 +3,7 @@
 * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
 *                         University Research and Technology
 *                         Corporation.  All rights reserved.
- * Copyright (c) 2004-2014 The University of Tennessee and The University
+ * Copyright (c) 2004-2015 The University of Tennessee and The University
 *                         of Tennessee Research Foundation.  All rights
 *                         reserved.
 * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@ -31,28 +31,8 @@
 #include "ompi/mca/coll/base/coll_tags.h"
 #include "ompi/mca/pml/pml.h"
 #include "ompi/op/op.h"
-#include "coll_tuned.h"
-#include "coll_tuned_topo.h"
-
-/* reduce algorithm variables */
-static int coll_tuned_reduce_algorithm_count = 6;
-static int coll_tuned_reduce_forced_algorithm = 0;
-static int coll_tuned_reduce_segment_size = 0;
-static int coll_tuned_reduce_max_requests;
-static int coll_tuned_reduce_tree_fanout;
-static int coll_tuned_reduce_chain_fanout;
-
-/* valid values for coll_tuned_reduce_forced_algorithm */
-static mca_base_var_enum_value_t reduce_algorithms[] = {
-    {0, "ignore"},
-    {1, "linear"},
-    {2, "chain"},
-    {3, "pipeline"},
-    {4, "binary"},
-    {5, "binomial"},
-    {6, "in-order_binary"},
-    {0, NULL}
-};
+#include "ompi/mca/coll/base/coll_base_functions.h"
+#include "coll_base_topo.h"

 /**
 * This is a generic implementation of the reduce protocol. It used the tree
@ -62,10 +42,10 @@ static mca_base_var_enum_value_t reduce_algorithms[] = {
 * the number of datatype to the original count (original_count)
 *
 * Note that for non-commutative operations we cannot save memory copy
- * for the first block: thus we must copy sendbuf to accumbuf on intermediate 
+ * for the first block: thus we must copy sendbuf to accumbuf on intermediate
 * to keep the optimized loop happy.
 */
-int ompi_coll_tuned_reduce_generic( void* sendbuf, void* recvbuf, int original_count,
+int ompi_coll_base_reduce_generic( void* sendbuf, void* recvbuf, int original_count,
                                    ompi_datatype_t* datatype, ompi_op_t* op,
                                    int root, ompi_communicator_t* comm,
                                    mca_coll_base_module_t *module,
@ -90,60 +70,60 @@ int ompi_coll_tuned_reduce_generic( void* sendbuf, void* recvbuf, int original_c
    num_segments = (original_count + count_by_segment - 1) / count_by_segment;
    segment_increment = (ptrdiff_t)count_by_segment * extent;

-    sendtmpbuf = (char*) sendbuf; 
-    if( sendbuf == MPI_IN_PLACE ) { 
-        sendtmpbuf = (char *)recvbuf; 
+    sendtmpbuf = (char*) sendbuf;
+    if( sendbuf == MPI_IN_PLACE ) {
+        sendtmpbuf = (char *)recvbuf;
    }

-    OPAL_OUTPUT((ompi_coll_tuned_stream, "coll:tuned:reduce_generic count %d, msg size %ld, segsize %ld, max_requests %d",
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output, "coll:base:reduce_generic count %d, msg size %ld, segsize %ld, max_requests %d",
                 original_count, (unsigned long)((ptrdiff_t)num_segments * (ptrdiff_t)segment_increment),
                 (unsigned long)segment_increment, max_outstanding_reqs));

    rank = ompi_comm_rank(comm);

-    /* non-leaf nodes - wait for children to send me data & forward up 
+    /* non-leaf nodes - wait for children to send me data & forward up
       (if needed) */
    if( tree->tree_nextsize > 0 ) {
        ptrdiff_t true_lower_bound, true_extent, real_segment_size;
-        ompi_datatype_get_true_extent( datatype, &true_lower_bound, 
+        ompi_datatype_get_true_extent( datatype, &true_lower_bound,
                                       &true_extent );

-        /* handle non existant recv buffer (i.e. its NULL) and 
+        /* handle non existant recv buffer (i.e. its NULL) and
           protect the recv buffer on non-root nodes */
        accumbuf = (char*)recvbuf;
        if( (NULL == accumbuf) || (root != rank) ) {
            /* Allocate temporary accumulator buffer. */
-            accumbuf_free = (char*)malloc(true_extent + 
+            accumbuf_free = (char*)malloc(true_extent +
                                          (ptrdiff_t)(original_count - 1) * extent);
-            if (accumbuf_free == NULL) { 
-                line = __LINE__; ret = -1; goto error_hndl; 
+            if (accumbuf_free == NULL) {
+                line = __LINE__; ret = -1; goto error_hndl;
            }
            accumbuf = accumbuf_free - lower_bound;
-        } 
+        }

        /* If this is a non-commutative operation we must copy
           sendbuf to the accumbuf, in order to simplfy the loops */
        if (!ompi_op_is_commute(op)) {
-            ompi_datatype_copy_content_same_ddt(datatype, original_count, 
+            ompi_datatype_copy_content_same_ddt(datatype, original_count,
                                                (char*)accumbuf,
                                                (char*)sendtmpbuf);
        }
        /* Allocate two buffers for incoming segments */
        real_segment_size = true_extent + (ptrdiff_t)(count_by_segment - 1) * extent;
        inbuf_free[0] = (char*) malloc(real_segment_size);
-        if( inbuf_free[0] == NULL ) { 
-            line = __LINE__; ret = -1; goto error_hndl; 
+        if( inbuf_free[0] == NULL ) {
+            line = __LINE__; ret = -1; goto error_hndl;
        }
        inbuf[0] = inbuf_free[0] - lower_bound;
        /* if there is chance to overlap communication -
           allocate second buffer */
        if( (num_segments > 1) || (tree->tree_nextsize > 1) ) {
            inbuf_free[1] = (char*) malloc(real_segment_size);
-            if( inbuf_free[1] == NULL ) { 
+            if( inbuf_free[1] == NULL ) {
                line = __LINE__; ret = -1; goto error_hndl;
            }
            inbuf[1] = inbuf_free[1] - lower_bound;
-        } 
+        }

        /* reset input buffer index and receive count */
        inbi = 0;
@ -166,14 +146,14 @@ int ompi_coll_tuned_reduce_generic( void* sendbuf, void* recvbuf, int original_c
                if( segindex < num_segments ) {
                    void* local_recvbuf = inbuf[inbi];
                    if( 0 == i ) {
-                        /* for the first step (1st child per segment) and 
-                         * commutative operations we might be able to irecv 
-                         * directly into the accumulate buffer so that we can 
-                         * reduce(op) this with our sendbuf in one step as 
-                         * ompi_op_reduce only has two buffer pointers, 
+                        /* for the first step (1st child per segment) and
+                         * commutative operations we might be able to irecv
+                         * directly into the accumulate buffer so that we can
+                         * reduce(op) this with our sendbuf in one step as
+                         * ompi_op_reduce only has two buffer pointers,
                         * this avoids an extra memory copy.
                         *
-                         * BUT if the operation is non-commutative or 
+                         * BUT if the operation is non-commutative or
                         * we are root and are USING MPI_IN_PLACE this is wrong!
                         */
                        if( (ompi_op_is_commute(op)) &&
@ -183,34 +163,34 @@ int ompi_coll_tuned_reduce_generic( void* sendbuf, void* recvbuf, int original_c
                    }

                    ret = MCA_PML_CALL(irecv(local_recvbuf, recvcount, datatype,
-                                             tree->tree_next[i], 
-                                             MCA_COLL_BASE_TAG_REDUCE, comm, 
+                                             tree->tree_next[i],
+                                             MCA_COLL_BASE_TAG_REDUCE, comm,
                                             &reqs[inbi]));
                    if (ret != MPI_SUCCESS) { line = __LINE__; goto error_hndl;}
                }
                /* wait for previous req to complete, if any.
-                   if there are no requests reqs[inbi ^1] will be 
+                   if there are no requests reqs[inbi ^1] will be
                   MPI_REQUEST_NULL. */
                /* wait on data from last child for previous segment */
-                ret = ompi_request_wait_all( 1, &reqs[inbi ^ 1], 
+                ret = ompi_request_wait_all( 1, &reqs[inbi ^ 1],
                                             MPI_STATUSES_IGNORE );
                if (ret != MPI_SUCCESS) { line = __LINE__; goto error_hndl;  }
                local_op_buffer = inbuf[inbi ^ 1];
                if( i > 0 ) {
-                    /* our first operation is to combine our own [sendbuf] data 
-                     * with the data we recvd from down stream (but only 
-                     * the operation is commutative and if we are not root and 
+                    /* our first operation is to combine our own [sendbuf] data
+                     * with the data we recvd from down stream (but only
+                     * the operation is commutative and if we are not root and
                     * not using MPI_IN_PLACE)
                     */
                    if( 1 == i ) {
-                        if( (ompi_op_is_commute(op)) && 
+                        if( (ompi_op_is_commute(op)) &&
                            !((MPI_IN_PLACE == sendbuf) && (rank == tree->tree_root)) ) {
                            local_op_buffer = sendtmpbuf + (ptrdiff_t)segindex * (ptrdiff_t)segment_increment;
                        }
                    }
                    /* apply operation */
-                    ompi_op_reduce(op, local_op_buffer, 
-                                   accumbuf + (ptrdiff_t)segindex * (ptrdiff_t)segment_increment, 
+                    ompi_op_reduce(op, local_op_buffer,
+                                   accumbuf + (ptrdiff_t)segindex * (ptrdiff_t)segment_increment,
                                   recvcount, datatype );
                } else if ( segindex > 0 ) {
                    void* accumulator = accumbuf + (ptrdiff_t)(segindex-1) * (ptrdiff_t)segment_increment;
@ -220,25 +200,25 @@ int ompi_coll_tuned_reduce_generic( void* sendbuf, void* recvbuf, int original_c
                            local_op_buffer = sendtmpbuf + (ptrdiff_t)(segindex-1) * (ptrdiff_t)segment_increment;
                        }
                    }
-                    ompi_op_reduce(op, local_op_buffer, accumulator, prevcount, 
+                    ompi_op_reduce(op, local_op_buffer, accumulator, prevcount,
                                   datatype );

-                    /* all reduced on available data this step (i) complete, 
+                    /* all reduced on available data this step (i) complete,
                     * pass to the next process unless you are the root.
                     */
                    if (rank != tree->tree_root) {
                        /* send combined/accumulated data to parent */
-                        ret = MCA_PML_CALL( send( accumulator, prevcount, 
-                                                  datatype, tree->tree_prev, 
+                        ret = MCA_PML_CALL( send( accumulator, prevcount,
+                                                  datatype, tree->tree_prev,
                                                  MCA_COLL_BASE_TAG_REDUCE,
-                                                  MCA_PML_BASE_SEND_STANDARD, 
+                                                  MCA_PML_BASE_SEND_STANDARD,
                                                  comm) );
-                        if (ret != MPI_SUCCESS) { 
-                            line = __LINE__; goto error_hndl;  
+                        if (ret != MPI_SUCCESS) {
+                            line = __LINE__; goto error_hndl;
                        }
                    }

-                    /* we stop when segindex = number of segments 
+                    /* we stop when segindex = number of segments
                       (i.e. we do num_segment+1 steps for pipelining */
                    if (segindex == num_segments) break;
                }
@ -254,33 +234,33 @@ int ompi_coll_tuned_reduce_generic( void* sendbuf, void* recvbuf, int original_c
        if( accumbuf_free != NULL ) free(accumbuf_free);
    }

-    /* leaf nodes 
-       Depending on the value of max_outstanding_reqs and 
+    /* leaf nodes
+       Depending on the value of max_outstanding_reqs and
       the number of segments we have two options:
       - send all segments using blocking send to the parent, or
-       - avoid overflooding the parent nodes by limiting the number of 
+       - avoid overflooding the parent nodes by limiting the number of
       outstanding requests to max_oustanding_reqs.
-       TODO/POSSIBLE IMPROVEMENT: If there is a way to determine the eager size 
-       for the current communication, synchronization should be used only 
+       TODO/POSSIBLE IMPROVEMENT: If there is a way to determine the eager size
+       for the current communication, synchronization should be used only
       when the message/segment size is smaller than the eager size.
    */
    else {

        /* If the number of segments is less than a maximum number of oustanding
-           requests or there is no limit on the maximum number of outstanding 
+           requests or there is no limit on the maximum number of outstanding
           requests, we send data to the parent using blocking send */
-        if ((0 == max_outstanding_reqs) || 
+        if ((0 == max_outstanding_reqs) ||
            (num_segments <= max_outstanding_reqs)) {
-            
+
            segindex = 0;
            while ( original_count > 0) {
                if (original_count < count_by_segment) {
                    count_by_segment = original_count;
                }
-                ret = MCA_PML_CALL( send((char*)sendbuf + 
+                ret = MCA_PML_CALL( send((char*)sendbuf +
                                         (ptrdiff_t)segindex * (ptrdiff_t)segment_increment,
                                         count_by_segment, datatype,
-                                         tree->tree_prev, 
+                                         tree->tree_prev,
                                         MCA_COLL_BASE_TAG_REDUCE,
                                         MCA_PML_BASE_SEND_STANDARD,
                                         comm) );
@ -310,7 +290,7 @@ int ompi_coll_tuned_reduce_generic( void* sendbuf, void* recvbuf, int original_c
                ret = MCA_PML_CALL( isend((char*)sendbuf +
                                          (ptrdiff_t)segindex * (ptrdiff_t)segment_increment,
                                          count_by_segment, datatype,
-                                          tree->tree_prev, 
+                                          tree->tree_prev,
                                          MCA_COLL_BASE_TAG_REDUCE,
                                          MCA_PML_BASE_SEND_SYNCHRONOUS, comm,
                                          &sreq[segindex]) );
@ -328,12 +308,12 @@ int ompi_coll_tuned_reduce_generic( void* sendbuf, void* recvbuf, int original_c
                if( original_count < count_by_segment ) {
                    count_by_segment = original_count;
                }
-                ret = MCA_PML_CALL( isend((char*)sendbuf + 
-                                          (ptrdiff_t)segindex * (ptrdiff_t)segment_increment, 
-                                          count_by_segment, datatype, 
-                                          tree->tree_prev, 
-                                          MCA_COLL_BASE_TAG_REDUCE, 
-                                          MCA_PML_BASE_SEND_SYNCHRONOUS, comm, 
+                ret = MCA_PML_CALL( isend((char*)sendbuf +
+                                          (ptrdiff_t)segindex * (ptrdiff_t)segment_increment,
+                                          count_by_segment, datatype,
+                                          tree->tree_prev,
+                                          MCA_COLL_BASE_TAG_REDUCE,
+                                          MCA_PML_BASE_SEND_SYNCHRONOUS, comm,
                                          &sreq[creq]) );
                if (ret != MPI_SUCCESS) { line = __LINE__; goto error_hndl;  }
                creq = (creq + 1) % max_outstanding_reqs;
@ -342,7 +322,7 @@ int ompi_coll_tuned_reduce_generic( void* sendbuf, void* recvbuf, int original_c
            }

            /* Wait on the remaining request to complete */
-            ret = ompi_request_wait_all( max_outstanding_reqs, sreq, 
+            ret = ompi_request_wait_all( max_outstanding_reqs, sreq,
                                         MPI_STATUSES_IGNORE );
            if (ret != MPI_SUCCESS) { line = __LINE__; goto error_hndl;  }

@ -353,8 +333,8 @@ int ompi_coll_tuned_reduce_generic( void* sendbuf, void* recvbuf, int original_c
    return OMPI_SUCCESS;

 error_hndl:  /* error handler */
-    OPAL_OUTPUT (( ompi_coll_tuned_stream, 
-                   "ERROR_HNDL: node %d file %s line %d error %d\n", 
+    OPAL_OUTPUT (( ompi_coll_base_framework.framework_output,
+                   "ERROR_HNDL: node %d file %s line %d error %d\n",
                   rank, __FILE__, line, ret ));
    if( inbuf_free[0] != NULL ) free(inbuf_free[0]);
    if( inbuf_free[1] != NULL ) free(inbuf_free[1]);
@ -369,9 +349,9 @@ int ompi_coll_tuned_reduce_generic( void* sendbuf, void* recvbuf, int original_c
     meaning that at least one datatype must fit in the segment !
 */

-int ompi_coll_tuned_reduce_intra_chain( void *sendbuf, void *recvbuf, int count,
-                                        ompi_datatype_t* datatype, 
-                                        ompi_op_t* op, int root, 
+int ompi_coll_base_reduce_intra_chain( void *sendbuf, void *recvbuf, int count,
+                                        ompi_datatype_t* datatype,
+                                        ompi_op_t* op, int root,
                                        ompi_communicator_t* comm,
                                        mca_coll_base_module_t *module,
                                        uint32_t segsize, int fanout,
@ -379,27 +359,27 @@ int ompi_coll_tuned_reduce_intra_chain( void *sendbuf, void *recvbuf, int count,
 {
    int segcount = count;
    size_t typelng;
-    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
-    mca_coll_tuned_comm_t *data = tuned_module->tuned_data;
+    mca_coll_base_module_t *base_module = (mca_coll_base_module_t*) module;
+    mca_coll_base_comm_t *data = base_module->base_data;

-    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:reduce_intra_chain rank %d fo %d ss %5d", ompi_comm_rank(comm), fanout, segsize));
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:reduce_intra_chain rank %d fo %d ss %5d", ompi_comm_rank(comm), fanout, segsize));

-    COLL_TUNED_UPDATE_CHAIN( comm, tuned_module, root, fanout );
+    COLL_BASE_UPDATE_CHAIN( comm, base_module, root, fanout );
    /**
     * Determine number of segments and number of elements
     * sent per operation
     */
    ompi_datatype_type_size( datatype, &typelng );
-    COLL_TUNED_COMPUTED_SEGCOUNT( segsize, typelng, segcount );
+    COLL_BASE_COMPUTED_SEGCOUNT( segsize, typelng, segcount );

-    return ompi_coll_tuned_reduce_generic( sendbuf, recvbuf, count, datatype, 
+    return ompi_coll_base_reduce_generic( sendbuf, recvbuf, count, datatype,
                                           op, root, comm, module,
-                                           data->cached_chain, 
+                                           data->cached_chain,
                                           segcount, max_outstanding_reqs );
 }


-int ompi_coll_tuned_reduce_intra_pipeline( void *sendbuf, void *recvbuf,
+int ompi_coll_base_reduce_intra_pipeline( void *sendbuf, void *recvbuf,
                                           int count, ompi_datatype_t* datatype,
                                           ompi_op_t* op, int root,
                                           ompi_communicator_t* comm,
@ -409,101 +389,101 @@ int ompi_coll_tuned_reduce_intra_pipeline( void *sendbuf, void *recvbuf,
 {
    int segcount = count;
    size_t typelng;
-    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
-    mca_coll_tuned_comm_t *data = tuned_module->tuned_data;
+    mca_coll_base_module_t *base_module = (mca_coll_base_module_t*) module;
+    mca_coll_base_comm_t *data = base_module->base_data;

-    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:reduce_intra_pipeline rank %d ss %5d",
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:reduce_intra_pipeline rank %d ss %5d",
                 ompi_comm_rank(comm), segsize));

-    COLL_TUNED_UPDATE_PIPELINE( comm, tuned_module, root );
+    COLL_BASE_UPDATE_PIPELINE( comm, base_module, root );

    /**
     * Determine number of segments and number of elements
     * sent per operation
     */
    ompi_datatype_type_size( datatype, &typelng );
-    COLL_TUNED_COMPUTED_SEGCOUNT( segsize, typelng, segcount );
+    COLL_BASE_COMPUTED_SEGCOUNT( segsize, typelng, segcount );

-    return ompi_coll_tuned_reduce_generic( sendbuf, recvbuf, count, datatype, 
+    return ompi_coll_base_reduce_generic( sendbuf, recvbuf, count, datatype,
                                           op, root, comm, module,
-                                           data->cached_pipeline, 
+                                           data->cached_pipeline,
                                           segcount, max_outstanding_reqs );
 }

-int ompi_coll_tuned_reduce_intra_binary( void *sendbuf, void *recvbuf,
+int ompi_coll_base_reduce_intra_binary( void *sendbuf, void *recvbuf,
                                         int count, ompi_datatype_t* datatype,
                                         ompi_op_t* op, int root,
-                                         ompi_communicator_t* comm, 
+                                         ompi_communicator_t* comm,
                                         mca_coll_base_module_t *module,
-                                         uint32_t segsize, 
+                                         uint32_t segsize,
                                         int max_outstanding_reqs  )
 {
    int segcount = count;
    size_t typelng;
-    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
-    mca_coll_tuned_comm_t *data = tuned_module->tuned_data;
+    mca_coll_base_module_t *base_module = (mca_coll_base_module_t*) module;
+    mca_coll_base_comm_t *data = base_module->base_data;

-    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:reduce_intra_binary rank %d ss %5d",
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:reduce_intra_binary rank %d ss %5d",
                 ompi_comm_rank(comm), segsize));

-    COLL_TUNED_UPDATE_BINTREE( comm, tuned_module, root );
+    COLL_BASE_UPDATE_BINTREE( comm, base_module, root );

    /**
     * Determine number of segments and number of elements
     * sent per operation
     */
    ompi_datatype_type_size( datatype, &typelng );
-    COLL_TUNED_COMPUTED_SEGCOUNT( segsize, typelng, segcount );
+    COLL_BASE_COMPUTED_SEGCOUNT( segsize, typelng, segcount );

-    return ompi_coll_tuned_reduce_generic( sendbuf, recvbuf, count, datatype, 
+    return ompi_coll_base_reduce_generic( sendbuf, recvbuf, count, datatype,
                                           op, root, comm, module,
-                                           data->cached_bintree, 
+                                           data->cached_bintree,
                                           segcount, max_outstanding_reqs );
 }

-int ompi_coll_tuned_reduce_intra_binomial( void *sendbuf, void *recvbuf,
+int ompi_coll_base_reduce_intra_binomial( void *sendbuf, void *recvbuf,
                                           int count, ompi_datatype_t* datatype,
                                           ompi_op_t* op, int root,
-                                           ompi_communicator_t* comm, 
+                                           ompi_communicator_t* comm,
                                           mca_coll_base_module_t *module,
                                           uint32_t segsize,
                                           int max_outstanding_reqs  )
 {
    int segcount = count;
    size_t typelng;
-    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
-    mca_coll_tuned_comm_t *data = tuned_module->tuned_data;
+    mca_coll_base_module_t *base_module = (mca_coll_base_module_t*) module;
+    mca_coll_base_comm_t *data = base_module->base_data;

-    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:reduce_intra_binomial rank %d ss %5d",
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:reduce_intra_binomial rank %d ss %5d",
                 ompi_comm_rank(comm), segsize));

-    COLL_TUNED_UPDATE_IN_ORDER_BMTREE( comm, tuned_module, root );
+    COLL_BASE_UPDATE_IN_ORDER_BMTREE( comm, base_module, root );

    /**
     * Determine number of segments and number of elements
     * sent per operation
     */
    ompi_datatype_type_size( datatype, &typelng );
-    COLL_TUNED_COMPUTED_SEGCOUNT( segsize, typelng, segcount );
+    COLL_BASE_COMPUTED_SEGCOUNT( segsize, typelng, segcount );

-    return ompi_coll_tuned_reduce_generic( sendbuf, recvbuf, count, datatype, 
+    return ompi_coll_base_reduce_generic( sendbuf, recvbuf, count, datatype,
                                           op, root, comm, module,
-                                           data->cached_in_order_bmtree, 
+                                           data->cached_in_order_bmtree,
                                           segcount, max_outstanding_reqs );
 }

 /*
- * reduce_intra_in_order_binary 
- * 
+ * reduce_intra_in_order_binary
+ *
 * Function:      Logarithmic reduce operation for non-commutative operations.
 * Acecpts:       same as MPI_Reduce()
 * Returns:       MPI_SUCCESS or error code
 */
-int ompi_coll_tuned_reduce_intra_in_order_binary( void *sendbuf, void *recvbuf,
-                                                  int count, 
+int ompi_coll_base_reduce_intra_in_order_binary( void *sendbuf, void *recvbuf,
+                                                  int count,
                                                  ompi_datatype_t* datatype,
                                                  ompi_op_t* op, int root,
-                                                  ompi_communicator_t* comm, 
+                                                  ompi_communicator_t* comm,
                                                  mca_coll_base_module_t *module,
                                                  uint32_t segsize,
                                                  int max_outstanding_reqs  )
@ -511,28 +491,28 @@ int ompi_coll_tuned_reduce_intra_in_order_binary( void *sendbuf, void *recvbuf,
    int ret, rank, size, io_root, segcount = count;
    void *use_this_sendbuf = NULL, *use_this_recvbuf = NULL;
    size_t typelng;
-    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
-    mca_coll_tuned_comm_t *data = tuned_module->tuned_data;
+    mca_coll_base_module_t *base_module = (mca_coll_base_module_t*) module;
+    mca_coll_base_comm_t *data = base_module->base_data;

    rank = ompi_comm_rank(comm);
    size = ompi_comm_size(comm);
-    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:reduce_intra_in_order_binary rank %d ss %5d",
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:reduce_intra_in_order_binary rank %d ss %5d",
                 rank, segsize));

-    COLL_TUNED_UPDATE_IN_ORDER_BINTREE( comm, tuned_module );
+    COLL_BASE_UPDATE_IN_ORDER_BINTREE( comm, base_module );

    /**
     * Determine number of segments and number of elements
     * sent per operation
     */
    ompi_datatype_type_size( datatype, &typelng );
-    COLL_TUNED_COMPUTED_SEGCOUNT( segsize, typelng, segcount );
+    COLL_BASE_COMPUTED_SEGCOUNT( segsize, typelng, segcount );

    /* An in-order binary tree must use root (size-1) to preserve the order of
       operations.  Thus, if root is not rank (size - 1), then we must handle
-       1. MPI_IN_PLACE option on real root, and 
+       1. MPI_IN_PLACE option on real root, and
       2. we must allocate temporary recvbuf on rank (size - 1).
-       Note that generic function must be careful not to switch order of 
+       Note that generic function must be careful not to switch order of
       operations for non-commutative ops.
    */
    io_root = size - 1;
@ -541,7 +521,7 @@ int ompi_coll_tuned_reduce_intra_in_order_binary( void *sendbuf, void *recvbuf,
    if (io_root != root) {
        ptrdiff_t tlb, text, lb, ext;
        char *tmpbuf = NULL;
-    
+
        ompi_datatype_get_extent(datatype, &lb, &ext);
        ompi_datatype_get_true_extent(datatype, &tlb, &text);

@ -550,7 +530,7 @@ int ompi_coll_tuned_reduce_intra_in_order_binary( void *sendbuf, void *recvbuf,
            if (NULL == tmpbuf) {
                return MPI_ERR_INTERN;
            }
-            ompi_datatype_copy_content_same_ddt(datatype, count, 
+            ompi_datatype_copy_content_same_ddt(datatype, count,
                                                (char*)tmpbuf,
                                                (char*)recvbuf);
            use_this_sendbuf = tmpbuf;
@ -564,9 +544,9 @@ int ompi_coll_tuned_reduce_intra_in_order_binary( void *sendbuf, void *recvbuf,
    }

    /* Use generic reduce with in-order binary tree topology and io_root */
-    ret = ompi_coll_tuned_reduce_generic( use_this_sendbuf, use_this_recvbuf, count, datatype,
-                                          op, io_root, comm, module, 
-                                          data->cached_in_order_bintree, 
+    ret = ompi_coll_base_reduce_generic( use_this_sendbuf, use_this_recvbuf, count, datatype,
+                                          op, io_root, comm, module,
+                                          data->cached_in_order_bintree,
                                          segcount, max_outstanding_reqs );
    if (MPI_SUCCESS != ret) { return ret; }

@ -581,11 +561,11 @@ int ompi_coll_tuned_reduce_intra_in_order_binary( void *sendbuf, void *recvbuf,
            if (MPI_IN_PLACE == sendbuf) {
                free(use_this_sendbuf);
            }
-          
+
        } else if (io_root == rank) {
            /* Send result from use_this_recvbuf to root */
            ret = MCA_PML_CALL(send(use_this_recvbuf, count, datatype, root,
-                                    MCA_COLL_BASE_TAG_REDUCE, 
+                                    MCA_COLL_BASE_TAG_REDUCE,
                                    MCA_PML_BASE_SEND_STANDARD, comm));
            if (MPI_SUCCESS != ret) { return ret; }
            free(use_this_recvbuf);
@ -598,8 +578,8 @@ int ompi_coll_tuned_reduce_intra_in_order_binary( void *sendbuf, void *recvbuf,
 /*
 * Linear functions are copied from the BASIC coll module
 * they do not segment the message and are simple implementations
- * but for some small number of nodes and/or small data sizes they 
- * are just as fast as tuned/tree based segmenting operations 
+ * but for some small number of nodes and/or small data sizes they
+ * are just as fast as base/tree based segmenting operations
 * and as such may be selected by the decision functions
 * These are copied into this module due to the way we select modules
 * in V1. i.e. in V2 we will handle this differently and so will not
@ -617,12 +597,12 @@ int ompi_coll_tuned_reduce_intra_in_order_binary( void *sendbuf, void *recvbuf,
 *  Returns:    - MPI_SUCCESS or error code
 */
 int
-ompi_coll_tuned_reduce_intra_basic_linear(void *sbuf, void *rbuf, int count,
-                                          struct ompi_datatype_t *dtype,
-                                          struct ompi_op_t *op,
-                                          int root,
-                                          struct ompi_communicator_t *comm,
-                                          mca_coll_base_module_t *module)
+ompi_coll_base_reduce_intra_basic_linear(void *sbuf, void *rbuf, int count,
+                                         struct ompi_datatype_t *dtype,
+                                         struct ompi_op_t *op,
+                                         int root,
+                                         struct ompi_communicator_t *comm,
+                                         mca_coll_base_module_t *module)
 {
    int i, rank, err, size;
    ptrdiff_t true_lb, true_extent, lb, extent;
@ -634,7 +614,7 @@ ompi_coll_tuned_reduce_intra_basic_linear(void *sbuf, void *rbuf, int count,
    rank = ompi_comm_rank(comm);
    size = ompi_comm_size(comm);

-    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:reduce_intra_basic_linear rank %d", rank));
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:reduce_intra_basic_linear rank %d", rank));

    /* If not root, send data to the root. */

@ -645,7 +625,7 @@ ompi_coll_tuned_reduce_intra_basic_linear(void *sbuf, void *rbuf, int count,
        return err;
    }

-    /* see discussion in ompi_coll_basic_reduce_lin_intra about 
+    /* see discussion in ompi_coll_basic_reduce_lin_intra about
       extent and true extent */
    /* for reducing buffer allocation lengths.... */

@ -673,7 +653,7 @@ ompi_coll_tuned_reduce_intra_basic_linear(void *sbuf, void *rbuf, int count,
    /* Initialize the receive buffer. */

    if (rank == (size - 1)) {
-        err = ompi_datatype_copy_content_same_ddt(dtype, count, (char*)rbuf, 
+        err = ompi_datatype_copy_content_same_ddt(dtype, count, (char*)rbuf,
                                                  (char*)sbuf);
    } else {
        err = MCA_PML_CALL(recv(rbuf, count, dtype, size - 1,
@ -705,7 +685,7 @@ ompi_coll_tuned_reduce_intra_basic_linear(void *sbuf, void *rbuf, int count,
    }

    if (NULL != inplace_temp) {
-        err = ompi_datatype_copy_content_same_ddt(dtype, count, (char*)sbuf, 
+        err = ompi_datatype_copy_content_same_ddt(dtype, count, (char*)sbuf,
                                                  inplace_temp);
    } else {
        err = MPI_SUCCESS;
@ -724,185 +704,3 @@ ompi_coll_tuned_reduce_intra_basic_linear(void *sbuf, void *rbuf, int count,
 }

 /* copied function (with appropriate renaming) ends here */
-
-
-/**
- * The following are used by dynamic and forced rules
- *
- * publish details of each algorithm and if its forced/fixed/locked in
- * as you add methods/algorithms you must update this and the query/map routines
- *
- * this routine is called by the component only
- * this makes sure that the mca parameters are set to their initial values and 
- * perms module does not call this they call the forced_getvalues routine 
- * instead.
- */
-
-int ompi_coll_tuned_reduce_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices)
-{
-    mca_base_var_enum_t*new_enum;
-
-    ompi_coll_tuned_forced_max_algorithms[REDUCE] = coll_tuned_reduce_algorithm_count;
-
-    (void) mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                           "reduce_algorithm_count",
-                                           "Number of reduce algorithms available",
-                                           MCA_BASE_VAR_TYPE_INT, NULL, 0,
-                                           MCA_BASE_VAR_FLAG_DEFAULT_ONLY,
-                                           OPAL_INFO_LVL_5,
-                                           MCA_BASE_VAR_SCOPE_CONSTANT,
-                                           &coll_tuned_reduce_algorithm_count);
-
-    /* MPI_T: This variable should eventually be bound to a communicator */
-    coll_tuned_reduce_forced_algorithm = 0;
-    (void) mca_base_var_enum_create("coll_tuned_reduce_algorithms", reduce_algorithms, &new_enum);
-    mca_param_indices->algorithm_param_index =
-        mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                        "reduce_algorithm",
-                                        "Which reduce algorithm is used. Can be locked down to choice of: 0 ignore, 1 linear, 2 chain, 3 pipeline, 4 binary, 5 binomial, 6 in-order binary",
-                                        MCA_BASE_VAR_TYPE_INT, new_enum, 0, 0,
-                                        OPAL_INFO_LVL_5,
-                                        MCA_BASE_VAR_SCOPE_READONLY,
-                                        &coll_tuned_reduce_forced_algorithm);
-    OBJ_RELEASE(new_enum);
-    if (mca_param_indices->algorithm_param_index < 0) {
-        return mca_param_indices->algorithm_param_index;
-    }
-
-    coll_tuned_reduce_segment_size = 0;
-    mca_param_indices->segsize_param_index =
-        mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                        "reduce_algorithm_segmentsize",
-                                        "Segment size in bytes used by default for reduce algorithms. Only has meaning if algorithm is forced and supports segmenting. 0 bytes means no segmentation.",
-                                        MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
-                                        OPAL_INFO_LVL_5,
-                                        MCA_BASE_VAR_SCOPE_READONLY,
-                                        &coll_tuned_reduce_segment_size);
-
-    coll_tuned_reduce_tree_fanout = ompi_coll_tuned_init_tree_fanout; /* get system wide default */
-    mca_param_indices->tree_fanout_param_index =
-        mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                        "reduce_algorithm_tree_fanout",
-                                        "Fanout for n-tree used for reduce algorithms. Only has meaning if algorithm is forced and supports n-tree topo based operation.",
-                                        MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
-                                        OPAL_INFO_LVL_5,
-                                        MCA_BASE_VAR_SCOPE_READONLY,
-                                        &coll_tuned_reduce_tree_fanout);
-
-    coll_tuned_reduce_chain_fanout = ompi_coll_tuned_init_chain_fanout; /* get system wide default */
-    mca_param_indices->chain_fanout_param_index = 
-      mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                      "reduce_algorithm_chain_fanout",
-                                      "Fanout for chains used for reduce algorithms. Only has meaning if algorithm is forced and supports chain topo based operation.",
-                                      MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
-                                      OPAL_INFO_LVL_5,
-                                      MCA_BASE_VAR_SCOPE_READONLY,
-                                      &coll_tuned_reduce_chain_fanout);
-
-    coll_tuned_reduce_max_requests = 0; /* no limit for reduce by default */
-    mca_param_indices->max_requests_param_index = 
-      mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                      "reduce_algorithm_max_requests",
-                                      "Maximum number of outstanding send requests on leaf nodes. 0 means no limit.",
-                                      MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
-                                      OPAL_INFO_LVL_5,
-                                      MCA_BASE_VAR_SCOPE_READONLY,
-                                      &coll_tuned_reduce_max_requests);
-    if (mca_param_indices->max_requests_param_index < 0) {
-        return mca_param_indices->max_requests_param_index;
-    }
-
-    if (coll_tuned_reduce_max_requests < 0) {
-        if( 0 == ompi_comm_rank( MPI_COMM_WORLD ) ) {
-            opal_output( 0, "Maximum outstanding requests must be positive number or 0.  Initializing to 0 (no limit).\n" );
-        }
-        coll_tuned_reduce_max_requests = 0;
-    }
-
-    return (MPI_SUCCESS);
-}
-
-
-int ompi_coll_tuned_reduce_intra_do_forced(void *sbuf, void* rbuf, int count,
-                                           struct ompi_datatype_t *dtype,
-                                           struct ompi_op_t *op, int root,
-                                           struct ompi_communicator_t *comm,
-                                           mca_coll_base_module_t *module)
-{
-    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
-    mca_coll_tuned_comm_t *data = tuned_module->tuned_data;
-
-    const int segsize      = data->user_forced[REDUCE].segsize;
-    const int chain_fanout = data->user_forced[REDUCE].chain_fanout;
-    const int max_requests = data->user_forced[REDUCE].max_requests;
-
-    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:reduce_intra_do_forced selected algorithm %d", 
-                 data->user_forced[REDUCE].algorithm));
-
-
-    switch (data->user_forced[REDUCE].algorithm) {
-    case (0):  return ompi_coll_tuned_reduce_intra_dec_fixed (sbuf, rbuf, count, dtype, 
-                                                              op, root, comm, module);
-    case (1):  return ompi_coll_tuned_reduce_intra_basic_linear (sbuf, rbuf, count, dtype,
-                                                                 op, root, comm, module);
-    case (2):  return ompi_coll_tuned_reduce_intra_chain (sbuf, rbuf, count, dtype,
-                                                          op, root, comm, module,
-                                                          segsize, chain_fanout, max_requests);
-    case (3):  return ompi_coll_tuned_reduce_intra_pipeline (sbuf, rbuf, count, dtype,
-                                                             op, root, comm, module,
-                                                             segsize, max_requests);
-    case (4):  return ompi_coll_tuned_reduce_intra_binary (sbuf, rbuf, count, dtype,
-                                                           op, root, comm, module,
-                                                           segsize, max_requests);
-    case (5):  return ompi_coll_tuned_reduce_intra_binomial (sbuf, rbuf, count, dtype,
-                                                             op, root, comm, module,
-                                                             segsize, max_requests);
-    case (6):  return ompi_coll_tuned_reduce_intra_in_order_binary(sbuf, rbuf, count, dtype,
-                                                                   op, root, comm, module,
-                                                                   segsize, max_requests);
-    default:
-        OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:reduce_intra_do_forced attempt to select algorithm %d when only 0-%d is valid?",
-                     data->user_forced[REDUCE].algorithm, ompi_coll_tuned_forced_max_algorithms[REDUCE]));
-        return (MPI_ERR_ARG);
-    } /* switch */
-}
-
-
-int ompi_coll_tuned_reduce_intra_do_this(void *sbuf, void* rbuf, int count,
-                                         struct ompi_datatype_t *dtype,
-                                         struct ompi_op_t *op, int root,
-                                         struct ompi_communicator_t *comm,
-                                         mca_coll_base_module_t *module,
-                                         int algorithm, int faninout, 
-                                         int segsize, int max_requests )
-{
-    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:reduce_intra_do_this selected algorithm %d topo faninout %d segsize %d",
-                 algorithm, faninout, segsize));
-
-    switch (algorithm) {
-    case (0):  return ompi_coll_tuned_reduce_intra_dec_fixed (sbuf, rbuf, count, dtype,
-                                                              op, root, comm, module);
-    case (1):  return ompi_coll_tuned_reduce_intra_basic_linear (sbuf, rbuf, count, dtype, 
-                                                                 op, root, comm, module);
-    case (2):  return ompi_coll_tuned_reduce_intra_chain (sbuf, rbuf, count, dtype,
-                                                          op, root, comm, module,
-                                                          segsize, faninout, max_requests);
-    case (3):  return ompi_coll_tuned_reduce_intra_pipeline (sbuf, rbuf, count, dtype,
-                                                             op, root, comm, module,
-                                                             segsize, max_requests);
-    case (4):  return ompi_coll_tuned_reduce_intra_binary (sbuf, rbuf, count, dtype,
-                                                           op, root, comm, module,
-                                                           segsize, max_requests); 
-    case (5):  return ompi_coll_tuned_reduce_intra_binomial (sbuf, rbuf, count, dtype,
-                                                             op, root, comm, module,
-                                                             segsize, max_requests); 
-    case (6):  return ompi_coll_tuned_reduce_intra_in_order_binary(sbuf, rbuf, count, dtype,
-                                                                   op, root, comm, module,
-                                                                   segsize, max_requests);
-    default:
-        OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:reduce_intra_do_this attempt to select algorithm %d when only 0-%d is valid?",
-                     algorithm, ompi_coll_tuned_forced_max_algorithms[REDUCE]));
-        return (MPI_ERR_ARG);
-    } /* switch */
-}
-
--- a/ompi/mca/coll/base/coll_base_reduce_scatter.c
+++ b/ompi/mca/coll/base/coll_base_reduce_scatter.c
@ -3,7 +3,7 @@
 * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
 *                         University Research and Technology
 *                         Corporation.  All rights reserved.
- * Copyright (c) 2004-2014 The University of Tennessee and The University
+ * Copyright (c) 2004-2015 The University of Tennessee and The University
 *                         of Tennessee Research Foundation.  All rights
 *                         reserved.
 * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@ -32,37 +32,21 @@
 #include "ompi/mca/coll/base/coll_tags.h"
 #include "ompi/mca/pml/pml.h"
 #include "ompi/op/op.h"
-#include "coll_tuned.h"
-#include "coll_tuned_topo.h"
-
-/* reduce_scatter algorithm variables */
-static int coll_tuned_reduce_scatter_algorithm_count = 2;
-static int coll_tuned_reduce_scatter_forced_algorithm = 0;
-static int coll_tuned_reduce_scatter_segment_size = 0;
-static int coll_tuned_reduce_scatter_tree_fanout;
-static int coll_tuned_reduce_scatter_chain_fanout;
-
-/* valid values for coll_tuned_reduce_scatter_forced_algorithm */
-static mca_base_var_enum_value_t reduce_scatter_algorithms[] = {
-    {0, "ignore"},
-    {1, "non-overlapping"},
-    {2, "recursive_halfing"},
-    {3, "ring"},
-    {0, NULL}
-};
+#include "ompi/mca/coll/base/coll_base_functions.h"
+#include "coll_base_topo.h"

 /*******************************************************************************
- * ompi_coll_tuned_reduce_scatter_intra_nonoverlapping
+ * ompi_coll_base_reduce_scatter_intra_nonoverlapping
 *
- * This function just calls a reduce to rank 0, followed by an 
+ * This function just calls a reduce to rank 0, followed by an
 * appropriate scatterv call.
 */
-int ompi_coll_tuned_reduce_scatter_intra_nonoverlapping(void *sbuf, void *rbuf, 
+int ompi_coll_base_reduce_scatter_intra_nonoverlapping(void *sbuf, void *rbuf,
                                                        int *rcounts,
                                                        struct ompi_datatype_t *dtype,
                                                        struct ompi_op_t *op,
                                                        struct ompi_communicator_t *comm,
-                                                        mca_coll_base_module_t *module) 
+                                                        mca_coll_base_module_t *module)
 {
    int err, i, rank, size, total_count, *displs = NULL;
    const int root = 0;
@ -71,7 +55,7 @@ int ompi_coll_tuned_reduce_scatter_intra_nonoverlapping(void *sbuf, void *rbuf,
    rank = ompi_comm_rank(comm);
    size = ompi_comm_size(comm);

-    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:reduce_scatter_intra_nonoverlapping, rank %d", rank));
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:reduce_scatter_intra_nonoverlapping, rank %d", rank));

    for (i = 0, total_count = 0; i < size; i++) { total_count += rcounts[i]; }

@ -80,7 +64,7 @@ int ompi_coll_tuned_reduce_scatter_intra_nonoverlapping(void *sbuf, void *rbuf,
    if (MPI_IN_PLACE == sbuf) {
        /* rbuf on root (0) is big enough to hold whole data */
        if (root == rank) {
-            err = comm->c_coll.coll_reduce (MPI_IN_PLACE, tmprbuf, total_count, 
+            err = comm->c_coll.coll_reduce (MPI_IN_PLACE, tmprbuf, total_count,
                                            dtype, op, root, comm, comm->c_coll.coll_reduce_module);
        } else {
            err = comm->c_coll.coll_reduce(tmprbuf, NULL, total_count,
@ -91,13 +75,13 @@ int ompi_coll_tuned_reduce_scatter_intra_nonoverlapping(void *sbuf, void *rbuf,
            /* We must allocate temporary receive buffer on root to ensure that
               rbuf is big enough */
            ptrdiff_t lb, extent, tlb, textent;
-         
+
            ompi_datatype_get_extent(dtype, &lb, &extent);
            ompi_datatype_get_true_extent(dtype, &tlb, &textent);

            tmprbuf_free = (char*) malloc(textent + (ptrdiff_t)(total_count - 1) * extent);
            tmprbuf = tmprbuf_free - lb;
-        } 
+        }
        err = comm->c_coll.coll_reduce (sbuf, tmprbuf, total_count,
                                        dtype, op, root, comm, comm->c_coll.coll_reduce_module);
    }
@ -105,7 +89,7 @@ int ompi_coll_tuned_reduce_scatter_intra_nonoverlapping(void *sbuf, void *rbuf,
        if (NULL != tmprbuf_free) free(tmprbuf_free);
        return err;
    }
- 
+
    displs = (int*) malloc(size * sizeof(int));
    displs[0] = 0;
    for (i = 1; i < size; i++) {
@ -122,7 +106,7 @@ int ompi_coll_tuned_reduce_scatter_intra_nonoverlapping(void *sbuf, void *rbuf,

 /*
 * Recursive-halving function is (*mostly*) copied from the BASIC coll module.
- * I have removed the part which handles "large" message sizes 
+ * I have removed the part which handles "large" message sizes
 * (non-overlapping version of reduce_Scatter).
 */

@ -131,15 +115,15 @@ int ompi_coll_tuned_reduce_scatter_intra_nonoverlapping(void *sbuf, void *rbuf,
 /*
 *  reduce_scatter_intra_basic_recursivehalving
 *
- *  Function:   - reduce scatter implementation using recursive-halving 
+ *  Function:   - reduce scatter implementation using recursive-halving
 *                algorithm
 *  Accepts:    - same as MPI_Reduce_scatter()
 *  Returns:    - MPI_SUCCESS or error code
 *  Limitation: - Works only for commutative operations.
 */
 int
-ompi_coll_tuned_reduce_scatter_intra_basic_recursivehalving(void *sbuf, 
-                                                            void *rbuf, 
+ompi_coll_base_reduce_scatter_intra_basic_recursivehalving(void *sbuf,
+                                                            void *rbuf,
                                                            int *rcounts,
                                                            struct ompi_datatype_t *dtype,
                                                            struct ompi_op_t *op,
@ -151,12 +135,12 @@ ompi_coll_tuned_reduce_scatter_intra_basic_recursivehalving(void *sbuf,
    ptrdiff_t true_lb, true_extent, lb, extent, buf_size;
    char *recv_buf = NULL, *recv_buf_free = NULL;
    char *result_buf = NULL, *result_buf_free = NULL;
-   
+
    /* Initialize */
    rank = ompi_comm_rank(comm);
    size = ompi_comm_size(comm);
-   
-    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:reduce_scatter_intra_basic_recursivehalving, rank %d", rank));
+
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:reduce_scatter_intra_basic_recursivehalving, rank %d", rank));

    /* Find displacements and the like */
    disps = (int*) malloc(sizeof(int) * size);
@ -191,43 +175,43 @@ ompi_coll_tuned_reduce_scatter_intra_basic_recursivehalving(void *sbuf,
        err = OMPI_ERR_OUT_OF_RESOURCE;
        goto cleanup;
    }
-   
+
    /* allocate temporary buffer for results */
    result_buf_free = (char*) malloc(buf_size);
    result_buf = result_buf_free - true_lb;
-   
+
    /* copy local buffer into the temporary results */
    err = ompi_datatype_sndrcv(sbuf, count, dtype, result_buf, count, dtype);
    if (OMPI_SUCCESS != err) goto cleanup;
-   
+
    /* figure out power of two mapping: grow until larger than
       comm size, then go back one, to get the largest power of
       two less than comm size */
-    tmp_size = opal_next_poweroftwo (size); 
+    tmp_size = opal_next_poweroftwo (size);
    tmp_size >>= 1;
    remain = size - tmp_size;
-   
+
    /* If comm size is not a power of two, have the first "remain"
       procs with an even rank send to rank + 1, leaving a power of
       two procs to do the rest of the algorithm */
    if (rank < 2 * remain) {
        if ((rank & 1) == 0) {
-            err = MCA_PML_CALL(send(result_buf, count, dtype, rank + 1, 
+            err = MCA_PML_CALL(send(result_buf, count, dtype, rank + 1,
                                    MCA_COLL_BASE_TAG_REDUCE_SCATTER,
                                    MCA_PML_BASE_SEND_STANDARD,
                                    comm));
            if (OMPI_SUCCESS != err) goto cleanup;
-         
+
            /* we don't participate from here on out */
            tmp_rank = -1;
        } else {
            err = MCA_PML_CALL(recv(recv_buf, count, dtype, rank - 1,
                                    MCA_COLL_BASE_TAG_REDUCE_SCATTER,
                                    comm, MPI_STATUS_IGNORE));
-         
+
            /* integrate their results into our temp results */
            ompi_op_reduce(op, recv_buf, result_buf, count, dtype);
-         
+
            /* adjust rank to be the bottom "remain" ranks */
            tmp_rank = rank / 2;
        }
@ -236,13 +220,13 @@ ompi_coll_tuned_reduce_scatter_intra_basic_recursivehalving(void *sbuf,
           remain" ranks dropped out */
        tmp_rank = rank - remain;
    }
-   
+
    /* For ranks not kicked out by the above code, perform the
       recursive halving */
    if (tmp_rank >= 0) {
        int *tmp_disps = NULL, *tmp_rcounts = NULL;
        int mask, send_index, recv_index, last_index;
-      
+
        /* recalculate disps and rcounts to account for the
           special "remainder" processes that are no longer doing
           anything */
@ -317,11 +301,11 @@ ompi_coll_tuned_reduce_scatter_intra_basic_recursivehalving(void *sbuf,
                    free(tmp_rcounts);
                    free(tmp_disps);
                    goto cleanup;
-                }                                             
+                }
            }
            if (send_count > 0) {
                err = MCA_PML_CALL(send(result_buf + (ptrdiff_t)tmp_disps[send_index] * extent,
-                                        send_count, dtype, peer, 
+                                        send_count, dtype, peer,
                                        MCA_COLL_BASE_TAG_REDUCE_SCATTER,
                                        MCA_PML_BASE_SEND_STANDARD,
                                        comm));
@ -329,7 +313,7 @@ ompi_coll_tuned_reduce_scatter_intra_basic_recursivehalving(void *sbuf,
                    free(tmp_rcounts);
                    free(tmp_disps);
                    goto cleanup;
-                }                                             
+                }
            }

            /* if we received something on this step, push it into
@ -340,10 +324,10 @@ ompi_coll_tuned_reduce_scatter_intra_basic_recursivehalving(void *sbuf,
                    free(tmp_rcounts);
                    free(tmp_disps);
                    goto cleanup;
-                }                                             
+                }

-                ompi_op_reduce(op, 
-                               recv_buf + (ptrdiff_t)tmp_disps[recv_index] * extent, 
+                ompi_op_reduce(op,
+                               recv_buf + (ptrdiff_t)tmp_disps[recv_index] * extent,
                               result_buf + (ptrdiff_t)tmp_disps[recv_index] * extent,
                               recv_count, dtype);
            }
@ -357,13 +341,13 @@ ompi_coll_tuned_reduce_scatter_intra_basic_recursivehalving(void *sbuf,
        /* copy local results from results buffer into real receive buffer */
        if (0 != rcounts[rank]) {
            err = ompi_datatype_sndrcv(result_buf + disps[rank] * extent,
-                                       rcounts[rank], dtype, 
+                                       rcounts[rank], dtype,
                                       rbuf, rcounts[rank], dtype);
            if (OMPI_SUCCESS != err) {
                free(tmp_rcounts);
                free(tmp_disps);
                goto cleanup;
-            }                                             
+            }
        }

        free(tmp_rcounts);
@ -389,7 +373,7 @@ ompi_coll_tuned_reduce_scatter_intra_basic_recursivehalving(void *sbuf,
                                        comm));
                if (OMPI_SUCCESS != err) goto cleanup;
            }
-        }            
+        }
    }

 cleanup:
@ -404,18 +388,18 @@ ompi_coll_tuned_reduce_scatter_intra_basic_recursivehalving(void *sbuf,


 /*
- *   ompi_coll_tuned_reduce_scatter_intra_ring
+ *   ompi_coll_base_reduce_scatter_intra_ring
 *
 *   Function:       Ring algorithm for reduce_scatter operation
 *   Accepts:        Same as MPI_Reduce_scatter()
 *   Returns:        MPI_SUCCESS or error code
 *
- *   Description:    Implements ring algorithm for reduce_scatter: 
- *                   the block sizes defined in rcounts are exchanged and 
+ *   Description:    Implements ring algorithm for reduce_scatter:
+ *                   the block sizes defined in rcounts are exchanged and
 8                    updated until they reach proper destination.
 *                   Algorithm requires 2 * max(rcounts) extra buffering
 *
- *   Limitations:    The algorithm DOES NOT preserve order of operations so it 
+ *   Limitations:    The algorithm DOES NOT preserve order of operations so it
 *                   can be used only for commutative operations.
 *         Example on 5 nodes:
 *         Initial state
@ -427,7 +411,7 @@ ompi_coll_tuned_reduce_scatter_intra_basic_recursivehalving(void *sbuf,
 *        [04]  ->       [14]          [24]           [34]           [44]
 *
 *        COMPUTATION PHASE
- *         Step 0: rank r sends block (r-1) to rank (r+1) and 
+ *         Step 0: rank r sends block (r-1) to rank (r+1) and
 *                 receives block (r+1) from rank (r-1) [with wraparound].
 *   #      0              1             2              3             4
 *        [00]           [10]        [10+20]   ->     [30]           [40]
@ -435,12 +419,12 @@ ompi_coll_tuned_reduce_scatter_intra_basic_recursivehalving(void *sbuf,
 *    ->  [02]           [12]          [22]           [32]         [32+42] -->..
 *      [43+03] ->       [13]          [23]           [33]           [43]
 *        [04]         [04+14]  ->     [24]           [34]           [44]
- *         
+ *
 *         Step 1:
 *   #      0              1             2              3             4
 *        [00]           [10]        [10+20]       [10+20+30] ->     [40]
 *    ->  [01]           [11]          [21]          [21+31]      [21+31+41] ->
- *     [32+42+02] ->     [12]          [22]           [32]         [32+42] 
+ *     [32+42+02] ->     [12]          [22]           [32]         [32+42]
 *        [03]        [43+03+13] ->    [23]           [33]           [43]
 *        [04]         [04+14]      [04+14+24]  ->    [34]           [44]
 *
@ -448,7 +432,7 @@ ompi_coll_tuned_reduce_scatter_intra_basic_recursivehalving(void *sbuf,
 *   #      0              1             2              3             4
 *     -> [00]           [10]        [10+20]       [10+20+30]   [10+20+30+40] ->
 *   [21+31+41+01]->     [11]          [21]          [21+31]      [21+31+41]
- *     [32+42+02]   [32+42+02+12]->    [22]           [32]         [32+42] 
+ *     [32+42+02]   [32+42+02+12]->    [22]           [32]         [32+42]
 *        [03]        [43+03+13]   [43+03+13+23]->    [33]           [43]
 *        [04]         [04+14]      [04+14+24]    [04+14+24+34] ->   [44]
 *
@ -456,14 +440,14 @@ ompi_coll_tuned_reduce_scatter_intra_basic_recursivehalving(void *sbuf,
 *   #      0             1              2              3             4
 * [10+20+30+40+00]     [10]         [10+20]       [10+20+30]   [10+20+30+40]
 *  [21+31+41+01] [21+31+41+01+11]     [21]          [21+31]      [21+31+41]
- *    [32+42+02]   [32+42+02+12] [32+42+02+12+22]     [32]         [32+42] 
+ *    [32+42+02]   [32+42+02+12] [32+42+02+12+22]     [32]         [32+42]
 *       [03]        [43+03+13]    [43+03+13+23] [43+03+13+23+33]    [43]
 *       [04]         [04+14]       [04+14+24]    [04+14+24+34] [04+14+24+34+44]
 *    DONE :)
 *
 */
-int 
-ompi_coll_tuned_reduce_scatter_intra_ring(void *sbuf, void *rbuf, int *rcounts,
+int
+ompi_coll_base_reduce_scatter_intra_ring(void *sbuf, void *rbuf, int *rcounts,
                                          struct ompi_datatype_t *dtype,
                                          struct ompi_op_t *op,
                                          struct ompi_communicator_t *comm,
@ -480,11 +464,11 @@ ompi_coll_tuned_reduce_scatter_intra_ring(void *sbuf, void *rbuf, int *rcounts,
    size = ompi_comm_size(comm);
    rank = ompi_comm_rank(comm);

-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "coll:tuned:reduce_scatter_intra_ring rank %d, size %d", 
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                 "coll:base:reduce_scatter_intra_ring rank %d, size %d",
                 rank, size));

-    /* Determine the maximum number of elements per node, 
+    /* Determine the maximum number of elements per node,
       corresponding block size, and displacements array.
    */
    displs = (int*) malloc(size * sizeof(int));
@ -492,16 +476,16 @@ ompi_coll_tuned_reduce_scatter_intra_ring(void *sbuf, void *rbuf, int *rcounts,
    displs[0] = 0;
    total_count = rcounts[0];
    max_block_count = rcounts[0];
-    for (i = 1; i < size; i++) { 
+    for (i = 1; i < size; i++) {
        displs[i] = total_count;
        total_count += rcounts[i];
        if (max_block_count < rcounts[i]) max_block_count = rcounts[i];
    }
-      
+
    /* Special case for size == 1 */
    if (1 == size) {
        if (MPI_IN_PLACE != sbuf) {
-            ret = ompi_datatype_copy_content_same_ddt(dtype, total_count, 
+            ret = ompi_datatype_copy_content_same_ddt(dtype, total_count,
                                                      (char*)rbuf, (char*)sbuf);
            if (ret < 0) { line = __LINE__; goto error_hndl; }
        }
@ -541,13 +525,13 @@ ompi_coll_tuned_reduce_scatter_intra_ring(void *sbuf, void *rbuf, int *rcounts,
        sbuf = rbuf;
    }

-    ret = ompi_datatype_copy_content_same_ddt(dtype, total_count, 
+    ret = ompi_datatype_copy_content_same_ddt(dtype, total_count,
                                              accumbuf, (char*)sbuf);
    if (ret < 0) { line = __LINE__; goto error_hndl; }

    /* Computation loop */

-    /* 
+    /*
       For each of the remote nodes:
       - post irecv for block (r-2) from (r-1) with wrap around
       - send block (r-1) to (r+1)
@ -568,7 +552,7 @@ ompi_coll_tuned_reduce_scatter_intra_ring(void *sbuf, void *rbuf, int *rcounts,
    inbi = 0;
    /* Initialize first receive from the neighbor on the left */
    ret = MCA_PML_CALL(irecv(inbuf[inbi], max_block_count, dtype, recv_from,
-                             MCA_COLL_BASE_TAG_REDUCE_SCATTER, comm, 
+                             MCA_COLL_BASE_TAG_REDUCE_SCATTER, comm,
                             &reqs[inbi]));
    if (MPI_SUCCESS != ret) { line = __LINE__; goto error_hndl; }
    tmpsend = accumbuf + (ptrdiff_t)displs[recv_from] * extent;
@ -579,25 +563,25 @@ ompi_coll_tuned_reduce_scatter_intra_ring(void *sbuf, void *rbuf, int *rcounts,

    for (k = 2; k < size; k++) {
        const int prevblock = (rank + size - k) % size;
-      
+
        inbi = inbi ^ 0x1;

        /* Post irecv for the current block */
        ret = MCA_PML_CALL(irecv(inbuf[inbi], max_block_count, dtype, recv_from,
-                                 MCA_COLL_BASE_TAG_REDUCE_SCATTER, comm, 
+                                 MCA_COLL_BASE_TAG_REDUCE_SCATTER, comm,
                                 &reqs[inbi]));
        if (MPI_SUCCESS != ret) { line = __LINE__; goto error_hndl; }
-      
+
        /* Wait on previous block to arrive */
        ret = ompi_request_wait(&reqs[inbi ^ 0x1], MPI_STATUS_IGNORE);
        if (MPI_SUCCESS != ret) { line = __LINE__; goto error_hndl; }
-      
+
        /* Apply operation on previous block: result goes to rbuf
           rbuf[prevblock] = inbuf[inbi ^ 0x1] (op) rbuf[prevblock]
        */
        tmprecv = accumbuf + (ptrdiff_t)displs[prevblock] * extent;
        ompi_op_reduce(op, inbuf[inbi ^ 0x1], tmprecv, rcounts[prevblock], dtype);
-      
+
        /* send previous block to send_to */
        ret = MCA_PML_CALL(send(tmprecv, rcounts[prevblock], dtype, send_to,
                                MCA_COLL_BASE_TAG_REDUCE_SCATTER,
@ -613,7 +597,7 @@ ompi_coll_tuned_reduce_scatter_intra_ring(void *sbuf, void *rbuf, int *rcounts,
       rbuf[rank] = inbuf[inbi] (op) rbuf[rank] */
    tmprecv = accumbuf + (ptrdiff_t)displs[rank] * extent;
    ompi_op_reduce(op, inbuf[inbi], tmprecv, rcounts[rank], dtype);
-   
+
    /* Copy result from tmprecv to rbuf */
    ret = ompi_datatype_copy_content_same_ddt(dtype, rcounts[rank], (char *)rbuf, tmprecv);
    if (ret < 0) { line = __LINE__; goto error_hndl; }
@ -626,7 +610,7 @@ ompi_coll_tuned_reduce_scatter_intra_ring(void *sbuf, void *rbuf, int *rcounts,
    return MPI_SUCCESS;

 error_hndl:
-    OPAL_OUTPUT((ompi_coll_tuned_stream, "%s:%4d\tRank %d Error occurred %d\n",
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output, "%s:%4d\tRank %d Error occurred %d\n",
                 __FILE__, line, rank, ret));
    if (NULL != displs) free(displs);
    if (NULL != accumbuf_free) free(accumbuf_free);
@ -634,139 +618,3 @@ ompi_coll_tuned_reduce_scatter_intra_ring(void *sbuf, void *rbuf, int *rcounts,
    if (NULL != inbuf_free[1]) free(inbuf_free[1]);
    return ret;
 }
-
-
-/**
- * The following are used by dynamic and forced rules
- *
- * publish details of each algorithm and if its forced/fixed/locked in
- * as you add methods/algorithms you must update this and the query/map routines
- *
- * this routine is called by the component only
- * this makes sure that the mca parameters are set to their initial values and 
- * perms module does not call this they call the forced_getvalues routine 
- * instead
- */
-
-int ompi_coll_tuned_reduce_scatter_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices)
-{
-    mca_base_var_enum_t *new_enum;
-
-    ompi_coll_tuned_forced_max_algorithms[REDUCESCATTER] = coll_tuned_reduce_scatter_algorithm_count;
-
-    (void) mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                           "reduce_scatter_algorithm_count",
-                                           "Number of reduce_scatter algorithms available",
-                                           MCA_BASE_VAR_TYPE_INT, NULL, 0,
-                                           MCA_BASE_VAR_FLAG_DEFAULT_ONLY,
-                                           OPAL_INFO_LVL_5,
-                                           MCA_BASE_VAR_SCOPE_CONSTANT,
-                                           &coll_tuned_reduce_scatter_algorithm_count);
-
-    /* MPI_T: This variable should eventually be bound to a communicator */
-    coll_tuned_reduce_scatter_forced_algorithm = 0;
-    (void) mca_base_var_enum_create("coll_tuned_reduce_scatter_algorithms", reduce_scatter_algorithms, &new_enum);
-    mca_param_indices->algorithm_param_index =
-        mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                        "reduce_scatter_algorithm",
-                                        "Which reduce reduce_scatter algorithm is used. Can be locked down to choice of: 0 ignore, 1 non-overlapping (Reduce + Scatterv), 2 recursive halving, 3 ring",
-                                        MCA_BASE_VAR_TYPE_INT, new_enum, 0, 0,
-                                        OPAL_INFO_LVL_5,
-                                        MCA_BASE_VAR_SCOPE_READONLY,
-                                        &coll_tuned_reduce_scatter_forced_algorithm);
-    OBJ_RELEASE(new_enum);
-    if (mca_param_indices->algorithm_param_index < 0) {
-        return mca_param_indices->algorithm_param_index;
-    }
-
-    coll_tuned_reduce_scatter_segment_size = 0;
-    mca_param_indices->segsize_param_index =
-        mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                        "reduce_scatter_algorithm_segmentsize",
-                                        "Segment size in bytes used by default for reduce_scatter algorithms. Only has meaning if algorithm is forced and supports segmenting. 0 bytes means no segmentation.",
-                                        MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
-                                        OPAL_INFO_LVL_5,
-                                        MCA_BASE_VAR_SCOPE_READONLY,
-                                        &coll_tuned_reduce_scatter_segment_size);
-
-    coll_tuned_reduce_scatter_tree_fanout = ompi_coll_tuned_init_tree_fanout; /* get system wide default */
-    mca_param_indices->tree_fanout_param_index =
-        mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                        "reduce_scatter_algorithm_tree_fanout",
-                                        "Fanout for n-tree used for reduce_scatter algorithms. Only has meaning if algorithm is forced and supports n-tree topo based operation.",
-                                        MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
-                                        OPAL_INFO_LVL_5,
-                                        MCA_BASE_VAR_SCOPE_READONLY,
-                                        &coll_tuned_reduce_scatter_tree_fanout);
-
-    coll_tuned_reduce_scatter_chain_fanout = ompi_coll_tuned_init_chain_fanout; /* get system wide default */
-    mca_param_indices->chain_fanout_param_index = 
-      mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                      "reduce_scatter_algorithm_chain_fanout",
-                                      "Fanout for chains used for reduce_scatter algorithms. Only has meaning if algorithm is forced and supports chain topo based operation.",
-                                      MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
-                                      OPAL_INFO_LVL_5,
-                                      MCA_BASE_VAR_SCOPE_READONLY,
-                                      &coll_tuned_reduce_scatter_chain_fanout);
-
-    return (MPI_SUCCESS);
-}
-
-
-int ompi_coll_tuned_reduce_scatter_intra_do_forced(void *sbuf, void* rbuf, 
-                                                   int *rcounts,
-                                                   struct ompi_datatype_t *dtype,
-                                                   struct ompi_op_t *op, 
-                                                   struct ompi_communicator_t *comm,
-                                                   mca_coll_base_module_t *module)
-{
-    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
-    mca_coll_tuned_comm_t *data = tuned_module->tuned_data;
-
-    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:reduce_scatter_intra_do_forced selected algorithm %d", 
-                 data->user_forced[REDUCESCATTER].algorithm));
-
-    switch (data->user_forced[REDUCESCATTER].algorithm) {
-    case (0): return ompi_coll_tuned_reduce_scatter_intra_dec_fixed (sbuf, rbuf, rcounts, 
-                                                                     dtype, op, comm, module);
-    case (1): return ompi_coll_tuned_reduce_scatter_intra_nonoverlapping(sbuf, rbuf, rcounts,
-                                                                         dtype, op, comm, module);
-    case (2): return ompi_coll_tuned_reduce_scatter_intra_basic_recursivehalving(sbuf, rbuf, rcounts,
-                                                                                 dtype, op, comm, module);
-    case (3): return ompi_coll_tuned_reduce_scatter_intra_ring (sbuf, rbuf, rcounts,
-                                                                dtype, op, comm, module);
-    default:
-        OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:reduce_scatter_intra_do_forced attempt to select algorithm %d when only 0-%d is valid?",
-                     data->user_forced[REDUCESCATTER].algorithm, ompi_coll_tuned_forced_max_algorithms[REDUCESCATTER]));
-        return (MPI_ERR_ARG);
-    } /* switch */
-}
-
-
-int ompi_coll_tuned_reduce_scatter_intra_do_this(void *sbuf, void* rbuf, 
-                                                 int *rcounts,
-                                                 struct ompi_datatype_t *dtype,
-                                                 struct ompi_op_t *op, 
-                                                 struct ompi_communicator_t *comm,
-                                                 mca_coll_base_module_t *module,
-                                                 int algorithm, int faninout, int segsize)
-{
-    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:reduce_scatter_intra_do_this selected algorithm %d topo faninout %d segsize %d",
-                 algorithm, faninout, segsize));
-   
-    switch (algorithm) {
-    case (0): return ompi_coll_tuned_reduce_scatter_intra_dec_fixed (sbuf, rbuf, rcounts, 
-                                                                     dtype, op, comm, module);
-    case (1): return ompi_coll_tuned_reduce_scatter_intra_nonoverlapping(sbuf, rbuf, rcounts,
-                                                                         dtype, op, comm, module);
-    case (2): return ompi_coll_tuned_reduce_scatter_intra_basic_recursivehalving(sbuf, rbuf, rcounts,
-                                                                                 dtype, op, comm, module);
-    case (3): return ompi_coll_tuned_reduce_scatter_intra_ring (sbuf, rbuf, rcounts,
-                                                                dtype, op, comm, module);
-    default:
-        OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:reduce_scatter_intra_do_this attempt to select algorithm %d when only 0-%d is valid?",
-                     algorithm, ompi_coll_tuned_forced_max_algorithms[REDUCESCATTER]));
-        return (MPI_ERR_ARG);
-    } /* switch */
-}
-
--- a/ompi/mca/coll/base/coll_base_scatter.c
+++ b/ompi/mca/coll/base/coll_base_scatter.c
@ -3,7 +3,7 @@
 * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
 *                         University Research and Technology
 *                         Corporation.  All rights reserved.
- * Copyright (c) 2004-2014 The University of Tennessee and The University
+ * Copyright (c) 2004-2015 The University of Tennessee and The University
 *                         of Tennessee Research Foundation.  All rights
 *                         reserved.
 * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@ -28,27 +28,12 @@
 #include "ompi/mca/coll/coll.h"
 #include "ompi/mca/coll/base/coll_tags.h"
 #include "ompi/mca/pml/pml.h"
-#include "coll_tuned.h"
-#include "coll_tuned_topo.h"
-#include "coll_tuned_util.h"
-
-/* scatter algorithm variables */
-static int coll_tuned_scatter_algorithm_count = 2;
-static int coll_tuned_scatter_forced_algorithm = 0;
-static int coll_tuned_scatter_segment_size = 0;
-static int coll_tuned_scatter_tree_fanout;
-static int coll_tuned_scatter_chain_fanout;
-
-/* valid values for coll_tuned_scatter_forced_algorithm */
-static mca_base_var_enum_value_t scatter_algorithms[] = {
-    {0, "ignore"},
-    {1, "basic_linear"},
-    {2, "binomial"},
-    {0, NULL}
-};
+#include "ompi/mca/coll/base/coll_base_functions.h"
+#include "coll_base_topo.h"
+#include "coll_base_util.h"

 int
-ompi_coll_tuned_scatter_intra_binomial(void *sbuf, int scount,
+ompi_coll_base_scatter_intra_binomial(void *sbuf, int scount,
                                       struct ompi_datatype_t *sdtype,
                                       void *rbuf, int rcount,
                                       struct ompi_datatype_t *rdtype,
@ -60,19 +45,19 @@ ompi_coll_tuned_scatter_intra_binomial(void *sbuf, int scount,
    char *ptmp, *tempbuf = NULL;
    ompi_coll_tree_t* bmtree;
    MPI_Status status;
-    MPI_Aint sextent, slb, strue_lb, strue_extent; 
+    MPI_Aint sextent, slb, strue_lb, strue_extent;
    MPI_Aint rextent, rlb, rtrue_lb, rtrue_extent;
-    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
-    mca_coll_tuned_comm_t *data = tuned_module->tuned_data;
+    mca_coll_base_module_t *base_module = (mca_coll_base_module_t*) module;
+    mca_coll_base_comm_t *data = base_module->base_data;

    size = ompi_comm_size(comm);
    rank = ompi_comm_rank(comm);

-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "ompi_coll_tuned_scatter_intra_binomial rank %d", rank));
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                 "ompi_coll_base_scatter_intra_binomial rank %d", rank));

    /* create the binomial tree */
-    COLL_TUNED_UPDATE_IN_ORDER_BMTREE( comm, tuned_module, root );
+    COLL_BASE_UPDATE_IN_ORDER_BMTREE( comm, base_module, root );
    bmtree = data->cached_in_order_bmtree;

    ompi_datatype_get_extent(sdtype, &slb, &sextent);
@ -167,7 +152,7 @@ ompi_coll_tuned_scatter_intra_binomial(void *sbuf, int scount,
            total_send += mycount;
        }

-        if (NULL != tempbuf) 
+        if (NULL != tempbuf)
            free(tempbuf);
    } else {
        /* recv from parent on leaf nodes */
@ -182,7 +167,7 @@ ompi_coll_tuned_scatter_intra_binomial(void *sbuf, int scount,
    if (NULL != tempbuf)
        free(tempbuf);

-    OPAL_OUTPUT((ompi_coll_tuned_stream,  "%s:%4d\tError occurred %d, rank %2d",
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,  "%s:%4d\tError occurred %d, rank %2d",
                 __FILE__, line, err, rank));
    return err;
 }
@ -190,13 +175,13 @@ ompi_coll_tuned_scatter_intra_binomial(void *sbuf, int scount,
 /*
 * Linear functions are copied from the BASIC coll module
 * they do not segment the message and are simple implementations
- * but for some small number of nodes and/or small data sizes they 
- * are just as fast as tuned/tree based segmenting operations 
+ * but for some small number of nodes and/or small data sizes they
+ * are just as fast as base/tree based segmenting operations
 * and as such may be selected by the decision functions
 * These are copied into this module due to the way we select modules
 * in V1. i.e. in V2 we will handle this differently and so will not
 * have to duplicate code.
- * JPG following the examples from other coll_tuned implementations. Dec06.
+ * JPG following the examples from other coll_base implementations. Dec06.
 */

 /* copied function (with appropriate renaming) starts here */
@ -208,7 +193,7 @@ ompi_coll_tuned_scatter_intra_binomial(void *sbuf, int scount,
 *	Returns:	- MPI_SUCCESS or error code
 */
 int
-ompi_coll_tuned_scatter_intra_basic_linear(void *sbuf, int scount,
+ompi_coll_base_scatter_intra_basic_linear(void *sbuf, int scount,
                                           struct ompi_datatype_t *sdtype,
                                           void *rbuf, int rcount,
                                           struct ompi_datatype_t *rdtype,
@ -269,153 +254,3 @@ ompi_coll_tuned_scatter_intra_basic_linear(void *sbuf, int scount,


 /* copied function (with appropriate renaming) ends here */
-
-/* The following are used by dynamic and forced rules */
-
-/* publish details of each algorithm and if its forced/fixed/locked in */
-/* as you add methods/algorithms you must update this and the query/map 
-   routines */
-
-/* this routine is called by the component only */
-/* this makes sure that the mca parameters are set to their initial values 
-   and perms */
-/* module does not call this they call the forced_getvalues routine instead */
-
-int 
-ompi_coll_tuned_scatter_intra_check_forced_init(coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices)
-{
-    mca_base_var_enum_t *new_enum;
-
-    ompi_coll_tuned_forced_max_algorithms[SCATTER] = coll_tuned_scatter_algorithm_count;
-
-    (void) mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                           "scatter_algorithm_count",
-                                           "Number of scatter algorithms available",
-                                           MCA_BASE_VAR_TYPE_INT, NULL, 0,
-                                           MCA_BASE_VAR_FLAG_DEFAULT_ONLY,
-                                           OPAL_INFO_LVL_5,
-                                           MCA_BASE_VAR_SCOPE_CONSTANT,
-                                           &coll_tuned_scatter_algorithm_count);
-
-    /* MPI_T: This variable should eventually be bound to a communicator */
-    coll_tuned_scatter_forced_algorithm = 0;
-    (void) mca_base_var_enum_create("coll_tuned_scatter_algorithms", scatter_algorithms, &new_enum);
-    mca_param_indices->algorithm_param_index =
-        mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                        "scatter_algorithm",
-                                        "Which scatter algorithm is used. Can be locked down to choice of: 0 ignore, 1 basic linear, 2 binomial.",       
-                                        MCA_BASE_VAR_TYPE_INT, new_enum, 0, 0,
-                                        OPAL_INFO_LVL_5,
-                                        MCA_BASE_VAR_SCOPE_READONLY,
-                                        &coll_tuned_scatter_forced_algorithm);
-    OBJ_RELEASE(new_enum);
-    if (mca_param_indices->algorithm_param_index < 0) {
-        return mca_param_indices->algorithm_param_index;
-    }
-
-    coll_tuned_scatter_segment_size = 0;
-    mca_param_indices->segsize_param_index =
-        mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                        "scatter_algorithm_segmentsize",
-                                        "Segment size in bytes used by default for scatter algorithms. Only has meaning if algorithm is forced and supports segmenting. 0 bytes means no segmentation. Currently, available algorithms do not support segmentation.",
-                                        MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
-                                        OPAL_INFO_LVL_5,
-                                        MCA_BASE_VAR_SCOPE_READONLY,
-                                        &coll_tuned_scatter_segment_size);
-
-    coll_tuned_scatter_tree_fanout = ompi_coll_tuned_init_tree_fanout; /* get system wide default */
-    mca_param_indices->tree_fanout_param_index =
-        mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                        "scatter_algorithm_tree_fanout",
-                                        "Fanout for n-tree used for scatter algorithms. Only has meaning if algorithm is forced and supports n-tree topo based operation. Currently, available algorithms do not support n-tree topologies.",
-                                        MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
-                                        OPAL_INFO_LVL_5,
-                                        MCA_BASE_VAR_SCOPE_READONLY,
-                                        &coll_tuned_scatter_tree_fanout);
-
-    coll_tuned_scatter_chain_fanout = ompi_coll_tuned_init_chain_fanout; /* get system wide default */
-    mca_param_indices->chain_fanout_param_index=
-      mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                      "scatter_algorithm_chain_fanout",
-                                      "Fanout for chains used for scatter algorithms. Only has meaning if algorithm is forced and supports chain topo based operation. Currently, available algorithms do not support chain topologies.",
-                                      MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
-                                      OPAL_INFO_LVL_5,
-                                      MCA_BASE_VAR_SCOPE_READONLY,
-                                      &coll_tuned_scatter_chain_fanout);
-
-    return (MPI_SUCCESS);
-}
-
-int
-ompi_coll_tuned_scatter_intra_do_forced(void *sbuf, int scount,
-                                        struct ompi_datatype_t *sdtype,
-                                        void* rbuf, int rcount,
-                                        struct ompi_datatype_t *rdtype,
-                                        int root,
-                                        struct ompi_communicator_t *comm,
-                                        mca_coll_base_module_t *module)
-{
-    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
-    mca_coll_tuned_comm_t *data = tuned_module->tuned_data;
-
-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "coll:tuned:scatter_intra_do_forced selected algorithm %d",
-                 data->user_forced[SCATTER].algorithm));
-
-    switch (data->user_forced[SCATTER].algorithm) {
-    case (0):
-        return ompi_coll_tuned_scatter_intra_dec_fixed (sbuf, scount, sdtype, 
-                                                        rbuf, rcount, rdtype, 
-                                                        root, comm, module);
-    case (1):
-        return ompi_coll_tuned_scatter_intra_basic_linear (sbuf, scount, sdtype,
-                                                           rbuf, rcount, rdtype,
-                                                           root, comm, module);
-    case (2):
-        return ompi_coll_tuned_scatter_intra_binomial(sbuf, scount, sdtype,
-                                                      rbuf, rcount, rdtype,
-                                                      root, comm, module);
-    default:
-        OPAL_OUTPUT((ompi_coll_tuned_stream,
-                     "coll:tuned:scatter_intra_do_forced attempt to select algorithm %d when only 0-%d is valid?", 
-                     data->user_forced[SCATTER].algorithm,
-                     ompi_coll_tuned_forced_max_algorithms[SCATTER]));
-        return (MPI_ERR_ARG);
-    } /* switch */
-}
-
-int
-ompi_coll_tuned_scatter_intra_do_this(void *sbuf, int scount,
-                                      struct ompi_datatype_t *sdtype,
-                                      void* rbuf, int rcount,
-                                      struct ompi_datatype_t *rdtype,
-                                      int root,
-                                      struct ompi_communicator_t *comm,
-                                      mca_coll_base_module_t *module,
-                                      int algorithm, int faninout, int segsize)
-{
-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "coll:tuned:scatter_intra_do_this selected algorithm %d topo faninout %d segsize %d", 
-                 algorithm, faninout, segsize));
-   
-    switch (algorithm) {
-    case (0):
-        return ompi_coll_tuned_scatter_intra_dec_fixed (sbuf, scount, sdtype, 
-                                                        rbuf, rcount, rdtype, 
-                                                        root, comm, module);
-    case (1):
-        return ompi_coll_tuned_scatter_intra_basic_linear (sbuf, scount, sdtype,
-                                                           rbuf, rcount, rdtype,
-                                                           root, comm, module);
-    case (2):  
-        return ompi_coll_tuned_scatter_intra_binomial(sbuf, scount, sdtype,
-                                                      rbuf, rcount, rdtype,
-                                                      root, comm, module);
-    default:
-        OPAL_OUTPUT((ompi_coll_tuned_stream,
-                     "coll:tuned:scatter_intra_do_this attempt to select algorithm %d when only 0-%d is valid?", 
-                     algorithm, 
-                     ompi_coll_tuned_forced_max_algorithms[SCATTER]));
-        return (MPI_ERR_ARG);
-    } /* switch */
-}
--- a/ompi/mca/coll/base/coll_base_topo.c
+++ b/ompi/mca/coll/base/coll_base_topo.c
@ -5,16 +5,16 @@
 * Copyright (c) 2004-2005 The University of Tennessee and The University
 *                         of Tennessee Research Foundation.  All rights
 *                         reserved.
- * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, 
+ * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
 *                         University of Stuttgart.  All rights reserved.
 * Copyright (c) 2004-2005 The Regents of the University of California.
 *                         All rights reserved.
 * Copyright (c) 2015      Research Organization for Information Science
 *                         and Technology (RIST). All rights reserved.
 * $COPYRIGHT$
- * 
+ *
 * Additional copyrights may follow
- * 
+ *
 * $HEADER$
 */

@ -25,8 +25,8 @@
 #include "ompi/constants.h"
 #include "ompi/communicator/communicator.h"
 #include "ompi/mca/coll/base/coll_tags.h"
-#include "coll_tuned.h"
-#include "coll_tuned_topo.h"
+#include "ompi/mca/coll/base/coll_base_functions.h"
+#include "coll_base_topo.h"

 /*
 * Some static helpers.
@ -75,36 +75,36 @@ static int calculate_num_nodes_up_to_level( int fanout, int level )
 */

 ompi_coll_tree_t*
-ompi_coll_tuned_topo_build_tree( int fanout,
+ompi_coll_base_topo_build_tree( int fanout,
                                 struct ompi_communicator_t* comm,
                                 int root )
 {
    int rank, size, schild, sparent, shiftedrank, i;
    int level; /* location of my rank in the tree structure of size */
    int delta; /* number of nodes on my level */
-    int slimit; /* total number of nodes on levels above me */ 
+    int slimit; /* total number of nodes on levels above me */
    ompi_coll_tree_t* tree;

-    OPAL_OUTPUT((ompi_coll_tuned_stream, "coll:tuned:topo_build_tree Building fo %d rt %d", fanout, root));
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output, "coll:base:topo_build_tree Building fo %d rt %d", fanout, root));

    if (fanout<1) {
-        OPAL_OUTPUT((ompi_coll_tuned_stream, "coll:tuned:topo_build_tree invalid fanout %d", fanout));
+        OPAL_OUTPUT((ompi_coll_base_framework.framework_output, "coll:base:topo_build_tree invalid fanout %d", fanout));
        return NULL;
    }
    if (fanout>MAXTREEFANOUT) {
-        OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:topo_build_tree invalid fanout %d bigger than max %d", fanout, MAXTREEFANOUT));
+        OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:topo_build_tree invalid fanout %d bigger than max %d", fanout, MAXTREEFANOUT));
        return NULL;
    }

-    /* 
-     * Get size and rank of the process in this communicator 
+    /*
+     * Get size and rank of the process in this communicator
     */
    size = ompi_comm_size(comm);
    rank = ompi_comm_rank(comm);

    tree = (ompi_coll_tree_t*)malloc(sizeof(ompi_coll_tree_t));
    if (!tree) {
-        OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:topo_build_tree PANIC::out of memory"));
+        OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:topo_build_tree PANIC::out of memory"));
        return NULL;
    }

@ -115,8 +115,8 @@ ompi_coll_tuned_topo_build_tree( int fanout,
     * Set root
     */
    tree->tree_root = root;
-  
-    /* 
+
+    /*
     * Initialize tree
     */
    tree->tree_fanout   = fanout;
@ -132,11 +132,11 @@ ompi_coll_tuned_topo_build_tree( int fanout,
    if( size < 2 ) {
        return tree;
    }
-  
+
    /*
-     * Shift all ranks by root, so that the algorithm can be 
+     * Shift all ranks by root, so that the algorithm can be
     * designed as if root would be always 0
-     * shiftedrank should be used in calculating distances 
+     * shiftedrank should be used in calculating distances
     * and position in tree
     */
    shiftedrank = rank - root;
@ -158,7 +158,7 @@ ompi_coll_tuned_topo_build_tree( int fanout,
            break;
        }
    }
-    
+
    /* find my parent */
    slimit = calculate_num_nodes_up_to_level( fanout, level );
    sparent = shiftedrank;
@ -170,12 +170,12 @@ ompi_coll_tuned_topo_build_tree( int fanout,
        }
    }
    tree->tree_prev = (sparent+root)%size;
-  
+
    return tree;
 }

 /*
- * Constructs in-order binary tree which can be used for non-commutative reduce 
+ * Constructs in-order binary tree which can be used for non-commutative reduce
 * operations.
 * Root of this tree is always rank (size-1) and fanout is 2.
 * Here are some of the examples of this tree:
@ -189,28 +189,28 @@ ompi_coll_tuned_topo_build_tree( int fanout,
 *                                                        4     0
 */
 ompi_coll_tree_t*
-ompi_coll_tuned_topo_build_in_order_bintree( struct ompi_communicator_t* comm )
+ompi_coll_base_topo_build_in_order_bintree( struct ompi_communicator_t* comm )
 {
    int rank, size, myrank, rightsize, delta, parent, lchild, rchild;
    ompi_coll_tree_t* tree;

-    /* 
-     * Get size and rank of the process in this communicator 
+    /*
+     * Get size and rank of the process in this communicator
     */
    size = ompi_comm_size(comm);
    rank = ompi_comm_rank(comm);

    tree = (ompi_coll_tree_t*)malloc(sizeof(ompi_coll_tree_t));
    if (!tree) {
-        OPAL_OUTPUT((ompi_coll_tuned_stream,
-                     "coll:tuned:topo_build_tree PANIC::out of memory"));
+        OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                     "coll:base:topo_build_tree PANIC::out of memory"));
        return NULL;
    }

    tree->tree_root     = MPI_UNDEFINED;
    tree->tree_nextsize = MPI_UNDEFINED;

-    /* 
+    /*
     * Initialize tree
     */
    tree->tree_fanout   = 2;
@ -220,11 +220,11 @@ ompi_coll_tuned_topo_build_in_order_bintree( struct ompi_communicator_t* comm )
    tree->tree_nextsize = 0;
    tree->tree_next[0]  = -1;
    tree->tree_next[1]  = -1;
-    OPAL_OUTPUT((ompi_coll_tuned_stream, 
-                 "coll:tuned:topo_build_in_order_tree Building fo %d rt %d", 
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                 "coll:base:topo_build_in_order_tree Building fo %d rt %d",
                 tree->tree_fanout, tree->tree_root));

-    /* 
+    /*
     * Build the tree
     */
    myrank = rank;
@ -240,18 +240,18 @@ ompi_coll_tuned_topo_build_in_order_bintree( struct ompi_communicator_t* comm )
        rchild = -1;
        if (size - 1 > 0) {
            lchild = parent - 1;
-            if (lchild > 0) { 
+            if (lchild > 0) {
                rchild = rightsize - 1;
            }
        }
-       
-        /* The following cases are possible: myrank can be 
+
+        /* The following cases are possible: myrank can be
           - a parent,
           - belong to the left subtree, or
           - belong to the right subtee
           Each of the cases need to be handled differently.
        */
-          
+
        if (myrank == parent) {
            /* I am the parent:
               - compute real ranks of my children, and exit the loop. */
@ -262,7 +262,7 @@ ompi_coll_tuned_topo_build_in_order_bintree( struct ompi_communicator_t* comm )
        if (myrank > rchild) {
            /* I belong to the left subtree:
               - If I am the left child, compute real rank of my parent
-               - Iterate down through tree: 
+               - Iterate down through tree:
               compute new size, shift ranks down, and update delta.
            */
            if (myrank == lchild) {
@ -276,8 +276,8 @@ ompi_coll_tuned_topo_build_in_order_bintree( struct ompi_communicator_t* comm )
        } else {
            /* I belong to the right subtree:
               - If I am the right child, compute real rank of my parent
-               - Iterate down through tree:  
-               compute new size and parent, 
+               - Iterate down through tree:
+               compute new size and parent,
               but the delta and rank do not need to change.
            */
            if (myrank == rchild) {
@ -287,14 +287,14 @@ ompi_coll_tuned_topo_build_in_order_bintree( struct ompi_communicator_t* comm )
            parent = rchild;
        }
    }
-    
+
    if (tree->tree_next[0] >= 0) { tree->tree_nextsize = 1; }
    if (tree->tree_next[1] >= 0) { tree->tree_nextsize += 1; }

    return tree;
 }

-int ompi_coll_tuned_topo_destroy_tree( ompi_coll_tree_t** tree )
+int ompi_coll_base_topo_destroy_tree( ompi_coll_tree_t** tree )
 {
    ompi_coll_tree_t *ptr;

@ -311,7 +311,7 @@ int ompi_coll_tuned_topo_destroy_tree( ompi_coll_tree_t** tree )
 }

 /*
- * 
+ *
 * Here are some of the examples of this tree:
 * size == 2                   size = 4                 size = 8
 *      0                           0                        0
@ -323,16 +323,16 @@ int ompi_coll_tuned_topo_destroy_tree( ompi_coll_tree_t** tree )
 *                                                                7
 */
 ompi_coll_tree_t*
-ompi_coll_tuned_topo_build_bmtree( struct ompi_communicator_t* comm,
+ompi_coll_base_topo_build_bmtree( struct ompi_communicator_t* comm,
                                   int root )
 {
    int childs = 0, rank, size, mask = 1, index, remote, i;
    ompi_coll_tree_t *bmtree;

-    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:topo:build_bmtree rt %d", root));
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:topo:build_bmtree rt %d", root));

-    /* 
-     * Get size and rank of the process in this communicator 
+    /*
+     * Get size and rank of the process in this communicator
     */
    size = ompi_comm_size(comm);
    rank = ompi_comm_rank(comm);
@ -341,7 +341,7 @@ ompi_coll_tuned_topo_build_bmtree( struct ompi_communicator_t* comm,

    bmtree = (ompi_coll_tree_t*)malloc(sizeof(ompi_coll_tree_t));
    if (!bmtree) {
-        OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:topo:build_bmtree PANIC out of memory"));
+        OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:topo:build_bmtree PANIC out of memory"));
        return NULL;
    }

@ -372,7 +372,7 @@ ompi_coll_tuned_topo_build_bmtree( struct ompi_communicator_t* comm,
        remote += root;
        if( remote >= size ) remote -= size;
        if (childs==MAXTREEFANOUT) {
-            OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:topo:build_bmtree max fanout incorrect %d needed %d", MAXTREEFANOUT, childs));
+            OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:topo:build_bmtree max fanout incorrect %d needed %d", MAXTREEFANOUT, childs));
            free(bmtree);
            return NULL;
        }
@ -388,7 +388,7 @@ ompi_coll_tuned_topo_build_bmtree( struct ompi_communicator_t* comm,
 /*
 * Constructs in-order binomial tree which can be used for gather/scatter
 * operations.
- * 
+ *
 * Here are some of the examples of this tree:
 * size == 2                   size = 4                 size = 8
 *      0                           0                        0
@ -400,16 +400,16 @@ ompi_coll_tuned_topo_build_bmtree( struct ompi_communicator_t* comm,
 *                                                                 7
 */
 ompi_coll_tree_t*
-ompi_coll_tuned_topo_build_in_order_bmtree( struct ompi_communicator_t* comm,
+ompi_coll_base_topo_build_in_order_bmtree( struct ompi_communicator_t* comm,
                                            int root )
 {
    int childs = 0, rank, vrank, size, mask = 1, remote, i;
    ompi_coll_tree_t *bmtree;

-    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:topo:build_in_order_bmtree rt %d", root));
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:topo:build_in_order_bmtree rt %d", root));

-    /* 
-     * Get size and rank of the process in this communicator 
+    /*
+     * Get size and rank of the process in this communicator
     */
    size = ompi_comm_size(comm);
    rank = ompi_comm_rank(comm);
@ -418,7 +418,7 @@ ompi_coll_tuned_topo_build_in_order_bmtree( struct ompi_communicator_t* comm,

    bmtree = (ompi_coll_tree_t*)malloc(sizeof(ompi_coll_tree_t));
    if (!bmtree) {
-        OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:topo:build_bmtree PANIC out of memory"));
+        OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:topo:build_bmtree PANIC out of memory"));
        return NULL;
    }

@ -442,8 +442,8 @@ ompi_coll_tuned_topo_build_in_order_bmtree( struct ompi_communicator_t* comm,
            bmtree->tree_next[childs] = (remote + root) % size;
            childs++;
            if (childs==MAXTREEFANOUT) {
-                OPAL_OUTPUT((ompi_coll_tuned_stream,
-                             "coll:tuned:topo:build_bmtree max fanout incorrect %d needed %d",
+                OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                             "coll:base:topo:build_bmtree max fanout incorrect %d needed %d",
                             MAXTREEFANOUT, childs));
                free (bmtree);
                return NULL;
@ -459,36 +459,36 @@ ompi_coll_tuned_topo_build_in_order_bmtree( struct ompi_communicator_t* comm,


 ompi_coll_tree_t*
-ompi_coll_tuned_topo_build_chain( int fanout,
+ompi_coll_base_topo_build_chain( int fanout,
                                  struct ompi_communicator_t* comm,
                                  int root )
 {
    int i, maxchainlen, mark, head, len, rank, size, srank /* shifted rank */;
    ompi_coll_tree_t *chain;

-    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:topo:build_chain fo %d rt %d", fanout, root));
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:topo:build_chain fo %d rt %d", fanout, root));

-    /* 
-     * Get size and rank of the process in this communicator 
+    /*
+     * Get size and rank of the process in this communicator
     */
    size = ompi_comm_size(comm);
    rank = ompi_comm_rank(comm);

    if( fanout < 1 ) {
-        OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:topo:build_chain WARNING invalid fanout of ZERO, forcing to 1 (pipeline)!"));
+        OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:topo:build_chain WARNING invalid fanout of ZERO, forcing to 1 (pipeline)!"));
        fanout = 1;
    }
    if (fanout>MAXTREEFANOUT) {
-        OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:topo:build_chain WARNING invalid fanout %d bigger than max %d, forcing to max!", fanout, MAXTREEFANOUT));
+        OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:topo:build_chain WARNING invalid fanout %d bigger than max %d, forcing to max!", fanout, MAXTREEFANOUT));
        fanout = MAXTREEFANOUT;
    }

    /*
-     * Allocate space for topology arrays if needed 
+     * Allocate space for topology arrays if needed
     */
    chain = (ompi_coll_tree_t*)malloc( sizeof(ompi_coll_tree_t) );
    if (!chain) {
-        OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:topo:build_chain PANIC out of memory"));
+        OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:topo:build_chain PANIC out of memory"));
        fflush(stdout);
        return NULL;
    }
@ -496,17 +496,17 @@ ompi_coll_tuned_topo_build_chain( int fanout,
    chain->tree_nextsize = -1;
    for(i=0;i<fanout;i++) chain->tree_next[i] = -1;

-    /* 
+    /*
     * Set root & numchain
     */
    chain->tree_root = root;
-    if( (size - 1) < fanout ) { 
+    if( (size - 1) < fanout ) {
        chain->tree_nextsize = size-1;
        fanout = size-1;
    } else {
        chain->tree_nextsize = fanout;
    }
-    
+
    /*
     * Shift ranks
     */
@ -577,7 +577,7 @@ ompi_coll_tuned_topo_build_chain( int fanout,
                chain->tree_nextsize = 1;
            } else {
                chain->tree_next[0] = -1;
-                chain->tree_nextsize = 0;    
+                chain->tree_nextsize = 0;
            }
        }
        chain->tree_prev = (chain->tree_prev+root)%size;
@ -586,7 +586,7 @@ ompi_coll_tuned_topo_build_chain( int fanout,
        }
    } else {
        /*
-         * Unshift values 
+         * Unshift values
         */
        chain->tree_prev = -1;
        chain->tree_next[0] = (root+1)%size;
@ -603,17 +603,62 @@ ompi_coll_tuned_topo_build_chain( int fanout,
    return chain;
 }

-int ompi_coll_tuned_topo_dump_tree (ompi_coll_tree_t* tree, int rank)
+int ompi_coll_base_topo_dump_tree (ompi_coll_tree_t* tree, int rank)
 {
    int i;

-    OPAL_OUTPUT((ompi_coll_tuned_stream, "coll:tuned:topo:topo_dump_tree %1d tree root %d"
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output, "coll:base:topo:topo_dump_tree %1d tree root %d"
                 " fanout %d BM %1d nextsize %d prev %d",
                 rank, tree->tree_root, tree->tree_bmtree, tree->tree_fanout,
                 tree->tree_nextsize, tree->tree_prev));
    if( tree->tree_nextsize ) {
        for( i = 0; i < tree->tree_nextsize; i++ )
-            OPAL_OUTPUT((ompi_coll_tuned_stream,"[%1d] %d", i, tree->tree_next[i]));
+            OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"[%1d] %d", i, tree->tree_next[i]));
    }
    return (0);
 }
+
+mca_coll_base_comm_t* ompi_coll_base_topo_construct( mca_coll_base_comm_t* data )
+{
+    if( NULL == data ) {
+        data = (mca_coll_base_comm_t*)calloc(1, sizeof(mca_coll_base_comm_t));
+    }
+    return data;
+}
+
+void ompi_coll_base_topo_destruct( mca_coll_base_comm_t* data )
+{
+    if(NULL == data) return;
+    
+#if OPAL_ENABLE_DEBUG
+    /* Reset the reqs to NULL/0 -- they'll be freed as part of freeing
+       the generel c_coll_selected_data */
+    data->mcct_reqs = NULL;
+    data->mcct_num_reqs = 0;
+#endif
+
+    /* free any cached information that has been allocated */
+    if (data->cached_ntree) { /* destroy general tree if defined */
+        ompi_coll_base_topo_destroy_tree (&data->cached_ntree);
+    }
+    if (data->cached_bintree) { /* destroy bintree if defined */
+        ompi_coll_base_topo_destroy_tree (&data->cached_bintree);
+    }
+    if (data->cached_bmtree) { /* destroy bmtree if defined */
+        ompi_coll_base_topo_destroy_tree (&data->cached_bmtree);
+    }
+    if (data->cached_in_order_bmtree) { /* destroy bmtree if defined */
+        ompi_coll_base_topo_destroy_tree (&data->cached_in_order_bmtree);
+    }
+    if (data->cached_chain) { /* destroy general chain if defined */
+        ompi_coll_base_topo_destroy_tree (&data->cached_chain);
+    }
+    if (data->cached_pipeline) { /* destroy pipeline if defined */
+        ompi_coll_base_topo_destroy_tree (&data->cached_pipeline);
+    }
+    if (data->cached_in_order_bintree) { /* destroy in order bintree if defined */
+        ompi_coll_base_topo_destroy_tree (&data->cached_in_order_bintree);
+    }
+    
+    free(data);
+}
--- a/ompi/mca/coll/base/coll_base_topo.h
+++ b/ompi/mca/coll/base/coll_base_topo.h
@ -5,19 +5,19 @@
 * Copyright (c) 2004-2012 The University of Tennessee and The University
 *                         of Tennessee Research Foundation.  All rights
 *                         reserved.
- * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, 
+ * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
 *                         University of Stuttgart.  All rights reserved.
 * Copyright (c) 2004-2005 The Regents of the University of California.
 *                         All rights reserved.
 * $COPYRIGHT$
- * 
+ *
 * Additional copyrights may follow
- * 
+ *
 * $HEADER$
 */

-#ifndef MCA_COLL_TUNED_TOPO_H_HAS_BEEN_INCLUDED
-#define MCA_COLL_TUNED_TOPO_H_HAS_BEEN_INCLUDED
+#ifndef MCA_COLL_BASE_TOPO_H_HAS_BEEN_INCLUDED
+#define MCA_COLL_BASE_TOPO_H_HAS_BEEN_INCLUDED

 #include "ompi_config.h"

@ -35,29 +35,28 @@ typedef struct ompi_coll_tree_t {
 } ompi_coll_tree_t;

 ompi_coll_tree_t*
-ompi_coll_tuned_topo_build_tree( int fanout,
+ompi_coll_base_topo_build_tree( int fanout,
                                 struct ompi_communicator_t* com,
                                 int root );
 ompi_coll_tree_t*
-ompi_coll_tuned_topo_build_in_order_bintree( struct ompi_communicator_t* comm );
+ompi_coll_base_topo_build_in_order_bintree( struct ompi_communicator_t* comm );

 ompi_coll_tree_t*
-ompi_coll_tuned_topo_build_bmtree( struct ompi_communicator_t* comm,
+ompi_coll_base_topo_build_bmtree( struct ompi_communicator_t* comm,
                                   int root );
 ompi_coll_tree_t*
-ompi_coll_tuned_topo_build_in_order_bmtree( struct ompi_communicator_t* comm,
+ompi_coll_base_topo_build_in_order_bmtree( struct ompi_communicator_t* comm,
                                            int root );
 ompi_coll_tree_t*
-ompi_coll_tuned_topo_build_chain( int fanout,
+ompi_coll_base_topo_build_chain( int fanout,
                                  struct ompi_communicator_t* com,
                                  int root );

-int ompi_coll_tuned_topo_destroy_tree( ompi_coll_tree_t** tree );
+int ompi_coll_base_topo_destroy_tree( ompi_coll_tree_t** tree );

 /* debugging stuff, will be removed later */
-int ompi_coll_tuned_topo_dump_tree (ompi_coll_tree_t* tree, int rank);
+int ompi_coll_base_topo_dump_tree (ompi_coll_tree_t* tree, int rank);

 END_C_DECLS

-#endif  /* MCA_COLL_TUNED_TOPO_H_HAS_BEEN_INCLUDED */
-
+#endif  /* MCA_COLL_BASE_TOPO_H_HAS_BEEN_INCLUDED */
--- a/ompi/mca/coll/base/coll_base_util.c
+++ b/ompi/mca/coll/base/coll_base_util.c
@ -2,7 +2,7 @@
 * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
 *                         University Research and Technology
 *                         Corporation.  All rights reserved.
- * Copyright (c) 2004-2014 The University of Tennessee and The University
+ * Copyright (c) 2004-2015 The University of Tennessee and The University
 *                         of Tennessee Research Foundation.  All rights
 *                         reserved.
 * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, 
@ -19,17 +19,17 @@
 */

 #include "ompi_config.h"
-#include "coll_tuned.h"

 #include "mpi.h"
 #include "ompi/constants.h"
 #include "ompi/datatype/ompi_datatype.h"
 #include "ompi/communicator/communicator.h"
 #include "ompi/mca/coll/base/coll_tags.h"
+#include "ompi/mca/coll/base/coll_base_functions.h"
 #include "ompi/mca/pml/pml.h"
-#include "coll_tuned_util.h"
+#include "coll_base_util.h"

-int ompi_coll_tuned_sendrecv_nonzero_actual( void* sendbuf, size_t scount, 
+int ompi_coll_base_sendrecv_nonzero_actual( void* sendbuf, size_t scount, 
                                             ompi_datatype_t* sdatatype,
                                             int dest, int stag,
                                             void* recvbuf, size_t rcount, 
@ -91,14 +91,14 @@ int ompi_coll_tuned_sendrecv_nonzero_actual( void* sendbuf, size_t scount,
            *status = statuses[err_index];
        }
        err = statuses[err_index].MPI_ERROR;
-        OPAL_OUTPUT ((ompi_coll_tuned_stream, "%s:%d: Error %d occurred in the %s"
-                                              " stage of ompi_coll_tuned_sendrecv_zero\n",
+        OPAL_OUTPUT ((ompi_coll_base_framework.framework_output, "%s:%d: Error %d occurred in the %s"
+                                              " stage of ompi_coll_base_sendrecv_zero\n",
                      __FILE__, line, err, (0 == err_index ? "receive" : "send")));
    } else {
        /* Error discovered during the posting of the irecv or isend,
         * and no status is available.
         */
-        OPAL_OUTPUT ((ompi_coll_tuned_stream, "%s:%d: Error %d occurred\n",
+        OPAL_OUTPUT ((ompi_coll_base_framework.framework_output, "%s:%d: Error %d occurred\n",
                      __FILE__, line, err));
        if (MPI_STATUS_IGNORE != status) {
            status->MPI_ERROR = err;
--- a/ompi/mca/coll/base/coll_base_util.h
+++ b/ompi/mca/coll/base/coll_base_util.h
@ -18,8 +18,8 @@
 * $HEADER$
 */

-#ifndef MCA_COLL_TUNED_UTIL_EXPORT_H
-#define MCA_COLL_TUNED_UTIL_EXPORT_H
+#ifndef MCA_COLL_BASE_UTIL_EXPORT_H
+#define MCA_COLL_BASE_UTIL_EXPORT_H

 #include "ompi_config.h"

@ -36,7 +36,7 @@ BEGIN_C_DECLS
 * If one of the communications results in a zero-byte message the
 * communication is ignored, and no message will cross to the peer.
 */
-int ompi_coll_tuned_sendrecv_nonzero_actual( void* sendbuf, size_t scount, 
+int ompi_coll_base_sendrecv_nonzero_actual( void* sendbuf, size_t scount, 
                                             ompi_datatype_t* sdatatype,
                                             int dest, int stag,
                                             void* recvbuf, size_t rcount, 
@ -53,7 +53,7 @@ int ompi_coll_tuned_sendrecv_nonzero_actual( void* sendbuf, size_t scount,
 * communications.
 */
 static inline int
-ompi_coll_tuned_sendrecv( void* sendbuf, size_t scount, ompi_datatype_t* sdatatype,
+ompi_coll_base_sendrecv( void* sendbuf, size_t scount, ompi_datatype_t* sdatatype,
                          int dest, int stag,
                          void* recvbuf, size_t rcount, ompi_datatype_t* rdatatype,
                          int source, int rtag, 
@ -64,13 +64,13 @@ ompi_coll_tuned_sendrecv( void* sendbuf, size_t scount, ompi_datatype_t* sdataty
        return (int) ompi_datatype_sndrcv(sendbuf, (int32_t) scount, sdatatype, 
                                          recvbuf, (int32_t) rcount, rdatatype);
    }
-    return ompi_coll_tuned_sendrecv_nonzero_actual (sendbuf, scount, sdatatype, 
+    return ompi_coll_base_sendrecv_nonzero_actual (sendbuf, scount, sdatatype, 
                                            dest, stag, 
                                            recvbuf, rcount, rdatatype,
                                            source, rtag, comm, status);
 }

 END_C_DECLS
-#endif /* MCA_COLL_TUNED_UTIL_EXPORT_H */
+#endif /* MCA_COLL_BASE_UTIL_EXPORT_H */


--- a/ompi/mca/coll/coll.h
+++ b/ompi/mca/coll/coll.h
@ -470,6 +470,9 @@ struct mca_coll_base_module_2_1_0_t {
        be used for the given communicator */
    mca_coll_base_module_disable_1_1_0_fn_t coll_module_disable;

+    /** Data storage for all the algorithms defined in the base. Should
+        not be used by other modules */
+    struct mca_coll_base_comm_t* base_data;
 };
 typedef struct mca_coll_base_module_2_1_0_t mca_coll_base_module_2_1_0_t;