From aa019e239e4b408a87f8d2442d0d0476b5363080 Mon Sep 17 00:00:00 2001
From: George Bosilca <bosilca@icl.utk.edu>
Date: Sun, 15 Feb 2015 14:47:27 -0500
Subject: [PATCH] Rename the base header file containing the prototypes of the
 collective functions.

---
 ompi/mca/coll/base/Makefile.am                |  21 +-
 ompi/mca/coll/base/base.h                     |   2 +-
 ompi/mca/coll/base/coll_base.h                | 558 ------------------
 ompi/mca/coll/base/coll_base_allgather.c      | 383 +++---------
 ompi/mca/coll/base/coll_base_allgatherv.c     | 255 ++------
 ompi/mca/coll/base/coll_base_allreduce.c      | 428 +++++---------
 ompi/mca/coll/base/coll_base_alltoall.c       | 333 +++--------
 ompi/mca/coll/base/coll_base_alltoallv.c      | 195 +-----
 ompi/mca/coll/base/coll_base_barrier.c        | 242 ++------
 ompi/mca/coll/base/coll_base_bcast.c          | 450 +++++---------
 ompi/mca/coll/base/coll_base_frame.c          |  58 +-
 ompi/mca/coll/base/coll_base_functions.h      | 341 +++++++++++
 ompi/mca/coll/base/coll_base_gather.c         | 265 ++-------
 ompi/mca/coll/base/coll_base_reduce.c         | 468 +++++----------
 ompi/mca/coll/base/coll_base_reduce_scatter.c | 284 +++------
 ompi/mca/coll/base/coll_base_scatter.c        | 199 +------
 ompi/mca/coll/base/coll_base_topo.c           | 185 +++---
 ompi/mca/coll/base/coll_base_topo.h           |  27 +-
 ompi/mca/coll/base/coll_base_util.c           |  14 +-
 ompi/mca/coll/base/coll_base_util.h           |  12 +-
 ompi/mca/coll/coll.h                          |   3 +
 21 files changed, 1400 insertions(+), 3323 deletions(-)
 delete mode 100644 ompi/mca/coll/base/coll_base.h
 create mode 100644 ompi/mca/coll/base/coll_base_functions.h

diff --git a/ompi/mca/coll/base/Makefile.am b/ompi/mca/coll/base/Makefile.am
index ca608693ca..10524b60b9 100644
--- a/ompi/mca/coll/base/Makefile.am
+++ b/ompi/mca/coll/base/Makefile.am
@@ -2,7 +2,7 @@
 # Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
 #                         University Research and Technology
 #                         Corporation.  All rights reserved.
-# Copyright (c) 2004-2005 The University of Tennessee and The University
+# Copyright (c) 2004-2015 The University of Tennessee and The University
 #                         of Tennessee Research Foundation.  All rights
 #                         reserved.
 # Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, 
@@ -20,10 +20,25 @@ dist_ompidata_DATA = base/help-mca-coll-base.txt
 
 headers += \
         base/base.h \
-        base/coll_tags.h
+        base/coll_tags.h \
+        base/coll_base_topo.h \
+        base/coll_base_util.h
 
 libmca_coll_la_SOURCES += \
         base/coll_base_comm_select.c \
         base/coll_base_comm_unselect.c \
         base/coll_base_find_available.c \
-        base/coll_base_frame.c
+        base/coll_base_frame.c \
+        base/coll_base_bcast.c \
+        base/coll_base_scatter.c \
+        base/coll_base_topo.c \
+        base/coll_base_allgather.c \
+        base/coll_base_allgatherv.c \
+        base/coll_base_util.c \
+        base/coll_base_allreduce.c \
+        base/coll_base_alltoall.c \
+        base/coll_base_gather.c \
+        base/coll_base_alltoallv.c \
+        base/coll_base_reduce.c \
+        base/coll_base_barrier.c \
+        base/coll_base_reduce_scatter.c
diff --git a/ompi/mca/coll/base/base.h b/ompi/mca/coll/base/base.h
index 1c9a95c180..3d54de22bf 100644
--- a/ompi/mca/coll/base/base.h
+++ b/ompi/mca/coll/base/base.h
@@ -87,7 +87,7 @@ int mca_coll_base_find_available(bool enable_progress_threads,
  * coll component needs to be selected for it.  It should be invoked
  * near the end of the communicator creation process such that
  * almost everything else is functional on the communicator (e.g.,
- * point-to-point communication).  
+ * point-to-point communication).
  *
  * Note that new communicators may be created as a result of
  * invoking this function.  Specifically: this function is called in
diff --git a/ompi/mca/coll/base/coll_base.h b/ompi/mca/coll/base/coll_base.h
deleted file mode 100644
index f044a60375..0000000000
--- a/ompi/mca/coll/base/coll_base.h
+++ /dev/null
@@ -1,558 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
-/*
- * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
- *                         University Research and Technology
- *                         Corporation.  All rights reserved.
- * Copyright (c) 2004-2009 The University of Tennessee and The University
- *                         of Tennessee Research Foundation.  All rights
- *                         reserved.
- * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, 
- *                         University of Stuttgart.  All rights reserved.
- * Copyright (c) 2004-2005 The Regents of the University of California.
- *                         All rights reserved.
- * Copyright (c) 2008      Sun Microsystems, Inc.  All rights reserved.
- * Copyright (c) 2008      Cisco Systems, Inc.  All rights reserved.
- * Copyright (c) 2013      Los Alamos National Security, LLC. All rights
- *                         reserved.
- * $COPYRIGHT$
- * 
- * Additional copyrights may follow
- * 
- * $HEADER$
- */
-
-#ifndef MCA_COLL_TUNED_EXPORT_H
-#define MCA_COLL_TUNED_EXPORT_H
-
-#include "ompi_config.h"
-
-#include "mpi.h"
-#include "opal/mca/mca.h"
-#include "ompi/mca/coll/coll.h"
-#include "ompi/request/request.h"
-
-/* need to include our own topo prototypes so we can malloc data on the comm correctly */
-#include "coll_tuned_topo.h"
-
-/* also need the dynamic rule structures */
-#include "coll_tuned_dynamic_rules.h"
-
-/* some fixed value index vars to simplify certain operations */
-typedef enum COLLTYPE {
-    ALLGATHER = 0,  /*  0 */
-    ALLGATHERV,     /*  1 */
-    ALLREDUCE,      /*  2 */
-    ALLTOALL,       /*  3 */
-    ALLTOALLV,      /*  4 */
-    ALLTOALLW,      /*  5 */
-    BARRIER,        /*  6 */
-    BCAST,          /*  7 */
-    EXSCAN,         /*  8 */
-    GATHER,         /*  9 */
-    GATHERV,        /* 10 */
-    REDUCE,         /* 11 */
-    REDUCESCATTER,  /* 12 */
-    SCAN,           /* 13 */
-    SCATTER,        /* 14 */
-    SCATTERV,       /* 15 */
-    COLLCOUNT       /* 16 end counter keep it as last element */
-} COLLTYPE_T;
-
-/* defined arg lists to simply auto inclusion of user overriding decision functions */
-#define ALLGATHER_ARGS void *sbuf, int scount, struct ompi_datatype_t *sdtype, void *rbuf, int rcount, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module
-#define ALLGATHERV_ARGS void *sbuf, int scount, struct ompi_datatype_t *sdtype, void * rbuf, int *rcounts, int *disps, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module
-#define ALLREDUCE_ARGS void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, mca_coll_base_module_t *module
-#define ALLTOALL_ARGS void *sbuf, int scount, struct ompi_datatype_t *sdtype, void* rbuf, int rcount, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module
-#define ALLTOALLV_ARGS void *sbuf, int *scounts, int *sdisps, struct ompi_datatype_t *sdtype, void *rbuf, int *rcounts, int *rdisps, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module
-#define ALLTOALLW_ARGS void *sbuf, int *scounts, int *sdisps,  struct ompi_datatype_t **sdtypes, void *rbuf, int *rcounts, int *rdisps, struct ompi_datatype_t **rdtypes, struct ompi_communicator_t *comm, mca_coll_base_module_t *module
-#define BARRIER_ARGS struct ompi_communicator_t *comm, mca_coll_base_module_t *module
-#define BCAST_ARGS void *buff, int count, struct ompi_datatype_t *datatype, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module
-#define EXSCAN_ARGS void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, mca_coll_base_module_t *module
-#define GATHER_ARGS void *sbuf, int scount, struct ompi_datatype_t *sdtype, void *rbuf, int rcount, struct ompi_datatype_t *rdtype, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module
-#define GATHERV_ARGS void *sbuf, int scount, struct ompi_datatype_t *sdtype, void *rbuf, int *rcounts, int *disps, struct ompi_datatype_t *rdtype, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module
-#define REDUCE_ARGS void *sbuf, void* rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module
-#define REDUCESCATTER_ARGS void *sbuf, void *rbuf, int *rcounts, struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, mca_coll_base_module_t *module
-#define SCAN_ARGS void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype,  struct ompi_op_t *op, struct ompi_communicator_t *comm, mca_coll_base_module_t *module
-#define SCATTER_ARGS void *sbuf, int scount, struct ompi_datatype_t *sdtype, void *rbuf, int rcount, struct ompi_datatype_t *rdtype, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module
-#define SCATTERV_ARGS void *sbuf, int *scounts, int *disps, struct ompi_datatype_t *sdtype, void* rbuf, int rcount, struct ompi_datatype_t *rdtype, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module
-/* end defined arg lists to simply auto inclusion of user overriding decision functions */
-
-BEGIN_C_DECLS
-
-/* these are the same across all modules and are loaded at component query time */
-extern int   ompi_coll_tuned_stream;
-extern int   ompi_coll_tuned_priority;
-extern int   ompi_coll_tuned_preallocate_memory_comm_size_limit;
-extern bool  ompi_coll_tuned_use_dynamic_rules;
-extern char* ompi_coll_tuned_dynamic_rules_filename;
-extern int   ompi_coll_tuned_init_tree_fanout;
-extern int   ompi_coll_tuned_init_chain_fanout;
-extern int   ompi_coll_tuned_init_max_requests;
-extern int   ompi_coll_tuned_alltoall_small_msg;
-extern int   ompi_coll_tuned_alltoall_intermediate_msg;
-
-/* forced algorithm choices */
-/* this structure is for storing the indexes to the forced algorithm mca params... */
-/* we get these at component query (so that registered values appear in ompi_infoi) */
-struct coll_tuned_force_algorithm_mca_param_indices_t {
-    int  algorithm_param_index;      /* which algorithm you want to force */
-    int  segsize_param_index;        /* segsize to use (if supported), 0 = no segmentation */
-    int  tree_fanout_param_index;    /* tree fanout/in to use */
-    int  chain_fanout_param_index;   /* K-chain fanout/in to use */
-    int  max_requests_param_index;   /* Maximum number of outstanding send or recv requests */
-};
-typedef struct coll_tuned_force_algorithm_mca_param_indices_t coll_tuned_force_algorithm_mca_param_indices_t;
-
-
-/* the following type is for storing actual value obtained from the MCA on each tuned module */
-/* via their mca param indices lookup in the component */
-/* this structure is stored once per collective type per communicator... */
-struct coll_tuned_force_algorithm_params_t {
-    int  algorithm;      /* which algorithm you want to force */
-    int  segsize;        /* segsize to use (if supported), 0 = no segmentation */
-    int  tree_fanout;    /* tree fanout/in to use */
-    int  chain_fanout;   /* K-chain fanout/in to use */
-    int  max_requests;   /* Maximum number of outstanding send or recv requests */
-};
-typedef struct coll_tuned_force_algorithm_params_t coll_tuned_force_algorithm_params_t;
-
-/* the indices to the MCA params so that modules can look them up at open / comm create time  */
-extern coll_tuned_force_algorithm_mca_param_indices_t ompi_coll_tuned_forced_params[COLLCOUNT];
-/* the actual max algorithm values (readonly), loaded at component open */
-extern int ompi_coll_tuned_forced_max_algorithms[COLLCOUNT];
-
-/*
- * coll API functions
- */
-
-/* API functions */
-
-int ompi_coll_tuned_init_query(bool enable_progress_threads,
-                               bool enable_mpi_threads);
-
-mca_coll_base_module_t *
-ompi_coll_tuned_comm_query(struct ompi_communicator_t *comm, int *priority);
-
-/* API functions of decision functions and any implementations */
-
-/*
- * Note this gets long as we have to have a prototype for each 
- * MPI collective 4 times.. 2 for the comm type and 2 for each decision
- * type. 
- * we might cut down the decision prototypes by conditional compiling
- */
-
-/* All Gather */
-int ompi_coll_tuned_allgather_intra_dec_fixed(ALLGATHER_ARGS);
-int ompi_coll_tuned_allgather_intra_dec_dynamic(ALLGATHER_ARGS);
-int ompi_coll_tuned_allgather_intra_do_forced(ALLGATHER_ARGS);
-int ompi_coll_tuned_allgather_intra_do_this(ALLGATHER_ARGS, int algorithm, int faninout, int segsize);
-int ompi_coll_tuned_allgather_intra_check_forced_init(coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices);
-int ompi_coll_tuned_allgather_intra_bruck(ALLGATHER_ARGS);
-int ompi_coll_tuned_allgather_intra_recursivedoubling(ALLGATHER_ARGS);
-int ompi_coll_tuned_allgather_intra_ring(ALLGATHER_ARGS);
-int ompi_coll_tuned_allgather_intra_neighborexchange(ALLGATHER_ARGS);
-int ompi_coll_tuned_allgather_intra_basic_linear(ALLGATHER_ARGS);
-int ompi_coll_tuned_allgather_intra_two_procs(ALLGATHER_ARGS);
-int ompi_coll_tuned_allgather_inter_dec_fixed(ALLGATHER_ARGS);
-int ompi_coll_tuned_allgather_inter_dec_dynamic(ALLGATHER_ARGS);
-
-/* All GatherV */
-int ompi_coll_tuned_allgatherv_intra_dec_fixed(ALLGATHERV_ARGS);
-int ompi_coll_tuned_allgatherv_intra_dec_dynamic(ALLGATHERV_ARGS);
-int ompi_coll_tuned_allgatherv_intra_do_forced(ALLGATHERV_ARGS);
-int ompi_coll_tuned_allgatherv_intra_do_this(ALLGATHERV_ARGS, int algorithm, int faninout, int segsize);
-int ompi_coll_tuned_allgatherv_intra_check_forced_init(coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices);
-int ompi_coll_tuned_allgatherv_intra_bruck(ALLGATHERV_ARGS);
-int ompi_coll_tuned_allgatherv_intra_ring(ALLGATHERV_ARGS);
-int ompi_coll_tuned_allgatherv_intra_neighborexchange(ALLGATHERV_ARGS);
-int ompi_coll_tuned_allgatherv_intra_basic_default(ALLGATHERV_ARGS);
-int ompi_coll_tuned_allgatherv_intra_two_procs(ALLGATHERV_ARGS);
-int ompi_coll_tuned_allgatherv_inter_dec_fixed(ALLGATHERV_ARGS);
-int ompi_coll_tuned_allgatherv_inter_dec_dynamic(ALLGATHERV_ARGS);
-
-/* All Reduce */
-int ompi_coll_tuned_allreduce_intra_dec_fixed(ALLREDUCE_ARGS);
-int ompi_coll_tuned_allreduce_intra_dec_dynamic(ALLREDUCE_ARGS);
-int ompi_coll_tuned_allreduce_intra_do_forced(ALLREDUCE_ARGS);
-int ompi_coll_tuned_allreduce_intra_do_this(ALLREDUCE_ARGS, int algorithm, int faninout, int segsize);
-int ompi_coll_tuned_allreduce_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices);
-int ompi_coll_tuned_allreduce_intra_nonoverlapping(ALLREDUCE_ARGS);
-int ompi_coll_tuned_allreduce_intra_recursivedoubling(ALLREDUCE_ARGS);
-int ompi_coll_tuned_allreduce_intra_ring(ALLREDUCE_ARGS);
-int ompi_coll_tuned_allreduce_intra_ring_segmented(ALLREDUCE_ARGS, uint32_t segsize);
-int ompi_coll_tuned_allreduce_intra_basic_linear(ALLREDUCE_ARGS);
-int ompi_coll_tuned_allreduce_inter_dec_fixed(ALLREDUCE_ARGS);
-int ompi_coll_tuned_allreduce_inter_dec_dynamic(ALLREDUCE_ARGS);
-
-/* AlltoAll */
-int ompi_coll_tuned_alltoall_intra_dec_fixed(ALLTOALL_ARGS);
-int ompi_coll_tuned_alltoall_intra_dec_dynamic(ALLTOALL_ARGS);
-int ompi_coll_tuned_alltoall_intra_do_forced(ALLTOALL_ARGS);
-int ompi_coll_tuned_alltoall_intra_do_this(ALLTOALL_ARGS, int algorithm, int faninout, int segsize, int max_requests);
-int ompi_coll_tuned_alltoall_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices);
-int ompi_coll_tuned_alltoall_intra_pairwise(ALLTOALL_ARGS);
-int ompi_coll_tuned_alltoall_intra_bruck(ALLTOALL_ARGS);
-int ompi_coll_tuned_alltoall_intra_basic_linear(ALLTOALL_ARGS);
-int ompi_coll_tuned_alltoall_intra_linear_sync(ALLTOALL_ARGS, int max_requests);
-int ompi_coll_tuned_alltoall_intra_two_procs(ALLTOALL_ARGS);
-int ompi_coll_tuned_alltoall_inter_dec_fixed(ALLTOALL_ARGS);
-int ompi_coll_tuned_alltoall_inter_dec_dynamic(ALLTOALL_ARGS);
-
-/* AlltoAllV */
-int ompi_coll_tuned_alltoallv_intra_dec_fixed(ALLTOALLV_ARGS);
-int ompi_coll_tuned_alltoallv_intra_dec_dynamic(ALLTOALLV_ARGS);
-int ompi_coll_tuned_alltoallv_intra_do_forced(ALLTOALLV_ARGS);
-int ompi_coll_tuned_alltoallv_intra_do_this(ALLTOALLV_ARGS, int algorithm);
-int ompi_coll_tuned_alltoallv_intra_check_forced_init(coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices);
-int ompi_coll_tuned_alltoallv_intra_pairwise(ALLTOALLV_ARGS);
-int ompi_coll_tuned_alltoallv_intra_basic_linear(ALLTOALLV_ARGS);
-int ompi_coll_tuned_alltoallv_inter_dec_fixed(ALLTOALLV_ARGS);
-int ompi_coll_tuned_alltoallv_inter_dec_dynamic(ALLTOALLV_ARGS);
-
-/* AlltoAllW */
-int ompi_coll_tuned_alltoallw_intra_dec_fixed(ALLTOALLW_ARGS);
-int ompi_coll_tuned_alltoallw_intra_dec_dynamic(ALLTOALLW_ARGS);
-int ompi_coll_tuned_alltoallw_inter_dec_fixed(ALLTOALLW_ARGS);
-int ompi_coll_tuned_alltoallw_inter_dec_dynamic(ALLTOALLW_ARGS);
-
-/* Barrier */
-int ompi_coll_tuned_barrier_intra_dec_fixed(BARRIER_ARGS);
-int ompi_coll_tuned_barrier_intra_dec_dynamic(BARRIER_ARGS);
-int ompi_coll_tuned_barrier_intra_do_forced(BARRIER_ARGS);
-int ompi_coll_tuned_barrier_intra_do_this(BARRIER_ARGS, int algorithm, int faninout, int segsize);
-int ompi_coll_tuned_barrier_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices);
-int ompi_coll_tuned_barrier_inter_dec_fixed(BARRIER_ARGS);
-int ompi_coll_tuned_barrier_inter_dec_dynamic(BARRIER_ARGS);
-int ompi_coll_tuned_barrier_intra_doublering(BARRIER_ARGS);
-int ompi_coll_tuned_barrier_intra_recursivedoubling(BARRIER_ARGS);
-int ompi_coll_tuned_barrier_intra_bruck(BARRIER_ARGS);
-int ompi_coll_tuned_barrier_intra_two_procs(BARRIER_ARGS);
-int ompi_coll_tuned_barrier_intra_linear(BARRIER_ARGS);
-int ompi_coll_tuned_barrier_intra_tree(BARRIER_ARGS);
-
-/* Bcast */
-int ompi_coll_tuned_bcast_intra_generic( BCAST_ARGS, uint32_t count_by_segment, ompi_coll_tree_t* tree );
-int ompi_coll_tuned_bcast_intra_dec_fixed(BCAST_ARGS);
-int ompi_coll_tuned_bcast_intra_dec_dynamic(BCAST_ARGS);
-int ompi_coll_tuned_bcast_intra_do_forced(BCAST_ARGS);
-int ompi_coll_tuned_bcast_intra_do_this(BCAST_ARGS, int algorithm, int faninout, int segsize);
-int ompi_coll_tuned_bcast_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices);
-int ompi_coll_tuned_bcast_intra_basic_linear(BCAST_ARGS);
-int ompi_coll_tuned_bcast_intra_chain(BCAST_ARGS, uint32_t segsize, int32_t chains);
-int ompi_coll_tuned_bcast_intra_pipeline(BCAST_ARGS, uint32_t segsize);
-int ompi_coll_tuned_bcast_intra_binomial(BCAST_ARGS, uint32_t segsize);
-int ompi_coll_tuned_bcast_intra_bintree(BCAST_ARGS, uint32_t segsize);
-int ompi_coll_tuned_bcast_intra_split_bintree(BCAST_ARGS, uint32_t segsize);
-int ompi_coll_tuned_bcast_inter_dec_fixed(BCAST_ARGS);
-int ompi_coll_tuned_bcast_inter_dec_dynamic(BCAST_ARGS);
-
-/* Exscan */
-int ompi_coll_tuned_exscan_intra_dec_fixed(EXSCAN_ARGS);
-int ompi_coll_tuned_exscan_intra_dec_dynamic(EXSCAN_ARGS);
-int ompi_coll_tuned_exscan_inter_dec_fixed(EXSCAN_ARGS);
-int ompi_coll_tuned_exscan_inter_dec_dynamic(EXSCAN_ARGS);
-
-/* Gather */
-int ompi_coll_tuned_gather_intra_dec_fixed(GATHER_ARGS);
-int ompi_coll_tuned_gather_intra_dec_dynamic(GATHER_ARGS);
-int ompi_coll_tuned_gather_intra_do_forced(GATHER_ARGS);
-int ompi_coll_tuned_gather_intra_do_this(GATHER_ARGS, int algorithm, int faninout, int segsize);
-int ompi_coll_tuned_gather_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices);
-int ompi_coll_tuned_gather_intra_basic_linear(GATHER_ARGS);
-int ompi_coll_tuned_gather_intra_binomial(GATHER_ARGS);
-int ompi_coll_tuned_gather_intra_linear_sync(GATHER_ARGS, int first_segment_size);
-int ompi_coll_tuned_gather_inter_dec_fixed(GATHER_ARGS);
-int ompi_coll_tuned_gather_inter_dec_dynamic(GATHER_ARGS);
-
-/* GatherV */
-int ompi_coll_tuned_gatherv_intra_dec_fixed(GATHERV_ARGS);
-int ompi_coll_tuned_gatherv_intra_dec_dynamic(GATHER_ARGS);
-int ompi_coll_tuned_gatherv_inter_dec_fixed(GATHER_ARGS);
-int ompi_coll_tuned_gatherv_inter_dec_dynamic(GATHER_ARGS);
-
-/* Reduce */
-int ompi_coll_tuned_reduce_generic( REDUCE_ARGS, ompi_coll_tree_t* tree, int count_by_segment, int max_outstanding_reqs );
-int ompi_coll_tuned_reduce_intra_dec_fixed(REDUCE_ARGS);
-int ompi_coll_tuned_reduce_intra_dec_dynamic(REDUCE_ARGS);
-int ompi_coll_tuned_reduce_intra_do_forced(REDUCE_ARGS);
-int ompi_coll_tuned_reduce_intra_do_this(REDUCE_ARGS, int algorithm, int faninout, int segsize, int max_oustanding_reqs);
-int ompi_coll_tuned_reduce_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices);
-int ompi_coll_tuned_reduce_intra_basic_linear(REDUCE_ARGS);
-int ompi_coll_tuned_reduce_intra_chain(REDUCE_ARGS, uint32_t segsize, int fanout, int max_outstanding_reqs );
-int ompi_coll_tuned_reduce_intra_pipeline(REDUCE_ARGS, uint32_t segsize, int max_outstanding_reqs );
-int ompi_coll_tuned_reduce_intra_binary(REDUCE_ARGS, uint32_t segsize, int max_outstanding_reqs );
-int ompi_coll_tuned_reduce_intra_binomial(REDUCE_ARGS, uint32_t segsize, int max_outstanding_reqs );
-int ompi_coll_tuned_reduce_intra_in_order_binary(REDUCE_ARGS, uint32_t segsize, int max_outstanding_reqs );
-int ompi_coll_tuned_reduce_inter_dec_fixed(REDUCE_ARGS);
-int ompi_coll_tuned_reduce_inter_dec_dynamic(REDUCE_ARGS);
-
-/* Reduce_scatter */
-int ompi_coll_tuned_reduce_scatter_intra_dec_fixed(REDUCESCATTER_ARGS);
-int ompi_coll_tuned_reduce_scatter_intra_dec_dynamic(REDUCESCATTER_ARGS);
-int ompi_coll_tuned_reduce_scatter_intra_do_forced(REDUCESCATTER_ARGS);
-int ompi_coll_tuned_reduce_scatter_intra_do_this(REDUCESCATTER_ARGS, int algorithm, int faninout, int segsize);
-int ompi_coll_tuned_reduce_scatter_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices);
-int ompi_coll_tuned_reduce_scatter_intra_nonoverlapping(REDUCESCATTER_ARGS);
-int ompi_coll_tuned_reduce_scatter_intra_basic_recursivehalving(REDUCESCATTER_ARGS);
-int ompi_coll_tuned_reduce_scatter_intra_ring(REDUCESCATTER_ARGS);
-
-int ompi_coll_tuned_reduce_scatter_inter_dec_fixed(REDUCESCATTER_ARGS);
-int ompi_coll_tuned_reduce_scatter_inter_dec_dynamic(REDUCESCATTER_ARGS);
-
-/* Scan */
-int ompi_coll_tuned_scan_intra_dec_fixed(SCAN_ARGS);
-int ompi_coll_tuned_scan_intra_dec_dynamic(SCAN_ARGS);
-int ompi_coll_tuned_scan_inter_dec_fixed(SCAN_ARGS);
-int ompi_coll_tuned_scan_inter_dec_dynamic(SCAN_ARGS);
-
-/* Scatter */
-int ompi_coll_tuned_scatter_intra_dec_fixed(SCATTER_ARGS);
-int ompi_coll_tuned_scatter_intra_dec_dynamic(SCATTER_ARGS);
-int ompi_coll_tuned_scatter_intra_do_forced(SCATTER_ARGS);
-int ompi_coll_tuned_scatter_intra_do_this(SCATTER_ARGS, int algorithm, int faninout, int segsize);
-int ompi_coll_tuned_scatter_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices);
-int ompi_coll_tuned_scatter_intra_basic_linear(SCATTER_ARGS);
-int ompi_coll_tuned_scatter_intra_binomial(SCATTER_ARGS);
-int ompi_coll_tuned_scatter_inter_dec_fixed(SCATTER_ARGS);
-int ompi_coll_tuned_scatter_inter_dec_dynamic(SCATTER_ARGS);
-
-/* ScatterV */
-int ompi_coll_tuned_scatterv_intra_dec_fixed(SCATTERV_ARGS);
-int ompi_coll_tuned_scatterv_intra_dec_dynamic(SCATTERV_ARGS);
-int ompi_coll_tuned_scatterv_inter_dec_fixed(SCATTERV_ARGS);
-int ompi_coll_tuned_scatterv_inter_dec_dynamic(SCATTERV_ARGS);
-
-int mca_coll_tuned_ft_event(int state);
-
-
-/* Utility functions */
-
-static inline void ompi_coll_tuned_free_reqs(ompi_request_t **reqs, int count)
-{
-	int i;
-	for (i = 0; i < count; ++i)
-	    ompi_request_free(&reqs[i]);
-}
-
-struct mca_coll_tuned_component_t {
-	/** Base coll component */ 
-	mca_coll_base_component_2_0_0_t super;
-    
-	/** MCA parameter: Priority of this component */
-	int tuned_priority;
-    
-	/** global stuff that I need the component to store */
-    
-	/* MCA parameters first */
-    
-	/* cached decision table stuff (moved from MCW module) */
-	ompi_coll_alg_rule_t *all_base_rules;
-};
-/**
- * Convenience typedef
- */
-typedef struct mca_coll_tuned_component_t mca_coll_tuned_component_t;
-
-/**
- * Global component instance
- */
-OMPI_MODULE_DECLSPEC extern mca_coll_tuned_component_t mca_coll_tuned_component;
-
-/*
- * Data structure for hanging data off the communicator 
- * i.e. per module instance
- */
-struct mca_coll_tuned_comm_t {
-	/* standard data for requests and PML usage */
-    
-	/* Precreate space for requests 
-	 * Note this does not effect basic, 
-	 * but if in wrong context can confuse a debugger
-	 * this is controlled by an MCA param
-	 */
-    
-	ompi_request_t **mcct_reqs;
-	int mcct_num_reqs;
-    
-	/* 
-	 * tuned topo information caching per communicator 
-	 *
-	 * for each communicator we cache the topo information so we can 
-	 * reuse without regenerating if we change the root, [or fanout]
-	 * then regenerate and recache this information 
-	 */
-    
-	/* general tree with n fan out */
-	ompi_coll_tree_t *cached_ntree;
-	int cached_ntree_root; 
-	int cached_ntree_fanout; 
-    
-	/* binary tree */
-	ompi_coll_tree_t *cached_bintree;
-	int cached_bintree_root; 
-    
-	/* binomial tree */
-	ompi_coll_tree_t *cached_bmtree;
-	int cached_bmtree_root;
-    
-	/* binomial tree */
-	ompi_coll_tree_t *cached_in_order_bmtree;
-	int cached_in_order_bmtree_root;
-    
-	/* chained tree (fanout followed by pipelines) */
-	ompi_coll_tree_t *cached_chain;
-	int cached_chain_root;
-	int cached_chain_fanout; 
-    
-	/* pipeline */
-	ompi_coll_tree_t *cached_pipeline;
-	int cached_pipeline_root;
-    
-	/* in-order binary tree (root of the in-order binary tree is rank 0) */
-	ompi_coll_tree_t *cached_in_order_bintree;
-	
-	/* moving to the component */
-	ompi_coll_com_rule_t *com_rules[COLLCOUNT]; /* the communicator rules for each MPI collective for ONLY my comsize */
-
-	/* for forced algorithms we store the information on the module */
-	/* previously we only had one shared copy, ops, it really is per comm/module */
-	coll_tuned_force_algorithm_params_t user_forced[COLLCOUNT];
-};
-typedef struct mca_coll_tuned_comm_t mca_coll_tuned_comm_t;
-
-struct mca_coll_tuned_module_t {
-	mca_coll_base_module_t super;
-    
-	mca_coll_tuned_comm_t *tuned_data;
-};
-typedef struct mca_coll_tuned_module_t mca_coll_tuned_module_t;
-OBJ_CLASS_DECLARATION(mca_coll_tuned_module_t);
-
-static inline void mca_coll_tuned_free_reqs(ompi_request_t ** reqs,
-                                            int count)
-{
-    int i;
-    for (i = 0; i < count; ++i)
-      ompi_request_free(reqs + i);
-}
-
-END_C_DECLS
-
-#define COLL_TUNED_UPDATE_BINTREE( OMPI_COMM, TUNED_MODULE, ROOT )	\
-do {                                                                                       \
-    mca_coll_tuned_comm_t* coll_comm = (TUNED_MODULE)->tuned_data;                  	   \
-    if( !( (coll_comm->cached_bintree)                                                     \
-           && (coll_comm->cached_bintree_root == (ROOT)) ) ) {                             \
-        if( coll_comm->cached_bintree ) { /* destroy previous binomial if defined */       \
-            ompi_coll_tuned_topo_destroy_tree( &(coll_comm->cached_bintree) );             \
-        }                                                                                  \
-        coll_comm->cached_bintree = ompi_coll_tuned_topo_build_tree(2,(OMPI_COMM),(ROOT)); \
-        coll_comm->cached_bintree_root = (ROOT);                                           \
-    }                                                                                      \
-} while (0)
-
-#define COLL_TUNED_UPDATE_BMTREE( OMPI_COMM, TUNED_MODULE, ROOT )	\
-do {                                                                                         \
-    mca_coll_tuned_comm_t* coll_comm = (TUNED_MODULE)->tuned_data;                           \
-    if( !( (coll_comm->cached_bmtree)                                                        \
-           && (coll_comm->cached_bmtree_root == (ROOT)) ) ) {                                \
-        if( coll_comm->cached_bmtree ) { /* destroy previous binomial if defined */          \
-            ompi_coll_tuned_topo_destroy_tree( &(coll_comm->cached_bmtree) );                \
-        }                                                                                    \
-        coll_comm->cached_bmtree = ompi_coll_tuned_topo_build_bmtree( (OMPI_COMM), (ROOT) ); \
-        coll_comm->cached_bmtree_root = (ROOT);                                              \
-    }                                                                                        \
-} while (0)
-
-#define COLL_TUNED_UPDATE_IN_ORDER_BMTREE( OMPI_COMM, TUNED_MODULE, ROOT ) \
-do {                                                                                         \
-    mca_coll_tuned_comm_t* coll_comm = (TUNED_MODULE)->tuned_data;                           \
-    if( !( (coll_comm->cached_in_order_bmtree)                                               \
-           && (coll_comm->cached_in_order_bmtree_root == (ROOT)) ) ) {                       \
-        if( coll_comm->cached_in_order_bmtree ) { /* destroy previous binomial if defined */ \
-            ompi_coll_tuned_topo_destroy_tree( &(coll_comm->cached_in_order_bmtree) );       \
-        }                                                                                    \
-        coll_comm->cached_in_order_bmtree = ompi_coll_tuned_topo_build_in_order_bmtree( (OMPI_COMM), (ROOT) ); \
-        coll_comm->cached_in_order_bmtree_root = (ROOT);                                     \
-    }                                                                                        \
-} while (0)
-
-#define COLL_TUNED_UPDATE_PIPELINE( OMPI_COMM, TUNED_MODULE, ROOT )	\
-do {                                                                                             \
-    mca_coll_tuned_comm_t* coll_comm = (TUNED_MODULE)->tuned_data;                               \
-    if( !( (coll_comm->cached_pipeline)                                                          \
-           && (coll_comm->cached_pipeline_root == (ROOT)) ) ) {                                  \
-        if (coll_comm->cached_pipeline) { /* destroy previous pipeline if defined */             \
-            ompi_coll_tuned_topo_destroy_tree( &(coll_comm->cached_pipeline) );                  \
-        }                                                                                        \
-        coll_comm->cached_pipeline = ompi_coll_tuned_topo_build_chain( 1, (OMPI_COMM), (ROOT) ); \
-        coll_comm->cached_pipeline_root = (ROOT);                                                \
-    }                                                                                            \
-} while (0)
-
-#define COLL_TUNED_UPDATE_CHAIN( OMPI_COMM, TUNED_MODULE, ROOT, FANOUT )	\
-do {                                                                                             \
-    mca_coll_tuned_comm_t* coll_comm = (TUNED_MODULE)->tuned_data;                               \
-    if( !( (coll_comm->cached_chain)                                                             \
-           && (coll_comm->cached_chain_root == (ROOT))                                           \
-           && (coll_comm->cached_chain_fanout == (FANOUT)) ) ) {                                 \
-        if( coll_comm->cached_chain) { /* destroy previous chain if defined */                   \
-            ompi_coll_tuned_topo_destroy_tree( &(coll_comm->cached_chain) );                     \
-        }                                                                                        \
-        coll_comm->cached_chain = ompi_coll_tuned_topo_build_chain((FANOUT), (OMPI_COMM), (ROOT)); \
-        coll_comm->cached_chain_root = (ROOT);                                                   \
-        coll_comm->cached_chain_fanout = (FANOUT);                                               \
-    }                                                                                            \
-} while (0)
-
-#define COLL_TUNED_UPDATE_IN_ORDER_BINTREE( OMPI_COMM, TUNED_MODULE )	\
-do {                                                                           \
-    mca_coll_tuned_comm_t* coll_comm = (TUNED_MODULE)->tuned_data;             \
-    if( !(coll_comm->cached_in_order_bintree) ) {                              \
-        /* In-order binary tree topology is defined by communicator size */    \
-        /* Thus, there is no need to destroy anything */                       \
-        coll_comm->cached_in_order_bintree =                                   \
-	    ompi_coll_tuned_topo_build_in_order_bintree((OMPI_COMM)); \
-    }                                                                          \
-} while (0)
-
-/**
- * This macro give a generic way to compute the best count of
- * the segment (i.e. the number of complete datatypes that
- * can fit in the specified SEGSIZE). Beware, when this macro
- * is called, the SEGCOUNT should be initialized to the count as
- * expected by the collective call.
- */
-#define COLL_TUNED_COMPUTED_SEGCOUNT(SEGSIZE, TYPELNG, SEGCOUNT)        \
-    if( ((SEGSIZE) >= (TYPELNG)) &&                                     \
-        ((SEGSIZE) < ((TYPELNG) * (SEGCOUNT))) ) {                      \
-        size_t residual;                                                \
-        (SEGCOUNT) = (int)((SEGSIZE) / (TYPELNG));                      \
-        residual = (SEGSIZE) - (SEGCOUNT) * (TYPELNG);                  \
-        if( residual > ((TYPELNG) >> 1) )                               \
-            (SEGCOUNT)++;                                               \
-    }                                                                   \
-
-/**
- * This macro gives a generic wait to compute the well distributed block counts
- * when the count and number of blocks are fixed.
- * Macro returns "early-block" count, "late-block" count, and "split-index"
- * which is the block at which we switch from "early-block" count to 
- * the "late-block" count.
- * count = split_index * early_block_count + 
- *         (block_count - split_index) * late_block_count
- * We do not perform ANY error checks - make sure that the input values 
- * make sense (eg. count > num_blocks).
- */
-#define COLL_TUNED_COMPUTE_BLOCKCOUNT( COUNT, NUM_BLOCKS, SPLIT_INDEX,       \
-                                       EARLY_BLOCK_COUNT, LATE_BLOCK_COUNT ) \
-    EARLY_BLOCK_COUNT = LATE_BLOCK_COUNT = COUNT / NUM_BLOCKS;               \
-    SPLIT_INDEX = COUNT % NUM_BLOCKS;                                        \
-    if (0 != SPLIT_INDEX) {                                                  \
-        EARLY_BLOCK_COUNT = EARLY_BLOCK_COUNT + 1;                           \
-    }                                                                        \
-
-
-#endif /* MCA_COLL_TUNED_EXPORT_H */
-
diff --git a/ompi/mca/coll/base/coll_base_allgather.c b/ompi/mca/coll/base/coll_base_allgather.c
index 4fe5c5eca3..6c90b10fa5 100644
--- a/ompi/mca/coll/base/coll_base_allgather.c
+++ b/ompi/mca/coll/base/coll_base_allgather.c
@@ -2,7 +2,7 @@
  * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
  *                         University Research and Technology
  *                         Corporation.  All rights reserved.
- * Copyright (c) 2004-2014 The University of Tennessee and The University
+ * Copyright (c) 2004-2015 The University of Tennessee and The University
  *                         of Tennessee Research Foundation.  All rights
  *                         reserved.
  * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@@ -30,31 +30,12 @@
 #include "ompi/communicator/communicator.h"
 #include "ompi/mca/coll/coll.h"
 #include "ompi/mca/coll/base/coll_tags.h"
-#include "coll_tuned.h"
-#include "coll_tuned_topo.h"
-#include "coll_tuned_util.h"
-
-/* allgather algorithm variables */
-static int coll_tuned_allgather_algorithm_count = 6;
-static int coll_tuned_allgather_forced_algorithm = 0;
-static int coll_tuned_allgather_segment_size = 0;
-static int coll_tuned_allgather_tree_fanout;
-static int coll_tuned_allgather_chain_fanout;
-
-/* valid values for coll_tuned_allgather_forced_algorithm */
-static mca_base_var_enum_value_t allgather_algorithms[] = {
-    {0, "ignore"},
-    {1, "linear"},
-    {2, "bruck"},
-    {3, "recursive_doubling"},
-    {4, "ring"},
-    {5, "neighbor"},
-    {6, "two_proc"},
-    {0, NULL}
-};
+#include "ompi/mca/coll/base/coll_base_functions.h"
+#include "coll_base_topo.h"
+#include "coll_base_util.h"
 
 /*
- * ompi_coll_tuned_allgather_intra_bruck
+ * ompi_coll_base_allgather_intra_bruck
  *
  * Function:     allgather using O(log(N)) steps.
  * Accepts:      Same arguments as MPI_Allgather
@@ -65,7 +46,7 @@ static mca_base_var_enum_value_t allgather_algorithms[] = {
  *                in Multiport Message-Passing Systems"
  * Memory requirements:  non-zero ranks require shift buffer to perform final
  *               step in the algorithm.
- * 
+ *
  * Example on 6 nodes:
  *   Initialization: everyone has its own buffer at location 0 in rbuf
  *                   This means if user specified MPI_IN_PLACE for sendbuf
@@ -84,7 +65,7 @@ static mca_base_var_enum_value_t allgather_algorithms[] = {
  *         [2]    [3]    [4]    [5]    [0]    [1]
  *         [3]    [4]    [5]    [0]    [1]    [2]
  *   Step 2: send message to (rank - 2^2), receive message from (rank + 2^2)
- *           message size is "all remaining blocks" 
+ *           message size is "all remaining blocks"
  *    #     0      1      2      3      4      5
  *         [0]    [1]    [2]    [3]    [4]    [5]
  *         [1]    [2]    [3]    [4]    [5]    [0]
@@ -101,7 +82,7 @@ static mca_base_var_enum_value_t allgather_algorithms[] = {
  *         [4]    [4]    [4]    [4]    [4]    [4]
  *         [5]    [5]    [5]    [5]    [5]    [5]
  */
-int ompi_coll_tuned_allgather_intra_bruck(void *sbuf, int scount,
+int ompi_coll_base_allgather_intra_bruck(void *sbuf, int scount,
                                           struct ompi_datatype_t *sdtype,
                                           void* rbuf, int rcount,
                                           struct ompi_datatype_t *rdtype,
@@ -115,8 +96,8 @@ int ompi_coll_tuned_allgather_intra_bruck(void *sbuf, int scount,
     size = ompi_comm_size(comm);
     rank = ompi_comm_rank(comm);
 
-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "coll:tuned:allgather_intra_bruck rank %d", rank));
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                 "coll:base:allgather_intra_bruck rank %d", rank));
 
     err = ompi_datatype_get_extent (sdtype, &slb, &sext);
     if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
@@ -125,7 +106,7 @@ int ompi_coll_tuned_allgather_intra_bruck(void *sbuf, int scount,
     if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
 
     /* Initialization step:
-       - if send buffer is not MPI_IN_PLACE, copy send buffer to block 0 of 
+       - if send buffer is not MPI_IN_PLACE, copy send buffer to block 0 of
        receive buffer, else
        - if rank r != 0, copy r^th block from receive buffer to block 0.
     */
@@ -140,15 +121,15 @@ int ompi_coll_tuned_allgather_intra_bruck(void *sbuf, int scount,
         err = ompi_datatype_copy_content_same_ddt(rdtype, rcount, tmprecv, tmpsend);
         if (err < 0) { line = __LINE__; goto err_hndl; }
     }
-   
+
     /* Communication step:
        At every step i, rank r:
        - doubles the distance
-       - sends message which starts at begining of rbuf and has size 
+       - sends message which starts at begining of rbuf and has size
        (blockcount * rcount) to rank (r - distance)
        - receives message of size blockcount * rcount from rank (r + distance)
        at location (rbuf + distance * rcount * rext)
-       - blockcount doubles until last step when only the remaining data is 
+       - blockcount doubles until last step when only the remaining data is
        exchanged.
     */
     blockcount = 1;
@@ -162,14 +143,14 @@ int ompi_coll_tuned_allgather_intra_bruck(void *sbuf, int scount,
 
         if (distance <= (size >> 1)) {
             blockcount = distance;
-        } else { 
+        } else {
             blockcount = size - distance;
         }
 
         /* Sendreceive */
-        err = ompi_coll_tuned_sendrecv(tmpsend, blockcount * rcount, rdtype, 
+        err = ompi_coll_base_sendrecv(tmpsend, blockcount * rcount, rdtype,
                                        sendto, MCA_COLL_BASE_TAG_ALLGATHER,
-                                       tmprecv, blockcount * rcount, rdtype, 
+                                       tmprecv, blockcount * rcount, rdtype,
                                        recvfrom, MCA_COLL_BASE_TAG_ALLGATHER,
                                        comm, MPI_STATUS_IGNORE, rank);
         if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
@@ -178,8 +159,8 @@ int ompi_coll_tuned_allgather_intra_bruck(void *sbuf, int scount,
 
     /* Finalization step:
        On all nodes except 0, data needs to be shifted locally:
-       - create temporary shift buffer, 
-       see discussion in coll_basic_reduce.c about the size and begining 
+       - create temporary shift buffer,
+       see discussion in coll_basic_reduce.c about the size and begining
        of temporary buffer.
        - copy blocks [0 .. (size - rank - 1)] from rbuf to shift buffer
        - move blocks [(size - rank) .. size] from rbuf to begining of rbuf
@@ -195,8 +176,8 @@ int ompi_coll_tuned_allgather_intra_bruck(void *sbuf, int scount,
         free_buf = (char*) calloc(((true_extent +
                                     ((ptrdiff_t)(size - rank) * (ptrdiff_t)rcount - 1) * rext)),
                                   sizeof(char));
-        if (NULL == free_buf) { 
-            line = __LINE__; err = OMPI_ERR_OUT_OF_RESOURCE; goto err_hndl; 
+        if (NULL == free_buf) {
+            line = __LINE__; err = OMPI_ERR_OUT_OF_RESOURCE; goto err_hndl;
         }
         shift_buf = free_buf - true_lb;
 
@@ -207,13 +188,13 @@ int ompi_coll_tuned_allgather_intra_bruck(void *sbuf, int scount,
 
         /* 2. move blocks [(size - rank) .. size] from rbuf to the begining of rbuf */
         tmpsend = (char*) rbuf + (ptrdiff_t)(size - rank) * (ptrdiff_t)rcount * rext;
-        err = ompi_datatype_copy_content_same_ddt(rdtype, (ptrdiff_t)rank * (ptrdiff_t)rcount, 
+        err = ompi_datatype_copy_content_same_ddt(rdtype, (ptrdiff_t)rank * (ptrdiff_t)rcount,
                                                   rbuf, tmpsend);
         if (err < 0) { line = __LINE__; goto err_hndl;  }
 
         /* 3. copy blocks from shift buffer back to rbuf starting at block [rank]. */
         tmprecv = (char*) rbuf + (ptrdiff_t)rank * (ptrdiff_t)rcount * rext;
-        err = ompi_datatype_copy_content_same_ddt(rdtype, (ptrdiff_t)(size - rank) * (ptrdiff_t)rcount, 
+        err = ompi_datatype_copy_content_same_ddt(rdtype, (ptrdiff_t)(size - rank) * (ptrdiff_t)rcount,
                                                   tmprecv, shift_buf);
         if (err < 0) { line = __LINE__; goto err_hndl;  }
 
@@ -223,13 +204,13 @@ int ompi_coll_tuned_allgather_intra_bruck(void *sbuf, int scount,
     return OMPI_SUCCESS;
 
  err_hndl:
-    OPAL_OUTPUT((ompi_coll_tuned_stream,  "%s:%4d\tError occurred %d, rank %2d",
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,  "%s:%4d\tError occurred %d, rank %2d",
                  __FILE__, line, err, rank));
     return err;
 }
 
 /*
- * ompi_coll_tuned_allgather_intra_recursivedoubling
+ * ompi_coll_base_allgather_intra_recursivedoubling
  *
  * Function:     allgather using O(log(N)) steps.
  * Accepts:      Same arguments as MPI_Allgather
@@ -239,29 +220,29 @@ int ompi_coll_tuned_allgather_intra_bruck(void *sbuf, int scount,
  *               This algorithm is used in MPICH-2 for small- and medium-sized
  *               messages on power-of-two processes.
  *
- * Limitation:   Current implementation only works on power-of-two number of 
- *               processes.  
+ * Limitation:   Current implementation only works on power-of-two number of
+ *               processes.
  *               In case this algorithm is invoked on non-power-of-two
  *               processes, Bruck algorithm will be invoked.
- * 
+ *
  * Memory requirements:
  *               No additional memory requirements beyond user-supplied buffers.
- * 
+ *
  * Example on 4 nodes:
  *   Initialization: everyone has its own buffer at location rank in rbuf
- *    #     0      1      2      3 
+ *    #     0      1      2      3
  *         [0]    [ ]    [ ]    [ ]
  *         [ ]    [1]    [ ]    [ ]
  *         [ ]    [ ]    [2]    [ ]
  *         [ ]    [ ]    [ ]    [3]
  *   Step 0: exchange data with (rank ^ 2^0)
- *    #     0      1      2      3 
+ *    #     0      1      2      3
  *         [0]    [0]    [ ]    [ ]
  *         [1]    [1]    [ ]    [ ]
  *         [ ]    [ ]    [2]    [2]
  *         [ ]    [ ]    [3]    [3]
  *   Step 1: exchange data with (rank ^ 2^1) (if you can)
- *    #     0      1      2      3 
+ *    #     0      1      2      3
  *         [0]    [0]    [0]    [0]
  *         [1]    [1]    [1]    [1]
  *         [2]    [2]    [2]    [2]
@@ -269,12 +250,12 @@ int ompi_coll_tuned_allgather_intra_bruck(void *sbuf, int scount,
  *
  *  TODO: Modify the algorithm to work with any number of nodes.
  *        We can modify code to use identical implementation like MPICH-2:
- *        - using recursive-halving algorithm, at the end of each step, 
+ *        - using recursive-halving algorithm, at the end of each step,
  *          determine if there are nodes who did not exchange their data in that
  *          step, and send them appropriate messages.
  */
-int 
-ompi_coll_tuned_allgather_intra_recursivedoubling(void *sbuf, int scount,
+int
+ompi_coll_base_allgather_intra_recursivedoubling(void *sbuf, int scount,
                                                   struct ompi_datatype_t *sdtype,
                                                   void* rbuf, int rcount,
                                                   struct ompi_datatype_t *rdtype,
@@ -293,21 +274,21 @@ ompi_coll_tuned_allgather_intra_recursivedoubling(void *sbuf, int scount,
     pow2size >>=1;
 
     /* Current implementation only handles power-of-two number of processes.
-       If the function was called on non-power-of-two number of processes, 
+       If the function was called on non-power-of-two number of processes,
        print warning and call bruck allgather algorithm with same parameters.
     */
     if (pow2size != size) {
-        OPAL_OUTPUT((ompi_coll_tuned_stream,
-                     "coll:tuned:allgather_intra_recursivedoubling WARNING: non-pow-2 size %d, switching to bruck algorithm", 
+        OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                     "coll:base:allgather_intra_recursivedoubling WARNING: non-pow-2 size %d, switching to bruck algorithm",
                      size));
 
-        return ompi_coll_tuned_allgather_intra_bruck(sbuf, scount, sdtype, 
+        return ompi_coll_base_allgather_intra_bruck(sbuf, scount, sdtype,
                                                      rbuf, rcount, rdtype,
                                                      comm, module);
     }
 
-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "coll:tuned:allgather_intra_recursivedoubling rank %d, size %d", 
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                 "coll:base:allgather_intra_recursivedoubling rank %d, size %d",
                  rank, size));
 
     err = ompi_datatype_get_extent (sdtype, &slb, &sext);
@@ -317,7 +298,7 @@ ompi_coll_tuned_allgather_intra_recursivedoubling(void *sbuf, int scount,
     if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
 
     /* Initialization step:
-       - if send buffer is not MPI_IN_PLACE, copy send buffer to block 0 of 
+       - if send buffer is not MPI_IN_PLACE, copy send buffer to block 0 of
        receive buffer
     */
     if (MPI_IN_PLACE != sbuf) {
@@ -326,8 +307,8 @@ ompi_coll_tuned_allgather_intra_recursivedoubling(void *sbuf, int scount,
         err = ompi_datatype_sndrcv(tmpsend, scount, sdtype, tmprecv, rcount, rdtype);
         if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl;  }
 
-    } 
-   
+    }
+
     /* Communication step:
        At every step i, rank r:
        - exchanges message with rank remote = (r ^ 2^i).
@@ -347,7 +328,7 @@ ompi_coll_tuned_allgather_intra_recursivedoubling(void *sbuf, int scount,
         }
 
         /* Sendreceive */
-        err = ompi_coll_tuned_sendrecv(tmpsend, (ptrdiff_t)distance * (ptrdiff_t)rcount, rdtype,
+        err = ompi_coll_base_sendrecv(tmpsend, (ptrdiff_t)distance * (ptrdiff_t)rcount, rdtype,
                                        remote, MCA_COLL_BASE_TAG_ALLGATHER,
                                        tmprecv, (ptrdiff_t)distance * (ptrdiff_t)rcount, rdtype,
                                        remote, MCA_COLL_BASE_TAG_ALLGATHER,
@@ -359,7 +340,7 @@ ompi_coll_tuned_allgather_intra_recursivedoubling(void *sbuf, int scount,
     return OMPI_SUCCESS;
 
  err_hndl:
-    OPAL_OUTPUT((ompi_coll_tuned_stream,  "%s:%4d\tError occurred %d, rank %2d",
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,  "%s:%4d\tError occurred %d, rank %2d",
                  __FILE__, line, err, rank));
     return err;
 }
@@ -367,7 +348,7 @@ ompi_coll_tuned_allgather_intra_recursivedoubling(void *sbuf, int scount,
 
 
 /*
- * ompi_coll_tuned_allgather_intra_ring
+ * ompi_coll_base_allgather_intra_ring
  *
  * Function:     allgather using O(N) steps.
  * Accepts:      Same arguments as MPI_Allgather
@@ -379,9 +360,9 @@ ompi_coll_tuned_allgather_intra_recursivedoubling(void *sbuf, int scount,
  *               (r + 1) containing data from rank (r - i), with wrap arounds.
  * Memory requirements:
  *               No additional memory requirements.
- * 
+ *
  */
-int ompi_coll_tuned_allgather_intra_ring(void *sbuf, int scount,
+int ompi_coll_base_allgather_intra_ring(void *sbuf, int scount,
                                          struct ompi_datatype_t *sdtype,
                                          void* rbuf, int rcount,
                                          struct ompi_datatype_t *rdtype,
@@ -395,8 +376,8 @@ int ompi_coll_tuned_allgather_intra_ring(void *sbuf, int scount,
     size = ompi_comm_size(comm);
     rank = ompi_comm_rank(comm);
 
-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "coll:tuned:allgather_intra_ring rank %d", rank));
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                 "coll:base:allgather_intra_ring rank %d", rank));
 
     err = ompi_datatype_get_extent (sdtype, &slb, &sext);
     if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
@@ -413,15 +394,15 @@ int ompi_coll_tuned_allgather_intra_ring(void *sbuf, int scount,
         tmpsend = (char*) sbuf;
         err = ompi_datatype_sndrcv(tmpsend, scount, sdtype, tmprecv, rcount, rdtype);
         if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl;  }
-    } 
-   
+    }
+
     /* Communication step:
        At every step i: 0 .. (P-1), rank r:
        - receives message from [(r - 1 + size) % size] containing data from rank
        [(r - i - 1 + size) % size]
        - sends message to rank [(r + 1) % size] containing data from rank
        [(r - i + size) % size]
-       - sends message which starts at begining of rbuf and has size 
+       - sends message which starts at begining of rbuf and has size
     */
     sendto = (rank + 1) % size;
     recvfrom  = (rank - 1 + size) % size;
@@ -434,7 +415,7 @@ int ompi_coll_tuned_allgather_intra_ring(void *sbuf, int scount,
         tmpsend = (char*)rbuf + (ptrdiff_t)senddatafrom * (ptrdiff_t)rcount * rext;
 
         /* Sendreceive */
-        err = ompi_coll_tuned_sendrecv(tmpsend, rcount, rdtype, sendto,
+        err = ompi_coll_base_sendrecv(tmpsend, rcount, rdtype, sendto,
                                        MCA_COLL_BASE_TAG_ALLGATHER,
                                        tmprecv, rcount, rdtype, recvfrom,
                                        MCA_COLL_BASE_TAG_ALLGATHER,
@@ -446,34 +427,34 @@ int ompi_coll_tuned_allgather_intra_ring(void *sbuf, int scount,
     return OMPI_SUCCESS;
 
  err_hndl:
-    OPAL_OUTPUT((ompi_coll_tuned_stream,  "%s:%4d\tError occurred %d, rank %2d",
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,  "%s:%4d\tError occurred %d, rank %2d",
                  __FILE__, line, err, rank));
     return err;
 }
 
 /*
- * ompi_coll_tuned_allgather_intra_neighborexchange
+ * ompi_coll_base_allgather_intra_neighborexchange
  *
  * Function:     allgather using N/2 steps (O(N))
  * Accepts:      Same arguments as MPI_Allgather
  * Returns:      MPI_SUCCESS or error code
  *
  * Description:  Neighbor Exchange algorithm for allgather.
- *               Described by Chen et.al. in 
- *               "Performance Evaluation of Allgather Algorithms on 
+ *               Described by Chen et.al. in
+ *               "Performance Evaluation of Allgather Algorithms on
  *                Terascale Linux Cluster with Fast Ethernet",
- *               Proceedings of the Eighth International Conference on 
+ *               Proceedings of the Eighth International Conference on
  *               High-Performance Computing inn Asia-Pacific Region
  *               (HPCASIA'05), 2005
- * 
+ *
  *               Rank r exchanges message with one of its neighbors and
  *               forwards the data further in the next step.
  *
  *               No additional memory requirements.
- * 
+ *
  * Limitations:  Algorithm works only on even number of processes.
  *               For odd number of processes we switch to ring algorithm.
- * 
+ *
  * Example on 6 nodes:
  *  Initial state
  *    #     0      1      2      3      4      5
@@ -508,8 +489,8 @@ int ompi_coll_tuned_allgather_intra_ring(void *sbuf, int scount,
  *         [4]    [4]    [4]    [4]    [4]    [4]
  *         [5]    [5]    [5]    [5]    [5]    [5]
  */
-int 
-ompi_coll_tuned_allgather_intra_neighborexchange(void *sbuf, int scount,
+int
+ompi_coll_base_allgather_intra_neighborexchange(void *sbuf, int scount,
                                                  struct ompi_datatype_t *sdtype,
                                                  void* rbuf, int rcount,
                                                  struct ompi_datatype_t *rdtype,
@@ -525,16 +506,16 @@ ompi_coll_tuned_allgather_intra_neighborexchange(void *sbuf, int scount,
     rank = ompi_comm_rank(comm);
 
     if (size % 2) {
-        OPAL_OUTPUT((ompi_coll_tuned_stream,
-                     "coll:tuned:allgather_intra_neighborexchange WARNING: odd size %d, switching to ring algorithm", 
+        OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                     "coll:base:allgather_intra_neighborexchange WARNING: odd size %d, switching to ring algorithm",
                      size));
-        return ompi_coll_tuned_allgather_intra_ring(sbuf, scount, sdtype,
+        return ompi_coll_base_allgather_intra_ring(sbuf, scount, sdtype,
                                                     rbuf, rcount, rdtype,
                                                     comm, module);
     }
 
-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "coll:tuned:allgather_intra_neighborexchange rank %d", rank));
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                 "coll:base:allgather_intra_neighborexchange rank %d", rank));
 
     err = ompi_datatype_get_extent (sdtype, &slb, &sext);
     if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
@@ -551,7 +532,7 @@ ompi_coll_tuned_allgather_intra_neighborexchange(void *sbuf, int scount,
         tmpsend = (char*) sbuf;
         err = ompi_datatype_sndrcv(tmpsend, scount, sdtype, tmprecv, rcount, rdtype);
         if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl;  }
-    } 
+    }
 
     /* Determine neighbors, order in which blocks will arrive, etc. */
     even_rank = !(rank % 2);
@@ -573,15 +554,15 @@ ompi_coll_tuned_allgather_intra_neighborexchange(void *sbuf, int scount,
 
     /* Communication loop:
        - First step is special: exchange a single block with neighbor[0].
-       - Rest of the steps: 
-       update recv_data_from according to offset, and 
+       - Rest of the steps:
+       update recv_data_from according to offset, and
        exchange two blocks with appropriate neighbor.
        the send location becomes previous receve location.
     */
     tmprecv = (char*)rbuf + (ptrdiff_t)neighbor[0] * (ptrdiff_t)rcount * rext;
     tmpsend = (char*)rbuf + (ptrdiff_t)rank * (ptrdiff_t)rcount * rext;
     /* Sendreceive */
-    err = ompi_coll_tuned_sendrecv(tmpsend, rcount, rdtype, neighbor[0],
+    err = ompi_coll_base_sendrecv(tmpsend, rcount, rdtype, neighbor[0],
                                    MCA_COLL_BASE_TAG_ALLGATHER,
                                    tmprecv, rcount, rdtype, neighbor[0],
                                    MCA_COLL_BASE_TAG_ALLGATHER,
@@ -597,15 +578,15 @@ ompi_coll_tuned_allgather_intra_neighborexchange(void *sbuf, int scount,
 
     for (i = 1; i < (size / 2); i++) {
         const int i_parity = i % 2;
-        recv_data_from[i_parity] = 
+        recv_data_from[i_parity] =
             (recv_data_from[i_parity] + offset_at_step[i_parity] + size) % size;
 
         tmprecv = (char*)rbuf + (ptrdiff_t)recv_data_from[i_parity] * (ptrdiff_t)rcount * rext;
         tmpsend = (char*)rbuf + (ptrdiff_t)send_data_from * rcount * rext;
-      
+
         /* Sendreceive */
-        err = ompi_coll_tuned_sendrecv(tmpsend, (ptrdiff_t)2 * (ptrdiff_t)rcount, rdtype, 
-                                       neighbor[i_parity], 
+        err = ompi_coll_base_sendrecv(tmpsend, (ptrdiff_t)2 * (ptrdiff_t)rcount, rdtype,
+                                       neighbor[i_parity],
                                        MCA_COLL_BASE_TAG_ALLGATHER,
                                        tmprecv, (ptrdiff_t)2 * (ptrdiff_t)rcount, rdtype,
                                        neighbor[i_parity],
@@ -619,13 +600,13 @@ ompi_coll_tuned_allgather_intra_neighborexchange(void *sbuf, int scount,
     return OMPI_SUCCESS;
 
  err_hndl:
-    OPAL_OUTPUT((ompi_coll_tuned_stream,  "%s:%4d\tError occurred %d, rank %2d",
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,  "%s:%4d\tError occurred %d, rank %2d",
                  __FILE__, line, err, rank));
     return err;
 }
 
 
-int ompi_coll_tuned_allgather_intra_two_procs(void *sbuf, int scount,
+int ompi_coll_base_allgather_intra_two_procs(void *sbuf, int scount,
                                               struct ompi_datatype_t *sdtype,
                                               void* rbuf, int rcount,
                                               struct ompi_datatype_t *rdtype,
@@ -638,8 +619,8 @@ int ompi_coll_tuned_allgather_intra_two_procs(void *sbuf, int scount,
 
     rank = ompi_comm_rank(comm);
 
-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "ompi_coll_tuned_allgather_intra_two_procs rank %d", rank));
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                 "ompi_coll_base_allgather_intra_two_procs rank %d", rank));
 
     err = ompi_datatype_get_extent (sdtype, &lb, &sext);
     if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
@@ -661,7 +642,7 @@ int ompi_coll_tuned_allgather_intra_two_procs(void *sbuf, int scount,
     }
     tmprecv = (char*)rbuf + (ptrdiff_t)remote * (ptrdiff_t)rcount * rext;
 
-    err = ompi_coll_tuned_sendrecv(tmpsend, scount, sdtype, remote,
+    err = ompi_coll_base_sendrecv(tmpsend, scount, sdtype, remote,
                                    MCA_COLL_BASE_TAG_ALLGATHER,
                                    tmprecv, rcount, rdtype, remote,
                                    MCA_COLL_BASE_TAG_ALLGATHER,
@@ -670,7 +651,7 @@ int ompi_coll_tuned_allgather_intra_two_procs(void *sbuf, int scount,
 
     /* Place your data in correct location if necessary */
     if (MPI_IN_PLACE != sbuf) {
-        err = ompi_datatype_sndrcv((char*)sbuf, scount, sdtype, 
+        err = ompi_datatype_sndrcv((char*)sbuf, scount, sdtype,
                                    (char*)rbuf + (ptrdiff_t)rank * (ptrdiff_t)rcount * rext, rcount, rdtype);
         if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl;  }
     }
@@ -678,7 +659,7 @@ int ompi_coll_tuned_allgather_intra_two_procs(void *sbuf, int scount,
     return MPI_SUCCESS;
 
  err_hndl:
-    OPAL_OUTPUT((ompi_coll_tuned_stream, "%s:%4d\tError occurred %d, rank %2d",
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output, "%s:%4d\tError occurred %d, rank %2d",
                  __FILE__, line, err, rank));
     return err;
 }
@@ -687,13 +668,13 @@ int ompi_coll_tuned_allgather_intra_two_procs(void *sbuf, int scount,
 /*
  * Linear functions are copied from the BASIC coll module
  * they do not segment the message and are simple implementations
- * but for some small number of nodes and/or small data sizes they 
- * are just as fast as tuned/tree based segmenting operations 
+ * but for some small number of nodes and/or small data sizes they
+ * are just as fast as base/tree based segmenting operations
  * and as such may be selected by the decision functions
  * These are copied into this module due to the way we select modules
  * in V1. i.e. in V2 we will handle this differently and so will not
  * have to duplicate code.
- * JPG following the examples from other coll_tuned implementations. Dec06.
+ * JPG following the examples from other coll_base implementations. Dec06.
  */
 
 /* copied function (with appropriate renaming) starts here */
@@ -706,10 +687,10 @@ int ompi_coll_tuned_allgather_intra_two_procs(void *sbuf, int scount,
  *    Returns:    - MPI_SUCCESS or error code
  */
 int
-ompi_coll_tuned_allgather_intra_basic_linear(void *sbuf, int scount,
-                                             struct ompi_datatype_t *sdtype, 
+ompi_coll_base_allgather_intra_basic_linear(void *sbuf, int scount,
+                                             struct ompi_datatype_t *sdtype,
                                              void *rbuf,
-                                             int rcount, 
+                                             int rcount,
                                              struct ompi_datatype_t *rdtype,
                                              struct ompi_communicator_t *comm,
                                              mca_coll_base_module_t *module)
@@ -727,7 +708,7 @@ ompi_coll_tuned_allgather_intra_basic_linear(void *sbuf, int scount,
         sbuf = ((char*) rbuf) + (ompi_comm_rank(comm) * extent * rcount);
         sdtype = rdtype;
         scount = rcount;
-    } 
+    }
 
     /* Gather and broadcast. */
 
@@ -755,183 +736,3 @@ ompi_coll_tuned_allgather_intra_basic_linear(void *sbuf, int scount,
 }
 
 /* copied function (with appropriate renaming) ends here */
-
-/* The following are used by dynamic and forced rules */
-
-/* publish details of each algorithm and if its forced/fixed/locked in */
-/* as you add methods/algorithms you must update this and the query/map 
-   routines */
-
-/* this routine is called by the component only */
-/* this makes sure that the mca parameters are set to their initial values 
-   and perms */
-/* module does not call this they call the forced_getvalues routine instead */
-
-int 
-ompi_coll_tuned_allgather_intra_check_forced_init(coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices)
-{
-    mca_base_var_enum_t *new_enum;
-   
-    ompi_coll_tuned_forced_max_algorithms[ALLGATHER] = coll_tuned_allgather_algorithm_count;
-   
-    (void) mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                           "allgather_algorithm_count",
-                                           "Number of allgather algorithms available",
-                                           MCA_BASE_VAR_TYPE_INT, NULL, 0,
-                                           MCA_BASE_VAR_FLAG_DEFAULT_ONLY,
-                                           OPAL_INFO_LVL_5,
-                                           MCA_BASE_VAR_SCOPE_CONSTANT,
-                                           &coll_tuned_allgather_algorithm_count);
-
-    /* MPI_T: This variable should eventually be bound to a communicator */
-    coll_tuned_allgather_forced_algorithm = 0;
-    (void) mca_base_var_enum_create("coll_tuned_allgather_algorithms", allgather_algorithms, &new_enum);
-    mca_param_indices->algorithm_param_index =
-        mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                        "allgather_algorithm",
-                                        "Which allallgather algorithm is used. Can be locked down to choice of: 0 ignore, 1 basic linear, 2 bruck, 3 recursive doubling, 4 ring, 5 neighbor exchange, 6: two proc only.",
-                                        MCA_BASE_VAR_TYPE_INT, new_enum, 0, 0,
-                                        OPAL_INFO_LVL_5,
-                                        MCA_BASE_VAR_SCOPE_READONLY,
-                                        &coll_tuned_allgather_forced_algorithm);
-    OBJ_RELEASE(new_enum);
-    if (mca_param_indices->algorithm_param_index < 0) {
-        return mca_param_indices->algorithm_param_index;
-    }
-
-    coll_tuned_allgather_segment_size = 0;
-    mca_param_indices->segsize_param_index =
-        mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                        "allgather_algorithm_segmentsize",
-                                        "Segment size in bytes used by default for allgather algorithms. Only has meaning if algorithm is forced and supports segmenting. 0 bytes means no segmentation. Currently, available algorithms do not support segmentation.",
-                                        MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
-                                        OPAL_INFO_LVL_5,
-                                        MCA_BASE_VAR_SCOPE_READONLY,
-                                        &coll_tuned_allgather_segment_size);
-
-    coll_tuned_allgather_tree_fanout = ompi_coll_tuned_init_tree_fanout; /* get system wide default */
-    mca_param_indices->tree_fanout_param_index =
-        mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                        "allgather_algorithm_tree_fanout",
-                                        "Fanout for n-tree used for allgather algorithms. Only has meaning if algorithm is forced and supports n-tree topo based operation. Currently, available algorithms do not support n-tree topologies.",
-                                        MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
-                                        OPAL_INFO_LVL_5,
-                                        MCA_BASE_VAR_SCOPE_READONLY,
-                                        &coll_tuned_allgather_tree_fanout);
-
-    coll_tuned_allgather_chain_fanout = ompi_coll_tuned_init_chain_fanout; /* get system wide default */
-    mca_param_indices->chain_fanout_param_index = 
-      mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                      "allgather_algorithm_chain_fanout",
-                                      "Fanout for chains used for allgather algorithms. Only has meaning if algorithm is forced and supports chain topo based operation. Currently, available algorithms do not support chain topologies.",
-                                      MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
-                                      OPAL_INFO_LVL_5,
-                                      MCA_BASE_VAR_SCOPE_READONLY,
-                                      &coll_tuned_allgather_chain_fanout);
-
-    return (MPI_SUCCESS);
-}
-
-int ompi_coll_tuned_allgather_intra_do_forced(void *sbuf, int scount,
-                                              struct ompi_datatype_t *sdtype,
-                                              void* rbuf, int rcount,
-                                              struct ompi_datatype_t *rdtype,
-                                              struct ompi_communicator_t *comm,
-                                              mca_coll_base_module_t *module)
-{
-    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
-    mca_coll_tuned_comm_t *data = tuned_module->tuned_data;
-
-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "coll:tuned:allgather_intra_do_forced selected algorithm %d",
-                 data->user_forced[ALLGATHER].algorithm));
-
-    switch (data->user_forced[ALLGATHER].algorithm) {
-    case (0):   
-        return ompi_coll_tuned_allgather_intra_dec_fixed (sbuf, scount, sdtype, 
-                                                          rbuf, rcount, rdtype, 
-                                                          comm, module);
-    case (1):   
-        return ompi_coll_tuned_allgather_intra_basic_linear (sbuf, scount, sdtype,
-                                                             rbuf, rcount, rdtype,
-                                                             comm, module);
-    case (2):   
-        return ompi_coll_tuned_allgather_intra_bruck (sbuf, scount, sdtype, 
-                                                      rbuf, rcount, rdtype,
-                                                      comm, module);
-    case (3):   
-        return ompi_coll_tuned_allgather_intra_recursivedoubling (sbuf, scount, sdtype, 
-                                                                  rbuf, rcount, rdtype, 
-                                                                  comm, module);
-    case (4):
-        return ompi_coll_tuned_allgather_intra_ring (sbuf, scount, sdtype, 
-                                                     rbuf, rcount, rdtype,
-                                                     comm, module);
-    case (5):
-        return ompi_coll_tuned_allgather_intra_neighborexchange (sbuf, scount, sdtype, 
-                                                                 rbuf, rcount, rdtype, 
-                                                                 comm, module);
-    case (6):
-        return ompi_coll_tuned_allgather_intra_two_procs (sbuf, scount, sdtype, 
-                                                          rbuf, rcount, rdtype, 
-                                                          comm, module);
-    default:
-        OPAL_OUTPUT((ompi_coll_tuned_stream,
-                     "coll:tuned:allgather_intra_do_forced attempt to select algorithm %d when only 0-%d is valid?", 
-                     data->user_forced[ALLGATHER].algorithm,
-                     ompi_coll_tuned_forced_max_algorithms[ALLGATHER]));
-        return (MPI_ERR_ARG);
-    } /* switch */
-
-}
-
-
-int ompi_coll_tuned_allgather_intra_do_this(void *sbuf, int scount,
-                                            struct ompi_datatype_t *sdtype,
-                                            void* rbuf, int rcount,
-                                            struct ompi_datatype_t *rdtype,
-                                            struct ompi_communicator_t *comm,
-                                            mca_coll_base_module_t *module,
-                                            int algorithm, int faninout, int segsize)
-{
-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "coll:tuned:allgather_intra_do_this selected algorithm %d topo faninout %d segsize %d", 
-                 algorithm, faninout, segsize));
-   
-    switch (algorithm) {
-    case (0):   
-        return ompi_coll_tuned_allgather_intra_dec_fixed(sbuf, scount, sdtype, 
-                                                         rbuf, rcount, rdtype, 
-                                                         comm, module);
-    case (1):   
-        return ompi_coll_tuned_allgather_intra_basic_linear(sbuf, scount, sdtype,
-                                                            rbuf, rcount, rdtype,
-                                                            comm, module);
-    case (2): 
-        return ompi_coll_tuned_allgather_intra_bruck(sbuf, scount, sdtype, 
-                                                     rbuf, rcount, rdtype,
-                                                     comm, module);
-    case (3): 
-        return ompi_coll_tuned_allgather_intra_recursivedoubling(sbuf, scount, sdtype, 
-                                                                 rbuf, rcount, rdtype, 
-                                                                 comm, module);
-    case (4): 
-        return ompi_coll_tuned_allgather_intra_ring(sbuf, scount, sdtype, 
-                                                    rbuf, rcount, rdtype,
-                                                    comm, module);
-    case (5): 
-        return ompi_coll_tuned_allgather_intra_neighborexchange(sbuf, scount, sdtype, 
-                                                                rbuf, rcount, rdtype, 
-                                                                comm, module);
-    case (6):
-        return ompi_coll_tuned_allgather_intra_two_procs (sbuf, scount, sdtype, 
-                                                          rbuf, rcount, rdtype, 
-                                                          comm, module);
-    default:
-        OPAL_OUTPUT((ompi_coll_tuned_stream,
-                     "coll:tuned:allgather_intra_do_this attempt to select algorithm %d when only 0-%d is valid?", 
-                     algorithm, 
-                     ompi_coll_tuned_forced_max_algorithms[ALLGATHER]));
-        return (MPI_ERR_ARG);
-    } /* switch */
-}
diff --git a/ompi/mca/coll/base/coll_base_allgatherv.c b/ompi/mca/coll/base/coll_base_allgatherv.c
index 4edb141c00..b884dc8591 100644
--- a/ompi/mca/coll/base/coll_base_allgatherv.c
+++ b/ompi/mca/coll/base/coll_base_allgatherv.c
@@ -30,19 +30,12 @@
 #include "ompi/communicator/communicator.h"
 #include "ompi/mca/coll/coll.h"
 #include "ompi/mca/coll/base/coll_tags.h"
-#include "coll_tuned.h"
-#include "coll_tuned_topo.h"
-#include "coll_tuned_util.h"
+#include "ompi/mca/coll/base/coll_base_functions.h"
+#include "coll_base_topo.h"
+#include "coll_base_util.h"
 
-/* allgatherv algorithm variables */
-static int coll_tuned_allgatherv_algorithm_count = 5;
-static int coll_tuned_allgatherv_forced_algorithm = 0;
-static int coll_tuned_allgatherv_segment_size = 0;
-static int coll_tuned_allgatherv_tree_fanout;
-static int coll_tuned_allgatherv_chain_fanout;
-
-/* valid values for coll_tuned_allgatherv_forced_algorithm */
-static mca_base_var_enum_value_t allgatherv_algorithms[] = {
+/* valid values for coll_base_allgatherv_forced_algorithm */
+mca_base_var_enum_value_t coll_base_allgatherv_algorithms[] = {
     {0, "ignore"},
     {1, "default"},
     {2, "bruck"},
@@ -53,7 +46,7 @@ static mca_base_var_enum_value_t allgatherv_algorithms[] = {
 };
 
 /*
- * ompi_coll_tuned_allgatherv_intra_bruck
+ * ompi_coll_base_allgatherv_intra_bruck
  *
  * Function:     allgather using O(log(N)) steps.
  * Accepts:      Same arguments as MPI_Allgather
@@ -107,7 +100,7 @@ static mca_base_var_enum_value_t allgatherv_algorithms[] = {
  *         [5]    [5]    [5]    [5]    [5]    [5]    [5]
  *         [6]    [6]    [6]    [6]    [6]    [6]    [6]
  */
-int ompi_coll_tuned_allgatherv_intra_bruck(void *sbuf, int scount,
+int ompi_coll_base_allgatherv_intra_bruck(void *sbuf, int scount,
                                            struct ompi_datatype_t *sdtype,
                                            void *rbuf, int *rcounts,
                                            int *rdispls, 
@@ -124,8 +117,8 @@ int ompi_coll_tuned_allgatherv_intra_bruck(void *sbuf, int scount,
     size = ompi_comm_size(comm);
     rank = ompi_comm_rank(comm);
 
-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "coll:tuned:allgather_intra_bruck rank %d", rank));
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                 "coll:base:allgather_intra_bruck rank %d", rank));
    
     err = ompi_datatype_get_extent (sdtype, &slb, &sext);
     if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
@@ -198,7 +191,7 @@ int ompi_coll_tuned_allgatherv_intra_bruck(void *sbuf, int scount,
         if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
 
         /* Sendreceive */
-        err = ompi_coll_tuned_sendrecv(rbuf, 1, new_sdtype, sendto,
+        err = ompi_coll_base_sendrecv(rbuf, 1, new_sdtype, sendto,
                                        MCA_COLL_BASE_TAG_ALLGATHERV,
                                        rbuf, 1, new_rdtype, recvfrom,
                                        MCA_COLL_BASE_TAG_ALLGATHERV,
@@ -217,14 +210,14 @@ int ompi_coll_tuned_allgatherv_intra_bruck(void *sbuf, int scount,
  err_hndl:
     if( NULL != new_rcounts ) free(new_rcounts);
 
-    OPAL_OUTPUT((ompi_coll_tuned_stream,  "%s:%4d\tError occurred %d, rank %2d",
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,  "%s:%4d\tError occurred %d, rank %2d",
                  __FILE__, line, err, rank));
     return err;
 }
 
 
 /*
- * ompi_coll_tuned_allgatherv_intra_ring
+ * ompi_coll_base_allgatherv_intra_ring
  *
  * Function:     allgatherv using O(N) steps.
  * Accepts:      Same arguments as MPI_Allgatherv
@@ -238,7 +231,7 @@ int ompi_coll_tuned_allgatherv_intra_bruck(void *sbuf, int scount,
  *               No additional memory requirements.
  * 
  */
-int ompi_coll_tuned_allgatherv_intra_ring(void *sbuf, int scount,
+int ompi_coll_base_allgatherv_intra_ring(void *sbuf, int scount,
                                           struct ompi_datatype_t *sdtype,
                                           void* rbuf, int *rcounts, int *rdisps,
                                           struct ompi_datatype_t *rdtype,
@@ -252,8 +245,8 @@ int ompi_coll_tuned_allgatherv_intra_ring(void *sbuf, int scount,
     size = ompi_comm_size(comm);
     rank = ompi_comm_rank(comm);
 
-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "coll:tuned:allgatherv_intra_ring rank %d", rank));
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                 "coll:base:allgatherv_intra_ring rank %d", rank));
 
     err = ompi_datatype_get_extent (sdtype, &slb, &sext);
     if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
@@ -292,7 +285,7 @@ int ompi_coll_tuned_allgatherv_intra_ring(void *sbuf, int scount,
         tmpsend = (char*)rbuf + rdisps[senddatafrom] * rext;
 
         /* Sendreceive */
-        err = ompi_coll_tuned_sendrecv(tmpsend, rcounts[senddatafrom], rdtype, 
+        err = ompi_coll_base_sendrecv(tmpsend, rcounts[senddatafrom], rdtype, 
                                        sendto, MCA_COLL_BASE_TAG_ALLGATHERV,
                                        tmprecv, rcounts[recvdatafrom], rdtype, 
                                        recvfrom, MCA_COLL_BASE_TAG_ALLGATHERV,
@@ -304,13 +297,13 @@ int ompi_coll_tuned_allgatherv_intra_ring(void *sbuf, int scount,
     return OMPI_SUCCESS;
 
  err_hndl:
-    OPAL_OUTPUT((ompi_coll_tuned_stream,  "%s:%4d\tError occurred %d, rank %2d",
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,  "%s:%4d\tError occurred %d, rank %2d",
                  __FILE__, line, err, rank));
     return err;
 }
 
 /*
- * ompi_coll_tuned_allgatherv_intra_neighborexchange
+ * ompi_coll_base_allgatherv_intra_neighborexchange
  *
  * Function:     allgatherv using N/2 steps (O(N))
  * Accepts:      Same arguments as MPI_Allgatherv
@@ -368,7 +361,7 @@ int ompi_coll_tuned_allgatherv_intra_ring(void *sbuf, int scount,
  *         [5]    [5]    [5]    [5]    [5]    [5]
  */
 int 
-ompi_coll_tuned_allgatherv_intra_neighborexchange(void *sbuf, int scount,
+ompi_coll_base_allgatherv_intra_neighborexchange(void *sbuf, int scount,
                                                   struct ompi_datatype_t *sdtype,
                                                   void* rbuf, int *rcounts, int *rdispls,
                                                   struct ompi_datatype_t *rdtype,
@@ -386,17 +379,17 @@ ompi_coll_tuned_allgatherv_intra_neighborexchange(void *sbuf, int scount,
     rank = ompi_comm_rank(comm);
 
     if (size % 2) {
-        OPAL_OUTPUT((ompi_coll_tuned_stream,
-                     "coll:tuned:allgatherv_intra_neighborexchange WARNING: odd size %d, switching to ring algorithm", 
+        OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                     "coll:base:allgatherv_intra_neighborexchange WARNING: odd size %d, switching to ring algorithm", 
                      size));
-        return ompi_coll_tuned_allgatherv_intra_ring(sbuf, scount, sdtype,
+        return ompi_coll_base_allgatherv_intra_ring(sbuf, scount, sdtype,
                                                      rbuf, rcounts, 
                                                      rdispls, rdtype,
                                                      comm, module);
     }
 
-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "coll:tuned:allgatherv_intra_neighborexchange rank %d", rank));
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                 "coll:base:allgatherv_intra_neighborexchange rank %d", rank));
 
     err = ompi_datatype_get_extent (sdtype, &slb, &sext);
     if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
@@ -445,7 +438,7 @@ ompi_coll_tuned_allgatherv_intra_neighborexchange(void *sbuf, int scount,
     */
     tmprecv = (char*)rbuf + (ptrdiff_t)rdispls[neighbor[0]] * rext;
     tmpsend = (char*)rbuf + (ptrdiff_t)rdispls[rank] * rext;
-    err = ompi_coll_tuned_sendrecv(tmpsend, rcounts[rank], rdtype, 
+    err = ompi_coll_base_sendrecv(tmpsend, rcounts[rank], rdtype, 
                                    neighbor[0], MCA_COLL_BASE_TAG_ALLGATHERV,
                                    tmprecv, rcounts[neighbor[0]], rdtype, 
                                    neighbor[0], MCA_COLL_BASE_TAG_ALLGATHERV,
@@ -493,7 +486,7 @@ ompi_coll_tuned_allgatherv_intra_neighborexchange(void *sbuf, int scount,
         tmpsend = (char*)rbuf;
       
         /* Sendreceive */
-        err = ompi_coll_tuned_sendrecv(tmpsend, 1, new_sdtype, neighbor[i_parity],
+        err = ompi_coll_base_sendrecv(tmpsend, 1, new_sdtype, neighbor[i_parity],
                                        MCA_COLL_BASE_TAG_ALLGATHERV,
                                        tmprecv, 1, new_rdtype, neighbor[i_parity],
                                        MCA_COLL_BASE_TAG_ALLGATHERV,
@@ -509,13 +502,13 @@ ompi_coll_tuned_allgatherv_intra_neighborexchange(void *sbuf, int scount,
     return OMPI_SUCCESS;
 
  err_hndl:
-    OPAL_OUTPUT((ompi_coll_tuned_stream,  "%s:%4d\tError occurred %d, rank %2d",
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,  "%s:%4d\tError occurred %d, rank %2d",
                  __FILE__, line, err, rank));
     return err;
 }
 
 
-int ompi_coll_tuned_allgatherv_intra_two_procs(void *sbuf, int scount,
+int ompi_coll_base_allgatherv_intra_two_procs(void *sbuf, int scount,
                                                struct ompi_datatype_t *sdtype,
                                                void* rbuf, int *rcounts,
                                                int *rdispls,
@@ -529,8 +522,8 @@ int ompi_coll_tuned_allgatherv_intra_two_procs(void *sbuf, int scount,
 
     rank = ompi_comm_rank(comm);
 
-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "ompi_coll_tuned_allgatherv_intra_two_procs rank %d", rank));
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                 "ompi_coll_base_allgatherv_intra_two_procs rank %d", rank));
 
     err = ompi_datatype_get_extent (sdtype, &lb, &sext);
     if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
@@ -552,7 +545,7 @@ int ompi_coll_tuned_allgatherv_intra_two_procs(void *sbuf, int scount,
     }
     tmprecv = (char*)rbuf + (ptrdiff_t)rdispls[remote] * rext;
 
-    err = ompi_coll_tuned_sendrecv(tmpsend, scount, sdtype, remote,
+    err = ompi_coll_base_sendrecv(tmpsend, scount, sdtype, remote,
                                    MCA_COLL_BASE_TAG_ALLGATHERV,
                                    tmprecv, rcounts[remote], rdtype, remote,
                                    MCA_COLL_BASE_TAG_ALLGATHERV,
@@ -570,7 +563,7 @@ int ompi_coll_tuned_allgatherv_intra_two_procs(void *sbuf, int scount,
     return MPI_SUCCESS;
 
  err_hndl:
-    OPAL_OUTPUT((ompi_coll_tuned_stream, "%s:%4d\tError occurred %d, rank %2d",
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output, "%s:%4d\tError occurred %d, rank %2d",
                  __FILE__, line, err, rank));
     return err;
 }
@@ -580,12 +573,12 @@ int ompi_coll_tuned_allgatherv_intra_two_procs(void *sbuf, int scount,
  * Linear functions are copied from the BASIC coll module
  * they do not segment the message and are simple implementations
  * but for some small number of nodes and/or small data sizes they 
- * are just as fast as tuned/tree based segmenting operations 
+ * are just as fast as base/tree based segmenting operations 
  * and as such may be selected by the decision functions
  * These are copied into this module due to the way we select modules
  * in V1. i.e. in V2 we will handle this differently and so will not
  * have to duplicate code.
- * JPG following the examples from other coll_tuned implementations. Dec06.
+ * JPG following the examples from other coll_base implementations. Dec06.
  */
 
 /* copied function (with appropriate renaming) starts here */
@@ -599,7 +592,7 @@ int ompi_coll_tuned_allgatherv_intra_two_procs(void *sbuf, int scount,
  *	Returns:	- MPI_SUCCESS or error code
  */
 int
-ompi_coll_tuned_allgatherv_intra_basic_default(void *sbuf, int scount,
+ompi_coll_base_allgatherv_intra_basic_default(void *sbuf, int scount,
                                                struct ompi_datatype_t *sdtype,
                                                void *rbuf, int *rcounts,
                                                int *disps,
@@ -619,8 +612,8 @@ ompi_coll_tuned_allgatherv_intra_basic_default(void *sbuf, int scount,
      * to process with rank 0 (OMPI convention)
      */
 
-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "ompi_coll_tuned_allgatherv_intra_basic_default rank %d", 
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                 "ompi_coll_base_allgatherv_intra_basic_default rank %d", 
                  rank));
 
     if (MPI_IN_PLACE == sbuf) {
@@ -676,177 +669,3 @@ ompi_coll_tuned_allgatherv_intra_basic_default(void *sbuf, int scount,
 
 /* copied function (with appropriate renaming) ends here */
 
-/* The following are used by dynamic and forced rules */
-
-/* publish details of each algorithm and if its forced/fixed/locked in */
-/* as you add methods/algorithms you must update this and the query/map 
-   routines */
-
-/* this routine is called by the component only */
-/* this makes sure that the mca parameters are set to their initial values 
-   and perms */
-/* module does not call this they call the forced_getvalues routine instead */
-
-int 
-ompi_coll_tuned_allgatherv_intra_check_forced_init(coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices)
-{
-    mca_base_var_enum_t *new_enum;
-
-    ompi_coll_tuned_forced_max_algorithms[ALLGATHERV] = coll_tuned_allgatherv_algorithm_count;
-
-    (void) mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                           "allgatherv_algorithm_count",
-                                           "Number of allgatherv algorithms available",
-                                           MCA_BASE_VAR_TYPE_INT, NULL, 0,
-                                           MCA_BASE_VAR_FLAG_DEFAULT_ONLY,
-                                           OPAL_INFO_LVL_5,
-                                           MCA_BASE_VAR_SCOPE_CONSTANT,
-                                           &coll_tuned_allgatherv_algorithm_count);
-
-    /* MPI_T: This variable should eventually be bound to a communicator */
-    coll_tuned_allgatherv_forced_algorithm = 0;
-    (void) mca_base_var_enum_create("coll_tuned_allgatherv_algorithms", allgatherv_algorithms, &new_enum);
-    mca_param_indices->algorithm_param_index =
-        mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                        "allgatherv_algorithm",
-                                        "Which allallgatherv algorithm is used. Can be locked down to choice of: 0 ignore, 1 default (allgathervv + bcast), 2 bruck, 3 ring, 4 neighbor exchange, 5: two proc only.",
-                                        MCA_BASE_VAR_TYPE_INT, new_enum, 0, 0,
-                                        OPAL_INFO_LVL_5,
-                                        MCA_BASE_VAR_SCOPE_READONLY,
-                                        &coll_tuned_allgatherv_forced_algorithm);
-    OBJ_RELEASE(new_enum);
-    if (mca_param_indices->algorithm_param_index < 0) {
-        return mca_param_indices->algorithm_param_index;
-    }
-
-    coll_tuned_allgatherv_segment_size = 0;
-    mca_param_indices->segsize_param_index =
-        mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                        "allgatherv_algorithm_segmentsize",
-                                        "Segment size in bytes used by default for allgatherv algorithms. Only has meaning if algorithm is forced and supports segmenting. 0 bytes means no segmentation. Currently, available algorithms do not support segmentation.",
-                                        MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
-                                        OPAL_INFO_LVL_5,
-                                        MCA_BASE_VAR_SCOPE_READONLY,
-                                        &coll_tuned_allgatherv_segment_size);
-
-    coll_tuned_allgatherv_tree_fanout = ompi_coll_tuned_init_tree_fanout; /* get system wide default */
-    mca_param_indices->tree_fanout_param_index =
-        mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                        "allgatherv_algorithm_tree_fanout",
-                                        "Fanout for n-tree used for allgatherv algorithms. Only has meaning if algorithm is forced and supports n-tree topo based operation. Currently, available algorithms do not support n-tree topologies.",
-                                        MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
-                                        OPAL_INFO_LVL_5,
-                                        MCA_BASE_VAR_SCOPE_READONLY,
-                                        &coll_tuned_allgatherv_tree_fanout);
-
-    coll_tuned_allgatherv_chain_fanout = ompi_coll_tuned_init_chain_fanout; /* get system wide default */
-    mca_param_indices->chain_fanout_param_index = 
-      mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                      "allgatherv_algorithm_chain_fanout",
-                                      "Fanout for chains used for allgatherv algorithms. Only has meaning if algorithm is forced and supports chain topo based operation. Currently, available algorithms do not support chain topologies.",
-                                      MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
-                                      OPAL_INFO_LVL_5,
-                                      MCA_BASE_VAR_SCOPE_READONLY,
-                                      &coll_tuned_allgatherv_chain_fanout);
-
-    return (MPI_SUCCESS);
-}
-
-int ompi_coll_tuned_allgatherv_intra_do_forced(void *sbuf, int scount,
-                                               struct ompi_datatype_t *sdtype,
-                                               void *rbuf, int *rcounts, 
-                                               int *rdispls,
-                                               struct ompi_datatype_t *rdtype,
-                                               struct ompi_communicator_t *comm,
-                                               mca_coll_base_module_t *module)
-{
-    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
-    mca_coll_tuned_comm_t *data = tuned_module->tuned_data;
-
-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "coll:tuned:allgatherv_intra_do_forced selected algorithm %d",
-                 data->user_forced[ALLGATHERV].algorithm));
-
-    switch (data->user_forced[ALLGATHERV].algorithm) {
-    case (0):   
-        return ompi_coll_tuned_allgatherv_intra_dec_fixed (sbuf, scount, sdtype, 
-                                                           rbuf, rcounts, rdispls, rdtype, 
-                                                           comm, module);
-    case (1):   
-        return ompi_coll_tuned_allgatherv_intra_basic_default (sbuf, scount, sdtype,
-                                                               rbuf, rcounts, rdispls, rdtype,
-                                                               comm, module);
-    case (2):   
-        return ompi_coll_tuned_allgatherv_intra_bruck (sbuf, scount, sdtype,
-                                                       rbuf, rcounts, rdispls, rdtype, 
-                                                       comm, module);
-    case (3):   
-        return ompi_coll_tuned_allgatherv_intra_ring (sbuf, scount, sdtype, 
-                                                      rbuf, rcounts, rdispls, rdtype, 
-                                                      comm, module);
-    case (4):
-        return ompi_coll_tuned_allgatherv_intra_neighborexchange (sbuf, scount, sdtype, 
-                                                                  rbuf, rcounts, rdispls, rdtype,
-                                                                  comm, module);
-    case (5):
-        return ompi_coll_tuned_allgatherv_intra_two_procs (sbuf, scount, sdtype, 
-                                                           rbuf, rcounts, rdispls, rdtype, 
-                                                           comm, module);
-    default:
-        OPAL_OUTPUT((ompi_coll_tuned_stream,
-                     "coll:tuned:allgatherv_intra_do_forced attempt to select algorithm %d when only 0-%d is valid?", 
-                     data->user_forced[ALLGATHERV].algorithm,
-                     ompi_coll_tuned_forced_max_algorithms[ALLGATHERV]));
-        return (MPI_ERR_ARG);
-    } /* switch */
-
-}
-
-
-int ompi_coll_tuned_allgatherv_intra_do_this(void *sbuf, int scount,
-                                             struct ompi_datatype_t *sdtype,
-                                             void *rbuf, int *rcounts, 
-                                             int *rdispls, 
-                                             struct ompi_datatype_t *rdtype,
-                                             struct ompi_communicator_t *comm,
-                                             mca_coll_base_module_t *module,
-                                             int algorithm, int faninout, 
-                                             int segsize)
-{
-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "coll:tuned:allgatherv_intra_do_this selected algorithm %d topo faninout %d segsize %d", 
-                 algorithm, faninout, segsize));
-   
-    switch (algorithm) {
-    case (0):   
-        return ompi_coll_tuned_allgatherv_intra_dec_fixed(sbuf, scount, sdtype, 
-                                                          rbuf, rcounts, rdispls, rdtype, 
-                                                          comm, module);
-    case (1):   
-        return ompi_coll_tuned_allgatherv_intra_basic_default(sbuf, scount, sdtype,
-                                                              rbuf, rcounts, rdispls, rdtype,
-                                                              comm, module);
-    case (2): 
-        return ompi_coll_tuned_allgatherv_intra_bruck(sbuf, scount, sdtype, 
-                                                      rbuf, rcounts, rdispls, rdtype,
-                                                      comm, module);
-    case (3): 
-        return ompi_coll_tuned_allgatherv_intra_ring(sbuf, scount, sdtype, 
-                                                     rbuf, rcounts, rdispls, rdtype,
-                                                     comm, module);
-    case (4): 
-        return ompi_coll_tuned_allgatherv_intra_neighborexchange(sbuf, scount, sdtype, 
-                                                                 rbuf, rcounts, rdispls, rdtype,
-                                                                 comm, module);
-    case (5):
-        return ompi_coll_tuned_allgatherv_intra_two_procs (sbuf, scount, sdtype,
-                                                           rbuf, rcounts, rdispls, rdtype, 
-                                                           comm, module);
-    default:
-        OPAL_OUTPUT((ompi_coll_tuned_stream,
-                     "coll:tuned:allgatherv_intra_do_this attempt to select algorithm %d when only 0-%d is valid?", 
-                     algorithm, 
-                     ompi_coll_tuned_forced_max_algorithms[ALLGATHERV]));
-        return (MPI_ERR_ARG);
-    } /* switch */
-}
diff --git a/ompi/mca/coll/base/coll_base_allreduce.c b/ompi/mca/coll/base/coll_base_allreduce.c
index b67dbee466..54f444b6cf 100644
--- a/ompi/mca/coll/base/coll_base_allreduce.c
+++ b/ompi/mca/coll/base/coll_base_allreduce.c
@@ -3,7 +3,7 @@
  * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
  *                         University Research and Technology
  *                         Corporation.  All rights reserved.
- * Copyright (c) 2004-2014 The University of Tennessee and The University
+ * Copyright (c) 2004-2015 The University of Tennessee and The University
  *                         of Tennessee Research Foundation.  All rights
  *                         reserved.
  * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@@ -31,41 +31,23 @@
 #include "ompi/mca/coll/base/coll_tags.h"
 #include "ompi/mca/pml/pml.h"
 #include "ompi/op/op.h"
-#include "coll_tuned.h"
-#include "coll_tuned_topo.h"
-#include "coll_tuned_util.h"
-
-/* allreduce algorithm variables */
-static int coll_tuned_allreduce_algorithm_count = 5;
-static int coll_tuned_allreduce_forced_algorithm = 0;
-static int coll_tuned_allreduce_segment_size = 0;
-static int coll_tuned_allreduce_tree_fanout;
-static int coll_tuned_allreduce_chain_fanout;
-
-/* valid values for coll_tuned_allreduce_forced_algorithm */
-static mca_base_var_enum_value_t allreduce_algorithms[] = {
-    {0, "ignore"},
-    {1, "basic_linear"},
-    {2, "nonoverlapping"},
-    {3, "recursive_doubling"},
-    {4, "ring"},
-    {5, "segmented_ring"},
-    {0, NULL}
-};
+#include "ompi/mca/coll/base/coll_base_functions.h"
+#include "coll_base_topo.h"
+#include "coll_base_util.h"
 
 /*
- * ompi_coll_tuned_allreduce_intra_nonoverlapping
+ * ompi_coll_base_allreduce_intra_nonoverlapping
  *
  * This function just calls a reduce followed by a broadcast
- * both called functions are tuned but they complete sequentially,
+ * both called functions are base but they complete sequentially,
  * i.e. no additional overlapping
- * meaning if the number of segments used is greater than the topo depth 
+ * meaning if the number of segments used is greater than the topo depth
  * then once the first segment of data is fully 'reduced' it is not broadcast
  * while the reduce continues (cost = cost-reduce + cost-bcast + decision x 3)
  *
  */
 int
-ompi_coll_tuned_allreduce_intra_nonoverlapping(void *sbuf, void *rbuf, int count,
+ompi_coll_base_allreduce_intra_nonoverlapping(void *sbuf, void *rbuf, int count,
                                                struct ompi_datatype_t *dtype,
                                                struct ompi_op_t *op,
                                                struct ompi_communicator_t *comm,
@@ -75,16 +57,16 @@ ompi_coll_tuned_allreduce_intra_nonoverlapping(void *sbuf, void *rbuf, int count
 
     rank = ompi_comm_rank(comm);
 
-    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:allreduce_intra_nonoverlapping rank %d", rank));
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:allreduce_intra_nonoverlapping rank %d", rank));
 
     /* Reduce to 0 and broadcast. */
 
     if (MPI_IN_PLACE == sbuf) {
         if (0 == rank) {
-            err = comm->c_coll.coll_reduce (MPI_IN_PLACE, rbuf, count, dtype, 
+            err = comm->c_coll.coll_reduce (MPI_IN_PLACE, rbuf, count, dtype,
                                             op, 0, comm, comm->c_coll.coll_reduce_module);
         } else {
-            err = comm->c_coll.coll_reduce (rbuf, NULL, count, dtype, op, 0, 
+            err = comm->c_coll.coll_reduce (rbuf, NULL, count, dtype, op, 0,
                                             comm, comm->c_coll.coll_reduce_module);
         }
     } else {
@@ -100,21 +82,21 @@ ompi_coll_tuned_allreduce_intra_nonoverlapping(void *sbuf, void *rbuf, int count
 }
 
 /*
- *   ompi_coll_tuned_allreduce_intra_recursivedoubling
+ *   ompi_coll_base_allreduce_intra_recursivedoubling
  *
  *   Function:       Recursive doubling algorithm for allreduce operation
  *   Accepts:        Same as MPI_Allreduce()
  *   Returns:        MPI_SUCCESS or error code
  *
- *   Description:    Implements recursive doubling algorithm for allreduce.  
- *                   Original (non-segmented) implementation is used in MPICH-2 
+ *   Description:    Implements recursive doubling algorithm for allreduce.
+ *                   Original (non-segmented) implementation is used in MPICH-2
  *                   for small and intermediate size messages.
- *                   The algorithm preserves order of operations so it can 
+ *                   The algorithm preserves order of operations so it can
  *                   be used both by commutative and non-commutative operations.
  *
  *         Example on 7 nodes:
  *         Initial state
- *         #      0       1      2       3      4       5      6 
+ *         #      0       1      2       3      4       5      6
  *               [0]     [1]    [2]     [3]    [4]     [5]    [6]
  *         Initial adjustment step for non-power of two nodes.
  *         old rank      1              3              5      6
@@ -129,24 +111,24 @@ ompi_coll_tuned_allreduce_intra_nonoverlapping(void *sbuf, void *rbuf, int count
  *         old rank      1              3              5      6
  *         new rank      0              1              2      3
  *                     [0+1+]         [0+1+]         [0+1+]  [0+1+]
- *                     [2+3+]         [2+3+]         [2+3+]  [2+3+]  
+ *                     [2+3+]         [2+3+]         [2+3+]  [2+3+]
  *                     [4+5+]         [4+5+]         [4+5+]  [4+5+]
  *                     [6   ]         [6   ]         [6   ]  [6   ]
  *         Final adjustment step for non-power of two nodes
- *         #      0       1      2       3      4       5      6 
+ *         #      0       1      2       3      4       5      6
  *              [0+1+] [0+1+] [0+1+]  [0+1+] [0+1+]  [0+1+] [0+1+]
- *              [2+3+] [2+3+] [2+3+]  [2+3+] [2+3+]  [2+3+] [2+3+] 
+ *              [2+3+] [2+3+] [2+3+]  [2+3+] [2+3+]  [2+3+] [2+3+]
  *              [4+5+] [4+5+] [4+5+]  [4+5+] [4+5+]  [4+5+] [4+5+]
  *              [6   ] [6   ] [6   ]  [6   ] [6   ]  [6   ] [6   ]
  *
  */
-int 
-ompi_coll_tuned_allreduce_intra_recursivedoubling(void *sbuf, void *rbuf, 
+int
+ompi_coll_base_allreduce_intra_recursivedoubling(void *sbuf, void *rbuf,
                                                   int count,
                                                   struct ompi_datatype_t *dtype,
                                                   struct ompi_op_t *op,
                                                   struct ompi_communicator_t *comm,
-                                                  mca_coll_base_module_t *module) 
+                                                  mca_coll_base_module_t *module)
 {
     int ret, line, rank, size, adjsize, remote, distance;
     int newrank, newremote, extra_ranks;
@@ -157,9 +139,9 @@ ompi_coll_tuned_allreduce_intra_recursivedoubling(void *sbuf, void *rbuf,
     size = ompi_comm_size(comm);
     rank = ompi_comm_rank(comm);
 
-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "coll:tuned:allreduce_intra_recursivedoubling rank %d", rank));
-   
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                 "coll:base:allreduce_intra_recursivedoubling rank %d", rank));
+
     /* Special case for size == 1 */
     if (1 == size) {
         if (MPI_IN_PLACE != sbuf) {
@@ -194,16 +176,16 @@ ompi_coll_tuned_allreduce_intra_recursivedoubling(void *sbuf, void *rbuf,
     adjsize >>= 1;
 
     /* Handle non-power-of-two case:
-       - Even ranks less than 2 * extra_ranks send their data to (rank + 1), and 
+       - Even ranks less than 2 * extra_ranks send their data to (rank + 1), and
        sets new rank to -1.
-       - Odd ranks less than 2 * extra_ranks receive data from (rank - 1), 
+       - Odd ranks less than 2 * extra_ranks receive data from (rank - 1),
        apply appropriate operation, and set new rank to rank/2
        - Everyone else sets rank to rank - extra_ranks
     */
     extra_ranks = size - adjsize;
     if (rank <  (2 * extra_ranks)) {
         if (0 == (rank % 2)) {
-            ret = MCA_PML_CALL(send(tmpsend, count, dtype, (rank + 1), 
+            ret = MCA_PML_CALL(send(tmpsend, count, dtype, (rank + 1),
                                     MCA_COLL_BASE_TAG_ALLREDUCE,
                                     MCA_PML_BASE_SEND_STANDARD, comm));
             if (MPI_SUCCESS != ret) { line = __LINE__; goto error_hndl; }
@@ -221,7 +203,7 @@ ompi_coll_tuned_allreduce_intra_recursivedoubling(void *sbuf, void *rbuf,
         newrank = rank - extra_ranks;
     }
 
-    /* Communication/Computation loop 
+    /* Communication/Computation loop
        - Exchange message with remote node.
        - Perform appropriate operation taking in account order of operations:
        result = value (op) result
@@ -230,14 +212,14 @@ ompi_coll_tuned_allreduce_intra_recursivedoubling(void *sbuf, void *rbuf,
         if (newrank < 0) break;
         /* Determine remote node */
         newremote = newrank ^ distance;
-        remote = (newremote < extra_ranks)? 
+        remote = (newremote < extra_ranks)?
             (newremote * 2 + 1):(newremote + extra_ranks);
 
         /* Exchange the data */
         ret = MCA_PML_CALL(irecv(tmprecv, count, dtype, remote,
                                  MCA_COLL_BASE_TAG_ALLREDUCE, comm, &reqs[0]));
         if (MPI_SUCCESS != ret) { line = __LINE__; goto error_hndl; }
-        ret = MCA_PML_CALL(isend(tmpsend, count, dtype, remote, 
+        ret = MCA_PML_CALL(isend(tmpsend, count, dtype, remote,
                                  MCA_COLL_BASE_TAG_ALLREDUCE,
                                  MCA_PML_BASE_SEND_STANDARD, comm, &reqs[1]));
         if (MPI_SUCCESS != ret) { line = __LINE__; goto error_hndl; }
@@ -258,14 +240,14 @@ ompi_coll_tuned_allreduce_intra_recursivedoubling(void *sbuf, void *rbuf,
     }
 
     /* Handle non-power-of-two case:
-       - Odd ranks less than 2 * extra_ranks send result from tmpsend to 
+       - Odd ranks less than 2 * extra_ranks send result from tmpsend to
        (rank - 1)
        - Even ranks less than 2 * extra_ranks receive result from (rank + 1)
     */
     if (rank < (2 * extra_ranks)) {
         if (0 == (rank % 2)) {
             ret = MCA_PML_CALL(recv(rbuf, count, dtype, (rank + 1),
-                                    MCA_COLL_BASE_TAG_ALLREDUCE, comm, 
+                                    MCA_COLL_BASE_TAG_ALLREDUCE, comm,
                                     MPI_STATUS_IGNORE));
             if (MPI_SUCCESS != ret) { line = __LINE__; goto error_hndl; }
             tmpsend = (char*)rbuf;
@@ -287,14 +269,14 @@ ompi_coll_tuned_allreduce_intra_recursivedoubling(void *sbuf, void *rbuf,
     return MPI_SUCCESS;
 
  error_hndl:
-    OPAL_OUTPUT((ompi_coll_tuned_stream, "%s:%4d\tRank %d Error occurred %d\n",
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output, "%s:%4d\tRank %d Error occurred %d\n",
                  __FILE__, line, rank, ret));
     if (NULL != inplacebuf) free(inplacebuf);
     return ret;
 }
 
 /*
- *   ompi_coll_tuned_allreduce_intra_ring
+ *   ompi_coll_base_allreduce_intra_ring
  *
  *   Function:       Ring algorithm for allreduce operation
  *   Accepts:        Same as MPI_Allreduce()
@@ -304,9 +286,9 @@ ompi_coll_tuned_allreduce_intra_recursivedoubling(void *sbuf, void *rbuf,
  *                   automatically segmented to segment of size M/N.
  *                   Algorithm requires 2*N - 1 steps.
  *
- *   Limitations:    The algorithm DOES NOT preserve order of operations so it 
+ *   Limitations:    The algorithm DOES NOT preserve order of operations so it
  *                   can be used only for commutative operations.
- *                   In addition, algorithm cannot work if the total count is 
+ *                   In addition, algorithm cannot work if the total count is
  *                   less than size.
  *         Example on 5 nodes:
  *         Initial state
@@ -318,7 +300,7 @@ ompi_coll_tuned_allreduce_intra_recursivedoubling(void *sbuf, void *rbuf,
  *        [04]           [14]          [24]           [34]           [44]
  *
  *        COMPUTATION PHASE
- *         Step 0: rank r sends block r to rank (r+1) and receives bloc (r-1) 
+ *         Step 0: rank r sends block r to rank (r+1) and receives bloc (r-1)
  *                 from rank (r-1) [with wraparound].
  *    #     0              1             2              3             4
  *        [00]          [00+10]        [20]           [30]           [40]
@@ -327,7 +309,7 @@ ompi_coll_tuned_allreduce_intra_recursivedoubling(void *sbuf, void *rbuf,
  *        [03]           [13]          [23]           [33]         [33+43]
  *      [44+04]          [14]          [24]           [34]           [44]
  *
- *         Step 1: rank r sends block (r-1) to rank (r+1) and receives bloc 
+ *         Step 1: rank r sends block (r-1) to rank (r+1) and receives bloc
  *                 (r-2) from rank (r-1) [with wraparound].
  *    #      0              1             2              3             4
  *         [00]          [00+10]     [01+10+20]        [30]           [40]
@@ -336,7 +318,7 @@ ompi_coll_tuned_allreduce_intra_recursivedoubling(void *sbuf, void *rbuf,
  *      [33+43+03]        [13]          [23]           [33]         [33+43]
  *        [44+04]       [44+04+14]       [24]           [34]           [44]
  *
- *         Step 2: rank r sends block (r-2) to rank (r+1) and receives bloc 
+ *         Step 2: rank r sends block (r-2) to rank (r+1) and receives bloc
  *                 (r-2) from rank (r-1) [with wraparound].
  *    #      0              1             2              3             4
  *         [00]          [00+10]     [01+10+20]    [01+10+20+30]      [40]
@@ -345,7 +327,7 @@ ompi_coll_tuned_allreduce_intra_recursivedoubling(void *sbuf, void *rbuf,
  *      [33+43+03]    [33+43+03+13]     [23]           [33]         [33+43]
  *        [44+04]       [44+04+14]  [44+04+14+24]      [34]           [44]
  *
- *         Step 3: rank r sends block (r-3) to rank (r+1) and receives bloc 
+ *         Step 3: rank r sends block (r-3) to rank (r+1) and receives bloc
  *                 (r-3) from rank (r-1) [with wraparound].
  *    #      0              1             2              3             4
  *         [00]          [00+10]     [01+10+20]    [01+10+20+30]     [FULL]
@@ -353,16 +335,16 @@ ompi_coll_tuned_allreduce_intra_recursivedoubling(void *sbuf, void *rbuf,
  *     [22+32+42+02]     [FULL]          [22]         [22+32]      [22+32+42]
  *      [33+43+03]    [33+43+03+13]     [FULL]          [33]         [33+43]
  *        [44+04]       [44+04+14]  [44+04+14+24]      [FULL]         [44]
- *         
+ *
  *        DISTRIBUTION PHASE: ring ALLGATHER with ranks shifted by 1.
  *
  */
-int 
-ompi_coll_tuned_allreduce_intra_ring(void *sbuf, void *rbuf, int count,
+int
+ompi_coll_base_allreduce_intra_ring(void *sbuf, void *rbuf, int count,
                                      struct ompi_datatype_t *dtype,
                                      struct ompi_op_t *op,
                                      struct ompi_communicator_t *comm,
-                                     mca_coll_base_module_t *module) 
+                                     mca_coll_base_module_t *module)
 {
     int ret, line, rank, size, k, recv_from, send_to, block_count, inbi;
     int early_segcount, late_segcount, split_rank, max_segcount;
@@ -375,9 +357,9 @@ ompi_coll_tuned_allreduce_intra_ring(void *sbuf, void *rbuf, int count,
     size = ompi_comm_size(comm);
     rank = ompi_comm_rank(comm);
 
-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "coll:tuned:allreduce_intra_ring rank %d, count %d", rank, count));
-      
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                 "coll:base:allreduce_intra_ring rank %d, count %d", rank, count));
+
     /* Special case for size == 1 */
     if (1 == size) {
         if (MPI_IN_PLACE != sbuf) {
@@ -389,10 +371,10 @@ ompi_coll_tuned_allreduce_intra_ring(void *sbuf, void *rbuf, int count,
 
     /* Special case for count less than size - use recursive doubling */
     if (count < size) {
-        OPAL_OUTPUT((ompi_coll_tuned_stream, "coll:tuned:allreduce_ring rank %d/%d, count %d, switching to recursive doubling", rank, size, count));
-        return (ompi_coll_tuned_allreduce_intra_recursivedoubling(sbuf, rbuf, 
+        OPAL_OUTPUT((ompi_coll_base_framework.framework_output, "coll:base:allreduce_ring rank %d/%d, count %d, switching to recursive doubling", rank, size, count));
+        return (ompi_coll_base_allreduce_intra_recursivedoubling(sbuf, rbuf,
                                                                   count,
-                                                                  dtype, op, 
+                                                                  dtype, op,
                                                                   comm, module));
     }
 
@@ -404,14 +386,14 @@ ompi_coll_tuned_allreduce_intra_ring(void *sbuf, void *rbuf, int count,
     ret = ompi_datatype_type_size( dtype, &typelng);
     if (MPI_SUCCESS != ret) { line = __LINE__; goto error_hndl; }
 
-    /* Determine the number of elements per block and corresponding 
+    /* Determine the number of elements per block and corresponding
        block sizes.
        The blocks are divided into "early" and "late" ones:
-       blocks 0 .. (split_rank - 1) are "early" and 
+       blocks 0 .. (split_rank - 1) are "early" and
        blocks (split_rank) .. (size - 1) are "late".
        Early blocks are at most 1 element larger than the late ones.
     */
-    COLL_TUNED_COMPUTE_BLOCKCOUNT( count, size, split_rank, 
+    COLL_BASE_COMPUTE_BLOCKCOUNT( count, size, split_rank,
                                    early_segcount, late_segcount );
     max_segcount = early_segcount;
     max_real_segsize = true_extent + (max_segcount - 1) * extent;
@@ -432,7 +414,7 @@ ompi_coll_tuned_allreduce_intra_ring(void *sbuf, void *rbuf, int count,
 
     /* Computation loop */
 
-    /* 
+    /*
        For each of the remote nodes:
        - post irecv for block (r-1)
        - send block (r)
@@ -456,8 +438,8 @@ ompi_coll_tuned_allreduce_intra_ring(void *sbuf, void *rbuf, int count,
                              MCA_COLL_BASE_TAG_ALLREDUCE, comm, &reqs[inbi]));
     if (MPI_SUCCESS != ret) { line = __LINE__; goto error_hndl; }
     /* Send first block (my block) to the neighbor on the right */
-    block_offset = ((rank < split_rank)? 
-                    ((ptrdiff_t)rank * (ptrdiff_t)early_segcount) : 
+    block_offset = ((rank < split_rank)?
+                    ((ptrdiff_t)rank * (ptrdiff_t)early_segcount) :
                     ((ptrdiff_t)rank * (ptrdiff_t)late_segcount + split_rank));
     block_count = ((rank < split_rank)? early_segcount : late_segcount);
     tmpsend = ((char*)rbuf) + block_offset * extent;
@@ -465,21 +447,21 @@ ompi_coll_tuned_allreduce_intra_ring(void *sbuf, void *rbuf, int count,
                             MCA_COLL_BASE_TAG_ALLREDUCE,
                             MCA_PML_BASE_SEND_STANDARD, comm));
     if (MPI_SUCCESS != ret) { line = __LINE__; goto error_hndl; }
-   
+
     for (k = 2; k < size; k++) {
         const int prevblock = (rank + size - k + 1) % size;
-      
+
         inbi = inbi ^ 0x1;
-      
+
         /* Post irecv for the current block */
         ret = MCA_PML_CALL(irecv(inbuf[inbi], max_segcount, dtype, recv_from,
                                  MCA_COLL_BASE_TAG_ALLREDUCE, comm, &reqs[inbi]));
         if (MPI_SUCCESS != ret) { line = __LINE__; goto error_hndl; }
-      
+
         /* Wait on previous block to arrive */
         ret = ompi_request_wait(&reqs[inbi ^ 0x1], MPI_STATUS_IGNORE);
         if (MPI_SUCCESS != ret) { line = __LINE__; goto error_hndl; }
-      
+
         /* Apply operation on previous block: result goes to rbuf
            rbuf[prevblock] = inbuf[inbi ^ 0x1] (op) rbuf[prevblock]
         */
@@ -489,7 +471,7 @@ ompi_coll_tuned_allreduce_intra_ring(void *sbuf, void *rbuf, int count,
         block_count = ((prevblock < split_rank)? early_segcount : late_segcount);
         tmprecv = ((char*)rbuf) + (ptrdiff_t)block_offset * extent;
         ompi_op_reduce(op, inbuf[inbi ^ 0x1], tmprecv, block_count, dtype);
-      
+
         /* send previous block to send_to */
         ret = MCA_PML_CALL(send(tmprecv, block_count, dtype, send_to,
                                 MCA_COLL_BASE_TAG_ALLREDUCE,
@@ -501,7 +483,7 @@ ompi_coll_tuned_allreduce_intra_ring(void *sbuf, void *rbuf, int count,
     ret = ompi_request_wait(&reqs[inbi], MPI_STATUS_IGNORE);
     if (MPI_SUCCESS != ret) { line = __LINE__; goto error_hndl; }
 
-    /* Apply operation on the last block (from neighbor (rank + 1) 
+    /* Apply operation on the last block (from neighbor (rank + 1)
        rbuf[rank+1] = inbuf[inbi] (op) rbuf[rank + 1] */
     recv_from = (rank + 1) % size;
     block_offset = ((recv_from < split_rank)?
@@ -510,28 +492,28 @@ ompi_coll_tuned_allreduce_intra_ring(void *sbuf, void *rbuf, int count,
     block_count = ((recv_from < split_rank)? early_segcount : late_segcount);
     tmprecv = ((char*)rbuf) + (ptrdiff_t)block_offset * extent;
     ompi_op_reduce(op, inbuf[inbi], tmprecv, block_count, dtype);
-   
+
     /* Distribution loop - variation of ring allgather */
     send_to = (rank + 1) % size;
     recv_from = (rank + size - 1) % size;
     for (k = 0; k < size - 1; k++) {
         const int recv_data_from = (rank + size - k) % size;
         const int send_data_from = (rank + 1 + size - k) % size;
-        const int send_block_offset = 
+        const int send_block_offset =
             ((send_data_from < split_rank)?
              ((ptrdiff_t)send_data_from * early_segcount) :
              ((ptrdiff_t)send_data_from * late_segcount + split_rank));
-        const int recv_block_offset = 
+        const int recv_block_offset =
             ((recv_data_from < split_rank)?
              ((ptrdiff_t)recv_data_from * early_segcount) :
              ((ptrdiff_t)recv_data_from * late_segcount + split_rank));
-        block_count = ((send_data_from < split_rank)? 
+        block_count = ((send_data_from < split_rank)?
                        early_segcount : late_segcount);
 
         tmprecv = (char*)rbuf + (ptrdiff_t)recv_block_offset * extent;
         tmpsend = (char*)rbuf + (ptrdiff_t)send_block_offset * extent;
 
-        ret = ompi_coll_tuned_sendrecv(tmpsend, block_count, dtype, send_to,
+        ret = ompi_coll_base_sendrecv(tmpsend, block_count, dtype, send_to,
                                        MCA_COLL_BASE_TAG_ALLREDUCE,
                                        tmprecv, max_segcount, dtype, recv_from,
                                        MCA_COLL_BASE_TAG_ALLREDUCE,
@@ -546,7 +528,7 @@ ompi_coll_tuned_allreduce_intra_ring(void *sbuf, void *rbuf, int count,
     return MPI_SUCCESS;
 
  error_hndl:
-    OPAL_OUTPUT((ompi_coll_tuned_stream, "%s:%4d\tRank %d Error occurred %d\n",
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output, "%s:%4d\tRank %d Error occurred %d\n",
                  __FILE__, line, rank, ret));
     if (NULL != inbuf[0]) free(inbuf[0]);
     if (NULL != inbuf[1]) free(inbuf[1]);
@@ -554,30 +536,30 @@ ompi_coll_tuned_allreduce_intra_ring(void *sbuf, void *rbuf, int count,
 }
 
 /*
- *   ompi_coll_tuned_allreduce_intra_ring_segmented
+ *   ompi_coll_base_allreduce_intra_ring_segmented
  *
  *   Function:       Pipelined ring algorithm for allreduce operation
  *   Accepts:        Same as MPI_Allreduce(), segment size
  *   Returns:        MPI_SUCCESS or error code
  *
- *   Description:    Implements pipelined ring algorithm for allreduce: 
+ *   Description:    Implements pipelined ring algorithm for allreduce:
  *                   user supplies suggested segment size for the pipelining of
  *                   reduce operation.
- *                   The segment size determines the number of phases, np, for 
- *                   the algorithm execution.  
- *                   The message is automatically divided into blocks of 
+ *                   The segment size determines the number of phases, np, for
+ *                   the algorithm execution.
+ *                   The message is automatically divided into blocks of
  *                   approximately  (count / (np * segcount)) elements.
- *                   At the end of reduction phase, allgather like step is 
+ *                   At the end of reduction phase, allgather like step is
  *                   executed.
  *                   Algorithm requires (np + 1)*(N - 1) steps.
  *
- *   Limitations:    The algorithm DOES NOT preserve order of operations so it 
+ *   Limitations:    The algorithm DOES NOT preserve order of operations so it
  *                   can be used only for commutative operations.
- *                   In addition, algorithm cannot work if the total size is 
+ *                   In addition, algorithm cannot work if the total size is
  *                   less than size * segment size.
  *         Example on 3 nodes with 2 phases
  *         Initial state
- *   #      0              1             2 
+ *   #      0              1             2
  *        [00a]          [10a]         [20a]
  *        [00b]          [10b]         [20b]
  *        [01a]          [11a]         [21a]
@@ -586,9 +568,9 @@ ompi_coll_tuned_allreduce_intra_ring(void *sbuf, void *rbuf, int count,
  *        [02b]          [12b]         [22b]
  *
  *        COMPUTATION PHASE 0 (a)
- *         Step 0: rank r sends block ra to rank (r+1) and receives bloc (r-1)a 
+ *         Step 0: rank r sends block ra to rank (r+1) and receives bloc (r-1)a
  *                 from rank (r-1) [with wraparound].
- *    #     0              1             2  
+ *    #     0              1             2
  *        [00a]        [00a+10a]       [20a]
  *        [00b]          [10b]         [20b]
  *        [01a]          [11a]       [11a+21a]
@@ -596,20 +578,20 @@ ompi_coll_tuned_allreduce_intra_ring(void *sbuf, void *rbuf, int count,
  *      [22a+02a]        [12a]         [22a]
  *        [02b]          [12b]         [22b]
  *
- *         Step 1: rank r sends block (r-1)a to rank (r+1) and receives bloc 
+ *         Step 1: rank r sends block (r-1)a to rank (r+1) and receives bloc
  *                 (r-2)a from rank (r-1) [with wraparound].
- *    #     0              1             2  
+ *    #     0              1             2
  *        [00a]        [00a+10a]   [00a+10a+20a]
  *        [00b]          [10b]         [20b]
  *    [11a+21a+01a]      [11a]       [11a+21a]
  *        [01b]          [11b]         [21b]
  *      [22a+02a]    [22a+02a+12a]     [22a]
- *        [02b]          [12b]         [22b] 
+ *        [02b]          [12b]         [22b]
  *
  *        COMPUTATION PHASE 1 (b)
- *         Step 0: rank r sends block rb to rank (r+1) and receives bloc (r-1)b 
+ *         Step 0: rank r sends block rb to rank (r+1) and receives bloc (r-1)b
  *                 from rank (r-1) [with wraparound].
- *    #     0              1             2  
+ *    #     0              1             2
  *        [00a]        [00a+10a]       [20a]
  *        [00b]        [00b+10b]       [20b]
  *        [01a]          [11a]       [11a+21a]
@@ -617,31 +599,31 @@ ompi_coll_tuned_allreduce_intra_ring(void *sbuf, void *rbuf, int count,
  *      [22a+02a]        [12a]         [22a]
  *      [22b+02b]        [12b]         [22b]
  *
- *         Step 1: rank r sends block (r-1)b to rank (r+1) and receives bloc 
+ *         Step 1: rank r sends block (r-1)b to rank (r+1) and receives bloc
  *                 (r-2)b from rank (r-1) [with wraparound].
- *    #     0              1             2  
+ *    #     0              1             2
  *        [00a]        [00a+10a]   [00a+10a+20a]
  *        [00b]          [10b]     [0bb+10b+20b]
  *    [11a+21a+01a]      [11a]       [11a+21a]
  *    [11b+21b+01b]      [11b]         [21b]
  *      [22a+02a]    [22a+02a+12a]     [22a]
- *        [02b]      [22b+01b+12b]     [22b] 
+ *        [02b]      [22b+01b+12b]     [22b]
+ *
  *
- *         
  *        DISTRIBUTION PHASE: ring ALLGATHER with ranks shifted by 1 (same as
  *         in regular ring algorithm.
  *
  */
-int 
-ompi_coll_tuned_allreduce_intra_ring_segmented(void *sbuf, void *rbuf, int count,
+int
+ompi_coll_base_allreduce_intra_ring_segmented(void *sbuf, void *rbuf, int count,
                                                struct ompi_datatype_t *dtype,
                                                struct ompi_op_t *op,
                                                struct ompi_communicator_t *comm,
                                                mca_coll_base_module_t *module,
-                                               uint32_t segsize) 
+                                               uint32_t segsize)
 {
     int ret, line, rank, size, k, recv_from, send_to;
-    int early_blockcount, late_blockcount, split_rank; 
+    int early_blockcount, late_blockcount, split_rank;
     int segcount, max_segcount, num_phases, phase, block_count, inbi;
     size_t typelng;
     char *tmpsend = NULL, *tmprecv = NULL, *inbuf[2] = {NULL, NULL};
@@ -652,9 +634,9 @@ ompi_coll_tuned_allreduce_intra_ring_segmented(void *sbuf, void *rbuf, int count
     size = ompi_comm_size(comm);
     rank = ompi_comm_rank(comm);
 
-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "coll:tuned:allreduce_intra_ring_segmented rank %d, count %d", rank, count));
-      
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                 "coll:base:allreduce_intra_ring_segmented rank %d, count %d", rank, count));
+
     /* Special case for size == 1 */
     if (1 == size) {
         if (MPI_IN_PLACE != sbuf) {
@@ -672,34 +654,34 @@ ompi_coll_tuned_allreduce_intra_ring_segmented(void *sbuf, void *rbuf, int count
     ret = ompi_datatype_type_size( dtype, &typelng);
     if (MPI_SUCCESS != ret) { line = __LINE__; goto error_hndl; }
     segcount = count;
-    COLL_TUNED_COMPUTED_SEGCOUNT(segsize, typelng, segcount)
+    COLL_BASE_COMPUTED_SEGCOUNT(segsize, typelng, segcount)
 
         /* Special case for count less than size * segcount - use regular ring */
         if (count < (size * segcount)) {
-            OPAL_OUTPUT((ompi_coll_tuned_stream, "coll:tuned:allreduce_ring_segmented rank %d/%d, count %d, switching to regular ring", rank, size, count));
-            return (ompi_coll_tuned_allreduce_intra_ring(sbuf, rbuf, count, dtype, op, 
+            OPAL_OUTPUT((ompi_coll_base_framework.framework_output, "coll:base:allreduce_ring_segmented rank %d/%d, count %d, switching to regular ring", rank, size, count));
+            return (ompi_coll_base_allreduce_intra_ring(sbuf, rbuf, count, dtype, op,
                                                          comm, module));
         }
 
     /* Determine the number of phases of the algorithm */
     num_phases = count / (size * segcount);
-    if ((count % (size * segcount) >= size) && 
+    if ((count % (size * segcount) >= size) &&
         (count % (size * segcount) > ((size * segcount) / 2))) {
         num_phases++;
     }
 
-    /* Determine the number of elements per block and corresponding 
+    /* Determine the number of elements per block and corresponding
        block sizes.
        The blocks are divided into "early" and "late" ones:
-       blocks 0 .. (split_rank - 1) are "early" and 
+       blocks 0 .. (split_rank - 1) are "early" and
        blocks (split_rank) .. (size - 1) are "late".
        Early blocks are at most 1 element larger than the late ones.
        Note, these blocks will be split into num_phases segments,
        out of the largest one will have max_segcount elements.
     */
-    COLL_TUNED_COMPUTE_BLOCKCOUNT( count, size, split_rank, 
+    COLL_BASE_COMPUTE_BLOCKCOUNT( count, size, split_rank,
                                    early_blockcount, late_blockcount );
-    COLL_TUNED_COMPUTE_BLOCKCOUNT( early_blockcount, num_phases, inbi,
+    COLL_BASE_COMPUTE_BLOCKCOUNT( early_blockcount, num_phases, inbi,
                                    max_segcount, k);
     max_real_segsize = true_extent + (ptrdiff_t)(max_segcount - 1) * extent;
 
@@ -722,7 +704,7 @@ ompi_coll_tuned_allreduce_intra_ring_segmented(void *sbuf, void *rbuf, int count
         ptrdiff_t phase_offset;
         int early_phase_segcount, late_phase_segcount, split_phase, phase_count;
 
-        /* 
+        /*
            For each of the remote nodes:
            - post irecv for block (r-1)
            - send block (r)
@@ -741,7 +723,7 @@ ompi_coll_tuned_allreduce_intra_ring_segmented(void *sbuf, void *rbuf, int count
         */
         send_to = (rank + 1) % size;
         recv_from = (rank + size - 1) % size;
-      
+
         inbi = 0;
         /* Initialize first receive from the neighbor on the left */
         ret = MCA_PML_CALL(irecv(inbuf[inbi], max_segcount, dtype, recv_from,
@@ -750,81 +732,81 @@ ompi_coll_tuned_allreduce_intra_ring_segmented(void *sbuf, void *rbuf, int count
         /* Send first block (my block) to the neighbor on the right:
            - compute my block and phase offset
            - send data */
-        block_offset = ((rank < split_rank)? 
-                        ((ptrdiff_t)rank * (ptrdiff_t)early_blockcount) : 
+        block_offset = ((rank < split_rank)?
+                        ((ptrdiff_t)rank * (ptrdiff_t)early_blockcount) :
                         ((ptrdiff_t)rank * (ptrdiff_t)late_blockcount + split_rank));
         block_count = ((rank < split_rank)? early_blockcount : late_blockcount);
-        COLL_TUNED_COMPUTE_BLOCKCOUNT(block_count, num_phases, split_phase,
+        COLL_BASE_COMPUTE_BLOCKCOUNT(block_count, num_phases, split_phase,
                                       early_phase_segcount, late_phase_segcount)
             phase_count = ((phase < split_phase)?
                            (early_phase_segcount) : (late_phase_segcount));
         phase_offset = ((phase < split_phase)?
-                        ((ptrdiff_t)phase * (ptrdiff_t)early_phase_segcount) : 
+                        ((ptrdiff_t)phase * (ptrdiff_t)early_phase_segcount) :
                         ((ptrdiff_t)phase * (ptrdiff_t)late_phase_segcount + split_phase));
         tmpsend = ((char*)rbuf) + (ptrdiff_t)(block_offset + phase_offset) * extent;
         ret = MCA_PML_CALL(send(tmpsend, phase_count, dtype, send_to,
                                 MCA_COLL_BASE_TAG_ALLREDUCE,
                                 MCA_PML_BASE_SEND_STANDARD, comm));
         if (MPI_SUCCESS != ret) { line = __LINE__; goto error_hndl; }
-      
+
         for (k = 2; k < size; k++) {
             const int prevblock = (rank + size - k + 1) % size;
-         
+
             inbi = inbi ^ 0x1;
-         
+
             /* Post irecv for the current block */
             ret = MCA_PML_CALL(irecv(inbuf[inbi], max_segcount, dtype, recv_from,
-                                     MCA_COLL_BASE_TAG_ALLREDUCE, comm, 
+                                     MCA_COLL_BASE_TAG_ALLREDUCE, comm,
                                      &reqs[inbi]));
             if (MPI_SUCCESS != ret) { line = __LINE__; goto error_hndl; }
-         
+
             /* Wait on previous block to arrive */
             ret = ompi_request_wait(&reqs[inbi ^ 0x1], MPI_STATUS_IGNORE);
             if (MPI_SUCCESS != ret) { line = __LINE__; goto error_hndl; }
-         
+
             /* Apply operation on previous block: result goes to rbuf
                rbuf[prevblock] = inbuf[inbi ^ 0x1] (op) rbuf[prevblock]
             */
             block_offset = ((prevblock < split_rank)?
                             ((ptrdiff_t)prevblock * (ptrdiff_t)early_blockcount) :
                             ((ptrdiff_t)prevblock * (ptrdiff_t)late_blockcount + split_rank));
-            block_count = ((prevblock < split_rank)? 
+            block_count = ((prevblock < split_rank)?
                            early_blockcount : late_blockcount);
-            COLL_TUNED_COMPUTE_BLOCKCOUNT(block_count, num_phases, split_phase,
+            COLL_BASE_COMPUTE_BLOCKCOUNT(block_count, num_phases, split_phase,
                                           early_phase_segcount, late_phase_segcount)
                 phase_count = ((phase < split_phase)?
                                (early_phase_segcount) : (late_phase_segcount));
             phase_offset = ((phase < split_phase)?
-                            ((ptrdiff_t)phase * (ptrdiff_t)early_phase_segcount) : 
+                            ((ptrdiff_t)phase * (ptrdiff_t)early_phase_segcount) :
                             ((ptrdiff_t)phase * (ptrdiff_t)late_phase_segcount + split_phase));
             tmprecv = ((char*)rbuf) + (ptrdiff_t)(block_offset + phase_offset) * extent;
             ompi_op_reduce(op, inbuf[inbi ^ 0x1], tmprecv, phase_count, dtype);
-         
+
             /* send previous block to send_to */
             ret = MCA_PML_CALL(send(tmprecv, phase_count, dtype, send_to,
                                     MCA_COLL_BASE_TAG_ALLREDUCE,
                                     MCA_PML_BASE_SEND_STANDARD, comm));
             if (MPI_SUCCESS != ret) { line = __LINE__; goto error_hndl; }
         }
-      
+
         /* Wait on the last block to arrive */
         ret = ompi_request_wait(&reqs[inbi], MPI_STATUS_IGNORE);
         if (MPI_SUCCESS != ret) { line = __LINE__; goto error_hndl; }
-      
-        /* Apply operation on the last block (from neighbor (rank + 1) 
+
+        /* Apply operation on the last block (from neighbor (rank + 1)
            rbuf[rank+1] = inbuf[inbi] (op) rbuf[rank + 1] */
         recv_from = (rank + 1) % size;
         block_offset = ((recv_from < split_rank)?
                         ((ptrdiff_t)recv_from * (ptrdiff_t)early_blockcount) :
                         ((ptrdiff_t)recv_from * (ptrdiff_t)late_blockcount + split_rank));
-        block_count = ((recv_from < split_rank)? 
+        block_count = ((recv_from < split_rank)?
                        early_blockcount : late_blockcount);
-        COLL_TUNED_COMPUTE_BLOCKCOUNT(block_count, num_phases, split_phase,
+        COLL_BASE_COMPUTE_BLOCKCOUNT(block_count, num_phases, split_phase,
                                       early_phase_segcount, late_phase_segcount)
             phase_count = ((phase < split_phase)?
                            (early_phase_segcount) : (late_phase_segcount));
         phase_offset = ((phase < split_phase)?
-                        ((ptrdiff_t)phase * (ptrdiff_t)early_phase_segcount) : 
+                        ((ptrdiff_t)phase * (ptrdiff_t)early_phase_segcount) :
                         ((ptrdiff_t)phase * (ptrdiff_t)late_phase_segcount + split_phase));
         tmprecv = ((char*)rbuf) + (ptrdiff_t)(block_offset + phase_offset) * extent;
         ompi_op_reduce(op, inbuf[inbi], tmprecv, phase_count, dtype);
@@ -836,21 +818,21 @@ ompi_coll_tuned_allreduce_intra_ring_segmented(void *sbuf, void *rbuf, int count
     for (k = 0; k < size - 1; k++) {
         const int recv_data_from = (rank + size - k) % size;
         const int send_data_from = (rank + 1 + size - k) % size;
-        const int send_block_offset = 
+        const int send_block_offset =
             ((send_data_from < split_rank)?
              ((ptrdiff_t)send_data_from * (ptrdiff_t)early_blockcount) :
              ((ptrdiff_t)send_data_from * (ptrdiff_t)late_blockcount + split_rank));
-        const int recv_block_offset = 
+        const int recv_block_offset =
             ((recv_data_from < split_rank)?
              ((ptrdiff_t)recv_data_from * (ptrdiff_t)early_blockcount) :
              ((ptrdiff_t)recv_data_from * (ptrdiff_t)late_blockcount + split_rank));
-        block_count = ((send_data_from < split_rank)? 
+        block_count = ((send_data_from < split_rank)?
                        early_blockcount : late_blockcount);
 
         tmprecv = (char*)rbuf + (ptrdiff_t)recv_block_offset * extent;
         tmpsend = (char*)rbuf + (ptrdiff_t)send_block_offset * extent;
 
-        ret = ompi_coll_tuned_sendrecv(tmpsend, block_count, dtype, send_to,
+        ret = ompi_coll_base_sendrecv(tmpsend, block_count, dtype, send_to,
                                        MCA_COLL_BASE_TAG_ALLREDUCE,
                                        tmprecv, early_blockcount, dtype, recv_from,
                                        MCA_COLL_BASE_TAG_ALLREDUCE,
@@ -865,7 +847,7 @@ ompi_coll_tuned_allreduce_intra_ring_segmented(void *sbuf, void *rbuf, int count
     return MPI_SUCCESS;
 
  error_hndl:
-    OPAL_OUTPUT((ompi_coll_tuned_stream, "%s:%4d\tRank %d Error occurred %d\n",
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output, "%s:%4d\tRank %d Error occurred %d\n",
                  __FILE__, line, rank, ret));
     if (NULL != inbuf[0]) free(inbuf[0]);
     if (NULL != inbuf[1]) free(inbuf[1]);
@@ -875,8 +857,8 @@ ompi_coll_tuned_allreduce_intra_ring_segmented(void *sbuf, void *rbuf, int count
 /*
  * Linear functions are copied from the BASIC coll module
  * they do not segment the message and are simple implementations
- * but for some small number of nodes and/or small data sizes they 
- * are just as fast as tuned/tree based segmenting operations 
+ * but for some small number of nodes and/or small data sizes they
+ * are just as fast as base/tree based segmenting operations
  * and as such may be selected by the decision functions
  * These are copied into this module due to the way we select modules
  * in V1. i.e. in V2 we will handle this differently and so will not
@@ -895,7 +877,7 @@ ompi_coll_tuned_allreduce_intra_ring_segmented(void *sbuf, void *rbuf, int count
  *	Returns:	- MPI_SUCCESS or error code
  */
 int
-ompi_coll_tuned_allreduce_intra_basic_linear(void *sbuf, void *rbuf, int count,
+ompi_coll_base_allreduce_intra_basic_linear(void *sbuf, void *rbuf, int count,
                                              struct ompi_datatype_t *dtype,
                                              struct ompi_op_t *op,
                                              struct ompi_communicator_t *comm,
@@ -905,158 +887,28 @@ ompi_coll_tuned_allreduce_intra_basic_linear(void *sbuf, void *rbuf, int count,
 
     rank = ompi_comm_rank(comm);
 
-    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:allreduce_intra_basic_linear rank %d", rank));
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:allreduce_intra_basic_linear rank %d", rank));
 
     /* Reduce to 0 and broadcast. */
 
     if (MPI_IN_PLACE == sbuf) {
         if (0 == rank) {
-            err = ompi_coll_tuned_reduce_intra_basic_linear (MPI_IN_PLACE, rbuf, count, dtype,
+            err = ompi_coll_base_reduce_intra_basic_linear (MPI_IN_PLACE, rbuf, count, dtype,
                                                              op, 0, comm, module);
         } else {
-            err = ompi_coll_tuned_reduce_intra_basic_linear(rbuf, NULL, count, dtype,
+            err = ompi_coll_base_reduce_intra_basic_linear(rbuf, NULL, count, dtype,
                                                             op, 0, comm, module);
         }
     } else {
-        err = ompi_coll_tuned_reduce_intra_basic_linear(sbuf, rbuf, count, dtype,
+        err = ompi_coll_base_reduce_intra_basic_linear(sbuf, rbuf, count, dtype,
                                                         op, 0, comm, module);
     }
     if (MPI_SUCCESS != err) {
         return err;
     }
 
-    return ompi_coll_tuned_bcast_intra_basic_linear(rbuf, count, dtype, 0, comm, module);
+    return ompi_coll_base_bcast_intra_basic_linear(rbuf, count, dtype, 0, comm, module);
 }
 
 
 /* copied function (with appropriate renaming) ends here */
-
-/* The following are used by dynamic and forced rules */
-
-/* publish details of each algorithm and if its forced/fixed/locked in */
-/* as you add methods/algorithms you must update this and the query/map routines */
-
-/* this routine is called by the component only */
-/* this makes sure that the mca parameters are set to their initial values and perms */
-/* module does not call this they call the forced_getvalues routine instead */
-
-int ompi_coll_tuned_allreduce_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices)
-{
-    mca_base_var_enum_t *new_enum;
-
-    ompi_coll_tuned_forced_max_algorithms[ALLREDUCE] = coll_tuned_allreduce_algorithm_count;
-
-    (void) mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                           "allreduce_algorithm_count",
-                                           "Number of allreduce algorithms available",
-                                           MCA_BASE_VAR_TYPE_INT, NULL, 0,
-                                           MCA_BASE_VAR_FLAG_DEFAULT_ONLY,
-                                           OPAL_INFO_LVL_5,
-                                           MCA_BASE_VAR_SCOPE_CONSTANT,
-                                           &coll_tuned_allreduce_algorithm_count);
-
-    /* MPI_T: This variable should eventually be bound to a communicator */
-    coll_tuned_allreduce_forced_algorithm = 0;
-    (void) mca_base_var_enum_create("coll_tuned_allreduce_algorithms", allreduce_algorithms, &new_enum);
-    mca_param_indices->algorithm_param_index =
-        mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                        "allreduce_algorithm",
-                                        "Which allreduce algorithm is used. Can be locked down to any of: 0 ignore, 1 basic linear, 2 nonoverlapping (tuned reduce + tuned bcast), 3 recursive doubling, 4 ring, 5 segmented ring",
-                                        MCA_BASE_VAR_TYPE_INT, new_enum, 0, 0,
-                                        OPAL_INFO_LVL_5,
-                                        MCA_BASE_VAR_SCOPE_READONLY,
-                                        &coll_tuned_allreduce_forced_algorithm);
-    OBJ_RELEASE(new_enum);
-    if (mca_param_indices->algorithm_param_index < 0) {
-        return mca_param_indices->algorithm_param_index;
-    }
-
-    coll_tuned_allreduce_segment_size = 0;
-    mca_param_indices->segsize_param_index =
-        mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                        "allreduce_algorithm_segmentsize",
-                                        "Segment size in bytes used by default for allreduce algorithms. Only has meaning if algorithm is forced and supports segmenting. 0 bytes means no segmentation.",
-                                        MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
-                                        OPAL_INFO_LVL_5,
-                                        MCA_BASE_VAR_SCOPE_READONLY,
-                                        &coll_tuned_allreduce_segment_size);
-
-    coll_tuned_allreduce_tree_fanout = ompi_coll_tuned_init_tree_fanout; /* get system wide default */
-    mca_param_indices->tree_fanout_param_index =
-        mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                        "allreduce_algorithm_tree_fanout",
-                                        "Fanout for n-tree used for allreduce algorithms. Only has meaning if algorithm is forced and supports n-tree topo based operation.",
-                                        MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
-                                        OPAL_INFO_LVL_5,
-                                        MCA_BASE_VAR_SCOPE_READONLY,
-                                        &coll_tuned_allreduce_tree_fanout);
-
-    coll_tuned_allreduce_chain_fanout = ompi_coll_tuned_init_chain_fanout; /* get system wide default */
-    mca_param_indices->chain_fanout_param_index = 
-      mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                      "allreduce_algorithm_chain_fanout",
-                                      "Fanout for chains used for allreduce algorithms. Only has meaning if algorithm is forced and supports chain topo based operation.",
-                                      MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
-                                      OPAL_INFO_LVL_5,
-                                      MCA_BASE_VAR_SCOPE_READONLY,
-                                      &coll_tuned_allreduce_chain_fanout);
-
-    return (MPI_SUCCESS);
-}
-
-
-int ompi_coll_tuned_allreduce_intra_do_forced(void *sbuf, void *rbuf, int count,
-                                              struct ompi_datatype_t *dtype,
-                                              struct ompi_op_t *op,
-                                              struct ompi_communicator_t *comm,
-                                              mca_coll_base_module_t *module)
-{
-    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
-    mca_coll_tuned_comm_t *data = tuned_module->tuned_data;
-
-    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:allreduce_intra_do_forced selected algorithm %d, segment size %d", 
-                 data->user_forced[ALLREDUCE].algorithm,
-                 data->user_forced[ALLREDUCE].segsize));
-
-    switch (data->user_forced[ALLREDUCE].algorithm) {
-    case (0):  return ompi_coll_tuned_allreduce_intra_dec_fixed (sbuf, rbuf, count, dtype, op, comm, module);
-    case (1):  return ompi_coll_tuned_allreduce_intra_basic_linear (sbuf, rbuf, count, dtype, op, comm, module);
-    case (2):  return ompi_coll_tuned_allreduce_intra_nonoverlapping (sbuf, rbuf, count, dtype, op, comm, module);
-    case (3):  return ompi_coll_tuned_allreduce_intra_recursivedoubling (sbuf, rbuf, count, dtype, op, comm, module);
-    case (4):  return ompi_coll_tuned_allreduce_intra_ring (sbuf, rbuf, count, dtype, op, comm, module);
-    case (5):  return ompi_coll_tuned_allreduce_intra_ring_segmented (sbuf, rbuf, count, dtype, op, comm, module, data->user_forced[ALLREDUCE].segsize);
-    default:
-        OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:allreduce_intra_do_forced attempt to select algorithm %d when only 0-%d is valid?",
-                     data->user_forced[ALLREDUCE].algorithm, 
-                     ompi_coll_tuned_forced_max_algorithms[ALLREDUCE]));
-        return (MPI_ERR_ARG);
-    } /* switch */
-
-}
-
-
-int ompi_coll_tuned_allreduce_intra_do_this(void *sbuf, void *rbuf, int count,
-                                            struct ompi_datatype_t *dtype,
-                                            struct ompi_op_t *op,
-                                            struct ompi_communicator_t *comm,
-                                            mca_coll_base_module_t *module,
-                                            int algorithm, int faninout, int segsize)
-{
-    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:allreduce_intra_do_this algorithm %d topo fan in/out %d segsize %d", 
-                 algorithm, faninout, segsize));
-
-    switch (algorithm) {
-    case (0):   return ompi_coll_tuned_allreduce_intra_dec_fixed (sbuf, rbuf, count, dtype, op, comm, module);
-    case (1):   return ompi_coll_tuned_allreduce_intra_basic_linear (sbuf, rbuf, count, dtype, op, comm, module);
-    case (2):   return ompi_coll_tuned_allreduce_intra_nonoverlapping (sbuf, rbuf, count, dtype, op, comm, module);
-    case (3):   return ompi_coll_tuned_allreduce_intra_recursivedoubling (sbuf, rbuf, count, dtype, op, comm, module);
-    case (4):   return ompi_coll_tuned_allreduce_intra_ring (sbuf, rbuf, count, dtype, op, comm, module);
-    case (5):   return ompi_coll_tuned_allreduce_intra_ring_segmented (sbuf, rbuf, count, dtype, op, comm, module, segsize);
-    default:
-        OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:allreduce_intra_do_this attempt to select algorithm %d when only 0-%d is valid?",
-                     algorithm, ompi_coll_tuned_forced_max_algorithms[ALLREDUCE]));
-        return (MPI_ERR_ARG);
-    } /* switch */
-
-}
-
diff --git a/ompi/mca/coll/base/coll_base_alltoall.c b/ompi/mca/coll/base/coll_base_alltoall.c
index 3bd1ecfa04..fe71c5345f 100644
--- a/ompi/mca/coll/base/coll_base_alltoall.c
+++ b/ompi/mca/coll/base/coll_base_alltoall.c
@@ -3,7 +3,7 @@
  * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
  *                         University Research and Technology
  *                         Corporation.  All rights reserved.
- * Copyright (c) 2004-2012 The University of Tennessee and The University
+ * Copyright (c) 2004-2015 The University of Tennessee and The University
  *                         of Tennessee Research Foundation.  All rights
  *                         reserved.
  * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@@ -30,37 +30,18 @@
 #include "ompi/mca/coll/coll.h"
 #include "ompi/mca/coll/base/coll_tags.h"
 #include "ompi/mca/pml/pml.h"
-#include "coll_tuned.h"
-#include "coll_tuned_topo.h"
-#include "coll_tuned_util.h"
-
-/* alltoall algorithm variables */
-static int coll_tuned_alltoall_algorithm_count = 5;
-static int coll_tuned_alltoall_forced_algorithm = 0;
-static int coll_tuned_alltoall_segment_size = 0;
-static int coll_tuned_alltoall_max_requests;
-static int coll_tuned_alltoall_tree_fanout;
-static int coll_tuned_alltoall_chain_fanout;
-
-/* valid values for coll_tuned_alltoall_forced_algorithm */
-static mca_base_var_enum_value_t alltoall_algorithms[] = {
-    {0, "ignore"},
-    {1, "linear"},
-    {2, "pairwise"},
-    {3, "modified_bruck"},
-    {4, "linear_sync"},
-    {5, "two_proc"},
-    {0, NULL}
-};
+#include "ompi/mca/coll/base/coll_base_functions.h"
+#include "coll_base_topo.h"
+#include "coll_base_util.h"
 
 /* MPI_IN_PLACE all to all algorithm. TODO: implement a better one. */
 static int
-mca_coll_tuned_alltoall_intra_basic_inplace(void *rbuf, int rcount,
+mca_coll_base_alltoall_intra_basic_inplace(void *rbuf, int rcount,
                                             struct ompi_datatype_t *rdtype,
                                             struct ompi_communicator_t *comm,
                                             mca_coll_base_module_t *module)
 {
-    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
+    mca_coll_base_module_t *base_module = (mca_coll_base_module_t*) module;
     int i, j, size, rank, err=MPI_SUCCESS;
     MPI_Request *preq;
     char *tmp_buffer;
@@ -91,7 +72,7 @@ mca_coll_tuned_alltoall_intra_basic_inplace(void *rbuf, int rcount,
     for (i = 0 ; i < size ; ++i) {
         for (j = i+1 ; j < size ; ++j) {
             /* Initiate all send/recv to/from others. */
-            preq = tuned_module->tuned_data->mcct_reqs;
+            preq = base_module->base_data->mcct_reqs;
 
             if (i == rank) {
                 /* Copy the data into the temporary buffer */
@@ -128,11 +109,8 @@ mca_coll_tuned_alltoall_intra_basic_inplace(void *rbuf, int rcount,
             }
 
             /* Wait for the requests to complete */
-            err = ompi_request_wait_all (2, tuned_module->tuned_data->mcct_reqs, MPI_STATUSES_IGNORE);
+            err = ompi_request_wait_all (2, base_module->base_data->mcct_reqs, MPI_STATUSES_IGNORE);
             if (MPI_SUCCESS != err) { goto error_hndl; }
-
-            /* Free the requests. */
-            mca_coll_tuned_free_reqs(tuned_module->tuned_data->mcct_reqs, 2);
         }
     }
 
@@ -145,7 +123,7 @@ mca_coll_tuned_alltoall_intra_basic_inplace(void *rbuf, int rcount,
     return err;
 }
 
-int ompi_coll_tuned_alltoall_intra_pairwise(void *sbuf, int scount, 
+int ompi_coll_base_alltoall_intra_pairwise(void *sbuf, int scount,
                                             struct ompi_datatype_t *sdtype,
                                             void* rbuf, int rcount,
                                             struct ompi_datatype_t *rdtype,
@@ -157,22 +135,22 @@ int ompi_coll_tuned_alltoall_intra_pairwise(void *sbuf, int scount,
     ptrdiff_t lb, sext, rext;
 
     if (MPI_IN_PLACE == sbuf) {
-        return mca_coll_tuned_alltoall_intra_basic_inplace (rbuf, rcount, rdtype,
+        return mca_coll_base_alltoall_intra_basic_inplace (rbuf, rcount, rdtype,
                                                             comm, module);
     }
 
     size = ompi_comm_size(comm);
     rank = ompi_comm_rank(comm);
 
-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "coll:tuned:alltoall_intra_pairwise rank %d", rank));
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                 "coll:base:alltoall_intra_pairwise rank %d", rank));
 
     err = ompi_datatype_get_extent (sdtype, &lb, &sext);
     if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl; }
     err = ompi_datatype_get_extent (rdtype, &lb, &rext);
     if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl; }
 
-    
+
     /* Perform pairwise exchange - starting from 1 so the local copy is last */
     for (step = 1; step < size + 1; step++) {
 
@@ -185,25 +163,25 @@ int ompi_coll_tuned_alltoall_intra_pairwise(void *sbuf, int scount,
         tmprecv = (char*)rbuf + (ptrdiff_t)recvfrom * rext * (ptrdiff_t)rcount;
 
         /* send and receive */
-        err = ompi_coll_tuned_sendrecv( tmpsend, scount, sdtype, sendto, 
+        err = ompi_coll_base_sendrecv( tmpsend, scount, sdtype, sendto,
                                         MCA_COLL_BASE_TAG_ALLTOALL,
-                                        tmprecv, rcount, rdtype, recvfrom, 
+                                        tmprecv, rcount, rdtype, recvfrom,
                                         MCA_COLL_BASE_TAG_ALLTOALL,
                                         comm, MPI_STATUS_IGNORE, rank);
         if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl;  }
     }
 
     return MPI_SUCCESS;
- 
+
  err_hndl:
-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "%s:%4d\tError occurred %d, rank %2d", __FILE__, line, 
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                 "%s:%4d\tError occurred %d, rank %2d", __FILE__, line,
                  err, rank));
     return err;
 }
 
 
-int ompi_coll_tuned_alltoall_intra_bruck(void *sbuf, int scount,
+int ompi_coll_base_alltoall_intra_bruck(void *sbuf, int scount,
                                          struct ompi_datatype_t *sdtype,
                                          void* rbuf, int rcount,
                                          struct ompi_datatype_t *rdtype,
@@ -216,20 +194,20 @@ int ompi_coll_tuned_alltoall_intra_bruck(void *sbuf, int scount,
     ptrdiff_t rlb, slb, tlb, sext, rext, tsext;
     struct ompi_datatype_t *new_ddt;
 #ifdef blahblah
-    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
-    mca_coll_tuned_comm_t *data = tuned_module->tuned_data;
+    mca_coll_base_module_t *base_module = (mca_coll_base_module_t*) module;
+    mca_coll_base_comm_t *data = base_module->base_data;
 #endif
 
     if (MPI_IN_PLACE == sbuf) {
-        return mca_coll_tuned_alltoall_intra_basic_inplace (rbuf, rcount, rdtype,
+        return mca_coll_base_alltoall_intra_basic_inplace (rbuf, rcount, rdtype,
                                                             comm, module);
     }
 
     size = ompi_comm_size(comm);
     rank = ompi_comm_rank(comm);
 
-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "coll:tuned:alltoall_intra_bruck rank %d", rank));
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                 "coll:base:alltoall_intra_bruck rank %d", rank));
 
     err = ompi_datatype_get_extent (sdtype, &slb, &sext);
     if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl; }
@@ -242,14 +220,14 @@ int ompi_coll_tuned_alltoall_intra_bruck(void *sbuf, int scount,
 
 
 #ifdef blahblah
-    /* try and SAVE memory by using the data segment hung off 
+    /* try and SAVE memory by using the data segment hung off
        the communicator if possible */
-    if (data->mcct_num_reqs >= size) { 
+    if (data->mcct_num_reqs >= size) {
         /* we have enought preallocated for displments and lengths */
         displs = (int*) data->mcct_reqs;
         blen = (int *) (displs + size);
         weallocated = 0;
-    } 
+    }
     else { /* allocate the buffers ourself */
 #endif
         displs = (int *) malloc(size * sizeof(int));
@@ -267,9 +245,9 @@ int ompi_coll_tuned_alltoall_intra_bruck(void *sbuf, int scount,
     tmpbuf = tmpbuf_free - slb;
 
     /* Step 1 - local rotation - shift up by rank */
-    err = ompi_datatype_copy_content_same_ddt (sdtype, 
+    err = ompi_datatype_copy_content_same_ddt (sdtype,
                                                (int32_t) ((ptrdiff_t)(size - rank) * (ptrdiff_t)scount),
-                                               tmpbuf, 
+                                               tmpbuf,
                                                ((char*) sbuf) + (ptrdiff_t)rank * (ptrdiff_t)scount * sext);
     if (err<0) {
         line = __LINE__; err = -1; goto err_hndl;
@@ -277,7 +255,7 @@ int ompi_coll_tuned_alltoall_intra_bruck(void *sbuf, int scount,
 
     if (rank != 0) {
         err = ompi_datatype_copy_content_same_ddt (sdtype, (ptrdiff_t)rank * (ptrdiff_t)scount,
-                                                   tmpbuf + (ptrdiff_t)(size - rank) * (ptrdiff_t)scount* sext, 
+                                                   tmpbuf + (ptrdiff_t)(size - rank) * (ptrdiff_t)scount* sext,
                                                    (char*) sbuf);
         if (err<0) {
             line = __LINE__; err = -1; goto err_hndl;
@@ -294,7 +272,7 @@ int ompi_coll_tuned_alltoall_intra_bruck(void *sbuf, int scount,
         /* create indexed datatype */
         for (i = 1; i < size; i++) {
             if (( i & distance) == distance) {
-                displs[k] = (ptrdiff_t)i * (ptrdiff_t)scount; 
+                displs[k] = (ptrdiff_t)i * (ptrdiff_t)scount;
                 blen[k] = scount;
                 k++;
             }
@@ -307,7 +285,7 @@ int ompi_coll_tuned_alltoall_intra_bruck(void *sbuf, int scount,
         if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl;  }
 
         /* Sendreceive */
-        err = ompi_coll_tuned_sendrecv ( tmpbuf, 1, new_ddt, sendto,
+        err = ompi_coll_base_sendrecv ( tmpbuf, 1, new_ddt, sendto,
                                          MCA_COLL_BASE_TAG_ALLTOALL,
                                          rbuf, 1, new_ddt, recvfrom,
                                          MCA_COLL_BASE_TAG_ALLTOALL,
@@ -327,7 +305,7 @@ int ompi_coll_tuned_alltoall_intra_bruck(void *sbuf, int scount,
     for (i = 0; i < size; i++) {
 
         err = ompi_datatype_copy_content_same_ddt (rdtype, (int32_t) rcount,
-                                                   ((char*)rbuf) + ((ptrdiff_t)((rank - i + size) % size) * (ptrdiff_t)rcount * rext), 
+                                                   ((char*)rbuf) + ((ptrdiff_t)((rank - i + size) % size) * (ptrdiff_t)rcount * rext),
                                                    tmpbuf + (ptrdiff_t)i * (ptrdiff_t)rcount * rext);
         if (err < 0) { line = __LINE__; err = -1; goto err_hndl;  }
     }
@@ -341,8 +319,8 @@ int ompi_coll_tuned_alltoall_intra_bruck(void *sbuf, int scount,
     return OMPI_SUCCESS;
 
  err_hndl:
-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "%s:%4d\tError occurred %d, rank %2d", __FILE__, line, err, 
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                 "%s:%4d\tError occurred %d, rank %2d", __FILE__, line, err,
                  rank));
     if (tmpbuf != NULL) free(tmpbuf_free);
     if (displs != NULL) free(displs);
@@ -352,10 +330,10 @@ int ompi_coll_tuned_alltoall_intra_bruck(void *sbuf, int scount,
 
 /*
  * alltoall_intra_linear_sync
- * 
+ *
  * Function:       Linear implementation of alltoall with limited number
  *                 of outstanding requests.
- * Accepts:        Same as MPI_Alltoall(), and the maximum number of 
+ * Accepts:        Same as MPI_Alltoall(), and the maximum number of
  *                 outstanding requests (actual number is 2 * max, since
  *                 we count receive and send requests separately).
  * Returns:        MPI_SUCCESS or error code
@@ -367,7 +345,7 @@ int ompi_coll_tuned_alltoall_intra_bruck(void *sbuf, int scount,
  *                    - wait for any request to complete
  *                    - replace that request by the new one of the same type.
  */
-int ompi_coll_tuned_alltoall_intra_linear_sync(void *sbuf, int scount,
+int ompi_coll_base_alltoall_intra_linear_sync(void *sbuf, int scount,
                                                struct ompi_datatype_t *sdtype,
                                                void* rbuf, int rcount,
                                                struct ompi_datatype_t *rdtype,
@@ -382,7 +360,7 @@ int ompi_coll_tuned_alltoall_intra_linear_sync(void *sbuf, int scount,
     ompi_request_t **reqs = NULL;
 
     if (MPI_IN_PLACE == sbuf) {
-        return mca_coll_tuned_alltoall_intra_basic_inplace (rbuf, rcount, rdtype,
+        return mca_coll_base_alltoall_intra_basic_inplace (rbuf, rcount, rdtype,
                                                             comm, module);
     }
 
@@ -391,8 +369,8 @@ int ompi_coll_tuned_alltoall_intra_linear_sync(void *sbuf, int scount,
     size = ompi_comm_size(comm);
     rank = ompi_comm_rank(comm);
 
-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "ompi_coll_tuned_alltoall_intra_linear_sync rank %d", rank));
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                 "ompi_coll_base_alltoall_intra_linear_sync rank %d", rank));
 
     error = ompi_datatype_get_extent(sdtype, &slb, &sext);
     if (OMPI_SUCCESS != error) {
@@ -423,18 +401,18 @@ int ompi_coll_tuned_alltoall_intra_linear_sync(void *sbuf, int scount,
     }
 
     /* Initiate send/recv to/from others. */
-    total_reqs =  (((max_outstanding_reqs > (size - 1)) || 
+    total_reqs =  (((max_outstanding_reqs > (size - 1)) ||
                     (max_outstanding_reqs <= 0)) ?
                    (size - 1) : (max_outstanding_reqs));
-    reqs = (ompi_request_t**) malloc( 2 * total_reqs * 
+    reqs = (ompi_request_t**) malloc( 2 * total_reqs *
                                       sizeof(ompi_request_t*));
     if (NULL == reqs) { error = -1; line = __LINE__; goto error_hndl; }
-    
+
     prcv = (char *) rbuf;
     psnd = (char *) sbuf;
 
     /* Post first batch or ireceive and isend requests  */
-    for (nreqs = 0, nrreqs = 0, ri = (rank + 1) % size; nreqs < total_reqs; 
+    for (nreqs = 0, nrreqs = 0, ri = (rank + 1) % size; nreqs < total_reqs;
          ri = (ri + 1) % size, ++nreqs, ++nrreqs) {
         error =
             MCA_PML_CALL(irecv
@@ -442,7 +420,7 @@ int ompi_coll_tuned_alltoall_intra_linear_sync(void *sbuf, int scount,
                           MCA_COLL_BASE_TAG_ALLTOALL, comm, &reqs[nreqs]));
         if (MPI_SUCCESS != error) { line = __LINE__; goto error_hndl; }
     }
-    for ( nsreqs = 0, si =  (rank + size - 1) % size; nreqs < 2 * total_reqs; 
+    for ( nsreqs = 0, si =  (rank + size - 1) % size; nreqs < 2 * total_reqs;
           si = (si + size - 1) % size, ++nreqs, ++nsreqs) {
         error =
             MCA_PML_CALL(isend
@@ -457,12 +435,12 @@ int ompi_coll_tuned_alltoall_intra_linear_sync(void *sbuf, int scount,
         /* Optimization for the case when all requests have been posted  */
         error = ompi_request_wait_all(nreqs, reqs, MPI_STATUSES_IGNORE);
         if (MPI_SUCCESS != error) { line = __LINE__; goto error_hndl; }
-       
+
     } else {
         /* As requests complete, replace them with corresponding requests:
-           - wait for any request to complete, mark the request as 
+           - wait for any request to complete, mark the request as
            MPI_REQUEST_NULL
-           - If it was a receive request, replace it with new irecv request 
+           - If it was a receive request, replace it with new irecv request
            (if any)
            - if it was a send request, replace it with new isend request (if any)
         */
@@ -476,10 +454,10 @@ int ompi_coll_tuned_alltoall_intra_linear_sync(void *sbuf, int scount,
             ncreqs++;
             if (completed < total_reqs) {
                 if (nrreqs < (size - 1)) {
-                    error = 
+                    error =
                         MCA_PML_CALL(irecv
                                      (prcv + (ptrdiff_t)ri * rext, rcount, rdtype, ri,
-                                      MCA_COLL_BASE_TAG_ALLTOALL, comm, 
+                                      MCA_COLL_BASE_TAG_ALLTOALL, comm,
                                       &reqs[completed]));
                     if (MPI_SUCCESS != error) { line = __LINE__; goto error_hndl; }
                     ++nrreqs;
@@ -493,7 +471,7 @@ int ompi_coll_tuned_alltoall_intra_linear_sync(void *sbuf, int scount,
                                           MCA_PML_BASE_SEND_STANDARD, comm,
                                           &reqs[completed]));
                     ++nsreqs;
-                    si = (si + size - 1) % size; 
+                    si = (si + size - 1) % size;
                 }
             }
         }
@@ -506,15 +484,15 @@ int ompi_coll_tuned_alltoall_intra_linear_sync(void *sbuf, int scount,
     return MPI_SUCCESS;
 
  error_hndl:
-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "%s:%4d\tError occurred %d, rank %2d", __FILE__, line, error, 
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                 "%s:%4d\tError occurred %d, rank %2d", __FILE__, line, error,
                  rank));
     if (NULL != reqs) free(reqs);
     return error;
 }
 
 
-int ompi_coll_tuned_alltoall_intra_two_procs(void *sbuf, int scount,
+int ompi_coll_base_alltoall_intra_two_procs(void *sbuf, int scount,
                                              struct ompi_datatype_t *sdtype,
                                              void* rbuf, int rcount,
                                              struct ompi_datatype_t *rdtype,
@@ -526,14 +504,14 @@ int ompi_coll_tuned_alltoall_intra_two_procs(void *sbuf, int scount,
     ptrdiff_t sext, rext, lb;
 
     if (MPI_IN_PLACE == sbuf) {
-        return mca_coll_tuned_alltoall_intra_basic_inplace (rbuf, rcount, rdtype,
+        return mca_coll_base_alltoall_intra_basic_inplace (rbuf, rcount, rdtype,
                                                             comm, module);
     }
 
     rank = ompi_comm_rank(comm);
 
-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "ompi_coll_tuned_alltoall_intra_two_procs rank %d", rank));
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                 "ompi_coll_base_alltoall_intra_two_procs rank %d", rank));
 
     err = ompi_datatype_get_extent (sdtype, &lb, &sext);
     if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl; }
@@ -548,17 +526,17 @@ int ompi_coll_tuned_alltoall_intra_two_procs(void *sbuf, int scount,
     tmprecv = (char*)rbuf + (ptrdiff_t)remote * rext * (ptrdiff_t)rcount;
 
     /* send and receive */
-    err = ompi_coll_tuned_sendrecv ( tmpsend, scount, sdtype, remote, 
+    err = ompi_coll_base_sendrecv ( tmpsend, scount, sdtype, remote,
                                      MCA_COLL_BASE_TAG_ALLTOALL,
-                                     tmprecv, rcount, rdtype, remote, 
+                                     tmprecv, rcount, rdtype, remote,
                                      MCA_COLL_BASE_TAG_ALLTOALL,
                                      comm, MPI_STATUS_IGNORE, rank );
     if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl;  }
 
     /* ddt sendrecv your own data */
-    err = ompi_datatype_sndrcv((char*) sbuf + (ptrdiff_t)rank * sext * (ptrdiff_t)scount, 
-                               (int32_t) scount, sdtype, 
-                               (char*) rbuf + (ptrdiff_t)rank * rext * (ptrdiff_t)rcount, 
+    err = ompi_datatype_sndrcv((char*) sbuf + (ptrdiff_t)rank * sext * (ptrdiff_t)scount,
+                               (int32_t) scount, sdtype,
+                               (char*) rbuf + (ptrdiff_t)rank * rext * (ptrdiff_t)rcount,
                                (int32_t) rcount, rdtype);
     if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl;  }
 
@@ -566,7 +544,7 @@ int ompi_coll_tuned_alltoall_intra_two_procs(void *sbuf, int scount,
     return MPI_SUCCESS;
 
  err_hndl:
-    OPAL_OUTPUT((ompi_coll_tuned_stream,
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
                  "%s:%4d\tError occurred %d, rank %2d", __FILE__, line, err,
                  rank));
     return err;
@@ -577,8 +555,8 @@ int ompi_coll_tuned_alltoall_intra_two_procs(void *sbuf, int scount,
 /*
  * Linear functions are copied from the BASIC coll module
  * they do not segment the message and are simple implementations
- * but for some small number of nodes and/or small data sizes they 
- * are just as fast as tuned/tree based segmenting operations 
+ * but for some small number of nodes and/or small data sizes they
+ * are just as fast as base/tree based segmenting operations
  * and as such may be selected by the decision functions
  * These are copied into this module due to the way we select modules
  * in V1. i.e. in V2 we will handle this differently and so will not
@@ -588,7 +566,7 @@ int ompi_coll_tuned_alltoall_intra_two_procs(void *sbuf, int scount,
 
 /* copied function (with appropriate renaming) starts here */
 
-int ompi_coll_tuned_alltoall_intra_basic_linear(void *sbuf, int scount,
+int ompi_coll_base_alltoall_intra_basic_linear(void *sbuf, int scount,
                                                 struct ompi_datatype_t *sdtype,
                                                 void* rbuf, int rcount,
                                                 struct ompi_datatype_t *rdtype,
@@ -599,11 +577,11 @@ int ompi_coll_tuned_alltoall_intra_basic_linear(void *sbuf, int scount,
     char *psnd, *prcv;
     MPI_Aint lb, sndinc, rcvinc;
     ompi_request_t **req, **sreq, **rreq;
-    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
-    mca_coll_tuned_comm_t *data = tuned_module->tuned_data;
+    mca_coll_base_module_t *base_module = (mca_coll_base_module_t*) module;
+    mca_coll_base_comm_t *data = base_module->base_data;
 
     if (MPI_IN_PLACE == sbuf) {
-        return mca_coll_tuned_alltoall_intra_basic_inplace (rbuf, rcount, rdtype,
+        return mca_coll_base_alltoall_intra_basic_inplace (rbuf, rcount, rdtype,
                                                             comm, module);
     }
 
@@ -612,8 +590,8 @@ int ompi_coll_tuned_alltoall_intra_basic_linear(void *sbuf, int scount,
     size = ompi_comm_size(comm);
     rank = ompi_comm_rank(comm);
 
-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "ompi_coll_tuned_alltoall_intra_basic_linear rank %d", rank));
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                 "ompi_coll_base_alltoall_intra_basic_linear rank %d", rank));
 
 
     err = ompi_datatype_get_extent(sdtype, &lb, &sndinc);
@@ -654,23 +632,23 @@ int ompi_coll_tuned_alltoall_intra_basic_linear(void *sbuf, int scount,
 
     /* Post all receives first -- a simple optimization */
 
-    for (nreqs = 0, i = (rank + 1) % size; i != rank; 
+    for (nreqs = 0, i = (rank + 1) % size; i != rank;
          i = (i + 1) % size, ++rreq, ++nreqs) {
         err =
             MCA_PML_CALL(irecv_init
                          (prcv + (ptrdiff_t)i * rcvinc, rcount, rdtype, i,
                           MCA_COLL_BASE_TAG_ALLTOALL, comm, rreq));
         if (MPI_SUCCESS != err) {
-            ompi_coll_tuned_free_reqs(req, rreq - req);
+            ompi_coll_base_free_reqs(req, rreq - req);
             return err;
         }
     }
 
-    /* Now post all sends in reverse order 
+    /* Now post all sends in reverse order
        - We would like to minimize the search time through message queue
          when messages actually arrive in the order in which they were posted.
      */
-    for (nreqs = 0, i = (rank + size - 1) % size; i != rank; 
+    for (nreqs = 0, i = (rank + size - 1) % size; i != rank;
          i = (i + size - 1) % size, ++sreq, ++nreqs) {
         err =
             MCA_PML_CALL(isend_init
@@ -678,7 +656,7 @@ int ompi_coll_tuned_alltoall_intra_basic_linear(void *sbuf, int scount,
                           MCA_COLL_BASE_TAG_ALLTOALL,
                           MCA_PML_BASE_SEND_STANDARD, comm, sreq));
         if (MPI_SUCCESS != err) {
-            ompi_coll_tuned_free_reqs(req, sreq - req);
+            ompi_coll_base_free_reqs(req, sreq - req);
             return err;
         }
     }
@@ -698,165 +676,10 @@ int ompi_coll_tuned_alltoall_intra_basic_linear(void *sbuf, int scount,
     err = ompi_request_wait_all(nreqs, req, MPI_STATUSES_IGNORE);
 
     /* Free the reqs */
-
-    ompi_coll_tuned_free_reqs(req, nreqs);
+    ompi_coll_base_free_reqs(req, nreqs);
 
     /* All done */
-
     return err;
 }
 
 /* copied function (with appropriate renaming) ends here */
-
-/* The following are used by dynamic and forced rules */
-
-/* publish details of each algorithm and if its forced/fixed/locked in */
-/* as you add methods/algorithms you must update this and the query/map routines */
-
-/* this routine is called by the component only */
-/* this makes sure that the mca parameters are set to their initial values and perms */
-/* module does not call this they call the forced_getvalues routine instead */
-
-int ompi_coll_tuned_alltoall_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices)
-{
-    mca_base_var_enum_t*new_enum;
-
-    ompi_coll_tuned_forced_max_algorithms[ALLTOALL] = coll_tuned_alltoall_algorithm_count;
-
-    (void) mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                           "alltoall_algorithm_count",
-                                           "Number of alltoall algorithms available",
-                                           MCA_BASE_VAR_TYPE_INT, NULL, 0,
-                                           MCA_BASE_VAR_FLAG_DEFAULT_ONLY,
-                                           OPAL_INFO_LVL_5,
-                                           MCA_BASE_VAR_SCOPE_CONSTANT,
-                                           &coll_tuned_alltoall_algorithm_count);
-
-    /* MPI_T: This variable should eventually be bound to a communicator */
-    coll_tuned_alltoall_forced_algorithm = 0;
-    (void) mca_base_var_enum_create("coll_tuned_alltoall_algorithms", alltoall_algorithms, &new_enum);
-    mca_param_indices->algorithm_param_index =
-        mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                        "alltoall_algorithm",
-                                        "Which alltoall algorithm is used. Can be locked down to choice of: 0 ignore, 1 basic linear, 2 pairwise, 3: modified bruck, 4: linear with sync, 5:two proc only.",
-                                        MCA_BASE_VAR_TYPE_INT, new_enum, 0, 0,
-                                        OPAL_INFO_LVL_5,
-                                        MCA_BASE_VAR_SCOPE_READONLY,
-                                        &coll_tuned_alltoall_forced_algorithm);
-    OBJ_RELEASE(new_enum);
-    if (mca_param_indices->algorithm_param_index < 0) {
-        return mca_param_indices->algorithm_param_index;
-    }
-
-    coll_tuned_alltoall_segment_size = 0;
-    mca_param_indices->segsize_param_index =
-        mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                        "alltoall_algorithm_segmentsize",
-                                        "Segment size in bytes used by default for alltoall algorithms. Only has meaning if algorithm is forced and supports segmenting. 0 bytes means no segmentation.",
-                                        MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
-                                        OPAL_INFO_LVL_5,
-                                        MCA_BASE_VAR_SCOPE_READONLY,
-                                        &coll_tuned_alltoall_segment_size);
-
-    coll_tuned_alltoall_tree_fanout = ompi_coll_tuned_init_tree_fanout; /* get system wide default */
-    mca_param_indices->tree_fanout_param_index =
-        mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                        "alltoall_algorithm_tree_fanout",
-                                        "Fanout for n-tree used for alltoall algorithms. Only has meaning if algorithm is forced and supports n-tree topo based operation.",
-                                        MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
-                                        OPAL_INFO_LVL_5,
-                                        MCA_BASE_VAR_SCOPE_READONLY,
-                                        &coll_tuned_alltoall_tree_fanout);
-
-    coll_tuned_alltoall_chain_fanout = ompi_coll_tuned_init_chain_fanout; /* get system wide default */
-    mca_param_indices->chain_fanout_param_index = 
-      mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                      "alltoall_algorithm_chain_fanout",
-                                      "Fanout for chains used for alltoall algorithms. Only has meaning if algorithm is forced and supports chain topo based operation.",
-                                      MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
-                                      OPAL_INFO_LVL_5,
-                                      MCA_BASE_VAR_SCOPE_READONLY,
-                                      &coll_tuned_alltoall_chain_fanout);
-
-    coll_tuned_alltoall_max_requests = 0; /* no limit for alltoall by default */
-    mca_param_indices->max_requests_param_index = 
-      mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                      "alltoall_algorithm_max_requests",
-                                      "Maximum number of outstanding send or recv requests.  Only has meaning for synchronized algorithms.",
-                                      MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
-                                      OPAL_INFO_LVL_5,
-                                      MCA_BASE_VAR_SCOPE_READONLY,
-                                      &coll_tuned_alltoall_max_requests);
-    if (mca_param_indices->max_requests_param_index < 0) {
-        return mca_param_indices->max_requests_param_index;
-    }
-
-    if (coll_tuned_alltoall_max_requests < 0) {
-        if( 0 == ompi_comm_rank( MPI_COMM_WORLD ) ) {
-            opal_output( 0, "Maximum outstanding requests must be positive number greater than 1.  Switching to system level default %d \n",
-                         ompi_coll_tuned_init_max_requests );
-        }
-        coll_tuned_alltoall_max_requests = 0;
-    }
-
-    return (MPI_SUCCESS);
-}
-
-
-
-int ompi_coll_tuned_alltoall_intra_do_forced(void *sbuf, int scount,
-                                             struct ompi_datatype_t *sdtype,
-                                             void* rbuf, int rcount,
-                                             struct ompi_datatype_t *rdtype,
-                                             struct ompi_communicator_t *comm,
-                                             mca_coll_base_module_t *module)
-{
-    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
-    mca_coll_tuned_comm_t *data = tuned_module->tuned_data;
-
-    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:alltoall_intra_do_forced selected algorithm %d",
-                 data->user_forced[ALLTOALL].algorithm));
-
-    switch (data->user_forced[ALLTOALL].algorithm) {
-    case (0):   return ompi_coll_tuned_alltoall_intra_dec_fixed (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm, module);
-    case (1):   return ompi_coll_tuned_alltoall_intra_basic_linear (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm, module);
-    case (2):   return ompi_coll_tuned_alltoall_intra_pairwise (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm, module);
-    case (3):   return ompi_coll_tuned_alltoall_intra_bruck (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm, module);
-    case (4):   return ompi_coll_tuned_alltoall_intra_linear_sync (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm, module, data->user_forced[ALLTOALL].max_requests);
-    case (5):   return ompi_coll_tuned_alltoall_intra_two_procs (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm, module);
-    default:
-        OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:alltoall_intra_do_forced attempt to select algorithm %d when only 0-%d is valid?", 
-                     data->user_forced[ALLTOALL].algorithm, ompi_coll_tuned_forced_max_algorithms[ALLTOALL]));
-        return (MPI_ERR_ARG);
-    } /* switch */
-
-}
-
-
-int ompi_coll_tuned_alltoall_intra_do_this(void *sbuf, int scount,
-                                           struct ompi_datatype_t *sdtype,
-                                           void* rbuf, int rcount,
-                                           struct ompi_datatype_t *rdtype,
-                                           struct ompi_communicator_t *comm,
-                                           mca_coll_base_module_t *module,
-                                           int algorithm, int faninout, int segsize, 
-                                           int max_requests)
-{
-    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:alltoall_intra_do_this selected algorithm %d topo faninout %d segsize %d", 
-                 algorithm, faninout, segsize));
-
-    switch (algorithm) {
-    case (0):   return ompi_coll_tuned_alltoall_intra_dec_fixed (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm, module);
-    case (1):   return ompi_coll_tuned_alltoall_intra_basic_linear (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm, module);
-    case (2):   return ompi_coll_tuned_alltoall_intra_pairwise (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm, module);
-    case (3):   return ompi_coll_tuned_alltoall_intra_bruck (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm, module);
-    case (4):   return ompi_coll_tuned_alltoall_intra_linear_sync (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm, module, max_requests);
-    case (5):   return ompi_coll_tuned_alltoall_intra_two_procs (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm, module);
-    default:
-        OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:alltoall_intra_do_this attempt to select algorithm %d when only 0-%d is valid?", 
-                     algorithm, ompi_coll_tuned_forced_max_algorithms[ALLTOALL]));
-        return (MPI_ERR_ARG);
-    } /* switch */
-
-}
-
diff --git a/ompi/mca/coll/base/coll_base_alltoallv.c b/ompi/mca/coll/base/coll_base_alltoallv.c
index 412fb4366c..19f71674dd 100644
--- a/ompi/mca/coll/base/coll_base_alltoallv.c
+++ b/ompi/mca/coll/base/coll_base_alltoallv.c
@@ -3,7 +3,7 @@
  * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
  *                         University Research and Technology
  *                         Corporation.  All rights reserved.
- * Copyright (c) 2004-2012 The University of Tennessee and The University
+ * Copyright (c) 2004-2015 The University of Tennessee and The University
  *                         of Tennessee Research Foundation.  All rights
  *                         reserved.
  * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@@ -32,29 +32,17 @@
 #include "ompi/mca/coll/coll.h"
 #include "ompi/mca/coll/base/coll_tags.h"
 #include "ompi/mca/pml/pml.h"
-#include "coll_tuned.h"
-#include "coll_tuned_topo.h"
-#include "coll_tuned_util.h"
-
-/* alltoallv algorithm variables */
-static int coll_tuned_alltoallv_algorithm_count = 2;
-static int coll_tuned_alltoallv_forced_algorithm = 0;
-
-/* valid values for coll_tuned_alltoallv_forced_algorithm */
-static mca_base_var_enum_value_t alltoallv_algorithms[] = {
-    {0, "ignore"},
-    {1, "basic_linear"},
-    {2, "pairwise"},
-    {0, NULL}
-};
+#include "ompi/mca/coll/base/coll_base_functions.h"
+#include "coll_base_topo.h"
+#include "coll_base_util.h"
 
 static int
-mca_coll_tuned_alltoallv_intra_basic_inplace(void *rbuf, const int *rcounts, const int *rdisps,
+mca_coll_base_alltoallv_intra_basic_inplace(void *rbuf, const int *rcounts, const int *rdisps,
                                              struct ompi_datatype_t *rdtype,
                                              struct ompi_communicator_t *comm,
                                              mca_coll_base_module_t *module)
 {
-    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
+    mca_coll_base_module_t *base_module = (mca_coll_base_module_t*) module;
     int i, j, size, rank, err=MPI_SUCCESS;
     MPI_Request *preq;
     char *tmp_buffer;
@@ -90,7 +78,7 @@ mca_coll_tuned_alltoallv_intra_basic_inplace(void *rbuf, const int *rcounts, con
     for (i = 0 ; i < size ; ++i) {
         for (j = i+1 ; j < size ; ++j) {
             /* Initiate all send/recv to/from others. */
-            preq = tuned_module->tuned_data->mcct_reqs;
+            preq = base_module->base_data->mcct_reqs;
 
             if (i == rank && rcounts[j]) {
                 /* Copy the data into the temporary buffer */
@@ -127,11 +115,8 @@ mca_coll_tuned_alltoallv_intra_basic_inplace(void *rbuf, const int *rcounts, con
             }
 
             /* Wait for the requests to complete */
-            err = ompi_request_wait_all (2, tuned_module->tuned_data->mcct_reqs, MPI_STATUSES_IGNORE);
+            err = ompi_request_wait_all (2, base_module->base_data->mcct_reqs, MPI_STATUSES_IGNORE);
             if (MPI_SUCCESS != err) { goto error_hndl; }
-
-            /* Free the requests. */
-            mca_coll_tuned_free_reqs(tuned_module->tuned_data->mcct_reqs, 2);
         }
     }
 
@@ -145,7 +130,7 @@ mca_coll_tuned_alltoallv_intra_basic_inplace(void *rbuf, const int *rcounts, con
 }
 
 int
-ompi_coll_tuned_alltoallv_intra_pairwise(void *sbuf, int *scounts, int *sdisps,
+ompi_coll_base_alltoallv_intra_pairwise(void *sbuf, int *scounts, int *sdisps,
                                          struct ompi_datatype_t *sdtype,
                                          void* rbuf, int *rcounts, int *rdisps,
                                          struct ompi_datatype_t *rdtype,
@@ -157,15 +142,15 @@ ompi_coll_tuned_alltoallv_intra_pairwise(void *sbuf, int *scounts, int *sdisps,
     ptrdiff_t sext, rext;
 
     if (MPI_IN_PLACE == sbuf) {
-        return mca_coll_tuned_alltoallv_intra_basic_inplace (rbuf, rcounts, rdisps,
+        return mca_coll_base_alltoallv_intra_basic_inplace (rbuf, rcounts, rdisps,
                                                              rdtype, comm, module);
     }
 
     size = ompi_comm_size(comm);
     rank = ompi_comm_rank(comm);
 
-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "coll:tuned:alltoallv_intra_pairwise rank %d", rank));
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                 "coll:base:alltoallv_intra_pairwise rank %d", rank));
 
     ompi_datatype_type_extent(sdtype, &sext);
     ompi_datatype_type_extent(rdtype, &rext);
@@ -182,34 +167,33 @@ ompi_coll_tuned_alltoallv_intra_pairwise(void *sbuf, int *scounts, int *sdisps,
         prcv = (char*)rbuf + (ptrdiff_t)rdisps[recvfrom] * rext;
 
         /* send and receive */
-        err = ompi_coll_tuned_sendrecv( psnd, scounts[sendto], sdtype, sendto, 
+        err = ompi_coll_base_sendrecv( psnd, scounts[sendto], sdtype, sendto,
                                         MCA_COLL_BASE_TAG_ALLTOALLV,
-                                        prcv, rcounts[recvfrom], rdtype, recvfrom, 
+                                        prcv, rcounts[recvfrom], rdtype, recvfrom,
                                         MCA_COLL_BASE_TAG_ALLTOALLV,
                                         comm, MPI_STATUS_IGNORE, rank);
         if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl;  }
     }
 
     return MPI_SUCCESS;
- 
+
  err_hndl:
-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "%s:%4d\tError occurred %d, rank %2d at step %d", __FILE__, line, 
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                 "%s:%4d\tError occurred %d, rank %2d at step %d", __FILE__, line,
                  err, rank, step));
     return err;
 }
 
-/*  
+/**
  * Linear functions are copied from the basic coll module.  For
  * some small number of nodes and/or small data sizes they are just as
- * fast as tuned/tree based segmenting operations and as such may be
+ * fast as base/tree based segmenting operations and as such may be
  * selected by the decision functions.  These are copied into this module
  * due to the way we select modules in V1. i.e. in V2 we will handle this
- * differently and so will not have to duplicate code.  
- * GEF Oct05 after asking Jeff.  
+ * differently and so will not have to duplicate code.
  */
 int
-ompi_coll_tuned_alltoallv_intra_basic_linear(void *sbuf, int *scounts, int *sdisps,
+ompi_coll_base_alltoallv_intra_basic_linear(void *sbuf, int *scounts, int *sdisps,
                                             struct ompi_datatype_t *sdtype,
                                             void *rbuf, int *rcounts, int *rdisps,
                                             struct ompi_datatype_t *rdtype,
@@ -220,19 +204,19 @@ ompi_coll_tuned_alltoallv_intra_basic_linear(void *sbuf, int *scounts, int *sdis
     char *psnd, *prcv;
     ptrdiff_t sext, rext;
     MPI_Request *preq;
-    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
-    mca_coll_tuned_comm_t *data = tuned_module->tuned_data;
+    mca_coll_base_module_t *base_module = (mca_coll_base_module_t*) module;
+    mca_coll_base_comm_t *data = base_module->base_data;
 
     if (MPI_IN_PLACE == sbuf) {
-        return  mca_coll_tuned_alltoallv_intra_basic_inplace (rbuf, rcounts, rdisps,
+        return  mca_coll_base_alltoallv_intra_basic_inplace (rbuf, rcounts, rdisps,
                                                               rdtype, comm, module);
     }
 
     size = ompi_comm_size(comm);
     rank = ompi_comm_rank(comm);
 
-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "coll:tuned:alltoallv_intra_basic_linear rank %d", rank));
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                 "coll:base:alltoallv_intra_basic_linear rank %d", rank));
 
     ompi_datatype_type_extent(sdtype, &sext);
     ompi_datatype_type_extent(rdtype, &rext);
@@ -269,7 +253,7 @@ ompi_coll_tuned_alltoallv_intra_basic_linear(void *sbuf, int *scounts, int *sdis
                                       preq++));
         ++nreqs;
         if (MPI_SUCCESS != err) {
-            ompi_coll_tuned_free_reqs(data->mcct_reqs, nreqs);
+            ompi_coll_base_free_reqs(data->mcct_reqs, nreqs);
             return err;
         }
     }
@@ -287,7 +271,7 @@ ompi_coll_tuned_alltoallv_intra_basic_linear(void *sbuf, int *scounts, int *sdis
                                       preq++));
         ++nreqs;
         if (MPI_SUCCESS != err) {
-            ompi_coll_tuned_free_reqs(data->mcct_reqs, nreqs);
+            ompi_coll_base_free_reqs(data->mcct_reqs, nreqs);
             return err;
         }
     }
@@ -305,128 +289,7 @@ ompi_coll_tuned_alltoallv_intra_basic_linear(void *sbuf, int *scounts, int *sdis
                                 MPI_STATUSES_IGNORE);
 
     /* Free the requests. */
-    ompi_coll_tuned_free_reqs(data->mcct_reqs, nreqs);
+    ompi_coll_base_free_reqs(data->mcct_reqs, nreqs);
 
     return err;
 }
-
-/* 
- * The following are used by dynamic and forced rules.  Publish
- * details of each algorithm and if its forced/fixed/locked in as you add
- * methods/algorithms you must update this and the query/map routines.
- * This routine is called by the component only.  This makes sure that
- * the mca parameters are set to their initial values and perms.
- * Module does not call this.  They call the forced_getvalues routine
- * instead.
- */
-int ompi_coll_tuned_alltoallv_intra_check_forced_init(coll_tuned_force_algorithm_mca_param_indices_t
-                                                      *mca_param_indices)
-{
-    mca_base_var_enum_t *new_enum;
-
-    ompi_coll_tuned_forced_max_algorithms[ALLTOALLV] = coll_tuned_alltoallv_algorithm_count;
-
-    (void) mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                           "alltoallv_algorithm_count",
-                                           "Number of alltoallv algorithms available",
-                                           MCA_BASE_VAR_TYPE_INT, NULL, 0,
-                                           MCA_BASE_VAR_FLAG_DEFAULT_ONLY,
-                                           OPAL_INFO_LVL_5,
-                                           MCA_BASE_VAR_SCOPE_CONSTANT,
-                                           &coll_tuned_alltoallv_algorithm_count);
-
-    /* MPI_T: This variable should eventually be bound to a communicator */
-    coll_tuned_alltoallv_forced_algorithm = 0;
-    (void) mca_base_var_enum_create("coll_tuned_alltoallv_algorithms", alltoallv_algorithms, &new_enum);
-    mca_param_indices->algorithm_param_index =
-        mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                        "alltoallv_algorithm",
-                                        "Which alltoallv algorithm is used. "
-                                        "Can be locked down to choice of: 0 ignore, "
-                                        "1 basic linear, 2 pairwise.",
-                                        MCA_BASE_VAR_TYPE_INT, new_enum, 0, 0,
-                                        OPAL_INFO_LVL_5,
-                                        MCA_BASE_VAR_SCOPE_READONLY,
-                                        &coll_tuned_alltoallv_forced_algorithm);
-    OBJ_RELEASE(new_enum);
-    if (mca_param_indices->algorithm_param_index < 0) {
-        return mca_param_indices->algorithm_param_index;
-    }
-    
-    return (MPI_SUCCESS);
-}
-
-
-
-int ompi_coll_tuned_alltoallv_intra_do_forced(void *sbuf, int *scounts, int *sdisps, 
-                                              struct ompi_datatype_t *sdtype,
-                                              void* rbuf, int *rcounts, int *rdisps, 
-                                              struct ompi_datatype_t *rdtype,
-                                              struct ompi_communicator_t *comm,
-                                              mca_coll_base_module_t *module)
-{
-    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
-    mca_coll_tuned_comm_t *data = tuned_module->tuned_data;
-
-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "coll:tuned:alltoallv_intra_do_forced selected algorithm %d",
-                 data->user_forced[ALLTOALLV].algorithm));
-
-    switch (data->user_forced[ALLTOALLV].algorithm) {
-    case (0):   
-        return ompi_coll_tuned_alltoallv_intra_dec_fixed(sbuf, scounts, sdisps, sdtype, 
-                                                         rbuf, rcounts, rdisps, rdtype,
-                                                         comm, module);
-    case (1):
-        return ompi_coll_tuned_alltoallv_intra_basic_linear(sbuf, scounts, sdisps, sdtype,
-                                                            rbuf, rcounts, rdisps, rdtype,      
-                                                            comm, module);
-    case (2): 
-        return ompi_coll_tuned_alltoallv_intra_pairwise(sbuf, scounts, sdisps, sdtype,
-                                                        rbuf, rcounts, rdisps, rdtype,
-                                                        comm, module);
-    default:
-        OPAL_OUTPUT((ompi_coll_tuned_stream,
-                     "coll:tuned:alltoallv_intra_do_forced attempt to "
-                     "select algorithm %d when only 0-%d is valid.", 
-                     data->user_forced[ALLTOALLV].algorithm,
-                     ompi_coll_tuned_forced_max_algorithms[ALLTOALLV]));
-        return (MPI_ERR_ARG);
-    }
-}
-
-/* If the user selects dynamic rules and specifies the algorithm to
- * use, then this function is called.  */
-int ompi_coll_tuned_alltoallv_intra_do_this(void *sbuf, int *scounts, int *sdisps,
-                                            struct ompi_datatype_t *sdtype,
-                                            void* rbuf, int *rcounts, int *rdisps,
-                                            struct ompi_datatype_t *rdtype,
-                                            struct ompi_communicator_t *comm,
-                                            mca_coll_base_module_t *module,
-                                            int algorithm)
-{
-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "coll:tuned:alltoallv_intra_do_this selected algorithm %d ",
-                 algorithm));
-
-    switch (algorithm) {
-    case (0):
-        return ompi_coll_tuned_alltoallv_intra_dec_fixed(sbuf, scounts, sdisps, sdtype,
-                                                         rbuf, rcounts, rdisps, rdtype,
-                                                         comm, module);
-    case (1):
-        return ompi_coll_tuned_alltoallv_intra_basic_linear(sbuf, scounts, sdisps, sdtype,
-                                                            rbuf, rcounts, rdisps, rdtype,
-                                                            comm, module);
-    case (2):
-        return ompi_coll_tuned_alltoallv_intra_pairwise(sbuf, scounts, sdisps, sdtype,
-                                                        rbuf, rcounts, rdisps, rdtype,
-                                                        comm, module);
-    default:
-        OPAL_OUTPUT((ompi_coll_tuned_stream,
-                     "coll:tuned:alltoall_intra_do_this attempt to select "
-                     "algorithm %d when only 0-%d is valid.", 
-                     algorithm, ompi_coll_tuned_forced_max_algorithms[ALLTOALLV]));
-        return (MPI_ERR_ARG);
-    }
-}
diff --git a/ompi/mca/coll/base/coll_base_barrier.c b/ompi/mca/coll/base/coll_base_barrier.c
index ca9d143f62..15b3f4883d 100644
--- a/ompi/mca/coll/base/coll_base_barrier.c
+++ b/ompi/mca/coll/base/coll_base_barrier.c
@@ -3,7 +3,7 @@
  * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
  *                         University Research and Technology
  *                         Corporation.  All rights reserved.
- * Copyright (c) 2004-2014 The University of Tennessee and The University
+ * Copyright (c) 2004-2015 The University of Tennessee and The University
  *                         of Tennessee Research Foundation.  All rights
  *                         reserved.
  * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@@ -31,25 +31,9 @@
 #include "ompi/mca/coll/coll.h"
 #include "ompi/mca/coll/base/coll_tags.h"
 #include "ompi/mca/pml/pml.h"
-#include "coll_tuned.h"
-#include "coll_tuned_topo.h"
-#include "coll_tuned_util.h"
-
-/* barrier algorithm variables */
-static int coll_tuned_barrier_algorithm_count = 6;
-static int coll_tuned_barrier_forced_algorithm = 0;
-
-/* valid values for coll_tuned_barrier_forced_algorithm */
-static mca_base_var_enum_value_t barrier_algorithms[] = {
-    {0, "ignore"},
-    {1, "linear"},
-    {2, "double_ring"},
-    {3, "recursive_doubling"},
-    {4, "bruck"},
-    {5, "two_proc"},
-    {6, "tree"},
-    {0, NULL}
-};
+#include "ompi/mca/coll/base/coll_base_functions.h"
+#include "coll_base_topo.h"
+#include "coll_base_util.h"
 
 /**
  * A quick version of the MPI_Sendreceive implemented for the barrier.
@@ -57,7 +41,7 @@ static mca_base_var_enum_value_t barrier_algorithms[] = {
  * signal a two peer synchronization.
  */
 static inline int
-ompi_coll_tuned_sendrecv_zero(int dest, int stag,
+ompi_coll_base_sendrecv_zero(int dest, int stag,
                               int source, int rtag,
                               MPI_Comm comm)
 
@@ -87,8 +71,8 @@ ompi_coll_tuned_sendrecv_zero(int dest, int stag,
             err_index = 1;
         }
         err = statuses[err_index].MPI_ERROR;
-        OPAL_OUTPUT ((ompi_coll_tuned_stream, "%s:%d: Error %d occurred in the %s"
-                                              " stage of ompi_coll_tuned_sendrecv_zero\n",
+        OPAL_OUTPUT ((ompi_coll_base_framework.framework_output, "%s:%d: Error %d occurred in the %s"
+                                              " stage of ompi_coll_base_sendrecv_zero\n",
                       __FILE__, line, err, (0 == err_index ? "receive" : "send")));
         return err;
     }
@@ -100,21 +84,21 @@ ompi_coll_tuned_sendrecv_zero(int dest, int stag,
     /* Error discovered during the posting of the irecv or isend,
      * and no status is available.
      */
-    OPAL_OUTPUT ((ompi_coll_tuned_stream, "%s:%d: Error %d occurred\n",
+    OPAL_OUTPUT ((ompi_coll_base_framework.framework_output, "%s:%d: Error %d occurred\n",
                   __FILE__, line, err));
     return err;
 }
 
 /*
- * Barrier is ment to be a synchronous operation, as some BTLs can mark 
- * a request done before its passed to the NIC and progress might not be made 
- * elsewhere we cannot allow a process to exit the barrier until its last 
+ * Barrier is ment to be a synchronous operation, as some BTLs can mark
+ * a request done before its passed to the NIC and progress might not be made
+ * elsewhere we cannot allow a process to exit the barrier until its last
  * [round of] sends are completed.
  *
- * It is last round of sends rather than 'last' individual send as each pair of 
- * peers can use different channels/devices/btls and the receiver of one of 
+ * It is last round of sends rather than 'last' individual send as each pair of
+ * peers can use different channels/devices/btls and the receiver of one of
  * these sends might be forced to wait as the sender
- * leaves the collective and does not make progress until the next mpi call 
+ * leaves the collective and does not make progress until the next mpi call
  *
  */
 
@@ -124,7 +108,7 @@ ompi_coll_tuned_sendrecv_zero(int dest, int stag,
  * synchronous gurantee made by last ring of sends are synchronous
  *
  */
-int ompi_coll_tuned_barrier_intra_doublering(struct ompi_communicator_t *comm,
+int ompi_coll_base_barrier_intra_doublering(struct ompi_communicator_t *comm,
                                              mca_coll_base_module_t *module)
 {
     int rank, size, err = 0, line = 0, left, right;
@@ -132,50 +116,50 @@ int ompi_coll_tuned_barrier_intra_doublering(struct ompi_communicator_t *comm,
     rank = ompi_comm_rank(comm);
     size = ompi_comm_size(comm);
 
-    OPAL_OUTPUT((ompi_coll_tuned_stream,"ompi_coll_tuned_barrier_intra_doublering rank %d", rank));
-  
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"ompi_coll_base_barrier_intra_doublering rank %d", rank));
+
     left = ((rank-1)%size);
     right = ((rank+1)%size);
 
     if (rank > 0) { /* receive message from the left */
-        err = MCA_PML_CALL(recv((void*)NULL, 0, MPI_BYTE, left, 
-                                MCA_COLL_BASE_TAG_BARRIER, comm, 
+        err = MCA_PML_CALL(recv((void*)NULL, 0, MPI_BYTE, left,
+                                MCA_COLL_BASE_TAG_BARRIER, comm,
                                 MPI_STATUS_IGNORE));
         if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl; }
     }
 
     /* Send message to the right */
-    err = MCA_PML_CALL(send((void*)NULL, 0, MPI_BYTE, right, 
-                            MCA_COLL_BASE_TAG_BARRIER, 
+    err = MCA_PML_CALL(send((void*)NULL, 0, MPI_BYTE, right,
+                            MCA_COLL_BASE_TAG_BARRIER,
                             MCA_PML_BASE_SEND_STANDARD, comm));
     if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl;  }
 
     /* root needs to receive from the last node */
     if (rank == 0) {
-        err = MCA_PML_CALL(recv((void*)NULL, 0, MPI_BYTE, left, 
-                                MCA_COLL_BASE_TAG_BARRIER, comm, 
+        err = MCA_PML_CALL(recv((void*)NULL, 0, MPI_BYTE, left,
+                                MCA_COLL_BASE_TAG_BARRIER, comm,
                                 MPI_STATUS_IGNORE));
         if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl; }
     }
 
     /* Allow nodes to exit */
     if (rank > 0) { /* post Receive from left */
-        err = MCA_PML_CALL(recv((void*)NULL, 0, MPI_BYTE, left, 
-                                MCA_COLL_BASE_TAG_BARRIER, comm, 
+        err = MCA_PML_CALL(recv((void*)NULL, 0, MPI_BYTE, left,
+                                MCA_COLL_BASE_TAG_BARRIER, comm,
                                 MPI_STATUS_IGNORE));
         if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl; }
     }
 
     /* send message to the right one */
-    err = MCA_PML_CALL(send((void*)NULL, 0, MPI_BYTE, right, 
-                            MCA_COLL_BASE_TAG_BARRIER, 
+    err = MCA_PML_CALL(send((void*)NULL, 0, MPI_BYTE, right,
+                            MCA_COLL_BASE_TAG_BARRIER,
                             MCA_PML_BASE_SEND_SYNCHRONOUS, comm));
     if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl;  }
- 
+
     /* rank 0 post receive from the last node */
     if (rank == 0) {
-        err = MCA_PML_CALL(recv((void*)NULL, 0, MPI_BYTE, left, 
-                                MCA_COLL_BASE_TAG_BARRIER, comm, 
+        err = MCA_PML_CALL(recv((void*)NULL, 0, MPI_BYTE, left,
+                                MCA_COLL_BASE_TAG_BARRIER, comm,
                                 MPI_STATUS_IGNORE));
         if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl;  }
     }
@@ -183,7 +167,7 @@ int ompi_coll_tuned_barrier_intra_doublering(struct ompi_communicator_t *comm,
     return MPI_SUCCESS;
 
  err_hndl:
-    OPAL_OUTPUT((ompi_coll_tuned_stream,"%s:%4d\tError occurred %d, rank %2d", 
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"%s:%4d\tError occurred %d, rank %2d",
                  __FILE__, line, err, rank));
     return err;
 }
@@ -193,15 +177,15 @@ int ompi_coll_tuned_barrier_intra_doublering(struct ompi_communicator_t *comm,
  * To make synchronous, uses sync sends and sync sendrecvs
  */
 
-int ompi_coll_tuned_barrier_intra_recursivedoubling(struct ompi_communicator_t *comm,
+int ompi_coll_base_barrier_intra_recursivedoubling(struct ompi_communicator_t *comm,
                                                     mca_coll_base_module_t *module)
 {
     int rank, size, adjsize, err, line, mask, remote;
 
     rank = ompi_comm_rank(comm);
     size = ompi_comm_size(comm);
-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "ompi_coll_tuned_barrier_intra_recursivedoubling rank %d", 
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                 "ompi_coll_base_barrier_intra_recursivedoubling rank %d",
                  rank));
 
     /* do nearest power of 2 less than size calc */
@@ -213,7 +197,7 @@ int ompi_coll_tuned_barrier_intra_recursivedoubling(struct ompi_communicator_t *
         if (rank >= adjsize) {
             /* send message to lower ranked node */
             remote = rank - adjsize;
-            err = ompi_coll_tuned_sendrecv_zero(remote, MCA_COLL_BASE_TAG_BARRIER,
+            err = ompi_coll_base_sendrecv_zero(remote, MCA_COLL_BASE_TAG_BARRIER,
                                                 remote, MCA_COLL_BASE_TAG_BARRIER,
                                                 comm);
             if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl;}
@@ -222,7 +206,7 @@ int ompi_coll_tuned_barrier_intra_recursivedoubling(struct ompi_communicator_t *
 
             /* receive message from high level rank */
             err = MCA_PML_CALL(recv((void*)NULL, 0, MPI_BYTE, rank+adjsize,
-                                    MCA_COLL_BASE_TAG_BARRIER, comm, 
+                                    MCA_COLL_BASE_TAG_BARRIER, comm,
                                     MPI_STATUS_IGNORE));
 
             if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl;}
@@ -238,7 +222,7 @@ int ompi_coll_tuned_barrier_intra_recursivedoubling(struct ompi_communicator_t *
             if (remote >= adjsize) continue;
 
             /* post receive from the remote node */
-            err = ompi_coll_tuned_sendrecv_zero(remote, MCA_COLL_BASE_TAG_BARRIER,
+            err = ompi_coll_base_sendrecv_zero(remote, MCA_COLL_BASE_TAG_BARRIER,
                                                 remote, MCA_COLL_BASE_TAG_BARRIER,
                                                 comm);
             if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl;}
@@ -250,8 +234,8 @@ int ompi_coll_tuned_barrier_intra_recursivedoubling(struct ompi_communicator_t *
         if (rank < (size - adjsize)) {
             /* send enter message to higher ranked node */
             remote = rank + adjsize;
-            err = MCA_PML_CALL(send((void*)NULL, 0, MPI_BYTE, remote, 
-                                    MCA_COLL_BASE_TAG_BARRIER, 
+            err = MCA_PML_CALL(send((void*)NULL, 0, MPI_BYTE, remote,
+                                    MCA_COLL_BASE_TAG_BARRIER,
                                     MCA_PML_BASE_SEND_SYNCHRONOUS, comm));
 
             if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl;}
@@ -261,7 +245,7 @@ int ompi_coll_tuned_barrier_intra_recursivedoubling(struct ompi_communicator_t *
     return MPI_SUCCESS;
 
  err_hndl:
-    OPAL_OUTPUT((ompi_coll_tuned_stream,"%s:%4d\tError occurred %d, rank %2d",
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"%s:%4d\tError occurred %d, rank %2d",
                  __FILE__, line, err, rank));
     return err;
 }
@@ -271,23 +255,23 @@ int ompi_coll_tuned_barrier_intra_recursivedoubling(struct ompi_communicator_t *
  * To make synchronous, uses sync sends and sync sendrecvs
  */
 
-int ompi_coll_tuned_barrier_intra_bruck(struct ompi_communicator_t *comm,
+int ompi_coll_base_barrier_intra_bruck(struct ompi_communicator_t *comm,
                                         mca_coll_base_module_t *module)
 {
     int rank, size, distance, to, from, err, line = 0;
 
     rank = ompi_comm_rank(comm);
     size = ompi_comm_size(comm);
-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "ompi_coll_tuned_barrier_intra_bruck rank %d", rank));
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                 "ompi_coll_base_barrier_intra_bruck rank %d", rank));
 
     /* exchange data with rank-2^k and rank+2^k */
-    for (distance = 1; distance < size; distance <<= 1) { 
+    for (distance = 1; distance < size; distance <<= 1) {
         from = (rank + size - distance) % size;
         to   = (rank + distance) % size;
 
         /* send message to lower ranked node */
-        err = ompi_coll_tuned_sendrecv_zero(to, MCA_COLL_BASE_TAG_BARRIER,
+        err = ompi_coll_base_sendrecv_zero(to, MCA_COLL_BASE_TAG_BARRIER,
                                             from, MCA_COLL_BASE_TAG_BARRIER,
                                             comm);
         if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl;}
@@ -296,7 +280,7 @@ int ompi_coll_tuned_barrier_intra_bruck(struct ompi_communicator_t *comm,
     return MPI_SUCCESS;
 
  err_hndl:
-    OPAL_OUTPUT((ompi_coll_tuned_stream,"%s:%4d\tError occurred %d, rank %2d", 
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"%s:%4d\tError occurred %d, rank %2d",
                  __FILE__, line, err, rank));
     return err;
 }
@@ -306,17 +290,17 @@ int ompi_coll_tuned_barrier_intra_bruck(struct ompi_communicator_t *comm,
  * To make synchronous, uses sync sends and sync sendrecvs
  */
 /* special case for two processes */
-int ompi_coll_tuned_barrier_intra_two_procs(struct ompi_communicator_t *comm,
+int ompi_coll_base_barrier_intra_two_procs(struct ompi_communicator_t *comm,
                                             mca_coll_base_module_t *module)
 {
     int remote, err;
 
     remote = ompi_comm_rank(comm);
-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "ompi_coll_tuned_barrier_intra_two_procs rank %d", remote));
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                 "ompi_coll_base_barrier_intra_two_procs rank %d", remote));
     remote = (remote + 1) & 0x1;
 
-    err = ompi_coll_tuned_sendrecv_zero(remote, MCA_COLL_BASE_TAG_BARRIER, 
+    err = ompi_coll_base_sendrecv_zero(remote, MCA_COLL_BASE_TAG_BARRIER,
                                         remote, MCA_COLL_BASE_TAG_BARRIER,
                                         comm);
     return (err);
@@ -327,7 +311,7 @@ int ompi_coll_tuned_barrier_intra_two_procs(struct ompi_communicator_t *comm,
  * Linear functions are copied from the BASIC coll module
  * they do not segment the message and are simple implementations
  * but for some small number of nodes and/or small data sizes they
- * are just as fast as tuned/tree based segmenting operations
+ * are just as fast as base/tree based segmenting operations
  * and as such may be selected by the decision functions
  * These are copied into this module due to the way we select modules
  * in V1. i.e. in V2 we will handle this differently and so will not
@@ -337,7 +321,7 @@ int ompi_coll_tuned_barrier_intra_two_procs(struct ompi_communicator_t *comm,
 
 /* copied function (with appropriate renaming) starts here */
 
-static int ompi_coll_tuned_barrier_intra_basic_linear(struct ompi_communicator_t *comm,
+static int ompi_coll_base_barrier_intra_basic_linear(struct ompi_communicator_t *comm,
                                                       mca_coll_base_module_t *module)
 {
     int i, err, rank, size;
@@ -347,14 +331,14 @@ static int ompi_coll_tuned_barrier_intra_basic_linear(struct ompi_communicator_t
 
     /* All non-root send & receive zero-length message. */
     if (rank > 0) {
-        err = MCA_PML_CALL(send (NULL, 0, MPI_BYTE, 0, 
+        err = MCA_PML_CALL(send (NULL, 0, MPI_BYTE, 0,
                                  MCA_COLL_BASE_TAG_BARRIER,
                                  MCA_PML_BASE_SEND_STANDARD, comm));
         if (MPI_SUCCESS != err) {
             return err;
         }
 
-        err = MCA_PML_CALL(recv (NULL, 0, MPI_BYTE, 0, 
+        err = MCA_PML_CALL(recv (NULL, 0, MPI_BYTE, 0,
                                  MCA_COLL_BASE_TAG_BARRIER,
                                  comm, MPI_STATUS_IGNORE));
         if (MPI_SUCCESS != err) {
@@ -370,7 +354,7 @@ static int ompi_coll_tuned_barrier_intra_basic_linear(struct ompi_communicator_t
         requests = (ompi_request_t**)malloc( size * sizeof(ompi_request_t*) );
         for (i = 1; i < size; ++i) {
             err = MCA_PML_CALL(irecv(NULL, 0, MPI_BYTE, MPI_ANY_SOURCE,
-                                     MCA_COLL_BASE_TAG_BARRIER, comm, 
+                                     MCA_COLL_BASE_TAG_BARRIER, comm,
                                      &(requests[i])));
             if (MPI_SUCCESS != err) {
                 return err;
@@ -380,7 +364,7 @@ static int ompi_coll_tuned_barrier_intra_basic_linear(struct ompi_communicator_t
 
         for (i = 1; i < size; ++i) {
             err = MCA_PML_CALL(isend(NULL, 0, MPI_BYTE, i,
-                                     MCA_COLL_BASE_TAG_BARRIER, 
+                                     MCA_COLL_BASE_TAG_BARRIER,
                                      MCA_PML_BASE_SEND_STANDARD, comm,
                                      &(requests[i])));
             if (MPI_SUCCESS != err) {
@@ -400,17 +384,17 @@ static int ompi_coll_tuned_barrier_intra_basic_linear(struct ompi_communicator_t
 
 /*
  * Another recursive doubling type algorithm, but in this case
- * we go up the tree and back down the tree.  
+ * we go up the tree and back down the tree.
  */
-int ompi_coll_tuned_barrier_intra_tree(struct ompi_communicator_t *comm,
+int ompi_coll_base_barrier_intra_tree(struct ompi_communicator_t *comm,
                                        mca_coll_base_module_t *module)
 {
     int rank, size, depth, err, jump, partner;
 
     rank = ompi_comm_rank(comm);
     size = ompi_comm_size(comm);
-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "ompi_coll_tuned_barrier_intra_tree %d", 
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                 "ompi_coll_base_barrier_intra_tree %d",
                  rank));
 
     /* Find the nearest power of 2 of the communicator size. */
@@ -420,21 +404,21 @@ int ompi_coll_tuned_barrier_intra_tree(struct ompi_communicator_t *comm,
         partner = rank ^ jump;
         if (!(partner & (jump-1)) && partner < size) {
             if (partner > rank) {
-                err = MCA_PML_CALL(recv (NULL, 0, MPI_BYTE, partner, 
+                err = MCA_PML_CALL(recv (NULL, 0, MPI_BYTE, partner,
                                          MCA_COLL_BASE_TAG_BARRIER, comm,
                                          MPI_STATUS_IGNORE));
                 if (MPI_SUCCESS != err)
                     return err;
             } else if (partner < rank) {
                 err = MCA_PML_CALL(send (NULL, 0, MPI_BYTE, partner,
-                                         MCA_COLL_BASE_TAG_BARRIER, 
+                                         MCA_COLL_BASE_TAG_BARRIER,
                                          MCA_PML_BASE_SEND_STANDARD, comm));
                 if (MPI_SUCCESS != err)
                     return err;
             }
         }
     }
-    
+
     depth >>= 1;
     for (jump = depth; jump>0; jump>>=1) {
         partner = rank ^ jump;
@@ -446,7 +430,7 @@ int ompi_coll_tuned_barrier_intra_tree(struct ompi_communicator_t *comm,
                 if (MPI_SUCCESS != err)
                     return err;
             } else if (partner < rank) {
-                err = MCA_PML_CALL(recv (NULL, 0, MPI_BYTE, partner, 
+                err = MCA_PML_CALL(recv (NULL, 0, MPI_BYTE, partner,
                                          MCA_COLL_BASE_TAG_BARRIER, comm,
                                          MPI_STATUS_IGNORE));
                 if (MPI_SUCCESS != err)
@@ -457,101 +441,3 @@ int ompi_coll_tuned_barrier_intra_tree(struct ompi_communicator_t *comm,
 
     return MPI_SUCCESS;
 }
-
-
-/* The following are used by dynamic and forced rules */
-
-/* publish details of each algorithm and if its forced/fixed/locked in */
-/* as you add methods/algorithms you must update this and the query/map  */
-/* routines */
-
-/* this routine is called by the component only */
-/* this makes sure that the mca parameters are set to their initial values */
-/* and perms */
-/* module does not call this they call the forced_getvalues routine instead */
-
-int ompi_coll_tuned_barrier_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices)
-{
-    mca_base_var_enum_t *new_enum;
-
-    ompi_coll_tuned_forced_max_algorithms[BARRIER] = coll_tuned_barrier_algorithm_count;
-
-    (void) mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                           "barrier_algorithm_count",
-                                           "Number of barrier algorithms available",
-                                           MCA_BASE_VAR_TYPE_INT, NULL, 0,
-                                           MCA_BASE_VAR_FLAG_DEFAULT_ONLY,
-                                           OPAL_INFO_LVL_5,
-                                           MCA_BASE_VAR_SCOPE_CONSTANT,
-                                           &coll_tuned_barrier_algorithm_count);
-
-    /* MPI_T: This variable should eventually be bound to a communicator */
-    coll_tuned_barrier_forced_algorithm = 0;
-    (void) mca_base_var_enum_create("coll_tuned_barrier_algorithms", barrier_algorithms, &new_enum);
-    mca_param_indices->algorithm_param_index =
-        mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                        "barrier_algorithm",
-                                        "Which barrier algorithm is used. Can be locked down to choice of: 0 ignore, 1 linear, 2 double ring, 3: recursive doubling 4: bruck, 5: two proc only, 6: tree",
-                                        MCA_BASE_VAR_TYPE_INT, new_enum, 0, 0,
-                                        OPAL_INFO_LVL_5,
-                                        MCA_BASE_VAR_SCOPE_READONLY,
-                                        &coll_tuned_barrier_forced_algorithm);
-    OBJ_RELEASE(new_enum);
-    if (mca_param_indices->algorithm_param_index < 0) {
-        return mca_param_indices->algorithm_param_index;
-    }
-
-    return (MPI_SUCCESS);
-}
-
-
-
-int ompi_coll_tuned_barrier_intra_do_forced(struct ompi_communicator_t *comm,
-                                            mca_coll_base_module_t *module)
-{
-    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
-    mca_coll_tuned_comm_t *data = tuned_module->tuned_data;
-
-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "coll:tuned:barrier_intra_do_forced selected algorithm %d",
-                 data->user_forced[BARRIER].algorithm));
-
-    switch (data->user_forced[BARRIER].algorithm) {
-    case (0):   return ompi_coll_tuned_barrier_intra_dec_fixed (comm, module);
-    case (1):   return ompi_coll_tuned_barrier_intra_basic_linear (comm, module); 
-    case (2):   return ompi_coll_tuned_barrier_intra_doublering (comm, module);
-    case (3):   return ompi_coll_tuned_barrier_intra_recursivedoubling (comm, module);
-    case (4):   return ompi_coll_tuned_barrier_intra_bruck (comm, module);
-    case (5):   return ompi_coll_tuned_barrier_intra_two_procs (comm, module);
-    case (6):   return ompi_coll_tuned_barrier_intra_tree (comm, module);
-    default:
-        OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:barrier_intra_do_forced attempt to select algorithm %d when only 0-%d is valid?",
-                     data->user_forced[BARRIER].algorithm,
-                     ompi_coll_tuned_forced_max_algorithms[BARRIER]));
-        return (MPI_ERR_ARG);
-    } /* switch */
-
-}
-
-
-int ompi_coll_tuned_barrier_intra_do_this (struct ompi_communicator_t *comm,
-                                           mca_coll_base_module_t *module,
-                                           int algorithm, int faninout, int segsize)
-{
-    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:barrier_intra_do_this selected algorithm %d topo fanin/out%d", algorithm, faninout));
-
-    switch (algorithm) {
-    case (0):   return ompi_coll_tuned_barrier_intra_dec_fixed (comm, module);
-    case (1):   return ompi_coll_tuned_barrier_intra_basic_linear (comm, module); 
-    case (2):   return ompi_coll_tuned_barrier_intra_doublering (comm, module);
-    case (3):   return ompi_coll_tuned_barrier_intra_recursivedoubling (comm, module);
-    case (4):   return ompi_coll_tuned_barrier_intra_bruck (comm, module);
-    case (5):   return ompi_coll_tuned_barrier_intra_two_procs (comm, module);
-    case (6):   return ompi_coll_tuned_barrier_intra_tree (comm, module);
-    default:
-        OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:barrier_intra_do_this attempt to select algorithm %d when only 0-%d is valid?",
-                     algorithm, ompi_coll_tuned_forced_max_algorithms[BARRIER]));
-        return (MPI_ERR_ARG);
-    } /* switch */
-}
-
diff --git a/ompi/mca/coll/base/coll_base_bcast.c b/ompi/mca/coll/base/coll_base_bcast.c
index 079dbbcacc..8f7fe1b3e4 100644
--- a/ompi/mca/coll/base/coll_base_bcast.c
+++ b/ompi/mca/coll/base/coll_base_bcast.c
@@ -3,18 +3,18 @@
  * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
  *                         University Research and Technology
  *                         Corporation.  All rights reserved.
- * Copyright (c) 2004-2012 The University of Tennessee and The University
+ * Copyright (c) 2004-2015 The University of Tennessee and The University
  *                         of Tennessee Research Foundation.  All rights
  *                         reserved.
- * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, 
+ * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
  *                         University of Stuttgart.  All rights reserved.
  * Copyright (c) 2004-2005 The Regents of the University of California.
  *                         All rights reserved.
  * Copyright (c) 2012 Cisco Systems, Inc.  All rights reserved.
  * $COPYRIGHT$
- * 
+ *
  * Additional copyrights may follow
- * 
+ *
  * $HEADER$
  */
 
@@ -27,33 +27,14 @@
 #include "ompi/mca/coll/coll.h"
 #include "ompi/mca/coll/base/coll_tags.h"
 #include "ompi/mca/pml/pml.h"
-#include "coll_tuned.h"
-#include "coll_tuned_topo.h"
-#include "coll_tuned_util.h"
-
-/* bcast algorithm variables */
-static int coll_tuned_bcast_algorithm_count = 6;
-static int coll_tuned_bcast_forced_algorithm = 0;
-static int coll_tuned_bcast_segment_size = 0;
-static int coll_tuned_bcast_tree_fanout;
-static int coll_tuned_bcast_chain_fanout;
-
-/* valid values for coll_tuned_bcast_forced_algorithm */
-static mca_base_var_enum_value_t bcast_algorithms[] = {
-    {0, "ignore"},
-    {1, "basic_linear"},
-    {2, "chain"},
-    {3, "pipeline"},
-    {4, "split_binary_tree"},
-    {5, "binary_tree"},
-    {6, "binomial"},
-    {0, NULL}
-};
+#include "ompi/mca/coll/base/coll_base_functions.h"
+#include "coll_base_topo.h"
+#include "coll_base_util.h"
 
 int
-ompi_coll_tuned_bcast_intra_generic( void* buffer,
-                                     int original_count, 
-                                     struct ompi_datatype_t* datatype, 
+ompi_coll_base_bcast_intra_generic( void* buffer,
+                                     int original_count,
+                                     struct ompi_datatype_t* datatype,
                                      int root,
                                      struct ompi_communicator_t* comm,
                                      mca_coll_base_module_t *module,
@@ -62,12 +43,12 @@ ompi_coll_tuned_bcast_intra_generic( void* buffer,
 {
     int err = 0, line, i, rank, size, segindex, req_index;
     int num_segments; /* Number of segments */
-    int sendcount;    /* number of elements sent in this segment */ 
+    int sendcount;    /* number of elements sent in this segment */
     size_t realsegsize, type_size;
     char *tmpbuf;
     ptrdiff_t extent, lb;
     ompi_request_t *recv_reqs[2] = {MPI_REQUEST_NULL, MPI_REQUEST_NULL};
-#if !defined(COLL_TUNED_BCAST_USE_BLOCKING)
+#if !defined(COLL_BASE_BCAST_USE_BLOCKING)
     ompi_request_t **send_reqs = NULL;
 #endif
 
@@ -79,20 +60,20 @@ ompi_coll_tuned_bcast_intra_generic( void* buffer,
     ompi_datatype_type_size( datatype, &type_size );
     num_segments = (original_count + count_by_segment - 1) / count_by_segment;
     realsegsize = (ptrdiff_t)count_by_segment * extent;
-    
+
     /* Set the buffer pointers */
     tmpbuf = (char *) buffer;
 
-#if !defined(COLL_TUNED_BCAST_USE_BLOCKING)
+#if !defined(COLL_BASE_BCAST_USE_BLOCKING)
     if( tree->tree_nextsize != 0 ) {
-        send_reqs = (ompi_request_t**)malloc( (ptrdiff_t)tree->tree_nextsize * 
+        send_reqs = (ompi_request_t**)malloc( (ptrdiff_t)tree->tree_nextsize *
                                               sizeof(ompi_request_t*) );
     }
 #endif
 
     /* Root code */
     if( rank == root ) {
-        /* 
+        /*
            For each segment:
            - send segment to all children.
            The last segment may have less elements than other segments.
@@ -102,39 +83,39 @@ ompi_coll_tuned_bcast_intra_generic( void* buffer,
             if( segindex == (num_segments - 1) ) {
                 sendcount = original_count - segindex * count_by_segment;
             }
-            for( i = 0; i < tree->tree_nextsize; i++ ) { 
-#if defined(COLL_TUNED_BCAST_USE_BLOCKING)
+            for( i = 0; i < tree->tree_nextsize; i++ ) {
+#if defined(COLL_BASE_BCAST_USE_BLOCKING)
                 err = MCA_PML_CALL(send(tmpbuf, sendcount, datatype,
-                                        tree->tree_next[i], 
+                                        tree->tree_next[i],
                                         MCA_COLL_BASE_TAG_BCAST,
                                         MCA_PML_BASE_SEND_STANDARD, comm));
 #else
                 err = MCA_PML_CALL(isend(tmpbuf, sendcount, datatype,
-                                         tree->tree_next[i], 
+                                         tree->tree_next[i],
                                          MCA_COLL_BASE_TAG_BCAST,
-                                         MCA_PML_BASE_SEND_STANDARD, comm, 
+                                         MCA_PML_BASE_SEND_STANDARD, comm,
                                          &send_reqs[i]));
-#endif  /* COLL_TUNED_BCAST_USE_BLOCKING */
+#endif  /* COLL_BASE_BCAST_USE_BLOCKING */
                 if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
-            } 
+            }
 
-#if !defined(COLL_TUNED_BCAST_USE_BLOCKING)
+#if !defined(COLL_BASE_BCAST_USE_BLOCKING)
             /* complete the sends before starting the next sends */
-            err = ompi_request_wait_all( tree->tree_nextsize, send_reqs, 
+            err = ompi_request_wait_all( tree->tree_nextsize, send_reqs,
                                          MPI_STATUSES_IGNORE );
             if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
-#endif /* not COLL_TUNED_BCAST_USE_BLOCKING */
+#endif /* not COLL_BASE_BCAST_USE_BLOCKING */
 
             /* update tmp buffer */
             tmpbuf += realsegsize;
 
         }
-    } 
-    
+    }
+
     /* Intermediate nodes code */
-    else if( tree->tree_nextsize > 0 ) { 
-        /* 
-           Create the pipeline. 
+    else if( tree->tree_nextsize > 0 ) {
+        /*
+           Create the pipeline.
            1) Post the first receive
            2) For segments 1 .. num_segments
            - post new receive
@@ -149,49 +130,49 @@ ompi_coll_tuned_bcast_intra_generic( void* buffer,
                                  tree->tree_prev, MCA_COLL_BASE_TAG_BCAST,
                                  comm, &recv_reqs[req_index]));
         if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
-        
+
         for( segindex = 1; segindex < num_segments; segindex++ ) {
-            
+
             req_index = req_index ^ 0x1;
-            
+
             /* post new irecv */
             err = MCA_PML_CALL(irecv( tmpbuf + realsegsize, count_by_segment,
-                                      datatype, tree->tree_prev, 
-                                      MCA_COLL_BASE_TAG_BCAST, 
+                                      datatype, tree->tree_prev,
+                                      MCA_COLL_BASE_TAG_BCAST,
                                       comm, &recv_reqs[req_index]));
             if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
-            
+
             /* wait for and forward the previous segment to children */
-            err = ompi_request_wait( &recv_reqs[req_index ^ 0x1], 
+            err = ompi_request_wait( &recv_reqs[req_index ^ 0x1],
                                      MPI_STATUSES_IGNORE );
             if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
-            
-            for( i = 0; i < tree->tree_nextsize; i++ ) { 
-#if defined(COLL_TUNED_BCAST_USE_BLOCKING)
+
+            for( i = 0; i < tree->tree_nextsize; i++ ) {
+#if defined(COLL_BASE_BCAST_USE_BLOCKING)
                 err = MCA_PML_CALL(send(tmpbuf, count_by_segment, datatype,
-                                        tree->tree_next[i], 
+                                        tree->tree_next[i],
                                         MCA_COLL_BASE_TAG_BCAST,
                                         MCA_PML_BASE_SEND_STANDARD, comm));
 #else
                 err = MCA_PML_CALL(isend(tmpbuf, count_by_segment, datatype,
-                                         tree->tree_next[i], 
+                                         tree->tree_next[i],
                                          MCA_COLL_BASE_TAG_BCAST,
-                                         MCA_PML_BASE_SEND_STANDARD, comm, 
+                                         MCA_PML_BASE_SEND_STANDARD, comm,
                                          &send_reqs[i]));
-#endif  /* COLL_TUNED_BCAST_USE_BLOCKING */
+#endif  /* COLL_BASE_BCAST_USE_BLOCKING */
                 if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
-            } 
-            
-#if !defined(COLL_TUNED_BCAST_USE_BLOCKING)
+            }
+
+#if !defined(COLL_BASE_BCAST_USE_BLOCKING)
             /* complete the sends before starting the next iteration */
-            err = ompi_request_wait_all( tree->tree_nextsize, send_reqs, 
+            err = ompi_request_wait_all( tree->tree_nextsize, send_reqs,
                                          MPI_STATUSES_IGNORE );
             if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
-#endif  /* COLL_TUNED_BCAST_USE_BLOCKING */
-            
+#endif  /* COLL_BASE_BCAST_USE_BLOCKING */
+
             /* Update the receive buffer */
             tmpbuf += realsegsize;
-            
+
         }
 
         /* Process the last segment */
@@ -199,31 +180,31 @@ ompi_coll_tuned_bcast_intra_generic( void* buffer,
         if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
         sendcount = original_count - (ptrdiff_t)(num_segments - 1) * count_by_segment;
         for( i = 0; i < tree->tree_nextsize; i++ ) {
-#if defined(COLL_TUNED_BCAST_USE_BLOCKING)
+#if defined(COLL_BASE_BCAST_USE_BLOCKING)
             err = MCA_PML_CALL(send(tmpbuf, sendcount, datatype,
-                                    tree->tree_next[i], 
+                                    tree->tree_next[i],
                                     MCA_COLL_BASE_TAG_BCAST,
                                     MCA_PML_BASE_SEND_STANDARD, comm));
 #else
             err = MCA_PML_CALL(isend(tmpbuf, sendcount, datatype,
-                                     tree->tree_next[i], 
+                                     tree->tree_next[i],
                                      MCA_COLL_BASE_TAG_BCAST,
-                                     MCA_PML_BASE_SEND_STANDARD, comm, 
+                                     MCA_PML_BASE_SEND_STANDARD, comm,
                                      &send_reqs[i]));
-#endif  /* COLL_TUNED_BCAST_USE_BLOCKING */
+#endif  /* COLL_BASE_BCAST_USE_BLOCKING */
             if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
         }
-        
-#if !defined(COLL_TUNED_BCAST_USE_BLOCKING)
-        err = ompi_request_wait_all( tree->tree_nextsize, send_reqs, 
+
+#if !defined(COLL_BASE_BCAST_USE_BLOCKING)
+        err = ompi_request_wait_all( tree->tree_nextsize, send_reqs,
                                      MPI_STATUSES_IGNORE );
         if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
-#endif  /* COLL_TUNED_BCAST_USE_BLOCKING */
+#endif  /* COLL_BASE_BCAST_USE_BLOCKING */
     }
-  
+
     /* Leaf nodes */
     else {
-        /* 
+        /*
            Receive all segments from parent in a loop:
            1) post irecv for the first segment
            2) for segments 1 .. num_segments
@@ -241,12 +222,12 @@ ompi_coll_tuned_bcast_intra_generic( void* buffer,
             req_index = req_index ^ 0x1;
             tmpbuf += realsegsize;
             /* post receive for the next segment */
-            err = MCA_PML_CALL(irecv(tmpbuf, count_by_segment, datatype, 
-                                     tree->tree_prev, MCA_COLL_BASE_TAG_BCAST, 
+            err = MCA_PML_CALL(irecv(tmpbuf, count_by_segment, datatype,
+                                     tree->tree_prev, MCA_COLL_BASE_TAG_BCAST,
                                      comm, &recv_reqs[req_index]));
             if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
             /* wait on the previous segment */
-            err = ompi_request_wait( &recv_reqs[req_index ^ 0x1], 
+            err = ompi_request_wait( &recv_reqs[req_index ^ 0x1],
                                      MPI_STATUS_IGNORE );
             if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
         }
@@ -255,25 +236,25 @@ ompi_coll_tuned_bcast_intra_generic( void* buffer,
         if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
     }
 
-#if !defined(COLL_TUNED_BCAST_USE_BLOCKING)
+#if !defined(COLL_BASE_BCAST_USE_BLOCKING)
     if( NULL != send_reqs ) free(send_reqs);
 #endif
 
     return (MPI_SUCCESS);
-  
+
  error_hndl:
-    OPAL_OUTPUT( (ompi_coll_tuned_stream,"%s:%4d\tError occurred %d, rank %2d",
+    OPAL_OUTPUT( (ompi_coll_base_framework.framework_output,"%s:%4d\tError occurred %d, rank %2d",
                   __FILE__, line, err, rank) );
-#if !defined(COLL_TUNED_BCAST_USE_BLOCKING)
+#if !defined(COLL_BASE_BCAST_USE_BLOCKING)
     if( NULL != send_reqs ) free(send_reqs);
 #endif
     return (err);
 }
 
 int
-ompi_coll_tuned_bcast_intra_bintree ( void* buffer,
-                                      int count, 
-                                      struct ompi_datatype_t* datatype, 
+ompi_coll_base_bcast_intra_bintree ( void* buffer,
+                                      int count,
+                                      struct ompi_datatype_t* datatype,
                                       int root,
                                       struct ompi_communicator_t* comm,
                                       mca_coll_base_module_t *module,
@@ -281,28 +262,27 @@ ompi_coll_tuned_bcast_intra_bintree ( void* buffer,
 {
     int segcount = count;
     size_t typelng;
-    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
-    mca_coll_tuned_comm_t *data = tuned_module->tuned_data;
+    mca_coll_base_comm_t *data = module->base_data;
 
-    COLL_TUNED_UPDATE_BINTREE( comm, tuned_module, root );
+    COLL_BASE_UPDATE_BINTREE( comm, module, root );
 
     /**
      * Determine number of elements sent per operation.
      */
     ompi_datatype_type_size( datatype, &typelng );
-    COLL_TUNED_COMPUTED_SEGCOUNT( segsize, typelng, segcount );
+    COLL_BASE_COMPUTED_SEGCOUNT( segsize, typelng, segcount );
 
-    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:bcast_intra_binary rank %d ss %5d typelng %lu segcount %d",
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:bcast_intra_binary rank %d ss %5d typelng %lu segcount %d",
                  ompi_comm_rank(comm), segsize, (unsigned long)typelng, segcount));
 
-    return ompi_coll_tuned_bcast_intra_generic( buffer, count, datatype, root, comm, module,
+    return ompi_coll_base_bcast_intra_generic( buffer, count, datatype, root, comm, module,
                                                 segcount, data->cached_bintree );
 }
 
 int
-ompi_coll_tuned_bcast_intra_pipeline( void* buffer,
-                                      int count, 
-                                      struct ompi_datatype_t* datatype, 
+ompi_coll_base_bcast_intra_pipeline( void* buffer,
+                                      int count,
+                                      struct ompi_datatype_t* datatype,
                                       int root,
                                       struct ompi_communicator_t* comm,
                                       mca_coll_base_module_t *module,
@@ -310,28 +290,27 @@ ompi_coll_tuned_bcast_intra_pipeline( void* buffer,
 {
     int segcount = count;
     size_t typelng;
-    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
-    mca_coll_tuned_comm_t *data = tuned_module->tuned_data;
+    mca_coll_base_comm_t *data = module->base_data;
 
-    COLL_TUNED_UPDATE_PIPELINE( comm, tuned_module, root );
+    COLL_BASE_UPDATE_PIPELINE( comm, module, root );
 
     /**
      * Determine number of elements sent per operation.
      */
     ompi_datatype_type_size( datatype, &typelng );
-    COLL_TUNED_COMPUTED_SEGCOUNT( segsize, typelng, segcount );
+    COLL_BASE_COMPUTED_SEGCOUNT( segsize, typelng, segcount );
 
-    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:bcast_intra_pipeline rank %d ss %5d typelng %lu segcount %d",
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:bcast_intra_pipeline rank %d ss %5d typelng %lu segcount %d",
                  ompi_comm_rank(comm), segsize, (unsigned long)typelng, segcount));
 
-    return ompi_coll_tuned_bcast_intra_generic( buffer, count, datatype, root, comm, module,
+    return ompi_coll_base_bcast_intra_generic( buffer, count, datatype, root, comm, module,
                                                 segcount, data->cached_pipeline );
 }
 
 int
-ompi_coll_tuned_bcast_intra_chain( void* buffer,
-                                   int count, 
-                                   struct ompi_datatype_t* datatype, 
+ompi_coll_base_bcast_intra_chain( void* buffer,
+                                   int count,
+                                   struct ompi_datatype_t* datatype,
                                    int root,
                                    struct ompi_communicator_t* comm,
                                    mca_coll_base_module_t *module,
@@ -339,28 +318,27 @@ ompi_coll_tuned_bcast_intra_chain( void* buffer,
 {
     int segcount = count;
     size_t typelng;
-    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
-    mca_coll_tuned_comm_t *data = tuned_module->tuned_data;
+    mca_coll_base_comm_t *data = module->base_data;
 
-    COLL_TUNED_UPDATE_CHAIN( comm, tuned_module, root, chains );
+    COLL_BASE_UPDATE_CHAIN( comm, module, root, chains );
 
     /**
      * Determine number of elements sent per operation.
      */
     ompi_datatype_type_size( datatype, &typelng );
-    COLL_TUNED_COMPUTED_SEGCOUNT( segsize, typelng, segcount );
+    COLL_BASE_COMPUTED_SEGCOUNT( segsize, typelng, segcount );
 
-    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:bcast_intra_chain rank %d fo %d ss %5d typelng %lu segcount %d",
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:bcast_intra_chain rank %d fo %d ss %5d typelng %lu segcount %d",
                  ompi_comm_rank(comm), chains, segsize, (unsigned long)typelng, segcount));
 
-    return ompi_coll_tuned_bcast_intra_generic( buffer, count, datatype, root, comm, module,
+    return ompi_coll_base_bcast_intra_generic( buffer, count, datatype, root, comm, module,
                                                 segcount, data->cached_chain );
 }
 
 int
-ompi_coll_tuned_bcast_intra_binomial( void* buffer,
-                                      int count, 
-                                      struct ompi_datatype_t* datatype, 
+ompi_coll_base_bcast_intra_binomial( void* buffer,
+                                      int count,
+                                      struct ompi_datatype_t* datatype,
                                       int root,
                                       struct ompi_communicator_t* comm,
                                       mca_coll_base_module_t *module,
@@ -368,28 +346,27 @@ ompi_coll_tuned_bcast_intra_binomial( void* buffer,
 {
     int segcount = count;
     size_t typelng;
-    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
-    mca_coll_tuned_comm_t *data = tuned_module->tuned_data;
+    mca_coll_base_comm_t *data = module->base_data;
 
-    COLL_TUNED_UPDATE_BMTREE( comm, tuned_module, root );
+    COLL_BASE_UPDATE_BMTREE( comm, module, root );
 
     /**
      * Determine number of elements sent per operation.
      */
     ompi_datatype_type_size( datatype, &typelng );
-    COLL_TUNED_COMPUTED_SEGCOUNT( segsize, typelng, segcount );
+    COLL_BASE_COMPUTED_SEGCOUNT( segsize, typelng, segcount );
 
-    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:bcast_intra_binomial rank %d ss %5d typelng %lu segcount %d",
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:bcast_intra_binomial rank %d ss %5d typelng %lu segcount %d",
                  ompi_comm_rank(comm), segsize, (unsigned long)typelng, segcount));
 
-    return ompi_coll_tuned_bcast_intra_generic( buffer, count, datatype, root, comm, module,
+    return ompi_coll_base_bcast_intra_generic( buffer, count, datatype, root, comm, module,
                                                 segcount, data->cached_bmtree );
 }
 
 int
-ompi_coll_tuned_bcast_intra_split_bintree ( void* buffer,
-                                            int count, 
-                                            struct ompi_datatype_t* datatype, 
+ompi_coll_base_bcast_intra_split_bintree ( void* buffer,
+                                            int count,
+                                            struct ompi_datatype_t* datatype,
                                             int root,
                                             struct ompi_communicator_t* comm,
                                             mca_coll_base_module_t *module,
@@ -399,26 +376,25 @@ ompi_coll_tuned_bcast_intra_split_bintree ( void* buffer,
     uint32_t counts[2];
     int segcount[2];       /* Number of elements sent with each segment */
     int num_segments[2];   /* Number of segmenets */
-    int sendcount[2];      /* the same like segcount, except for the last segment */ 
+    int sendcount[2];      /* the same like segcount, except for the last segment */
     size_t realsegsize[2], type_size;
     char *tmpbuf[2];
     ptrdiff_t type_extent, lb;
     ompi_request_t *base_req, *new_req;
     ompi_coll_tree_t *tree;
-    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
-    mca_coll_tuned_comm_t *data = tuned_module->tuned_data;
+    mca_coll_base_comm_t *data = module->base_data;
 
     size = ompi_comm_size(comm);
     rank = ompi_comm_rank(comm);
 
-    OPAL_OUTPUT((ompi_coll_tuned_stream,"ompi_coll_tuned_bcast_intra_split_bintree rank %d root %d ss %5d", rank, root, segsize));
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"ompi_coll_base_bcast_intra_split_bintree rank %d root %d ss %5d", rank, root, segsize));
 
     if (size == 1) {
         return MPI_SUCCESS;
     }
 
     /* setup the binary tree topology. */
-    COLL_TUNED_UPDATE_BINTREE( comm, tuned_module, root );
+    COLL_BASE_UPDATE_BINTREE( comm, module, root );
     tree = data->cached_bintree;
 
     err = ompi_datatype_type_size( datatype, &type_size );
@@ -431,10 +407,10 @@ ompi_coll_tuned_bcast_intra_split_bintree ( void* buffer,
         /* Note that ompi_datatype_type_size() will never return a negative
            value in typelng; it returns an int [vs. an unsigned type]
            because of the MPI spec. */
-    	if (segsize < ((uint32_t) type_size)) {
+        if (segsize < ((uint32_t) type_size)) {
             segsize = type_size; /* push segsize up to hold one type */
         }
-        segcount[0] = segcount[1] = segsize / type_size; 
+        segcount[0] = segcount[1] = segsize / type_size;
         num_segments[0] = counts[0]/segcount[0];
         if ((counts[0] % segcount[0]) != 0) num_segments[0]++;
         num_segments[1] = counts[1]/segcount[1];
@@ -450,17 +426,17 @@ ompi_coll_tuned_bcast_intra_split_bintree ( void* buffer,
         (segsize > ((ptrdiff_t)counts[0] * type_size)) ||
         (segsize > ((ptrdiff_t)counts[1] * type_size)) ) {
         /* call linear version here ! */
-        return (ompi_coll_tuned_bcast_intra_chain ( buffer, count, datatype, 
+        return (ompi_coll_base_bcast_intra_chain ( buffer, count, datatype,
                                                     root, comm, module,
                                                     segsize, 1 ));
     }
 
     err = ompi_datatype_get_extent (datatype, &lb, &type_extent);
-    
+
     /* Determine real segment size */
     realsegsize[0] = (ptrdiff_t)segcount[0] * type_extent;
     realsegsize[1] = (ptrdiff_t)segcount[1] * type_extent;
-  
+
     /* set the buffer pointers */
     tmpbuf[0] = (char *) buffer;
     tmpbuf[1] = (char *) buffer + (ptrdiff_t)counts[0] * type_extent;
@@ -473,11 +449,11 @@ ompi_coll_tuned_bcast_intra_split_bintree ( void* buffer,
 
     /* determine if I am left (0) or right (1), (root is right) */
     lr = ((rank + size - root)%size + 1)%2;
-  
+
     /* root code */
     if( rank == root ) {
         /* determine segment count */
-        sendcount[0] = segcount[0]; 
+        sendcount[0] = segcount[0];
         sendcount[1] = segcount[1];
         /* for each segment */
         for (segindex = 0; segindex < num_segments[0]; segindex++) {
@@ -487,7 +463,7 @@ ompi_coll_tuned_bcast_intra_split_bintree ( void* buffer,
                     continue;
                 }
                 /* determine how many elements are being sent in this round */
-                if(segindex == (num_segments[i] - 1)) 
+                if(segindex == (num_segments[i] - 1))
                     sendcount[i] = counts[i] - segindex*segcount[i];
                 /* send data */
                 MCA_PML_CALL(send(tmpbuf[i], sendcount[i], datatype,
@@ -498,19 +474,19 @@ ompi_coll_tuned_bcast_intra_split_bintree ( void* buffer,
                 tmpbuf[i] += realsegsize[i];
             }
         }
-    } 
-    
+    }
+
     /* intermediate nodes code */
-    else if( tree->tree_nextsize > 0 ) { 
+    else if( tree->tree_nextsize > 0 ) {
         /* Intermediate nodes:
          * It will receive segments only from one half of the data.
-         * Which one is determined by whether the node belongs to the "left" or "right" 
+         * Which one is determined by whether the node belongs to the "left" or "right"
          * subtree. Topoloby building function builds binary tree such that
          * odd "shifted ranks" ((rank + size - root)%size) are on the left subtree,
          * and even on the right subtree.
          *
          * Create the pipeline. We first post the first receive, then in the loop we
-         * post the next receive and after that wait for the previous receive to complete 
+         * post the next receive and after that wait for the previous receive to complete
          * and we disseminating the data to all children.
          */
         sendcount[lr] = segcount[lr];
@@ -521,11 +497,11 @@ ompi_coll_tuned_bcast_intra_split_bintree ( void* buffer,
 
         for( segindex = 1; segindex < num_segments[lr]; segindex++ ) {
             /* determine how many elements to expect in this round */
-            if( segindex == (num_segments[lr] - 1)) 
+            if( segindex == (num_segments[lr] - 1))
                 sendcount[lr] = counts[lr] - (ptrdiff_t)segindex * (ptrdiff_t)segcount[lr];
             /* post new irecv */
             err = MCA_PML_CALL(irecv( tmpbuf[lr] + realsegsize[lr], sendcount[lr],
-                                      datatype, tree->tree_prev, MCA_COLL_BASE_TAG_BCAST, 
+                                      datatype, tree->tree_prev, MCA_COLL_BASE_TAG_BCAST,
                                       comm, &new_req));
             if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
 
@@ -539,7 +515,7 @@ ompi_coll_tuned_bcast_intra_split_bintree ( void* buffer,
             } /* end of for each child */
 
             /* upate the base request */
-            base_req = new_req;     
+            base_req = new_req;
             /* go to the next buffer (ie. the one corresponding to the next recv) */
             tmpbuf[lr] += realsegsize[lr];
         } /* end of for segindex */
@@ -552,10 +528,10 @@ ompi_coll_tuned_bcast_intra_split_bintree ( void* buffer,
                                     MCA_PML_BASE_SEND_STANDARD, comm));
             if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
         } /* end of for each child */
-    } 
-  
+    }
+
     /* leaf nodes */
-    else { 
+    else {
         /* Just consume segments as fast as possible */
         sendcount[lr] = segcount[lr];
         for (segindex = 0; segindex < num_segments[lr]; segindex++) {
@@ -577,9 +553,9 @@ ompi_coll_tuned_bcast_intra_split_bintree ( void* buffer,
     tmpbuf[1] = (char *) buffer + (ptrdiff_t)counts[0] * type_extent;
 
     /* Step 2:
-       Find your immediate pair (identical node in opposite subtree) and SendRecv 
+       Find your immediate pair (identical node in opposite subtree) and SendRecv
        data buffer with them.
-       The tree building function ensures that 
+       The tree building function ensures that
        if (we are not root)
        if we are in the left subtree (lr == 0) our pair is (rank+1)%size.
        if we are in the right subtree (lr == 1) our pair is (rank-1)%size
@@ -591,9 +567,9 @@ ompi_coll_tuned_bcast_intra_split_bintree ( void* buffer,
         pair = (rank+size-1)%size;
     }
 
-    if ( (size%2) != 0 && rank != root) { 
+    if ( (size%2) != 0 && rank != root) {
 
-        err = ompi_coll_tuned_sendrecv( tmpbuf[lr], counts[lr], datatype,
+        err = ompi_coll_base_sendrecv( tmpbuf[lr], counts[lr], datatype,
                                         pair, MCA_COLL_BASE_TAG_BCAST,
                                         tmpbuf[(lr+1)%2], counts[(lr+1)%2], datatype,
                                         pair, MCA_COLL_BASE_TAG_BCAST,
@@ -607,28 +583,28 @@ ompi_coll_tuned_bcast_intra_split_bintree ( void* buffer,
                                     MCA_PML_BASE_SEND_STANDARD, comm));
             if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
 
-        } 
+        }
         /* last node receives right buffer from the root */
         else if (rank == (root+size-1)%size) {
             err = MCA_PML_CALL(recv(tmpbuf[1], counts[1], datatype,
                                     root, MCA_COLL_BASE_TAG_BCAST,
                                     comm, MPI_STATUS_IGNORE));
             if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
-        } 
+        }
         /* everyone else exchanges buffers */
         else {
-            err = ompi_coll_tuned_sendrecv( tmpbuf[lr], counts[lr], datatype,
+            err = ompi_coll_base_sendrecv( tmpbuf[lr], counts[lr], datatype,
                                             pair, MCA_COLL_BASE_TAG_BCAST,
                                             tmpbuf[(lr+1)%2], counts[(lr+1)%2], datatype,
                                             pair, MCA_COLL_BASE_TAG_BCAST,
                                             comm, MPI_STATUS_IGNORE, rank);
-            if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }  
+            if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
         }
     }
     return (MPI_SUCCESS);
-  
+
  error_hndl:
-    OPAL_OUTPUT((ompi_coll_tuned_stream,"%s:%4d\tError occurred %d, rank %2d", __FILE__,line,err,rank));
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"%s:%4d\tError occurred %d, rank %2d", __FILE__,line,err,rank));
     return (err);
 }
 
@@ -636,8 +612,8 @@ ompi_coll_tuned_bcast_intra_split_bintree ( void* buffer,
 /*
  * Linear functions are copied from the BASIC coll module
  * they do not segment the message and are simple implementations
- * but for some small number of nodes and/or small data sizes they 
- * are just as fast as tuned/tree based segmenting operations 
+ * but for some small number of nodes and/or small data sizes they
+ * are just as fast as base/tree based segmenting operations
  * and as such may be selected by the decision functions
  * These are copied into this module due to the way we select modules
  * in V1. i.e. in V2 we will handle this differently and so will not
@@ -655,21 +631,20 @@ ompi_coll_tuned_bcast_intra_split_bintree ( void* buffer,
  *  Returns:    - MPI_SUCCESS or error code
  */
 int
-ompi_coll_tuned_bcast_intra_basic_linear (void *buff, int count,
+ompi_coll_base_bcast_intra_basic_linear (void *buff, int count,
                                           struct ompi_datatype_t *datatype, int root,
                                           struct ompi_communicator_t *comm,
                                           mca_coll_base_module_t *module)
 {
     int i, size, rank, err;
-    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
-    mca_coll_tuned_comm_t *data = tuned_module->tuned_data;
+    mca_coll_base_comm_t *data = module->base_data;
     ompi_request_t **preq, **reqs = data->mcct_reqs;
 
 
     size = ompi_comm_size(comm);
     rank = ompi_comm_rank(comm);
 
-    OPAL_OUTPUT((ompi_coll_tuned_stream,"ompi_coll_tuned_bcast_intra_basic_linear rank %d root %d", rank, root));
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"ompi_coll_base_bcast_intra_basic_linear rank %d root %d", rank, root));
 
     /* Non-root receive the data. */
 
@@ -710,148 +685,11 @@ ompi_coll_tuned_bcast_intra_basic_linear (void *buff, int count,
     err = ompi_request_wait_all(i, reqs, MPI_STATUSES_IGNORE);
 
     /* Free the reqs */
-
-    ompi_coll_tuned_free_reqs(reqs, i);
+    ompi_coll_base_free_reqs(reqs, i);
 
     /* All done */
-
     return err;
 }
 
 
 /* copied function (with appropriate renaming) ends here */
-
-/* The following are used by dynamic and forced rules */
-
-/* publish details of each algorithm and if its forced/fixed/locked in */
-/* as you add methods/algorithms you must update this and the query/map routines */
-
-/* this routine is called by the component only */
-/* this makes sure that the mca parameters are set to their initial values and perms */
-/* module does not call this they call the forced_getvalues routine instead */
-
-int ompi_coll_tuned_bcast_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices)
-{
-    mca_base_var_enum_t *new_enum;
-
-    ompi_coll_tuned_forced_max_algorithms[BCAST] = coll_tuned_bcast_algorithm_count;
-
-    (void) mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                           "bcast_algorithm_count",
-                                           "Number of bcast algorithms available",
-                                           MCA_BASE_VAR_TYPE_INT, NULL, 0,
-                                           MCA_BASE_VAR_FLAG_DEFAULT_ONLY,
-                                           OPAL_INFO_LVL_5,
-                                           MCA_BASE_VAR_SCOPE_CONSTANT,
-                                           &coll_tuned_bcast_algorithm_count);
-
-    /* MPI_T: This variable should eventually be bound to a communicator */
-    coll_tuned_bcast_forced_algorithm = 0;
-    (void) mca_base_var_enum_create("coll_tuned_bcast_algorithms", bcast_algorithms, &new_enum);
-    mca_param_indices->algorithm_param_index =
-        mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                        "bcast_algorithm",
-                                        "Which bcast algorithm is used. Can be locked down to choice of: 0 ignore, 1 basic linear, 2 chain, 3: pipeline, 4: split binary tree, 5: binary tree, 6: binomial tree.",
-                                        MCA_BASE_VAR_TYPE_INT, new_enum, 0, 0,
-                                        OPAL_INFO_LVL_5,
-                                        MCA_BASE_VAR_SCOPE_READONLY,
-                                        &coll_tuned_bcast_forced_algorithm);
-    OBJ_RELEASE(new_enum);
-    if (mca_param_indices->algorithm_param_index < 0) {
-        return mca_param_indices->algorithm_param_index;
-    }
-
-    coll_tuned_bcast_segment_size = 0;
-    mca_param_indices->segsize_param_index =
-        mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                        "bcast_algorithm_segmentsize",
-                                        "Segment size in bytes used by default for bcast algorithms. Only has meaning if algorithm is forced and supports segmenting. 0 bytes means no segmentation.",
-                                        MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
-                                        OPAL_INFO_LVL_5,
-                                        MCA_BASE_VAR_SCOPE_READONLY,
-                                        &coll_tuned_bcast_segment_size);
-
-    coll_tuned_bcast_tree_fanout = ompi_coll_tuned_init_tree_fanout; /* get system wide default */
-    mca_param_indices->tree_fanout_param_index =
-        mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                        "bcast_algorithm_tree_fanout",
-                                        "Fanout for n-tree used for bcast algorithms. Only has meaning if algorithm is forced and supports n-tree topo based operation.",
-                                        MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
-                                        OPAL_INFO_LVL_5,
-                                        MCA_BASE_VAR_SCOPE_READONLY,
-                                        &coll_tuned_bcast_tree_fanout);
-
-    coll_tuned_bcast_chain_fanout = ompi_coll_tuned_init_chain_fanout; /* get system wide default */
-    mca_param_indices->chain_fanout_param_index = 
-      mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                      "bcast_algorithm_chain_fanout",
-                                      "Fanout for chains used for bcast algorithms. Only has meaning if algorithm is forced and supports chain topo based operation.",
-                                      MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
-                                      OPAL_INFO_LVL_5,
-                                      MCA_BASE_VAR_SCOPE_READONLY,
-                                      &coll_tuned_bcast_chain_fanout);
-
-    return (MPI_SUCCESS);
-}
-
-
-int ompi_coll_tuned_bcast_intra_do_forced(void *buf, int count,
-                                          struct ompi_datatype_t *dtype,
-                                          int root,
-                                          struct ompi_communicator_t *comm,
-                                          mca_coll_base_module_t *module)
-{
-    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
-    mca_coll_tuned_comm_t *data = tuned_module->tuned_data;
-
-    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:bcast_intra_do_forced algorithm %d", 
-                 data->user_forced[BCAST].algorithm));
-
-    switch (data->user_forced[BCAST].algorithm) {
-    case (0):   return ompi_coll_tuned_bcast_intra_dec_fixed( buf, count, dtype, root, comm, module );
-    case (1):   return ompi_coll_tuned_bcast_intra_basic_linear( buf, count, dtype, root, comm, module );
-    case (2):   return ompi_coll_tuned_bcast_intra_chain( buf, count, dtype, root, comm, module,
-                                                          data->user_forced[BCAST].segsize,
-                                                          data->user_forced[BCAST].chain_fanout );
-    case (3):   return ompi_coll_tuned_bcast_intra_pipeline( buf, count, dtype, root, comm, module,
-                                                             data->user_forced[BCAST].segsize );
-    case (4):   return ompi_coll_tuned_bcast_intra_split_bintree( buf, count, dtype, root, comm, module,
-                                                                  data->user_forced[BCAST].segsize );
-    case (5):   return ompi_coll_tuned_bcast_intra_bintree( buf, count, dtype, root, comm, module,
-                                                            data->user_forced[BCAST].segsize );
-    case (6):   return ompi_coll_tuned_bcast_intra_binomial( buf, count, dtype, root, comm, module,
-                                                             data->user_forced[BCAST].segsize );
-    default:
-        OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:bcast_intra_do_forced attempt to select algorithm %d when only 0-%d is valid?",
-                     data->user_forced[BCAST].algorithm, ompi_coll_tuned_forced_max_algorithms[BCAST]));
-    } /* switch */
-    return (MPI_ERR_ARG);
-}
-
-
-int ompi_coll_tuned_bcast_intra_do_this(void *buf, int count,
-                                        struct ompi_datatype_t *dtype,
-                                        int root,
-                                        struct ompi_communicator_t *comm,
-                                        mca_coll_base_module_t *module,
-                                        int algorithm, int faninout, int segsize)
-
-{
-    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:bcast_intra_do_this algorithm %d topo faninout %d segsize %d", 
-                 algorithm, faninout, segsize));
-
-    switch (algorithm) {
-    case (0):   return ompi_coll_tuned_bcast_intra_dec_fixed( buf, count, dtype, root, comm, module );
-    case (1):   return ompi_coll_tuned_bcast_intra_basic_linear( buf, count, dtype, root, comm, module );
-    case (2):   return ompi_coll_tuned_bcast_intra_chain( buf, count, dtype, root, comm, module, segsize, faninout );
-    case (3):   return ompi_coll_tuned_bcast_intra_pipeline( buf, count, dtype, root, comm, module, segsize );
-    case (4):   return ompi_coll_tuned_bcast_intra_split_bintree( buf, count, dtype, root, comm, module, segsize );
-    case (5):   return ompi_coll_tuned_bcast_intra_bintree( buf, count, dtype, root, comm, module, segsize );
-    case (6):   return ompi_coll_tuned_bcast_intra_binomial( buf, count, dtype, root, comm, module, segsize );
-    default:
-        OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:bcast_intra_do_this attempt to select algorithm %d when only 0-%d is valid?",
-                     algorithm, ompi_coll_tuned_forced_max_algorithms[BCAST]));
-    } /* switch */
-    return (MPI_ERR_ARG);
-}
-
diff --git a/ompi/mca/coll/base/coll_base_frame.c b/ompi/mca/coll/base/coll_base_frame.c
index 311f31bdf9..6159d1a94b 100644
--- a/ompi/mca/coll/base/coll_base_frame.c
+++ b/ompi/mca/coll/base/coll_base_frame.c
@@ -3,10 +3,10 @@
  * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
  *                         University Research and Technology
  *                         Corporation.  All rights reserved.
- * Copyright (c) 2004-2005 The University of Tennessee and The University
+ * Copyright (c) 2004-2015 The University of Tennessee and The University
  *                         of Tennessee Research Foundation.  All rights
  *                         reserved.
- * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, 
+ * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
  *                         University of Stuttgart.  All rights reserved.
  * Copyright (c) 2004-2005 The Regents of the University of California.
  *                         All rights reserved.
@@ -15,9 +15,9 @@
  * Copyright (c) 2014      Research Organization for Information Science
  *                         and Technology (RIST). All rights reserved.
  * $COPYRIGHT$
- * 
+ *
  * Additional copyrights may follow
- * 
+ *
  * $HEADER$
  */
 
@@ -33,6 +33,7 @@
 
 #include "ompi/mca/coll/coll.h"
 #include "ompi/mca/coll/base/base.h"
+#include "ompi/mca/coll/base/coll_base_functions.h"
 
 /*
  * The following file was created by configure.  It contains extern
@@ -49,10 +50,55 @@ static void coll_base_module_construct(mca_coll_base_module_t *m)
     /* zero out all functions */
     memset ((char *) m + sizeof (m->super), 0, sizeof (*m) - sizeof (m->super));
     m->coll_module_disable = NULL;
+    m->base_data = NULL;
 }
 
-OBJ_CLASS_INSTANCE(mca_coll_base_module_t, opal_object_t, 
-                   coll_base_module_construct, NULL);
+static void
+coll_base_module_destruct(mca_coll_base_module_t *module)
+{
+    mca_coll_base_comm_t* data = module->base_data;
+
+    if (NULL != data) {
+        if( NULL != data->mcct_reqs ) {
+            for( int i = 0; i < data->mcct_num_reqs; ++i ) {
+                if( MPI_REQUEST_NULL != data->mcct_reqs[i] )
+                    ompi_request_free(&data->mcct_reqs[i]);
+            }
+            free(data->mcct_reqs);
+            data->mcct_reqs = NULL;
+            data->mcct_num_reqs = 0;
+        }
+        assert(0 == data->mcct_num_reqs);
+
+        /* free any cached information that has been allocated */
+        if (data->cached_ntree) { /* destroy general tree if defined */
+            ompi_coll_base_topo_destroy_tree (&data->cached_ntree);
+        }
+        if (data->cached_bintree) { /* destroy bintree if defined */
+            ompi_coll_base_topo_destroy_tree (&data->cached_bintree);
+        }
+        if (data->cached_bmtree) { /* destroy bmtree if defined */
+            ompi_coll_base_topo_destroy_tree (&data->cached_bmtree);
+        }
+        if (data->cached_in_order_bmtree) { /* destroy bmtree if defined */
+            ompi_coll_base_topo_destroy_tree (&data->cached_in_order_bmtree);
+        }
+        if (data->cached_chain) { /* destroy general chain if defined */
+            ompi_coll_base_topo_destroy_tree (&data->cached_chain);
+        }
+        if (data->cached_pipeline) { /* destroy pipeline if defined */
+            ompi_coll_base_topo_destroy_tree (&data->cached_pipeline);
+        }
+        if (data->cached_in_order_bintree) { /* destroy in order bintree if defined */
+            ompi_coll_base_topo_destroy_tree (&data->cached_in_order_bintree);
+        }
+
+        free(data);
+    }
+}
+
+OBJ_CLASS_INSTANCE(mca_coll_base_module_t, opal_object_t,
+                   coll_base_module_construct, coll_base_module_destruct);
 
 MCA_BASE_FRAMEWORK_DECLARE(ompi, coll, "Collectives", NULL, NULL, NULL,
                            mca_coll_base_static_components, 0);
diff --git a/ompi/mca/coll/base/coll_base_functions.h b/ompi/mca/coll/base/coll_base_functions.h
new file mode 100644
index 0000000000..5291f8725e
--- /dev/null
+++ b/ompi/mca/coll/base/coll_base_functions.h
@@ -0,0 +1,341 @@
+/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
+/*
+ * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
+ *                         University Research and Technology
+ *                         Corporation.  All rights reserved.
+ * Copyright (c) 2004-2015 The University of Tennessee and The University
+ *                         of Tennessee Research Foundation.  All rights
+ *                         reserved.
+ * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
+ *                         University of Stuttgart.  All rights reserved.
+ * Copyright (c) 2004-2005 The Regents of the University of California.
+ *                         All rights reserved.
+ * Copyright (c) 2008      Sun Microsystems, Inc.  All rights reserved.
+ * Copyright (c) 2008      Cisco Systems, Inc.  All rights reserved.
+ * Copyright (c) 2013      Los Alamos National Security, LLC. All rights
+ *                         reserved.
+ * $COPYRIGHT$
+ *
+ * Additional copyrights may follow
+ *
+ * $HEADER$
+ */
+
+#ifndef MCA_COLL_BASE_EXPORT_H
+#define MCA_COLL_BASE_EXPORT_H
+
+#include "ompi_config.h"
+
+#include "ompi/mca/coll/base/base.h"
+#include "opal/mca/mca.h"
+#include "ompi/mca/coll/coll.h"
+#include "ompi/request/request.h"
+
+/* need to include our own topo prototypes so we can malloc data on the comm correctly */
+#include "coll_base_topo.h"
+
+/* some fixed value index vars to simplify certain operations */
+typedef enum COLLTYPE {
+    ALLGATHER = 0,  /*  0 */
+    ALLGATHERV,     /*  1 */
+    ALLREDUCE,      /*  2 */
+    ALLTOALL,       /*  3 */
+    ALLTOALLV,      /*  4 */
+    ALLTOALLW,      /*  5 */
+    BARRIER,        /*  6 */
+    BCAST,          /*  7 */
+    EXSCAN,         /*  8 */
+    GATHER,         /*  9 */
+    GATHERV,        /* 10 */
+    REDUCE,         /* 11 */
+    REDUCESCATTER,  /* 12 */
+    SCAN,           /* 13 */
+    SCATTER,        /* 14 */
+    SCATTERV,       /* 15 */
+    COLLCOUNT       /* 16 end counter keep it as last element */
+} COLLTYPE_T;
+
+/* defined arg lists to simply auto inclusion of user overriding decision functions */
+#define ALLGATHER_ARGS void *sbuf, int scount, struct ompi_datatype_t *sdtype, void *rbuf, int rcount, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module
+#define ALLGATHERV_ARGS void *sbuf, int scount, struct ompi_datatype_t *sdtype, void * rbuf, int *rcounts, int *disps, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module
+#define ALLREDUCE_ARGS void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, mca_coll_base_module_t *module
+#define ALLTOALL_ARGS void *sbuf, int scount, struct ompi_datatype_t *sdtype, void* rbuf, int rcount, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module
+#define ALLTOALLV_ARGS void *sbuf, int *scounts, int *sdisps, struct ompi_datatype_t *sdtype, void *rbuf, int *rcounts, int *rdisps, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module
+#define ALLTOALLW_ARGS void *sbuf, int *scounts, int *sdisps,  struct ompi_datatype_t **sdtypes, void *rbuf, int *rcounts, int *rdisps, struct ompi_datatype_t **rdtypes, struct ompi_communicator_t *comm, mca_coll_base_module_t *module
+#define BARRIER_ARGS struct ompi_communicator_t *comm, mca_coll_base_module_t *module
+#define BCAST_ARGS void *buff, int count, struct ompi_datatype_t *datatype, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module
+#define EXSCAN_ARGS void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, mca_coll_base_module_t *module
+#define GATHER_ARGS void *sbuf, int scount, struct ompi_datatype_t *sdtype, void *rbuf, int rcount, struct ompi_datatype_t *rdtype, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module
+#define GATHERV_ARGS void *sbuf, int scount, struct ompi_datatype_t *sdtype, void *rbuf, int *rcounts, int *disps, struct ompi_datatype_t *rdtype, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module
+#define REDUCE_ARGS void *sbuf, void* rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module
+#define REDUCESCATTER_ARGS void *sbuf, void *rbuf, int *rcounts, struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, mca_coll_base_module_t *module
+#define SCAN_ARGS void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype,  struct ompi_op_t *op, struct ompi_communicator_t *comm, mca_coll_base_module_t *module
+#define SCATTER_ARGS void *sbuf, int scount, struct ompi_datatype_t *sdtype, void *rbuf, int rcount, struct ompi_datatype_t *rdtype, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module
+#define SCATTERV_ARGS void *sbuf, int *scounts, int *disps, struct ompi_datatype_t *sdtype, void* rbuf, int rcount, struct ompi_datatype_t *rdtype, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module
+/* end defined arg lists to simply auto inclusion of user overriding decision functions */
+
+BEGIN_C_DECLS
+
+/* All Gather */
+int ompi_coll_base_allgather_intra_bruck(ALLGATHER_ARGS);
+int ompi_coll_base_allgather_intra_recursivedoubling(ALLGATHER_ARGS);
+int ompi_coll_base_allgather_intra_ring(ALLGATHER_ARGS);
+int ompi_coll_base_allgather_intra_neighborexchange(ALLGATHER_ARGS);
+int ompi_coll_base_allgather_intra_basic_linear(ALLGATHER_ARGS);
+int ompi_coll_base_allgather_intra_two_procs(ALLGATHER_ARGS);
+
+/* All GatherV */
+int ompi_coll_base_allgatherv_intra_bruck(ALLGATHERV_ARGS);
+int ompi_coll_base_allgatherv_intra_ring(ALLGATHERV_ARGS);
+int ompi_coll_base_allgatherv_intra_neighborexchange(ALLGATHERV_ARGS);
+int ompi_coll_base_allgatherv_intra_basic_default(ALLGATHERV_ARGS);
+int ompi_coll_base_allgatherv_intra_two_procs(ALLGATHERV_ARGS);
+
+/* All Reduce */
+int ompi_coll_base_allreduce_intra_nonoverlapping(ALLREDUCE_ARGS);
+int ompi_coll_base_allreduce_intra_recursivedoubling(ALLREDUCE_ARGS);
+int ompi_coll_base_allreduce_intra_ring(ALLREDUCE_ARGS);
+int ompi_coll_base_allreduce_intra_ring_segmented(ALLREDUCE_ARGS, uint32_t segsize);
+int ompi_coll_base_allreduce_intra_basic_linear(ALLREDUCE_ARGS);
+
+/* AlltoAll */
+int ompi_coll_base_alltoall_intra_pairwise(ALLTOALL_ARGS);
+int ompi_coll_base_alltoall_intra_bruck(ALLTOALL_ARGS);
+int ompi_coll_base_alltoall_intra_basic_linear(ALLTOALL_ARGS);
+int ompi_coll_base_alltoall_intra_linear_sync(ALLTOALL_ARGS, int max_requests);
+int ompi_coll_base_alltoall_intra_two_procs(ALLTOALL_ARGS);
+
+/* AlltoAllV */
+int ompi_coll_base_alltoallv_intra_pairwise(ALLTOALLV_ARGS);
+int ompi_coll_base_alltoallv_intra_basic_linear(ALLTOALLV_ARGS);
+
+/* AlltoAllW */
+
+/* Barrier */
+int ompi_coll_base_barrier_intra_doublering(BARRIER_ARGS);
+int ompi_coll_base_barrier_intra_recursivedoubling(BARRIER_ARGS);
+int ompi_coll_base_barrier_intra_bruck(BARRIER_ARGS);
+int ompi_coll_base_barrier_intra_two_procs(BARRIER_ARGS);
+int ompi_coll_base_barrier_intra_linear(BARRIER_ARGS);
+int ompi_coll_base_barrier_intra_tree(BARRIER_ARGS);
+
+/* Bcast */
+int ompi_coll_base_bcast_intra_basic_linear(BCAST_ARGS);
+int ompi_coll_base_bcast_intra_chain(BCAST_ARGS, uint32_t segsize, int32_t chains);
+int ompi_coll_base_bcast_intra_pipeline(BCAST_ARGS, uint32_t segsize);
+int ompi_coll_base_bcast_intra_binomial(BCAST_ARGS, uint32_t segsize);
+int ompi_coll_base_bcast_intra_bintree(BCAST_ARGS, uint32_t segsize);
+int ompi_coll_base_bcast_intra_split_bintree(BCAST_ARGS, uint32_t segsize);
+
+/* Exscan */
+
+/* Gather */
+int ompi_coll_base_gather_intra_basic_linear(GATHER_ARGS);
+int ompi_coll_base_gather_intra_binomial(GATHER_ARGS);
+int ompi_coll_base_gather_intra_linear_sync(GATHER_ARGS, int first_segment_size);
+
+/* GatherV */
+
+/* Reduce */
+int ompi_coll_base_reduce_intra_basic_linear(REDUCE_ARGS);
+int ompi_coll_base_reduce_intra_chain(REDUCE_ARGS, uint32_t segsize, int fanout, int max_outstanding_reqs );
+int ompi_coll_base_reduce_intra_pipeline(REDUCE_ARGS, uint32_t segsize, int max_outstanding_reqs );
+int ompi_coll_base_reduce_intra_binary(REDUCE_ARGS, uint32_t segsize, int max_outstanding_reqs );
+int ompi_coll_base_reduce_intra_binomial(REDUCE_ARGS, uint32_t segsize, int max_outstanding_reqs );
+int ompi_coll_base_reduce_intra_in_order_binary(REDUCE_ARGS, uint32_t segsize, int max_outstanding_reqs );
+
+/* Reduce_scatter */
+int ompi_coll_base_reduce_scatter_intra_nonoverlapping(REDUCESCATTER_ARGS);
+int ompi_coll_base_reduce_scatter_intra_basic_recursivehalving(REDUCESCATTER_ARGS);
+int ompi_coll_base_reduce_scatter_intra_ring(REDUCESCATTER_ARGS);
+
+/* Scan */
+
+/* Scatter */
+int ompi_coll_base_scatter_intra_basic_linear(SCATTER_ARGS);
+int ompi_coll_base_scatter_intra_binomial(SCATTER_ARGS);
+
+/* ScatterV */
+
+END_C_DECLS
+
+#define COLL_BASE_UPDATE_BINTREE( OMPI_COMM, BASE_MODULE, ROOT )	\
+do {                                                                                       \
+    mca_coll_base_comm_t* coll_comm = (BASE_MODULE)->base_data;                        \
+    if( !( (coll_comm->cached_bintree)                                                     \
+           && (coll_comm->cached_bintree_root == (ROOT)) ) ) {                             \
+        if( coll_comm->cached_bintree ) { /* destroy previous binomial if defined */       \
+            ompi_coll_base_topo_destroy_tree( &(coll_comm->cached_bintree) );             \
+        }                                                                                  \
+        coll_comm->cached_bintree = ompi_coll_base_topo_build_tree(2,(OMPI_COMM),(ROOT)); \
+        coll_comm->cached_bintree_root = (ROOT);                                           \
+    }                                                                                      \
+} while (0)
+
+#define COLL_BASE_UPDATE_BMTREE( OMPI_COMM, BASE_MODULE, ROOT )	\
+do {                                                                                         \
+    mca_coll_base_comm_t* coll_comm = (BASE_MODULE)->base_data;                           \
+    if( !( (coll_comm->cached_bmtree)                                                        \
+           && (coll_comm->cached_bmtree_root == (ROOT)) ) ) {                                \
+        if( coll_comm->cached_bmtree ) { /* destroy previous binomial if defined */          \
+            ompi_coll_base_topo_destroy_tree( &(coll_comm->cached_bmtree) );                \
+        }                                                                                    \
+        coll_comm->cached_bmtree = ompi_coll_base_topo_build_bmtree( (OMPI_COMM), (ROOT) ); \
+        coll_comm->cached_bmtree_root = (ROOT);                                              \
+    }                                                                                        \
+} while (0)
+
+#define COLL_BASE_UPDATE_IN_ORDER_BMTREE( OMPI_COMM, BASE_MODULE, ROOT ) \
+do {                                                                                         \
+    mca_coll_base_comm_t* coll_comm = (BASE_MODULE)->base_data;                           \
+    if( !( (coll_comm->cached_in_order_bmtree)                                               \
+           && (coll_comm->cached_in_order_bmtree_root == (ROOT)) ) ) {                       \
+        if( coll_comm->cached_in_order_bmtree ) { /* destroy previous binomial if defined */ \
+            ompi_coll_base_topo_destroy_tree( &(coll_comm->cached_in_order_bmtree) );       \
+        }                                                                                    \
+        coll_comm->cached_in_order_bmtree = ompi_coll_base_topo_build_in_order_bmtree( (OMPI_COMM), (ROOT) ); \
+        coll_comm->cached_in_order_bmtree_root = (ROOT);                                     \
+    }                                                                                        \
+} while (0)
+
+#define COLL_BASE_UPDATE_PIPELINE( OMPI_COMM, BASE_MODULE, ROOT )	\
+do {                                                                                             \
+    mca_coll_base_comm_t* coll_comm = (BASE_MODULE)->base_data;                               \
+    if( !( (coll_comm->cached_pipeline)                                                          \
+           && (coll_comm->cached_pipeline_root == (ROOT)) ) ) {                                  \
+        if (coll_comm->cached_pipeline) { /* destroy previous pipeline if defined */             \
+            ompi_coll_base_topo_destroy_tree( &(coll_comm->cached_pipeline) );                  \
+        }                                                                                        \
+        coll_comm->cached_pipeline = ompi_coll_base_topo_build_chain( 1, (OMPI_COMM), (ROOT) ); \
+        coll_comm->cached_pipeline_root = (ROOT);                                                \
+    }                                                                                            \
+} while (0)
+
+#define COLL_BASE_UPDATE_CHAIN( OMPI_COMM, BASE_MODULE, ROOT, FANOUT )	\
+do {                                                                                             \
+    mca_coll_base_comm_t* coll_comm = (BASE_MODULE)->base_data;                               \
+    if( !( (coll_comm->cached_chain)                                                             \
+           && (coll_comm->cached_chain_root == (ROOT))                                           \
+           && (coll_comm->cached_chain_fanout == (FANOUT)) ) ) {                                 \
+        if( coll_comm->cached_chain) { /* destroy previous chain if defined */                   \
+            ompi_coll_base_topo_destroy_tree( &(coll_comm->cached_chain) );                     \
+        }                                                                                        \
+        coll_comm->cached_chain = ompi_coll_base_topo_build_chain((FANOUT), (OMPI_COMM), (ROOT)); \
+        coll_comm->cached_chain_root = (ROOT);                                                   \
+        coll_comm->cached_chain_fanout = (FANOUT);                                               \
+    }                                                                                            \
+} while (0)
+
+#define COLL_BASE_UPDATE_IN_ORDER_BINTREE( OMPI_COMM, BASE_MODULE )	\
+do {                                                                           \
+    mca_coll_base_comm_t* coll_comm = (BASE_MODULE)->base_data;             \
+    if( !(coll_comm->cached_in_order_bintree) ) {                              \
+        /* In-order binary tree topology is defined by communicator size */    \
+        /* Thus, there is no need to destroy anything */                       \
+        coll_comm->cached_in_order_bintree =                                   \
+        ompi_coll_base_topo_build_in_order_bintree((OMPI_COMM)); \
+    }                                                                          \
+} while (0)
+
+/**
+ * This macro give a generic way to compute the best count of
+ * the segment (i.e. the number of complete datatypes that
+ * can fit in the specified SEGSIZE). Beware, when this macro
+ * is called, the SEGCOUNT should be initialized to the count as
+ * expected by the collective call.
+ */
+#define COLL_BASE_COMPUTED_SEGCOUNT(SEGSIZE, TYPELNG, SEGCOUNT)        \
+    if( ((SEGSIZE) >= (TYPELNG)) &&                                     \
+        ((SEGSIZE) < ((TYPELNG) * (SEGCOUNT))) ) {                      \
+        size_t residual;                                                \
+        (SEGCOUNT) = (int)((SEGSIZE) / (TYPELNG));                      \
+        residual = (SEGSIZE) - (SEGCOUNT) * (TYPELNG);                  \
+        if( residual > ((TYPELNG) >> 1) )                               \
+            (SEGCOUNT)++;                                               \
+    }                                                                   \
+
+/**
+ * This macro gives a generic wait to compute the well distributed block counts
+ * when the count and number of blocks are fixed.
+ * Macro returns "early-block" count, "late-block" count, and "split-index"
+ * which is the block at which we switch from "early-block" count to
+ * the "late-block" count.
+ * count = split_index * early_block_count +
+ *         (block_count - split_index) * late_block_count
+ * We do not perform ANY error checks - make sure that the input values
+ * make sense (eg. count > num_blocks).
+ */
+#define COLL_BASE_COMPUTE_BLOCKCOUNT( COUNT, NUM_BLOCKS, SPLIT_INDEX,       \
+                                       EARLY_BLOCK_COUNT, LATE_BLOCK_COUNT ) \
+    EARLY_BLOCK_COUNT = LATE_BLOCK_COUNT = COUNT / NUM_BLOCKS;               \
+    SPLIT_INDEX = COUNT % NUM_BLOCKS;                                        \
+    if (0 != SPLIT_INDEX) {                                                  \
+        EARLY_BLOCK_COUNT = EARLY_BLOCK_COUNT + 1;                           \
+    }                                                                        \
+
+/*
+ * Data structure for hanging data off the communicator
+ * i.e. per module instance
+ */
+struct mca_coll_base_comm_t {
+    opal_object_t super;
+
+    /* standard data for requests and PML usage */
+
+    /* Precreate space for requests
+     * Note this does not effect basic,
+     * but if in wrong context can confuse a debugger
+     * this is controlled by an MCA param
+     */
+
+    ompi_request_t **mcct_reqs;
+    int mcct_num_reqs;
+
+    /*
+     * base topo information caching per communicator
+     *
+     * for each communicator we cache the topo information so we can
+     * reuse without regenerating if we change the root, [or fanout]
+     * then regenerate and recache this information
+     */
+
+    /* general tree with n fan out */
+    ompi_coll_tree_t *cached_ntree;
+    int cached_ntree_root;
+    int cached_ntree_fanout;
+
+    /* binary tree */
+    ompi_coll_tree_t *cached_bintree;
+    int cached_bintree_root;
+
+    /* binomial tree */
+    ompi_coll_tree_t *cached_bmtree;
+    int cached_bmtree_root;
+
+    /* binomial tree */
+    ompi_coll_tree_t *cached_in_order_bmtree;
+    int cached_in_order_bmtree_root;
+
+    /* chained tree (fanout followed by pipelines) */
+    ompi_coll_tree_t *cached_chain;
+    int cached_chain_root;
+    int cached_chain_fanout;
+
+    /* pipeline */
+    ompi_coll_tree_t *cached_pipeline;
+    int cached_pipeline_root;
+
+    /* in-order binary tree (root of the in-order binary tree is rank 0) */
+    ompi_coll_tree_t *cached_in_order_bintree;
+};
+typedef struct mca_coll_base_comm_t mca_coll_base_comm_t;
+OMPI_DECLSPEC OBJ_CLASS_DECLARATION(mca_coll_base_comm_t);
+
+static inline void ompi_coll_base_free_reqs(ompi_request_t **reqs, int count)
+{
+    int i;
+    for (i = 0; i < count; ++i)
+        ompi_request_free(&reqs[i]);
+}
+
+#endif /* MCA_COLL_BASE_EXPORT_H */
diff --git a/ompi/mca/coll/base/coll_base_gather.c b/ompi/mca/coll/base/coll_base_gather.c
index d23fe78e77..fc8f9f6495 100644
--- a/ompi/mca/coll/base/coll_base_gather.c
+++ b/ompi/mca/coll/base/coll_base_gather.c
@@ -3,7 +3,7 @@
  * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
  *                         University Research and Technology
  *                         Corporation.  All rights reserved.
- * Copyright (c) 2004-2014 The University of Tennessee and The University
+ * Copyright (c) 2004-2015 The University of Tennessee and The University
  *                         of Tennessee Research Foundation.  All rights
  *                         reserved.
  * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@@ -30,30 +30,14 @@
 #include "ompi/mca/coll/coll.h"
 #include "ompi/mca/coll/base/coll_tags.h"
 #include "ompi/mca/pml/pml.h"
-#include "coll_tuned.h"
-#include "coll_tuned_topo.h"
-#include "coll_tuned_util.h"
-
-/* gather algorithm variables */
-static int coll_tuned_gather_algorithm_count = 3;
-static int coll_tuned_gather_forced_algorithm = 0;
-static int coll_tuned_gather_segment_size = 0;
-static int coll_tuned_gather_tree_fanout;
-static int coll_tuned_gather_chain_fanout;
-
-/* valid values for coll_tuned_gather_forced_algorithm */
-static mca_base_var_enum_value_t gather_algorithms[] = {
-    {0, "ignore"},
-    {1, "basic_linear"},
-    {2, "binomial"},
-    {3, "linear_sync"},
-    {0, NULL}
-};
+#include "ompi/mca/coll/base/coll_base_functions.h"
+#include "coll_base_topo.h"
+#include "coll_base_util.h"
 
 /* Todo: gather_intra_generic, gather_intra_binary, gather_intra_chain,
  * gather_intra_pipeline, segmentation? */
 int
-ompi_coll_tuned_gather_intra_binomial(void *sbuf, int scount,
+ompi_coll_base_gather_intra_binomial(void *sbuf, int scount,
                                       struct ompi_datatype_t *sdtype,
                                       void *rbuf, int rcount,
                                       struct ompi_datatype_t *rdtype,
@@ -65,19 +49,19 @@ ompi_coll_tuned_gather_intra_binomial(void *sbuf, int scount,
     char *ptmp     = NULL, *tempbuf  = NULL;
     ompi_coll_tree_t* bmtree;
     MPI_Status status;
-    MPI_Aint sextent, slb, strue_lb, strue_extent; 
+    MPI_Aint sextent, slb, strue_lb, strue_extent;
     MPI_Aint rextent, rlb, rtrue_lb, rtrue_extent;
-    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
-    mca_coll_tuned_comm_t *data = tuned_module->tuned_data;
+    mca_coll_base_module_t *base_module = (mca_coll_base_module_t*) module;
+    mca_coll_base_comm_t *data = base_module->base_data;
 
     size = ompi_comm_size(comm);
     rank = ompi_comm_rank(comm);
 
-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "ompi_coll_tuned_gather_intra_binomial rank %d", rank));
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                 "ompi_coll_base_gather_intra_binomial rank %d", rank));
 
     /* create the binomial tree */
-    COLL_TUNED_UPDATE_IN_ORDER_BMTREE( comm, tuned_module, root );
+    COLL_BASE_UPDATE_IN_ORDER_BMTREE( comm, base_module, root );
     bmtree = data->cached_in_order_bmtree;
 
     ompi_datatype_get_extent(sdtype, &slb, &sextent);
@@ -112,7 +96,7 @@ ompi_coll_tuned_gather_intra_binomial(void *sbuf, int scount,
                 if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
             } else {
                 /* copy from rbuf to temp buffer  */
-                err = ompi_datatype_copy_content_same_ddt(rdtype, rcount, ptmp, 
+                err = ompi_datatype_copy_content_same_ddt(rdtype, rcount, ptmp,
                                                           (char *)rbuf + (ptrdiff_t)rank * rextent * (ptrdiff_t)rcount);
                 if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
             }
@@ -157,8 +141,8 @@ ompi_coll_tuned_gather_intra_binomial(void *sbuf, int scount,
                 mycount = size - vkid;
             mycount *= rcount;
 
-            OPAL_OUTPUT((ompi_coll_tuned_stream,
-                         "ompi_coll_tuned_gather_intra_binomial rank %d recv %d mycount = %d",
+            OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                         "ompi_coll_base_gather_intra_binomial rank %d recv %d mycount = %d",
                          rank, bmtree->tree_next[i], mycount));
 
             err = MCA_PML_CALL(recv(ptmp + total_recv*rextent, (ptrdiff_t)rcount * size - total_recv, rdtype,
@@ -172,8 +156,8 @@ ompi_coll_tuned_gather_intra_binomial(void *sbuf, int scount,
 
     if (rank != root) {
         /* all nodes except root send to parents */
-        OPAL_OUTPUT((ompi_coll_tuned_stream,
-                     "ompi_coll_tuned_gather_intra_binomial rank %d send %d count %d\n",
+        OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                     "ompi_coll_base_gather_intra_binomial rank %d send %d count %d\n",
                      rank, bmtree->tree_prev, total_recv));
 
         err = MCA_PML_CALL(send(ptmp, total_recv, sdtype,
@@ -207,7 +191,7 @@ ompi_coll_tuned_gather_intra_binomial(void *sbuf, int scount,
     if (NULL != tempbuf)
         free(tempbuf);
 
-    OPAL_OUTPUT((ompi_coll_tuned_stream,  "%s:%4d\tError occurred %d, rank %2d",
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,  "%s:%4d\tError occurred %d, rank %2d",
                  __FILE__, line, err, rank));
     return err;
 }
@@ -220,11 +204,11 @@ ompi_coll_tuned_gather_intra_binomial(void *sbuf, int scount,
  *	Returns:	- MPI_SUCCESS or error code
  */
 int
-ompi_coll_tuned_gather_intra_linear_sync(void *sbuf, int scount,
+ompi_coll_base_gather_intra_linear_sync(void *sbuf, int scount,
                                          struct ompi_datatype_t *sdtype,
                                          void *rbuf, int rcount,
                                          struct ompi_datatype_t *rdtype,
-                                         int root, 
+                                         int root,
                                          struct ompi_communicator_t *comm,
                                          mca_coll_base_module_t *module,
                                          int first_segment_size)
@@ -237,8 +221,8 @@ ompi_coll_tuned_gather_intra_linear_sync(void *sbuf, int scount,
     size = ompi_comm_size(comm);
     rank = ompi_comm_rank(comm);
 
-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "ompi_coll_tuned_gather_intra_linear_sync rank %d, segment %d", rank, first_segment_size));
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                 "ompi_coll_base_gather_intra_linear_sync rank %d, segment %d", rank, first_segment_size));
 
     if (rank != root) {
         /* Non-root processes:
@@ -250,10 +234,10 @@ ompi_coll_tuned_gather_intra_linear_sync(void *sbuf, int scount,
         ompi_datatype_type_size(sdtype, &typelng);
         ompi_datatype_get_extent(sdtype, &lb, &extent);
         first_segment_count = scount;
-        COLL_TUNED_COMPUTED_SEGCOUNT( (size_t) first_segment_size, typelng, 
+        COLL_BASE_COMPUTED_SEGCOUNT( (size_t) first_segment_size, typelng,
                                       first_segment_count );
 
-        ret = MCA_PML_CALL(recv(sbuf, 0, MPI_BYTE, root, 
+        ret = MCA_PML_CALL(recv(sbuf, 0, MPI_BYTE, root,
                                 MCA_COLL_BASE_TAG_GATHER,
                                 comm, MPI_STATUS_IGNORE));
         if (ret != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
@@ -263,15 +247,15 @@ ompi_coll_tuned_gather_intra_linear_sync(void *sbuf, int scount,
                                 MCA_PML_BASE_SEND_STANDARD, comm));
         if (ret != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
 
-        ret = MCA_PML_CALL(send((char*)sbuf + extent * first_segment_count, 
-                                (scount - first_segment_count), sdtype, 
+        ret = MCA_PML_CALL(send((char*)sbuf + extent * first_segment_count,
+                                (scount - first_segment_count), sdtype,
                                 root, MCA_COLL_BASE_TAG_GATHER,
                                 MCA_PML_BASE_SEND_STANDARD, comm));
         if (ret != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
 
     } else {
 
-        /* Root process, 
+        /* Root process,
            - For every non-root node:
            - post irecv for the first segment of the message
            - send zero byte message to signal node to send the message
@@ -284,20 +268,20 @@ ompi_coll_tuned_gather_intra_linear_sync(void *sbuf, int scount,
         ompi_request_t *first_segment_req;
         reqs = (ompi_request_t**) calloc(size, sizeof(ompi_request_t*));
         if (NULL == reqs) { ret = -1; line = __LINE__; goto error_hndl; }
-        
+
         ompi_datatype_type_size(rdtype, &typelng);
         ompi_datatype_get_extent(rdtype, &lb, &extent);
         first_segment_count = rcount;
-        COLL_TUNED_COMPUTED_SEGCOUNT( (size_t)first_segment_size, typelng, 
+        COLL_BASE_COMPUTED_SEGCOUNT( (size_t)first_segment_size, typelng,
                                       first_segment_count );
 
         ptmp = (char *) rbuf;
         for (i = 0; i < size; ++i) {
-            if (i == rank) {  
+            if (i == rank) {
                 /* skip myself */
-                reqs[i] = MPI_REQUEST_NULL; 
-                continue; 
-            } 
+                reqs[i] = MPI_REQUEST_NULL;
+                continue;
+            }
 
             /* irecv for the first segment from i */
             ptmp = (char*)rbuf + (ptrdiff_t)i * (ptrdiff_t)rcount * extent;
@@ -305,7 +289,7 @@ ompi_coll_tuned_gather_intra_linear_sync(void *sbuf, int scount,
                                      MCA_COLL_BASE_TAG_GATHER, comm,
                                      &first_segment_req));
             if (ret != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
-            
+
             /* send sync message */
             ret = MCA_PML_CALL(send(rbuf, 0, MPI_BYTE, i,
                                     MCA_COLL_BASE_TAG_GATHER,
@@ -314,7 +298,7 @@ ompi_coll_tuned_gather_intra_linear_sync(void *sbuf, int scount,
 
             /* irecv for the second segment */
             ptmp = (char*)rbuf + ((ptrdiff_t)i * (ptrdiff_t)rcount + first_segment_count) * extent;
-            ret = MCA_PML_CALL(irecv(ptmp, (rcount - first_segment_count), 
+            ret = MCA_PML_CALL(irecv(ptmp, (rcount - first_segment_count),
                                      rdtype, i, MCA_COLL_BASE_TAG_GATHER, comm,
                                      &reqs[i]));
             if (ret != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
@@ -327,11 +311,11 @@ ompi_coll_tuned_gather_intra_linear_sync(void *sbuf, int scount,
         /* copy local data if necessary */
         if (MPI_IN_PLACE != sbuf) {
             ret = ompi_datatype_sndrcv(sbuf, scount, sdtype,
-                                       (char*)rbuf + (ptrdiff_t)rank * (ptrdiff_t)rcount * extent, 
+                                       (char*)rbuf + (ptrdiff_t)rank * (ptrdiff_t)rcount * extent,
                                        rcount, rdtype);
             if (ret != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
         }
-        
+
         /* wait all second segments to complete */
         ret = ompi_request_wait_all(size, reqs, MPI_STATUSES_IGNORE);
         if (ret != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
@@ -346,8 +330,8 @@ ompi_coll_tuned_gather_intra_linear_sync(void *sbuf, int scount,
     if (NULL != reqs) {
         free(reqs);
     }
-    OPAL_OUTPUT (( ompi_coll_tuned_stream, 
-                   "ERROR_HNDL: node %d file %s line %d error %d\n", 
+    OPAL_OUTPUT (( ompi_coll_base_framework.framework_output,
+                   "ERROR_HNDL: node %d file %s line %d error %d\n",
                    rank, __FILE__, line, ret ));
     return ret;
 }
@@ -355,13 +339,13 @@ ompi_coll_tuned_gather_intra_linear_sync(void *sbuf, int scount,
 /*
  * Linear functions are copied from the BASIC coll module
  * they do not segment the message and are simple implementations
- * but for some small number of nodes and/or small data sizes they 
- * are just as fast as tuned/tree based segmenting operations 
+ * but for some small number of nodes and/or small data sizes they
+ * are just as fast as base/tree based segmenting operations
  * and as such may be selected by the decision functions
  * These are copied into this module due to the way we select modules
  * in V1. i.e. in V2 we will handle this differently and so will not
  * have to duplicate code.
- * JPG following the examples from other coll_tuned implementations. Dec06.
+ * JPG following the examples from other coll_base implementations. Dec06.
  */
 
 /* copied function (with appropriate renaming) starts here */
@@ -373,7 +357,7 @@ ompi_coll_tuned_gather_intra_linear_sync(void *sbuf, int scount,
  *	Returns:	- MPI_SUCCESS or error code
  */
 int
-ompi_coll_tuned_gather_intra_basic_linear(void *sbuf, int scount,
+ompi_coll_base_gather_intra_basic_linear(void *sbuf, int scount,
                                           struct ompi_datatype_t *sdtype,
                                           void *rbuf, int rcount,
                                           struct ompi_datatype_t *rdtype,
@@ -389,8 +373,8 @@ ompi_coll_tuned_gather_intra_basic_linear(void *sbuf, int scount,
     rank = ompi_comm_rank(comm);
 
     /* Everyone but root sends data and returns. */
-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "ompi_coll_tuned_gather_intra_basic_linear rank %d", rank));
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                 "ompi_coll_base_gather_intra_basic_linear rank %d", rank));
 
     if (rank != root) {
         return MCA_PML_CALL(send(sbuf, scount, sdtype, root,
@@ -427,164 +411,3 @@ ompi_coll_tuned_gather_intra_basic_linear(void *sbuf, int scount,
 
 
 /* copied function (with appropriate renaming) ends here */
-
-/* The following are used by dynamic and forced rules */
-
-/* publish details of each algorithm and if its forced/fixed/locked in */
-/* as you add methods/algorithms you must update this and the query/map 
-   routines */
-
-/* this routine is called by the component only */
-/* this makes sure that the mca parameters are set to their initial values 
-   and perms */
-/* module does not call this they call the forced_getvalues routine instead */
-
-int 
-ompi_coll_tuned_gather_intra_check_forced_init(coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices)
-{
-    mca_base_var_enum_t *new_enum;
-
-    ompi_coll_tuned_forced_max_algorithms[GATHER] = coll_tuned_gather_algorithm_count;
-
-    (void) mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                           "gather_algorithm_count",
-                                           "Number of gather algorithms available",
-                                           MCA_BASE_VAR_TYPE_INT, NULL, 0,
-                                           MCA_BASE_VAR_FLAG_DEFAULT_ONLY,
-                                           OPAL_INFO_LVL_5,
-                                           MCA_BASE_VAR_SCOPE_CONSTANT,
-                                           &coll_tuned_gather_algorithm_count);
-
-    /* MPI_T: This variable should eventually be bound to a communicator */
-    coll_tuned_gather_forced_algorithm = 0;
-    (void) mca_base_var_enum_create("coll_tuned_gather_algorithms", gather_algorithms, &new_enum);
-    mca_param_indices->algorithm_param_index =
-        mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                        "gather_algorithm",
-                                        "Which gather algorithm is used. Can be locked down to choice of: 0 ignore, 1 basic linear, 2 binomial, 3 linear with synchronization.",
-                                        MCA_BASE_VAR_TYPE_INT, new_enum, 0, 0,
-                                        OPAL_INFO_LVL_5,
-                                        MCA_BASE_VAR_SCOPE_READONLY,
-                                        &coll_tuned_gather_forced_algorithm);
-    OBJ_RELEASE(new_enum);
-    if (mca_param_indices->algorithm_param_index < 0) {
-        return mca_param_indices->algorithm_param_index;
-    }
-
-    coll_tuned_gather_segment_size = 0;
-    mca_param_indices->segsize_param_index =
-        mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                        "gather_algorithm_segmentsize",
-                                        "Segment size in bytes used by default for gather algorithms. Only has meaning if algorithm is forced and supports segmenting. 0 bytes means no segmentation. Currently, available algorithms do not support segmentation.",
-                                        MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
-                                        OPAL_INFO_LVL_5,
-                                        MCA_BASE_VAR_SCOPE_READONLY,
-                                        &coll_tuned_gather_segment_size);
-
-    coll_tuned_gather_tree_fanout = ompi_coll_tuned_init_tree_fanout; /* get system wide default */
-    mca_param_indices->tree_fanout_param_index =
-        mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                        "gather_algorithm_tree_fanout",
-                                        "Fanout for n-tree used for gather algorithms. Only has meaning if algorithm is forced and supports n-tree topo based operation. Currently, available algorithms do not support n-tree topologies.",
-                                        MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
-                                        OPAL_INFO_LVL_5,
-                                        MCA_BASE_VAR_SCOPE_READONLY,
-                                        &coll_tuned_gather_tree_fanout);
-
-    coll_tuned_gather_chain_fanout = ompi_coll_tuned_init_chain_fanout; /* get system wide default */
-    mca_param_indices->chain_fanout_param_index = 
-      mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                      "gather_algorithm_chain_fanout",
-                                      "Fanout for chains used for gather algorithms. Only has meaning if algorithm is forced and supports chain topo based operation. Currently, available algorithms do not support chain topologies.",
-                                      MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
-                                      OPAL_INFO_LVL_5,
-                                      MCA_BASE_VAR_SCOPE_READONLY,
-                                      &coll_tuned_gather_chain_fanout);
-
-    return (MPI_SUCCESS);
-}
-
-int
-ompi_coll_tuned_gather_intra_do_forced(void *sbuf, int scount,
-                                       struct ompi_datatype_t *sdtype,
-                                       void* rbuf, int rcount,
-                                       struct ompi_datatype_t *rdtype,
-                                       int root,
-                                       struct ompi_communicator_t *comm,
-                                       mca_coll_base_module_t *module)
-{
-    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
-    mca_coll_tuned_comm_t *data = tuned_module->tuned_data;
-
-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "coll:tuned:gather_intra_do_forced selected algorithm %d",
-                 data->user_forced[GATHER].algorithm));
-
-    switch (data->user_forced[GATHER].algorithm) {
-    case (0):
-        return ompi_coll_tuned_gather_intra_dec_fixed (sbuf, scount, sdtype, 
-                                                       rbuf, rcount, rdtype, 
-                                                       root, comm, module);
-    case (1):
-        return ompi_coll_tuned_gather_intra_basic_linear (sbuf, scount, sdtype,
-                                                          rbuf, rcount, rdtype,
-                                                          root, comm, module);
-    case (2):
-        return ompi_coll_tuned_gather_intra_binomial(sbuf, scount, sdtype,
-                                                     rbuf, rcount, rdtype,
-                                                     root, comm, module);
-    case (3):
-            return ompi_coll_tuned_gather_intra_linear_sync (sbuf, scount, sdtype,
-                                                             rbuf, rcount, rdtype,
-                                                             root, comm, module,
-                                                             data->user_forced[GATHER].segsize);
-    default:
-        OPAL_OUTPUT((ompi_coll_tuned_stream,
-                     "coll:tuned:gather_intra_do_forced attempt to select algorithm %d when only 0-%d is valid?", 
-                     data->user_forced[GATHER].algorithm,
-                     ompi_coll_tuned_forced_max_algorithms[GATHER]));
-        return (MPI_ERR_ARG);
-    } /* switch */
-}
-
-int
-ompi_coll_tuned_gather_intra_do_this(void *sbuf, int scount,
-                                     struct ompi_datatype_t *sdtype,
-                                     void* rbuf, int rcount,
-                                     struct ompi_datatype_t *rdtype,
-                                     int root,
-                                     struct ompi_communicator_t *comm,
-                                     mca_coll_base_module_t *module,
-                                     int algorithm, int faninout, int segsize)
-{
-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "coll:tuned:gather_intra_do_this selected algorithm %d topo faninout %d segsize %d", 
-                 algorithm, faninout, segsize));
-   
-    switch (algorithm) {
-    case (0):
-        return ompi_coll_tuned_gather_intra_dec_fixed (sbuf, scount, sdtype, 
-                                                       rbuf, rcount, rdtype, 
-                                                       root, comm, module);
-    case (1):
-        return ompi_coll_tuned_gather_intra_basic_linear (sbuf, scount, sdtype,
-                                                          rbuf, rcount, rdtype,
-                                                          root, comm, module);
-    case (2):  
-        return ompi_coll_tuned_gather_intra_binomial(sbuf, scount, sdtype,
-                                                     rbuf, rcount, rdtype,
-                                                     root, comm, module);
-    case (3):
-        return ompi_coll_tuned_gather_intra_linear_sync (sbuf, scount, sdtype,
-                                                         rbuf, rcount, rdtype,
-                                                         root, comm, module,
-                                                         segsize);
-
-    default:
-        OPAL_OUTPUT((ompi_coll_tuned_stream,
-                     "coll:tuned:gather_intra_do_this attempt to select algorithm %d when only 0-%d is valid?", 
-                     algorithm, 
-                     ompi_coll_tuned_forced_max_algorithms[GATHER]));
-        return (MPI_ERR_ARG);
-    } /* switch */
-}
diff --git a/ompi/mca/coll/base/coll_base_reduce.c b/ompi/mca/coll/base/coll_base_reduce.c
index 4b7c2acf7d..f6752579c9 100644
--- a/ompi/mca/coll/base/coll_base_reduce.c
+++ b/ompi/mca/coll/base/coll_base_reduce.c
@@ -3,7 +3,7 @@
  * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
  *                         University Research and Technology
  *                         Corporation.  All rights reserved.
- * Copyright (c) 2004-2014 The University of Tennessee and The University
+ * Copyright (c) 2004-2015 The University of Tennessee and The University
  *                         of Tennessee Research Foundation.  All rights
  *                         reserved.
  * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@@ -31,28 +31,8 @@
 #include "ompi/mca/coll/base/coll_tags.h"
 #include "ompi/mca/pml/pml.h"
 #include "ompi/op/op.h"
-#include "coll_tuned.h"
-#include "coll_tuned_topo.h"
-
-/* reduce algorithm variables */
-static int coll_tuned_reduce_algorithm_count = 6;
-static int coll_tuned_reduce_forced_algorithm = 0;
-static int coll_tuned_reduce_segment_size = 0;
-static int coll_tuned_reduce_max_requests;
-static int coll_tuned_reduce_tree_fanout;
-static int coll_tuned_reduce_chain_fanout;
-
-/* valid values for coll_tuned_reduce_forced_algorithm */
-static mca_base_var_enum_value_t reduce_algorithms[] = {
-    {0, "ignore"},
-    {1, "linear"},
-    {2, "chain"},
-    {3, "pipeline"},
-    {4, "binary"},
-    {5, "binomial"},
-    {6, "in-order_binary"},
-    {0, NULL}
-};
+#include "ompi/mca/coll/base/coll_base_functions.h"
+#include "coll_base_topo.h"
 
 /**
  * This is a generic implementation of the reduce protocol. It used the tree
@@ -62,10 +42,10 @@ static mca_base_var_enum_value_t reduce_algorithms[] = {
  * the number of datatype to the original count (original_count)
  *
  * Note that for non-commutative operations we cannot save memory copy
- * for the first block: thus we must copy sendbuf to accumbuf on intermediate 
+ * for the first block: thus we must copy sendbuf to accumbuf on intermediate
  * to keep the optimized loop happy.
  */
-int ompi_coll_tuned_reduce_generic( void* sendbuf, void* recvbuf, int original_count,
+int ompi_coll_base_reduce_generic( void* sendbuf, void* recvbuf, int original_count,
                                     ompi_datatype_t* datatype, ompi_op_t* op,
                                     int root, ompi_communicator_t* comm,
                                     mca_coll_base_module_t *module,
@@ -90,60 +70,60 @@ int ompi_coll_tuned_reduce_generic( void* sendbuf, void* recvbuf, int original_c
     num_segments = (original_count + count_by_segment - 1) / count_by_segment;
     segment_increment = (ptrdiff_t)count_by_segment * extent;
 
-    sendtmpbuf = (char*) sendbuf; 
-    if( sendbuf == MPI_IN_PLACE ) { 
-        sendtmpbuf = (char *)recvbuf; 
+    sendtmpbuf = (char*) sendbuf;
+    if( sendbuf == MPI_IN_PLACE ) {
+        sendtmpbuf = (char *)recvbuf;
     }
 
-    OPAL_OUTPUT((ompi_coll_tuned_stream, "coll:tuned:reduce_generic count %d, msg size %ld, segsize %ld, max_requests %d",
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output, "coll:base:reduce_generic count %d, msg size %ld, segsize %ld, max_requests %d",
                  original_count, (unsigned long)((ptrdiff_t)num_segments * (ptrdiff_t)segment_increment),
                  (unsigned long)segment_increment, max_outstanding_reqs));
 
     rank = ompi_comm_rank(comm);
 
-    /* non-leaf nodes - wait for children to send me data & forward up 
+    /* non-leaf nodes - wait for children to send me data & forward up
        (if needed) */
     if( tree->tree_nextsize > 0 ) {
         ptrdiff_t true_lower_bound, true_extent, real_segment_size;
-        ompi_datatype_get_true_extent( datatype, &true_lower_bound, 
+        ompi_datatype_get_true_extent( datatype, &true_lower_bound,
                                        &true_extent );
 
-        /* handle non existant recv buffer (i.e. its NULL) and 
+        /* handle non existant recv buffer (i.e. its NULL) and
            protect the recv buffer on non-root nodes */
         accumbuf = (char*)recvbuf;
         if( (NULL == accumbuf) || (root != rank) ) {
             /* Allocate temporary accumulator buffer. */
-            accumbuf_free = (char*)malloc(true_extent + 
+            accumbuf_free = (char*)malloc(true_extent +
                                           (ptrdiff_t)(original_count - 1) * extent);
-            if (accumbuf_free == NULL) { 
-                line = __LINE__; ret = -1; goto error_hndl; 
+            if (accumbuf_free == NULL) {
+                line = __LINE__; ret = -1; goto error_hndl;
             }
             accumbuf = accumbuf_free - lower_bound;
-        } 
+        }
 
         /* If this is a non-commutative operation we must copy
            sendbuf to the accumbuf, in order to simplfy the loops */
         if (!ompi_op_is_commute(op)) {
-            ompi_datatype_copy_content_same_ddt(datatype, original_count, 
+            ompi_datatype_copy_content_same_ddt(datatype, original_count,
                                                 (char*)accumbuf,
                                                 (char*)sendtmpbuf);
         }
         /* Allocate two buffers for incoming segments */
         real_segment_size = true_extent + (ptrdiff_t)(count_by_segment - 1) * extent;
         inbuf_free[0] = (char*) malloc(real_segment_size);
-        if( inbuf_free[0] == NULL ) { 
-            line = __LINE__; ret = -1; goto error_hndl; 
+        if( inbuf_free[0] == NULL ) {
+            line = __LINE__; ret = -1; goto error_hndl;
         }
         inbuf[0] = inbuf_free[0] - lower_bound;
         /* if there is chance to overlap communication -
            allocate second buffer */
         if( (num_segments > 1) || (tree->tree_nextsize > 1) ) {
             inbuf_free[1] = (char*) malloc(real_segment_size);
-            if( inbuf_free[1] == NULL ) { 
+            if( inbuf_free[1] == NULL ) {
                 line = __LINE__; ret = -1; goto error_hndl;
             }
             inbuf[1] = inbuf_free[1] - lower_bound;
-        } 
+        }
 
         /* reset input buffer index and receive count */
         inbi = 0;
@@ -166,14 +146,14 @@ int ompi_coll_tuned_reduce_generic( void* sendbuf, void* recvbuf, int original_c
                 if( segindex < num_segments ) {
                     void* local_recvbuf = inbuf[inbi];
                     if( 0 == i ) {
-                        /* for the first step (1st child per segment) and 
-                         * commutative operations we might be able to irecv 
-                         * directly into the accumulate buffer so that we can 
-                         * reduce(op) this with our sendbuf in one step as 
-                         * ompi_op_reduce only has two buffer pointers, 
+                        /* for the first step (1st child per segment) and
+                         * commutative operations we might be able to irecv
+                         * directly into the accumulate buffer so that we can
+                         * reduce(op) this with our sendbuf in one step as
+                         * ompi_op_reduce only has two buffer pointers,
                          * this avoids an extra memory copy.
                          *
-                         * BUT if the operation is non-commutative or 
+                         * BUT if the operation is non-commutative or
                          * we are root and are USING MPI_IN_PLACE this is wrong!
                          */
                         if( (ompi_op_is_commute(op)) &&
@@ -183,34 +163,34 @@ int ompi_coll_tuned_reduce_generic( void* sendbuf, void* recvbuf, int original_c
                     }
 
                     ret = MCA_PML_CALL(irecv(local_recvbuf, recvcount, datatype,
-                                             tree->tree_next[i], 
-                                             MCA_COLL_BASE_TAG_REDUCE, comm, 
+                                             tree->tree_next[i],
+                                             MCA_COLL_BASE_TAG_REDUCE, comm,
                                              &reqs[inbi]));
                     if (ret != MPI_SUCCESS) { line = __LINE__; goto error_hndl;}
                 }
                 /* wait for previous req to complete, if any.
-                   if there are no requests reqs[inbi ^1] will be 
+                   if there are no requests reqs[inbi ^1] will be
                    MPI_REQUEST_NULL. */
                 /* wait on data from last child for previous segment */
-                ret = ompi_request_wait_all( 1, &reqs[inbi ^ 1], 
+                ret = ompi_request_wait_all( 1, &reqs[inbi ^ 1],
                                              MPI_STATUSES_IGNORE );
                 if (ret != MPI_SUCCESS) { line = __LINE__; goto error_hndl;  }
                 local_op_buffer = inbuf[inbi ^ 1];
                 if( i > 0 ) {
-                    /* our first operation is to combine our own [sendbuf] data 
-                     * with the data we recvd from down stream (but only 
-                     * the operation is commutative and if we are not root and 
+                    /* our first operation is to combine our own [sendbuf] data
+                     * with the data we recvd from down stream (but only
+                     * the operation is commutative and if we are not root and
                      * not using MPI_IN_PLACE)
                      */
                     if( 1 == i ) {
-                        if( (ompi_op_is_commute(op)) && 
+                        if( (ompi_op_is_commute(op)) &&
                             !((MPI_IN_PLACE == sendbuf) && (rank == tree->tree_root)) ) {
                             local_op_buffer = sendtmpbuf + (ptrdiff_t)segindex * (ptrdiff_t)segment_increment;
                         }
                     }
                     /* apply operation */
-                    ompi_op_reduce(op, local_op_buffer, 
-                                   accumbuf + (ptrdiff_t)segindex * (ptrdiff_t)segment_increment, 
+                    ompi_op_reduce(op, local_op_buffer,
+                                   accumbuf + (ptrdiff_t)segindex * (ptrdiff_t)segment_increment,
                                    recvcount, datatype );
                 } else if ( segindex > 0 ) {
                     void* accumulator = accumbuf + (ptrdiff_t)(segindex-1) * (ptrdiff_t)segment_increment;
@@ -220,25 +200,25 @@ int ompi_coll_tuned_reduce_generic( void* sendbuf, void* recvbuf, int original_c
                             local_op_buffer = sendtmpbuf + (ptrdiff_t)(segindex-1) * (ptrdiff_t)segment_increment;
                         }
                     }
-                    ompi_op_reduce(op, local_op_buffer, accumulator, prevcount, 
+                    ompi_op_reduce(op, local_op_buffer, accumulator, prevcount,
                                    datatype );
 
-                    /* all reduced on available data this step (i) complete, 
+                    /* all reduced on available data this step (i) complete,
                      * pass to the next process unless you are the root.
                      */
                     if (rank != tree->tree_root) {
                         /* send combined/accumulated data to parent */
-                        ret = MCA_PML_CALL( send( accumulator, prevcount, 
-                                                  datatype, tree->tree_prev, 
+                        ret = MCA_PML_CALL( send( accumulator, prevcount,
+                                                  datatype, tree->tree_prev,
                                                   MCA_COLL_BASE_TAG_REDUCE,
-                                                  MCA_PML_BASE_SEND_STANDARD, 
+                                                  MCA_PML_BASE_SEND_STANDARD,
                                                   comm) );
-                        if (ret != MPI_SUCCESS) { 
-                            line = __LINE__; goto error_hndl;  
+                        if (ret != MPI_SUCCESS) {
+                            line = __LINE__; goto error_hndl;
                         }
                     }
 
-                    /* we stop when segindex = number of segments 
+                    /* we stop when segindex = number of segments
                        (i.e. we do num_segment+1 steps for pipelining */
                     if (segindex == num_segments) break;
                 }
@@ -254,33 +234,33 @@ int ompi_coll_tuned_reduce_generic( void* sendbuf, void* recvbuf, int original_c
         if( accumbuf_free != NULL ) free(accumbuf_free);
     }
 
-    /* leaf nodes 
-       Depending on the value of max_outstanding_reqs and 
+    /* leaf nodes
+       Depending on the value of max_outstanding_reqs and
        the number of segments we have two options:
        - send all segments using blocking send to the parent, or
-       - avoid overflooding the parent nodes by limiting the number of 
+       - avoid overflooding the parent nodes by limiting the number of
        outstanding requests to max_oustanding_reqs.
-       TODO/POSSIBLE IMPROVEMENT: If there is a way to determine the eager size 
-       for the current communication, synchronization should be used only 
+       TODO/POSSIBLE IMPROVEMENT: If there is a way to determine the eager size
+       for the current communication, synchronization should be used only
        when the message/segment size is smaller than the eager size.
     */
     else {
 
         /* If the number of segments is less than a maximum number of oustanding
-           requests or there is no limit on the maximum number of outstanding 
+           requests or there is no limit on the maximum number of outstanding
            requests, we send data to the parent using blocking send */
-        if ((0 == max_outstanding_reqs) || 
+        if ((0 == max_outstanding_reqs) ||
             (num_segments <= max_outstanding_reqs)) {
-            
+
             segindex = 0;
             while ( original_count > 0) {
                 if (original_count < count_by_segment) {
                     count_by_segment = original_count;
                 }
-                ret = MCA_PML_CALL( send((char*)sendbuf + 
+                ret = MCA_PML_CALL( send((char*)sendbuf +
                                          (ptrdiff_t)segindex * (ptrdiff_t)segment_increment,
                                          count_by_segment, datatype,
-                                         tree->tree_prev, 
+                                         tree->tree_prev,
                                          MCA_COLL_BASE_TAG_REDUCE,
                                          MCA_PML_BASE_SEND_STANDARD,
                                          comm) );
@@ -310,7 +290,7 @@ int ompi_coll_tuned_reduce_generic( void* sendbuf, void* recvbuf, int original_c
                 ret = MCA_PML_CALL( isend((char*)sendbuf +
                                           (ptrdiff_t)segindex * (ptrdiff_t)segment_increment,
                                           count_by_segment, datatype,
-                                          tree->tree_prev, 
+                                          tree->tree_prev,
                                           MCA_COLL_BASE_TAG_REDUCE,
                                           MCA_PML_BASE_SEND_SYNCHRONOUS, comm,
                                           &sreq[segindex]) );
@@ -328,12 +308,12 @@ int ompi_coll_tuned_reduce_generic( void* sendbuf, void* recvbuf, int original_c
                 if( original_count < count_by_segment ) {
                     count_by_segment = original_count;
                 }
-                ret = MCA_PML_CALL( isend((char*)sendbuf + 
-                                          (ptrdiff_t)segindex * (ptrdiff_t)segment_increment, 
-                                          count_by_segment, datatype, 
-                                          tree->tree_prev, 
-                                          MCA_COLL_BASE_TAG_REDUCE, 
-                                          MCA_PML_BASE_SEND_SYNCHRONOUS, comm, 
+                ret = MCA_PML_CALL( isend((char*)sendbuf +
+                                          (ptrdiff_t)segindex * (ptrdiff_t)segment_increment,
+                                          count_by_segment, datatype,
+                                          tree->tree_prev,
+                                          MCA_COLL_BASE_TAG_REDUCE,
+                                          MCA_PML_BASE_SEND_SYNCHRONOUS, comm,
                                           &sreq[creq]) );
                 if (ret != MPI_SUCCESS) { line = __LINE__; goto error_hndl;  }
                 creq = (creq + 1) % max_outstanding_reqs;
@@ -342,7 +322,7 @@ int ompi_coll_tuned_reduce_generic( void* sendbuf, void* recvbuf, int original_c
             }
 
             /* Wait on the remaining request to complete */
-            ret = ompi_request_wait_all( max_outstanding_reqs, sreq, 
+            ret = ompi_request_wait_all( max_outstanding_reqs, sreq,
                                          MPI_STATUSES_IGNORE );
             if (ret != MPI_SUCCESS) { line = __LINE__; goto error_hndl;  }
 
@@ -353,8 +333,8 @@ int ompi_coll_tuned_reduce_generic( void* sendbuf, void* recvbuf, int original_c
     return OMPI_SUCCESS;
 
  error_hndl:  /* error handler */
-    OPAL_OUTPUT (( ompi_coll_tuned_stream, 
-                   "ERROR_HNDL: node %d file %s line %d error %d\n", 
+    OPAL_OUTPUT (( ompi_coll_base_framework.framework_output,
+                   "ERROR_HNDL: node %d file %s line %d error %d\n",
                    rank, __FILE__, line, ret ));
     if( inbuf_free[0] != NULL ) free(inbuf_free[0]);
     if( inbuf_free[1] != NULL ) free(inbuf_free[1]);
@@ -369,9 +349,9 @@ int ompi_coll_tuned_reduce_generic( void* sendbuf, void* recvbuf, int original_c
      meaning that at least one datatype must fit in the segment !
 */
 
-int ompi_coll_tuned_reduce_intra_chain( void *sendbuf, void *recvbuf, int count,
-                                        ompi_datatype_t* datatype, 
-                                        ompi_op_t* op, int root, 
+int ompi_coll_base_reduce_intra_chain( void *sendbuf, void *recvbuf, int count,
+                                        ompi_datatype_t* datatype,
+                                        ompi_op_t* op, int root,
                                         ompi_communicator_t* comm,
                                         mca_coll_base_module_t *module,
                                         uint32_t segsize, int fanout,
@@ -379,27 +359,27 @@ int ompi_coll_tuned_reduce_intra_chain( void *sendbuf, void *recvbuf, int count,
 {
     int segcount = count;
     size_t typelng;
-    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
-    mca_coll_tuned_comm_t *data = tuned_module->tuned_data;
+    mca_coll_base_module_t *base_module = (mca_coll_base_module_t*) module;
+    mca_coll_base_comm_t *data = base_module->base_data;
 
-    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:reduce_intra_chain rank %d fo %d ss %5d", ompi_comm_rank(comm), fanout, segsize));
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:reduce_intra_chain rank %d fo %d ss %5d", ompi_comm_rank(comm), fanout, segsize));
 
-    COLL_TUNED_UPDATE_CHAIN( comm, tuned_module, root, fanout );
+    COLL_BASE_UPDATE_CHAIN( comm, base_module, root, fanout );
     /**
      * Determine number of segments and number of elements
      * sent per operation
      */
     ompi_datatype_type_size( datatype, &typelng );
-    COLL_TUNED_COMPUTED_SEGCOUNT( segsize, typelng, segcount );
+    COLL_BASE_COMPUTED_SEGCOUNT( segsize, typelng, segcount );
 
-    return ompi_coll_tuned_reduce_generic( sendbuf, recvbuf, count, datatype, 
+    return ompi_coll_base_reduce_generic( sendbuf, recvbuf, count, datatype,
                                            op, root, comm, module,
-                                           data->cached_chain, 
+                                           data->cached_chain,
                                            segcount, max_outstanding_reqs );
 }
 
 
-int ompi_coll_tuned_reduce_intra_pipeline( void *sendbuf, void *recvbuf,
+int ompi_coll_base_reduce_intra_pipeline( void *sendbuf, void *recvbuf,
                                            int count, ompi_datatype_t* datatype,
                                            ompi_op_t* op, int root,
                                            ompi_communicator_t* comm,
@@ -409,101 +389,101 @@ int ompi_coll_tuned_reduce_intra_pipeline( void *sendbuf, void *recvbuf,
 {
     int segcount = count;
     size_t typelng;
-    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
-    mca_coll_tuned_comm_t *data = tuned_module->tuned_data;
+    mca_coll_base_module_t *base_module = (mca_coll_base_module_t*) module;
+    mca_coll_base_comm_t *data = base_module->base_data;
 
-    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:reduce_intra_pipeline rank %d ss %5d",
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:reduce_intra_pipeline rank %d ss %5d",
                  ompi_comm_rank(comm), segsize));
 
-    COLL_TUNED_UPDATE_PIPELINE( comm, tuned_module, root );
+    COLL_BASE_UPDATE_PIPELINE( comm, base_module, root );
 
     /**
      * Determine number of segments and number of elements
      * sent per operation
      */
     ompi_datatype_type_size( datatype, &typelng );
-    COLL_TUNED_COMPUTED_SEGCOUNT( segsize, typelng, segcount );
+    COLL_BASE_COMPUTED_SEGCOUNT( segsize, typelng, segcount );
 
-    return ompi_coll_tuned_reduce_generic( sendbuf, recvbuf, count, datatype, 
+    return ompi_coll_base_reduce_generic( sendbuf, recvbuf, count, datatype,
                                            op, root, comm, module,
-                                           data->cached_pipeline, 
+                                           data->cached_pipeline,
                                            segcount, max_outstanding_reqs );
 }
 
-int ompi_coll_tuned_reduce_intra_binary( void *sendbuf, void *recvbuf,
+int ompi_coll_base_reduce_intra_binary( void *sendbuf, void *recvbuf,
                                          int count, ompi_datatype_t* datatype,
                                          ompi_op_t* op, int root,
-                                         ompi_communicator_t* comm, 
+                                         ompi_communicator_t* comm,
                                          mca_coll_base_module_t *module,
-                                         uint32_t segsize, 
+                                         uint32_t segsize,
                                          int max_outstanding_reqs  )
 {
     int segcount = count;
     size_t typelng;
-    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
-    mca_coll_tuned_comm_t *data = tuned_module->tuned_data;
+    mca_coll_base_module_t *base_module = (mca_coll_base_module_t*) module;
+    mca_coll_base_comm_t *data = base_module->base_data;
 
-    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:reduce_intra_binary rank %d ss %5d",
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:reduce_intra_binary rank %d ss %5d",
                  ompi_comm_rank(comm), segsize));
 
-    COLL_TUNED_UPDATE_BINTREE( comm, tuned_module, root );
+    COLL_BASE_UPDATE_BINTREE( comm, base_module, root );
 
     /**
      * Determine number of segments and number of elements
      * sent per operation
      */
     ompi_datatype_type_size( datatype, &typelng );
-    COLL_TUNED_COMPUTED_SEGCOUNT( segsize, typelng, segcount );
+    COLL_BASE_COMPUTED_SEGCOUNT( segsize, typelng, segcount );
 
-    return ompi_coll_tuned_reduce_generic( sendbuf, recvbuf, count, datatype, 
+    return ompi_coll_base_reduce_generic( sendbuf, recvbuf, count, datatype,
                                            op, root, comm, module,
-                                           data->cached_bintree, 
+                                           data->cached_bintree,
                                            segcount, max_outstanding_reqs );
 }
 
-int ompi_coll_tuned_reduce_intra_binomial( void *sendbuf, void *recvbuf,
+int ompi_coll_base_reduce_intra_binomial( void *sendbuf, void *recvbuf,
                                            int count, ompi_datatype_t* datatype,
                                            ompi_op_t* op, int root,
-                                           ompi_communicator_t* comm, 
+                                           ompi_communicator_t* comm,
                                            mca_coll_base_module_t *module,
                                            uint32_t segsize,
                                            int max_outstanding_reqs  )
 {
     int segcount = count;
     size_t typelng;
-    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
-    mca_coll_tuned_comm_t *data = tuned_module->tuned_data;
+    mca_coll_base_module_t *base_module = (mca_coll_base_module_t*) module;
+    mca_coll_base_comm_t *data = base_module->base_data;
 
-    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:reduce_intra_binomial rank %d ss %5d",
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:reduce_intra_binomial rank %d ss %5d",
                  ompi_comm_rank(comm), segsize));
 
-    COLL_TUNED_UPDATE_IN_ORDER_BMTREE( comm, tuned_module, root );
+    COLL_BASE_UPDATE_IN_ORDER_BMTREE( comm, base_module, root );
 
     /**
      * Determine number of segments and number of elements
      * sent per operation
      */
     ompi_datatype_type_size( datatype, &typelng );
-    COLL_TUNED_COMPUTED_SEGCOUNT( segsize, typelng, segcount );
+    COLL_BASE_COMPUTED_SEGCOUNT( segsize, typelng, segcount );
 
-    return ompi_coll_tuned_reduce_generic( sendbuf, recvbuf, count, datatype, 
+    return ompi_coll_base_reduce_generic( sendbuf, recvbuf, count, datatype,
                                            op, root, comm, module,
-                                           data->cached_in_order_bmtree, 
+                                           data->cached_in_order_bmtree,
                                            segcount, max_outstanding_reqs );
 }
 
 /*
- * reduce_intra_in_order_binary 
- * 
+ * reduce_intra_in_order_binary
+ *
  * Function:      Logarithmic reduce operation for non-commutative operations.
  * Acecpts:       same as MPI_Reduce()
  * Returns:       MPI_SUCCESS or error code
  */
-int ompi_coll_tuned_reduce_intra_in_order_binary( void *sendbuf, void *recvbuf,
-                                                  int count, 
+int ompi_coll_base_reduce_intra_in_order_binary( void *sendbuf, void *recvbuf,
+                                                  int count,
                                                   ompi_datatype_t* datatype,
                                                   ompi_op_t* op, int root,
-                                                  ompi_communicator_t* comm, 
+                                                  ompi_communicator_t* comm,
                                                   mca_coll_base_module_t *module,
                                                   uint32_t segsize,
                                                   int max_outstanding_reqs  )
@@ -511,28 +491,28 @@ int ompi_coll_tuned_reduce_intra_in_order_binary( void *sendbuf, void *recvbuf,
     int ret, rank, size, io_root, segcount = count;
     void *use_this_sendbuf = NULL, *use_this_recvbuf = NULL;
     size_t typelng;
-    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
-    mca_coll_tuned_comm_t *data = tuned_module->tuned_data;
+    mca_coll_base_module_t *base_module = (mca_coll_base_module_t*) module;
+    mca_coll_base_comm_t *data = base_module->base_data;
 
     rank = ompi_comm_rank(comm);
     size = ompi_comm_size(comm);
-    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:reduce_intra_in_order_binary rank %d ss %5d",
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:reduce_intra_in_order_binary rank %d ss %5d",
                  rank, segsize));
 
-    COLL_TUNED_UPDATE_IN_ORDER_BINTREE( comm, tuned_module );
+    COLL_BASE_UPDATE_IN_ORDER_BINTREE( comm, base_module );
 
     /**
      * Determine number of segments and number of elements
      * sent per operation
      */
     ompi_datatype_type_size( datatype, &typelng );
-    COLL_TUNED_COMPUTED_SEGCOUNT( segsize, typelng, segcount );
+    COLL_BASE_COMPUTED_SEGCOUNT( segsize, typelng, segcount );
 
     /* An in-order binary tree must use root (size-1) to preserve the order of
        operations.  Thus, if root is not rank (size - 1), then we must handle
-       1. MPI_IN_PLACE option on real root, and 
+       1. MPI_IN_PLACE option on real root, and
        2. we must allocate temporary recvbuf on rank (size - 1).
-       Note that generic function must be careful not to switch order of 
+       Note that generic function must be careful not to switch order of
        operations for non-commutative ops.
     */
     io_root = size - 1;
@@ -541,7 +521,7 @@ int ompi_coll_tuned_reduce_intra_in_order_binary( void *sendbuf, void *recvbuf,
     if (io_root != root) {
         ptrdiff_t tlb, text, lb, ext;
         char *tmpbuf = NULL;
-    
+
         ompi_datatype_get_extent(datatype, &lb, &ext);
         ompi_datatype_get_true_extent(datatype, &tlb, &text);
 
@@ -550,7 +530,7 @@ int ompi_coll_tuned_reduce_intra_in_order_binary( void *sendbuf, void *recvbuf,
             if (NULL == tmpbuf) {
                 return MPI_ERR_INTERN;
             }
-            ompi_datatype_copy_content_same_ddt(datatype, count, 
+            ompi_datatype_copy_content_same_ddt(datatype, count,
                                                 (char*)tmpbuf,
                                                 (char*)recvbuf);
             use_this_sendbuf = tmpbuf;
@@ -564,9 +544,9 @@ int ompi_coll_tuned_reduce_intra_in_order_binary( void *sendbuf, void *recvbuf,
     }
 
     /* Use generic reduce with in-order binary tree topology and io_root */
-    ret = ompi_coll_tuned_reduce_generic( use_this_sendbuf, use_this_recvbuf, count, datatype,
-                                          op, io_root, comm, module, 
-                                          data->cached_in_order_bintree, 
+    ret = ompi_coll_base_reduce_generic( use_this_sendbuf, use_this_recvbuf, count, datatype,
+                                          op, io_root, comm, module,
+                                          data->cached_in_order_bintree,
                                           segcount, max_outstanding_reqs );
     if (MPI_SUCCESS != ret) { return ret; }
 
@@ -581,11 +561,11 @@ int ompi_coll_tuned_reduce_intra_in_order_binary( void *sendbuf, void *recvbuf,
             if (MPI_IN_PLACE == sendbuf) {
                 free(use_this_sendbuf);
             }
-          
+
         } else if (io_root == rank) {
             /* Send result from use_this_recvbuf to root */
             ret = MCA_PML_CALL(send(use_this_recvbuf, count, datatype, root,
-                                    MCA_COLL_BASE_TAG_REDUCE, 
+                                    MCA_COLL_BASE_TAG_REDUCE,
                                     MCA_PML_BASE_SEND_STANDARD, comm));
             if (MPI_SUCCESS != ret) { return ret; }
             free(use_this_recvbuf);
@@ -598,8 +578,8 @@ int ompi_coll_tuned_reduce_intra_in_order_binary( void *sendbuf, void *recvbuf,
 /*
  * Linear functions are copied from the BASIC coll module
  * they do not segment the message and are simple implementations
- * but for some small number of nodes and/or small data sizes they 
- * are just as fast as tuned/tree based segmenting operations 
+ * but for some small number of nodes and/or small data sizes they
+ * are just as fast as base/tree based segmenting operations
  * and as such may be selected by the decision functions
  * These are copied into this module due to the way we select modules
  * in V1. i.e. in V2 we will handle this differently and so will not
@@ -617,12 +597,12 @@ int ompi_coll_tuned_reduce_intra_in_order_binary( void *sendbuf, void *recvbuf,
  *  Returns:    - MPI_SUCCESS or error code
  */
 int
-ompi_coll_tuned_reduce_intra_basic_linear(void *sbuf, void *rbuf, int count,
-                                          struct ompi_datatype_t *dtype,
-                                          struct ompi_op_t *op,
-                                          int root,
-                                          struct ompi_communicator_t *comm,
-                                          mca_coll_base_module_t *module)
+ompi_coll_base_reduce_intra_basic_linear(void *sbuf, void *rbuf, int count,
+                                         struct ompi_datatype_t *dtype,
+                                         struct ompi_op_t *op,
+                                         int root,
+                                         struct ompi_communicator_t *comm,
+                                         mca_coll_base_module_t *module)
 {
     int i, rank, err, size;
     ptrdiff_t true_lb, true_extent, lb, extent;
@@ -634,7 +614,7 @@ ompi_coll_tuned_reduce_intra_basic_linear(void *sbuf, void *rbuf, int count,
     rank = ompi_comm_rank(comm);
     size = ompi_comm_size(comm);
 
-    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:reduce_intra_basic_linear rank %d", rank));
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:reduce_intra_basic_linear rank %d", rank));
 
     /* If not root, send data to the root. */
 
@@ -645,7 +625,7 @@ ompi_coll_tuned_reduce_intra_basic_linear(void *sbuf, void *rbuf, int count,
         return err;
     }
 
-    /* see discussion in ompi_coll_basic_reduce_lin_intra about 
+    /* see discussion in ompi_coll_basic_reduce_lin_intra about
        extent and true extent */
     /* for reducing buffer allocation lengths.... */
 
@@ -673,7 +653,7 @@ ompi_coll_tuned_reduce_intra_basic_linear(void *sbuf, void *rbuf, int count,
     /* Initialize the receive buffer. */
 
     if (rank == (size - 1)) {
-        err = ompi_datatype_copy_content_same_ddt(dtype, count, (char*)rbuf, 
+        err = ompi_datatype_copy_content_same_ddt(dtype, count, (char*)rbuf,
                                                   (char*)sbuf);
     } else {
         err = MCA_PML_CALL(recv(rbuf, count, dtype, size - 1,
@@ -705,7 +685,7 @@ ompi_coll_tuned_reduce_intra_basic_linear(void *sbuf, void *rbuf, int count,
     }
 
     if (NULL != inplace_temp) {
-        err = ompi_datatype_copy_content_same_ddt(dtype, count, (char*)sbuf, 
+        err = ompi_datatype_copy_content_same_ddt(dtype, count, (char*)sbuf,
                                                   inplace_temp);
     } else {
         err = MPI_SUCCESS;
@@ -724,185 +704,3 @@ ompi_coll_tuned_reduce_intra_basic_linear(void *sbuf, void *rbuf, int count,
 }
 
 /* copied function (with appropriate renaming) ends here */
-
-
-/**
- * The following are used by dynamic and forced rules
- *
- * publish details of each algorithm and if its forced/fixed/locked in
- * as you add methods/algorithms you must update this and the query/map routines
- *
- * this routine is called by the component only
- * this makes sure that the mca parameters are set to their initial values and 
- * perms module does not call this they call the forced_getvalues routine 
- * instead.
- */
-
-int ompi_coll_tuned_reduce_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices)
-{
-    mca_base_var_enum_t*new_enum;
-
-    ompi_coll_tuned_forced_max_algorithms[REDUCE] = coll_tuned_reduce_algorithm_count;
-
-    (void) mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                           "reduce_algorithm_count",
-                                           "Number of reduce algorithms available",
-                                           MCA_BASE_VAR_TYPE_INT, NULL, 0,
-                                           MCA_BASE_VAR_FLAG_DEFAULT_ONLY,
-                                           OPAL_INFO_LVL_5,
-                                           MCA_BASE_VAR_SCOPE_CONSTANT,
-                                           &coll_tuned_reduce_algorithm_count);
-
-    /* MPI_T: This variable should eventually be bound to a communicator */
-    coll_tuned_reduce_forced_algorithm = 0;
-    (void) mca_base_var_enum_create("coll_tuned_reduce_algorithms", reduce_algorithms, &new_enum);
-    mca_param_indices->algorithm_param_index =
-        mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                        "reduce_algorithm",
-                                        "Which reduce algorithm is used. Can be locked down to choice of: 0 ignore, 1 linear, 2 chain, 3 pipeline, 4 binary, 5 binomial, 6 in-order binary",
-                                        MCA_BASE_VAR_TYPE_INT, new_enum, 0, 0,
-                                        OPAL_INFO_LVL_5,
-                                        MCA_BASE_VAR_SCOPE_READONLY,
-                                        &coll_tuned_reduce_forced_algorithm);
-    OBJ_RELEASE(new_enum);
-    if (mca_param_indices->algorithm_param_index < 0) {
-        return mca_param_indices->algorithm_param_index;
-    }
-
-    coll_tuned_reduce_segment_size = 0;
-    mca_param_indices->segsize_param_index =
-        mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                        "reduce_algorithm_segmentsize",
-                                        "Segment size in bytes used by default for reduce algorithms. Only has meaning if algorithm is forced and supports segmenting. 0 bytes means no segmentation.",
-                                        MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
-                                        OPAL_INFO_LVL_5,
-                                        MCA_BASE_VAR_SCOPE_READONLY,
-                                        &coll_tuned_reduce_segment_size);
-
-    coll_tuned_reduce_tree_fanout = ompi_coll_tuned_init_tree_fanout; /* get system wide default */
-    mca_param_indices->tree_fanout_param_index =
-        mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                        "reduce_algorithm_tree_fanout",
-                                        "Fanout for n-tree used for reduce algorithms. Only has meaning if algorithm is forced and supports n-tree topo based operation.",
-                                        MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
-                                        OPAL_INFO_LVL_5,
-                                        MCA_BASE_VAR_SCOPE_READONLY,
-                                        &coll_tuned_reduce_tree_fanout);
-
-    coll_tuned_reduce_chain_fanout = ompi_coll_tuned_init_chain_fanout; /* get system wide default */
-    mca_param_indices->chain_fanout_param_index = 
-      mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                      "reduce_algorithm_chain_fanout",
-                                      "Fanout for chains used for reduce algorithms. Only has meaning if algorithm is forced and supports chain topo based operation.",
-                                      MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
-                                      OPAL_INFO_LVL_5,
-                                      MCA_BASE_VAR_SCOPE_READONLY,
-                                      &coll_tuned_reduce_chain_fanout);
-
-    coll_tuned_reduce_max_requests = 0; /* no limit for reduce by default */
-    mca_param_indices->max_requests_param_index = 
-      mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                      "reduce_algorithm_max_requests",
-                                      "Maximum number of outstanding send requests on leaf nodes. 0 means no limit.",
-                                      MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
-                                      OPAL_INFO_LVL_5,
-                                      MCA_BASE_VAR_SCOPE_READONLY,
-                                      &coll_tuned_reduce_max_requests);
-    if (mca_param_indices->max_requests_param_index < 0) {
-        return mca_param_indices->max_requests_param_index;
-    }
-
-    if (coll_tuned_reduce_max_requests < 0) {
-        if( 0 == ompi_comm_rank( MPI_COMM_WORLD ) ) {
-            opal_output( 0, "Maximum outstanding requests must be positive number or 0.  Initializing to 0 (no limit).\n" );
-        }
-        coll_tuned_reduce_max_requests = 0;
-    }
-
-    return (MPI_SUCCESS);
-}
-
-
-int ompi_coll_tuned_reduce_intra_do_forced(void *sbuf, void* rbuf, int count,
-                                           struct ompi_datatype_t *dtype,
-                                           struct ompi_op_t *op, int root,
-                                           struct ompi_communicator_t *comm,
-                                           mca_coll_base_module_t *module)
-{
-    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
-    mca_coll_tuned_comm_t *data = tuned_module->tuned_data;
-
-    const int segsize      = data->user_forced[REDUCE].segsize;
-    const int chain_fanout = data->user_forced[REDUCE].chain_fanout;
-    const int max_requests = data->user_forced[REDUCE].max_requests;
-
-    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:reduce_intra_do_forced selected algorithm %d", 
-                 data->user_forced[REDUCE].algorithm));
-
-
-    switch (data->user_forced[REDUCE].algorithm) {
-    case (0):  return ompi_coll_tuned_reduce_intra_dec_fixed (sbuf, rbuf, count, dtype, 
-                                                              op, root, comm, module);
-    case (1):  return ompi_coll_tuned_reduce_intra_basic_linear (sbuf, rbuf, count, dtype,
-                                                                 op, root, comm, module);
-    case (2):  return ompi_coll_tuned_reduce_intra_chain (sbuf, rbuf, count, dtype,
-                                                          op, root, comm, module,
-                                                          segsize, chain_fanout, max_requests);
-    case (3):  return ompi_coll_tuned_reduce_intra_pipeline (sbuf, rbuf, count, dtype,
-                                                             op, root, comm, module,
-                                                             segsize, max_requests);
-    case (4):  return ompi_coll_tuned_reduce_intra_binary (sbuf, rbuf, count, dtype,
-                                                           op, root, comm, module,
-                                                           segsize, max_requests);
-    case (5):  return ompi_coll_tuned_reduce_intra_binomial (sbuf, rbuf, count, dtype,
-                                                             op, root, comm, module,
-                                                             segsize, max_requests);
-    case (6):  return ompi_coll_tuned_reduce_intra_in_order_binary(sbuf, rbuf, count, dtype,
-                                                                   op, root, comm, module,
-                                                                   segsize, max_requests);
-    default:
-        OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:reduce_intra_do_forced attempt to select algorithm %d when only 0-%d is valid?",
-                     data->user_forced[REDUCE].algorithm, ompi_coll_tuned_forced_max_algorithms[REDUCE]));
-        return (MPI_ERR_ARG);
-    } /* switch */
-}
-
-
-int ompi_coll_tuned_reduce_intra_do_this(void *sbuf, void* rbuf, int count,
-                                         struct ompi_datatype_t *dtype,
-                                         struct ompi_op_t *op, int root,
-                                         struct ompi_communicator_t *comm,
-                                         mca_coll_base_module_t *module,
-                                         int algorithm, int faninout, 
-                                         int segsize, int max_requests )
-{
-    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:reduce_intra_do_this selected algorithm %d topo faninout %d segsize %d",
-                 algorithm, faninout, segsize));
-
-    switch (algorithm) {
-    case (0):  return ompi_coll_tuned_reduce_intra_dec_fixed (sbuf, rbuf, count, dtype,
-                                                              op, root, comm, module);
-    case (1):  return ompi_coll_tuned_reduce_intra_basic_linear (sbuf, rbuf, count, dtype, 
-                                                                 op, root, comm, module);
-    case (2):  return ompi_coll_tuned_reduce_intra_chain (sbuf, rbuf, count, dtype,
-                                                          op, root, comm, module,
-                                                          segsize, faninout, max_requests);
-    case (3):  return ompi_coll_tuned_reduce_intra_pipeline (sbuf, rbuf, count, dtype,
-                                                             op, root, comm, module,
-                                                             segsize, max_requests);
-    case (4):  return ompi_coll_tuned_reduce_intra_binary (sbuf, rbuf, count, dtype,
-                                                           op, root, comm, module,
-                                                           segsize, max_requests); 
-    case (5):  return ompi_coll_tuned_reduce_intra_binomial (sbuf, rbuf, count, dtype,
-                                                             op, root, comm, module,
-                                                             segsize, max_requests); 
-    case (6):  return ompi_coll_tuned_reduce_intra_in_order_binary(sbuf, rbuf, count, dtype,
-                                                                   op, root, comm, module,
-                                                                   segsize, max_requests);
-    default:
-        OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:reduce_intra_do_this attempt to select algorithm %d when only 0-%d is valid?",
-                     algorithm, ompi_coll_tuned_forced_max_algorithms[REDUCE]));
-        return (MPI_ERR_ARG);
-    } /* switch */
-}
-
diff --git a/ompi/mca/coll/base/coll_base_reduce_scatter.c b/ompi/mca/coll/base/coll_base_reduce_scatter.c
index 3fc85daa87..0c23206c0e 100644
--- a/ompi/mca/coll/base/coll_base_reduce_scatter.c
+++ b/ompi/mca/coll/base/coll_base_reduce_scatter.c
@@ -3,7 +3,7 @@
  * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
  *                         University Research and Technology
  *                         Corporation.  All rights reserved.
- * Copyright (c) 2004-2014 The University of Tennessee and The University
+ * Copyright (c) 2004-2015 The University of Tennessee and The University
  *                         of Tennessee Research Foundation.  All rights
  *                         reserved.
  * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@@ -32,37 +32,21 @@
 #include "ompi/mca/coll/base/coll_tags.h"
 #include "ompi/mca/pml/pml.h"
 #include "ompi/op/op.h"
-#include "coll_tuned.h"
-#include "coll_tuned_topo.h"
-
-/* reduce_scatter algorithm variables */
-static int coll_tuned_reduce_scatter_algorithm_count = 2;
-static int coll_tuned_reduce_scatter_forced_algorithm = 0;
-static int coll_tuned_reduce_scatter_segment_size = 0;
-static int coll_tuned_reduce_scatter_tree_fanout;
-static int coll_tuned_reduce_scatter_chain_fanout;
-
-/* valid values for coll_tuned_reduce_scatter_forced_algorithm */
-static mca_base_var_enum_value_t reduce_scatter_algorithms[] = {
-    {0, "ignore"},
-    {1, "non-overlapping"},
-    {2, "recursive_halfing"},
-    {3, "ring"},
-    {0, NULL}
-};
+#include "ompi/mca/coll/base/coll_base_functions.h"
+#include "coll_base_topo.h"
 
 /*******************************************************************************
- * ompi_coll_tuned_reduce_scatter_intra_nonoverlapping
+ * ompi_coll_base_reduce_scatter_intra_nonoverlapping
  *
- * This function just calls a reduce to rank 0, followed by an 
+ * This function just calls a reduce to rank 0, followed by an
  * appropriate scatterv call.
  */
-int ompi_coll_tuned_reduce_scatter_intra_nonoverlapping(void *sbuf, void *rbuf, 
+int ompi_coll_base_reduce_scatter_intra_nonoverlapping(void *sbuf, void *rbuf,
                                                         int *rcounts,
                                                         struct ompi_datatype_t *dtype,
                                                         struct ompi_op_t *op,
                                                         struct ompi_communicator_t *comm,
-                                                        mca_coll_base_module_t *module) 
+                                                        mca_coll_base_module_t *module)
 {
     int err, i, rank, size, total_count, *displs = NULL;
     const int root = 0;
@@ -71,7 +55,7 @@ int ompi_coll_tuned_reduce_scatter_intra_nonoverlapping(void *sbuf, void *rbuf,
     rank = ompi_comm_rank(comm);
     size = ompi_comm_size(comm);
 
-    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:reduce_scatter_intra_nonoverlapping, rank %d", rank));
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:reduce_scatter_intra_nonoverlapping, rank %d", rank));
 
     for (i = 0, total_count = 0; i < size; i++) { total_count += rcounts[i]; }
 
@@ -80,7 +64,7 @@ int ompi_coll_tuned_reduce_scatter_intra_nonoverlapping(void *sbuf, void *rbuf,
     if (MPI_IN_PLACE == sbuf) {
         /* rbuf on root (0) is big enough to hold whole data */
         if (root == rank) {
-            err = comm->c_coll.coll_reduce (MPI_IN_PLACE, tmprbuf, total_count, 
+            err = comm->c_coll.coll_reduce (MPI_IN_PLACE, tmprbuf, total_count,
                                             dtype, op, root, comm, comm->c_coll.coll_reduce_module);
         } else {
             err = comm->c_coll.coll_reduce(tmprbuf, NULL, total_count,
@@ -91,13 +75,13 @@ int ompi_coll_tuned_reduce_scatter_intra_nonoverlapping(void *sbuf, void *rbuf,
             /* We must allocate temporary receive buffer on root to ensure that
                rbuf is big enough */
             ptrdiff_t lb, extent, tlb, textent;
-         
+
             ompi_datatype_get_extent(dtype, &lb, &extent);
             ompi_datatype_get_true_extent(dtype, &tlb, &textent);
 
             tmprbuf_free = (char*) malloc(textent + (ptrdiff_t)(total_count - 1) * extent);
             tmprbuf = tmprbuf_free - lb;
-        } 
+        }
         err = comm->c_coll.coll_reduce (sbuf, tmprbuf, total_count,
                                         dtype, op, root, comm, comm->c_coll.coll_reduce_module);
     }
@@ -105,7 +89,7 @@ int ompi_coll_tuned_reduce_scatter_intra_nonoverlapping(void *sbuf, void *rbuf,
         if (NULL != tmprbuf_free) free(tmprbuf_free);
         return err;
     }
- 
+
     displs = (int*) malloc(size * sizeof(int));
     displs[0] = 0;
     for (i = 1; i < size; i++) {
@@ -122,7 +106,7 @@ int ompi_coll_tuned_reduce_scatter_intra_nonoverlapping(void *sbuf, void *rbuf,
 
 /*
  * Recursive-halving function is (*mostly*) copied from the BASIC coll module.
- * I have removed the part which handles "large" message sizes 
+ * I have removed the part which handles "large" message sizes
  * (non-overlapping version of reduce_Scatter).
  */
 
@@ -131,15 +115,15 @@ int ompi_coll_tuned_reduce_scatter_intra_nonoverlapping(void *sbuf, void *rbuf,
 /*
  *  reduce_scatter_intra_basic_recursivehalving
  *
- *  Function:   - reduce scatter implementation using recursive-halving 
+ *  Function:   - reduce scatter implementation using recursive-halving
  *                algorithm
  *  Accepts:    - same as MPI_Reduce_scatter()
  *  Returns:    - MPI_SUCCESS or error code
  *  Limitation: - Works only for commutative operations.
  */
 int
-ompi_coll_tuned_reduce_scatter_intra_basic_recursivehalving(void *sbuf, 
-                                                            void *rbuf, 
+ompi_coll_base_reduce_scatter_intra_basic_recursivehalving(void *sbuf,
+                                                            void *rbuf,
                                                             int *rcounts,
                                                             struct ompi_datatype_t *dtype,
                                                             struct ompi_op_t *op,
@@ -151,12 +135,12 @@ ompi_coll_tuned_reduce_scatter_intra_basic_recursivehalving(void *sbuf,
     ptrdiff_t true_lb, true_extent, lb, extent, buf_size;
     char *recv_buf = NULL, *recv_buf_free = NULL;
     char *result_buf = NULL, *result_buf_free = NULL;
-   
+
     /* Initialize */
     rank = ompi_comm_rank(comm);
     size = ompi_comm_size(comm);
-   
-    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:reduce_scatter_intra_basic_recursivehalving, rank %d", rank));
+
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:reduce_scatter_intra_basic_recursivehalving, rank %d", rank));
 
     /* Find displacements and the like */
     disps = (int*) malloc(sizeof(int) * size);
@@ -191,43 +175,43 @@ ompi_coll_tuned_reduce_scatter_intra_basic_recursivehalving(void *sbuf,
         err = OMPI_ERR_OUT_OF_RESOURCE;
         goto cleanup;
     }
-   
+
     /* allocate temporary buffer for results */
     result_buf_free = (char*) malloc(buf_size);
     result_buf = result_buf_free - true_lb;
-   
+
     /* copy local buffer into the temporary results */
     err = ompi_datatype_sndrcv(sbuf, count, dtype, result_buf, count, dtype);
     if (OMPI_SUCCESS != err) goto cleanup;
-   
+
     /* figure out power of two mapping: grow until larger than
        comm size, then go back one, to get the largest power of
        two less than comm size */
-    tmp_size = opal_next_poweroftwo (size); 
+    tmp_size = opal_next_poweroftwo (size);
     tmp_size >>= 1;
     remain = size - tmp_size;
-   
+
     /* If comm size is not a power of two, have the first "remain"
        procs with an even rank send to rank + 1, leaving a power of
        two procs to do the rest of the algorithm */
     if (rank < 2 * remain) {
         if ((rank & 1) == 0) {
-            err = MCA_PML_CALL(send(result_buf, count, dtype, rank + 1, 
+            err = MCA_PML_CALL(send(result_buf, count, dtype, rank + 1,
                                     MCA_COLL_BASE_TAG_REDUCE_SCATTER,
                                     MCA_PML_BASE_SEND_STANDARD,
                                     comm));
             if (OMPI_SUCCESS != err) goto cleanup;
-         
+
             /* we don't participate from here on out */
             tmp_rank = -1;
         } else {
             err = MCA_PML_CALL(recv(recv_buf, count, dtype, rank - 1,
                                     MCA_COLL_BASE_TAG_REDUCE_SCATTER,
                                     comm, MPI_STATUS_IGNORE));
-         
+
             /* integrate their results into our temp results */
             ompi_op_reduce(op, recv_buf, result_buf, count, dtype);
-         
+
             /* adjust rank to be the bottom "remain" ranks */
             tmp_rank = rank / 2;
         }
@@ -236,13 +220,13 @@ ompi_coll_tuned_reduce_scatter_intra_basic_recursivehalving(void *sbuf,
            remain" ranks dropped out */
         tmp_rank = rank - remain;
     }
-   
+
     /* For ranks not kicked out by the above code, perform the
        recursive halving */
     if (tmp_rank >= 0) {
         int *tmp_disps = NULL, *tmp_rcounts = NULL;
         int mask, send_index, recv_index, last_index;
-      
+
         /* recalculate disps and rcounts to account for the
            special "remainder" processes that are no longer doing
            anything */
@@ -317,11 +301,11 @@ ompi_coll_tuned_reduce_scatter_intra_basic_recursivehalving(void *sbuf,
                     free(tmp_rcounts);
                     free(tmp_disps);
                     goto cleanup;
-                }                                             
+                }
             }
             if (send_count > 0) {
                 err = MCA_PML_CALL(send(result_buf + (ptrdiff_t)tmp_disps[send_index] * extent,
-                                        send_count, dtype, peer, 
+                                        send_count, dtype, peer,
                                         MCA_COLL_BASE_TAG_REDUCE_SCATTER,
                                         MCA_PML_BASE_SEND_STANDARD,
                                         comm));
@@ -329,7 +313,7 @@ ompi_coll_tuned_reduce_scatter_intra_basic_recursivehalving(void *sbuf,
                     free(tmp_rcounts);
                     free(tmp_disps);
                     goto cleanup;
-                }                                             
+                }
             }
 
             /* if we received something on this step, push it into
@@ -340,10 +324,10 @@ ompi_coll_tuned_reduce_scatter_intra_basic_recursivehalving(void *sbuf,
                     free(tmp_rcounts);
                     free(tmp_disps);
                     goto cleanup;
-                }                                             
+                }
 
-                ompi_op_reduce(op, 
-                               recv_buf + (ptrdiff_t)tmp_disps[recv_index] * extent, 
+                ompi_op_reduce(op,
+                               recv_buf + (ptrdiff_t)tmp_disps[recv_index] * extent,
                                result_buf + (ptrdiff_t)tmp_disps[recv_index] * extent,
                                recv_count, dtype);
             }
@@ -357,13 +341,13 @@ ompi_coll_tuned_reduce_scatter_intra_basic_recursivehalving(void *sbuf,
         /* copy local results from results buffer into real receive buffer */
         if (0 != rcounts[rank]) {
             err = ompi_datatype_sndrcv(result_buf + disps[rank] * extent,
-                                       rcounts[rank], dtype, 
+                                       rcounts[rank], dtype,
                                        rbuf, rcounts[rank], dtype);
             if (OMPI_SUCCESS != err) {
                 free(tmp_rcounts);
                 free(tmp_disps);
                 goto cleanup;
-            }                                             
+            }
         }
 
         free(tmp_rcounts);
@@ -389,7 +373,7 @@ ompi_coll_tuned_reduce_scatter_intra_basic_recursivehalving(void *sbuf,
                                         comm));
                 if (OMPI_SUCCESS != err) goto cleanup;
             }
-        }            
+        }
     }
 
  cleanup:
@@ -404,18 +388,18 @@ ompi_coll_tuned_reduce_scatter_intra_basic_recursivehalving(void *sbuf,
 
 
 /*
- *   ompi_coll_tuned_reduce_scatter_intra_ring
+ *   ompi_coll_base_reduce_scatter_intra_ring
  *
  *   Function:       Ring algorithm for reduce_scatter operation
  *   Accepts:        Same as MPI_Reduce_scatter()
  *   Returns:        MPI_SUCCESS or error code
  *
- *   Description:    Implements ring algorithm for reduce_scatter: 
- *                   the block sizes defined in rcounts are exchanged and 
+ *   Description:    Implements ring algorithm for reduce_scatter:
+ *                   the block sizes defined in rcounts are exchanged and
  8                    updated until they reach proper destination.
  *                   Algorithm requires 2 * max(rcounts) extra buffering
  *
- *   Limitations:    The algorithm DOES NOT preserve order of operations so it 
+ *   Limitations:    The algorithm DOES NOT preserve order of operations so it
  *                   can be used only for commutative operations.
  *         Example on 5 nodes:
  *         Initial state
@@ -427,7 +411,7 @@ ompi_coll_tuned_reduce_scatter_intra_basic_recursivehalving(void *sbuf,
  *        [04]  ->       [14]          [24]           [34]           [44]
  *
  *        COMPUTATION PHASE
- *         Step 0: rank r sends block (r-1) to rank (r+1) and 
+ *         Step 0: rank r sends block (r-1) to rank (r+1) and
  *                 receives block (r+1) from rank (r-1) [with wraparound].
  *   #      0              1             2              3             4
  *        [00]           [10]        [10+20]   ->     [30]           [40]
@@ -435,12 +419,12 @@ ompi_coll_tuned_reduce_scatter_intra_basic_recursivehalving(void *sbuf,
  *    ->  [02]           [12]          [22]           [32]         [32+42] -->..
  *      [43+03] ->       [13]          [23]           [33]           [43]
  *        [04]         [04+14]  ->     [24]           [34]           [44]
- *         
+ *
  *         Step 1:
  *   #      0              1             2              3             4
  *        [00]           [10]        [10+20]       [10+20+30] ->     [40]
  *    ->  [01]           [11]          [21]          [21+31]      [21+31+41] ->
- *     [32+42+02] ->     [12]          [22]           [32]         [32+42] 
+ *     [32+42+02] ->     [12]          [22]           [32]         [32+42]
  *        [03]        [43+03+13] ->    [23]           [33]           [43]
  *        [04]         [04+14]      [04+14+24]  ->    [34]           [44]
  *
@@ -448,7 +432,7 @@ ompi_coll_tuned_reduce_scatter_intra_basic_recursivehalving(void *sbuf,
  *   #      0              1             2              3             4
  *     -> [00]           [10]        [10+20]       [10+20+30]   [10+20+30+40] ->
  *   [21+31+41+01]->     [11]          [21]          [21+31]      [21+31+41]
- *     [32+42+02]   [32+42+02+12]->    [22]           [32]         [32+42] 
+ *     [32+42+02]   [32+42+02+12]->    [22]           [32]         [32+42]
  *        [03]        [43+03+13]   [43+03+13+23]->    [33]           [43]
  *        [04]         [04+14]      [04+14+24]    [04+14+24+34] ->   [44]
  *
@@ -456,14 +440,14 @@ ompi_coll_tuned_reduce_scatter_intra_basic_recursivehalving(void *sbuf,
  *   #      0             1              2              3             4
  * [10+20+30+40+00]     [10]         [10+20]       [10+20+30]   [10+20+30+40]
  *  [21+31+41+01] [21+31+41+01+11]     [21]          [21+31]      [21+31+41]
- *    [32+42+02]   [32+42+02+12] [32+42+02+12+22]     [32]         [32+42] 
+ *    [32+42+02]   [32+42+02+12] [32+42+02+12+22]     [32]         [32+42]
  *       [03]        [43+03+13]    [43+03+13+23] [43+03+13+23+33]    [43]
  *       [04]         [04+14]       [04+14+24]    [04+14+24+34] [04+14+24+34+44]
  *    DONE :)
  *
  */
-int 
-ompi_coll_tuned_reduce_scatter_intra_ring(void *sbuf, void *rbuf, int *rcounts,
+int
+ompi_coll_base_reduce_scatter_intra_ring(void *sbuf, void *rbuf, int *rcounts,
                                           struct ompi_datatype_t *dtype,
                                           struct ompi_op_t *op,
                                           struct ompi_communicator_t *comm,
@@ -480,11 +464,11 @@ ompi_coll_tuned_reduce_scatter_intra_ring(void *sbuf, void *rbuf, int *rcounts,
     size = ompi_comm_size(comm);
     rank = ompi_comm_rank(comm);
 
-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "coll:tuned:reduce_scatter_intra_ring rank %d, size %d", 
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                 "coll:base:reduce_scatter_intra_ring rank %d, size %d",
                  rank, size));
 
-    /* Determine the maximum number of elements per node, 
+    /* Determine the maximum number of elements per node,
        corresponding block size, and displacements array.
     */
     displs = (int*) malloc(size * sizeof(int));
@@ -492,16 +476,16 @@ ompi_coll_tuned_reduce_scatter_intra_ring(void *sbuf, void *rbuf, int *rcounts,
     displs[0] = 0;
     total_count = rcounts[0];
     max_block_count = rcounts[0];
-    for (i = 1; i < size; i++) { 
+    for (i = 1; i < size; i++) {
         displs[i] = total_count;
         total_count += rcounts[i];
         if (max_block_count < rcounts[i]) max_block_count = rcounts[i];
     }
-      
+
     /* Special case for size == 1 */
     if (1 == size) {
         if (MPI_IN_PLACE != sbuf) {
-            ret = ompi_datatype_copy_content_same_ddt(dtype, total_count, 
+            ret = ompi_datatype_copy_content_same_ddt(dtype, total_count,
                                                       (char*)rbuf, (char*)sbuf);
             if (ret < 0) { line = __LINE__; goto error_hndl; }
         }
@@ -541,13 +525,13 @@ ompi_coll_tuned_reduce_scatter_intra_ring(void *sbuf, void *rbuf, int *rcounts,
         sbuf = rbuf;
     }
 
-    ret = ompi_datatype_copy_content_same_ddt(dtype, total_count, 
+    ret = ompi_datatype_copy_content_same_ddt(dtype, total_count,
                                               accumbuf, (char*)sbuf);
     if (ret < 0) { line = __LINE__; goto error_hndl; }
 
     /* Computation loop */
 
-    /* 
+    /*
        For each of the remote nodes:
        - post irecv for block (r-2) from (r-1) with wrap around
        - send block (r-1) to (r+1)
@@ -568,7 +552,7 @@ ompi_coll_tuned_reduce_scatter_intra_ring(void *sbuf, void *rbuf, int *rcounts,
     inbi = 0;
     /* Initialize first receive from the neighbor on the left */
     ret = MCA_PML_CALL(irecv(inbuf[inbi], max_block_count, dtype, recv_from,
-                             MCA_COLL_BASE_TAG_REDUCE_SCATTER, comm, 
+                             MCA_COLL_BASE_TAG_REDUCE_SCATTER, comm,
                              &reqs[inbi]));
     if (MPI_SUCCESS != ret) { line = __LINE__; goto error_hndl; }
     tmpsend = accumbuf + (ptrdiff_t)displs[recv_from] * extent;
@@ -579,25 +563,25 @@ ompi_coll_tuned_reduce_scatter_intra_ring(void *sbuf, void *rbuf, int *rcounts,
 
     for (k = 2; k < size; k++) {
         const int prevblock = (rank + size - k) % size;
-      
+
         inbi = inbi ^ 0x1;
 
         /* Post irecv for the current block */
         ret = MCA_PML_CALL(irecv(inbuf[inbi], max_block_count, dtype, recv_from,
-                                 MCA_COLL_BASE_TAG_REDUCE_SCATTER, comm, 
+                                 MCA_COLL_BASE_TAG_REDUCE_SCATTER, comm,
                                  &reqs[inbi]));
         if (MPI_SUCCESS != ret) { line = __LINE__; goto error_hndl; }
-      
+
         /* Wait on previous block to arrive */
         ret = ompi_request_wait(&reqs[inbi ^ 0x1], MPI_STATUS_IGNORE);
         if (MPI_SUCCESS != ret) { line = __LINE__; goto error_hndl; }
-      
+
         /* Apply operation on previous block: result goes to rbuf
            rbuf[prevblock] = inbuf[inbi ^ 0x1] (op) rbuf[prevblock]
         */
         tmprecv = accumbuf + (ptrdiff_t)displs[prevblock] * extent;
         ompi_op_reduce(op, inbuf[inbi ^ 0x1], tmprecv, rcounts[prevblock], dtype);
-      
+
         /* send previous block to send_to */
         ret = MCA_PML_CALL(send(tmprecv, rcounts[prevblock], dtype, send_to,
                                 MCA_COLL_BASE_TAG_REDUCE_SCATTER,
@@ -613,7 +597,7 @@ ompi_coll_tuned_reduce_scatter_intra_ring(void *sbuf, void *rbuf, int *rcounts,
        rbuf[rank] = inbuf[inbi] (op) rbuf[rank] */
     tmprecv = accumbuf + (ptrdiff_t)displs[rank] * extent;
     ompi_op_reduce(op, inbuf[inbi], tmprecv, rcounts[rank], dtype);
-   
+
     /* Copy result from tmprecv to rbuf */
     ret = ompi_datatype_copy_content_same_ddt(dtype, rcounts[rank], (char *)rbuf, tmprecv);
     if (ret < 0) { line = __LINE__; goto error_hndl; }
@@ -626,7 +610,7 @@ ompi_coll_tuned_reduce_scatter_intra_ring(void *sbuf, void *rbuf, int *rcounts,
     return MPI_SUCCESS;
 
  error_hndl:
-    OPAL_OUTPUT((ompi_coll_tuned_stream, "%s:%4d\tRank %d Error occurred %d\n",
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output, "%s:%4d\tRank %d Error occurred %d\n",
                  __FILE__, line, rank, ret));
     if (NULL != displs) free(displs);
     if (NULL != accumbuf_free) free(accumbuf_free);
@@ -634,139 +618,3 @@ ompi_coll_tuned_reduce_scatter_intra_ring(void *sbuf, void *rbuf, int *rcounts,
     if (NULL != inbuf_free[1]) free(inbuf_free[1]);
     return ret;
 }
-
-
-/**
- * The following are used by dynamic and forced rules
- *
- * publish details of each algorithm and if its forced/fixed/locked in
- * as you add methods/algorithms you must update this and the query/map routines
- *
- * this routine is called by the component only
- * this makes sure that the mca parameters are set to their initial values and 
- * perms module does not call this they call the forced_getvalues routine 
- * instead
- */
-
-int ompi_coll_tuned_reduce_scatter_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices)
-{
-    mca_base_var_enum_t *new_enum;
-
-    ompi_coll_tuned_forced_max_algorithms[REDUCESCATTER] = coll_tuned_reduce_scatter_algorithm_count;
-
-    (void) mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                           "reduce_scatter_algorithm_count",
-                                           "Number of reduce_scatter algorithms available",
-                                           MCA_BASE_VAR_TYPE_INT, NULL, 0,
-                                           MCA_BASE_VAR_FLAG_DEFAULT_ONLY,
-                                           OPAL_INFO_LVL_5,
-                                           MCA_BASE_VAR_SCOPE_CONSTANT,
-                                           &coll_tuned_reduce_scatter_algorithm_count);
-
-    /* MPI_T: This variable should eventually be bound to a communicator */
-    coll_tuned_reduce_scatter_forced_algorithm = 0;
-    (void) mca_base_var_enum_create("coll_tuned_reduce_scatter_algorithms", reduce_scatter_algorithms, &new_enum);
-    mca_param_indices->algorithm_param_index =
-        mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                        "reduce_scatter_algorithm",
-                                        "Which reduce reduce_scatter algorithm is used. Can be locked down to choice of: 0 ignore, 1 non-overlapping (Reduce + Scatterv), 2 recursive halving, 3 ring",
-                                        MCA_BASE_VAR_TYPE_INT, new_enum, 0, 0,
-                                        OPAL_INFO_LVL_5,
-                                        MCA_BASE_VAR_SCOPE_READONLY,
-                                        &coll_tuned_reduce_scatter_forced_algorithm);
-    OBJ_RELEASE(new_enum);
-    if (mca_param_indices->algorithm_param_index < 0) {
-        return mca_param_indices->algorithm_param_index;
-    }
-
-    coll_tuned_reduce_scatter_segment_size = 0;
-    mca_param_indices->segsize_param_index =
-        mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                        "reduce_scatter_algorithm_segmentsize",
-                                        "Segment size in bytes used by default for reduce_scatter algorithms. Only has meaning if algorithm is forced and supports segmenting. 0 bytes means no segmentation.",
-                                        MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
-                                        OPAL_INFO_LVL_5,
-                                        MCA_BASE_VAR_SCOPE_READONLY,
-                                        &coll_tuned_reduce_scatter_segment_size);
-
-    coll_tuned_reduce_scatter_tree_fanout = ompi_coll_tuned_init_tree_fanout; /* get system wide default */
-    mca_param_indices->tree_fanout_param_index =
-        mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                        "reduce_scatter_algorithm_tree_fanout",
-                                        "Fanout for n-tree used for reduce_scatter algorithms. Only has meaning if algorithm is forced and supports n-tree topo based operation.",
-                                        MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
-                                        OPAL_INFO_LVL_5,
-                                        MCA_BASE_VAR_SCOPE_READONLY,
-                                        &coll_tuned_reduce_scatter_tree_fanout);
-
-    coll_tuned_reduce_scatter_chain_fanout = ompi_coll_tuned_init_chain_fanout; /* get system wide default */
-    mca_param_indices->chain_fanout_param_index = 
-      mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                      "reduce_scatter_algorithm_chain_fanout",
-                                      "Fanout for chains used for reduce_scatter algorithms. Only has meaning if algorithm is forced and supports chain topo based operation.",
-                                      MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
-                                      OPAL_INFO_LVL_5,
-                                      MCA_BASE_VAR_SCOPE_READONLY,
-                                      &coll_tuned_reduce_scatter_chain_fanout);
-
-    return (MPI_SUCCESS);
-}
-
-
-int ompi_coll_tuned_reduce_scatter_intra_do_forced(void *sbuf, void* rbuf, 
-                                                   int *rcounts,
-                                                   struct ompi_datatype_t *dtype,
-                                                   struct ompi_op_t *op, 
-                                                   struct ompi_communicator_t *comm,
-                                                   mca_coll_base_module_t *module)
-{
-    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
-    mca_coll_tuned_comm_t *data = tuned_module->tuned_data;
-
-    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:reduce_scatter_intra_do_forced selected algorithm %d", 
-                 data->user_forced[REDUCESCATTER].algorithm));
-
-    switch (data->user_forced[REDUCESCATTER].algorithm) {
-    case (0): return ompi_coll_tuned_reduce_scatter_intra_dec_fixed (sbuf, rbuf, rcounts, 
-                                                                     dtype, op, comm, module);
-    case (1): return ompi_coll_tuned_reduce_scatter_intra_nonoverlapping(sbuf, rbuf, rcounts,
-                                                                         dtype, op, comm, module);
-    case (2): return ompi_coll_tuned_reduce_scatter_intra_basic_recursivehalving(sbuf, rbuf, rcounts,
-                                                                                 dtype, op, comm, module);
-    case (3): return ompi_coll_tuned_reduce_scatter_intra_ring (sbuf, rbuf, rcounts,
-                                                                dtype, op, comm, module);
-    default:
-        OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:reduce_scatter_intra_do_forced attempt to select algorithm %d when only 0-%d is valid?",
-                     data->user_forced[REDUCESCATTER].algorithm, ompi_coll_tuned_forced_max_algorithms[REDUCESCATTER]));
-        return (MPI_ERR_ARG);
-    } /* switch */
-}
-
-
-int ompi_coll_tuned_reduce_scatter_intra_do_this(void *sbuf, void* rbuf, 
-                                                 int *rcounts,
-                                                 struct ompi_datatype_t *dtype,
-                                                 struct ompi_op_t *op, 
-                                                 struct ompi_communicator_t *comm,
-                                                 mca_coll_base_module_t *module,
-                                                 int algorithm, int faninout, int segsize)
-{
-    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:reduce_scatter_intra_do_this selected algorithm %d topo faninout %d segsize %d",
-                 algorithm, faninout, segsize));
-   
-    switch (algorithm) {
-    case (0): return ompi_coll_tuned_reduce_scatter_intra_dec_fixed (sbuf, rbuf, rcounts, 
-                                                                     dtype, op, comm, module);
-    case (1): return ompi_coll_tuned_reduce_scatter_intra_nonoverlapping(sbuf, rbuf, rcounts,
-                                                                         dtype, op, comm, module);
-    case (2): return ompi_coll_tuned_reduce_scatter_intra_basic_recursivehalving(sbuf, rbuf, rcounts,
-                                                                                 dtype, op, comm, module);
-    case (3): return ompi_coll_tuned_reduce_scatter_intra_ring (sbuf, rbuf, rcounts,
-                                                                dtype, op, comm, module);
-    default:
-        OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:reduce_scatter_intra_do_this attempt to select algorithm %d when only 0-%d is valid?",
-                     algorithm, ompi_coll_tuned_forced_max_algorithms[REDUCESCATTER]));
-        return (MPI_ERR_ARG);
-    } /* switch */
-}
-
diff --git a/ompi/mca/coll/base/coll_base_scatter.c b/ompi/mca/coll/base/coll_base_scatter.c
index b9381e18b7..e832f4064e 100644
--- a/ompi/mca/coll/base/coll_base_scatter.c
+++ b/ompi/mca/coll/base/coll_base_scatter.c
@@ -3,7 +3,7 @@
  * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
  *                         University Research and Technology
  *                         Corporation.  All rights reserved.
- * Copyright (c) 2004-2014 The University of Tennessee and The University
+ * Copyright (c) 2004-2015 The University of Tennessee and The University
  *                         of Tennessee Research Foundation.  All rights
  *                         reserved.
  * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@@ -28,27 +28,12 @@
 #include "ompi/mca/coll/coll.h"
 #include "ompi/mca/coll/base/coll_tags.h"
 #include "ompi/mca/pml/pml.h"
-#include "coll_tuned.h"
-#include "coll_tuned_topo.h"
-#include "coll_tuned_util.h"
-
-/* scatter algorithm variables */
-static int coll_tuned_scatter_algorithm_count = 2;
-static int coll_tuned_scatter_forced_algorithm = 0;
-static int coll_tuned_scatter_segment_size = 0;
-static int coll_tuned_scatter_tree_fanout;
-static int coll_tuned_scatter_chain_fanout;
-
-/* valid values for coll_tuned_scatter_forced_algorithm */
-static mca_base_var_enum_value_t scatter_algorithms[] = {
-    {0, "ignore"},
-    {1, "basic_linear"},
-    {2, "binomial"},
-    {0, NULL}
-};
+#include "ompi/mca/coll/base/coll_base_functions.h"
+#include "coll_base_topo.h"
+#include "coll_base_util.h"
 
 int
-ompi_coll_tuned_scatter_intra_binomial(void *sbuf, int scount,
+ompi_coll_base_scatter_intra_binomial(void *sbuf, int scount,
                                        struct ompi_datatype_t *sdtype,
                                        void *rbuf, int rcount,
                                        struct ompi_datatype_t *rdtype,
@@ -60,19 +45,19 @@ ompi_coll_tuned_scatter_intra_binomial(void *sbuf, int scount,
     char *ptmp, *tempbuf = NULL;
     ompi_coll_tree_t* bmtree;
     MPI_Status status;
-    MPI_Aint sextent, slb, strue_lb, strue_extent; 
+    MPI_Aint sextent, slb, strue_lb, strue_extent;
     MPI_Aint rextent, rlb, rtrue_lb, rtrue_extent;
-    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
-    mca_coll_tuned_comm_t *data = tuned_module->tuned_data;
+    mca_coll_base_module_t *base_module = (mca_coll_base_module_t*) module;
+    mca_coll_base_comm_t *data = base_module->base_data;
 
     size = ompi_comm_size(comm);
     rank = ompi_comm_rank(comm);
 
-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "ompi_coll_tuned_scatter_intra_binomial rank %d", rank));
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                 "ompi_coll_base_scatter_intra_binomial rank %d", rank));
 
     /* create the binomial tree */
-    COLL_TUNED_UPDATE_IN_ORDER_BMTREE( comm, tuned_module, root );
+    COLL_BASE_UPDATE_IN_ORDER_BMTREE( comm, base_module, root );
     bmtree = data->cached_in_order_bmtree;
 
     ompi_datatype_get_extent(sdtype, &slb, &sextent);
@@ -167,7 +152,7 @@ ompi_coll_tuned_scatter_intra_binomial(void *sbuf, int scount,
             total_send += mycount;
         }
 
-        if (NULL != tempbuf) 
+        if (NULL != tempbuf)
             free(tempbuf);
     } else {
         /* recv from parent on leaf nodes */
@@ -182,7 +167,7 @@ ompi_coll_tuned_scatter_intra_binomial(void *sbuf, int scount,
     if (NULL != tempbuf)
         free(tempbuf);
 
-    OPAL_OUTPUT((ompi_coll_tuned_stream,  "%s:%4d\tError occurred %d, rank %2d",
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,  "%s:%4d\tError occurred %d, rank %2d",
                  __FILE__, line, err, rank));
     return err;
 }
@@ -190,13 +175,13 @@ ompi_coll_tuned_scatter_intra_binomial(void *sbuf, int scount,
 /*
  * Linear functions are copied from the BASIC coll module
  * they do not segment the message and are simple implementations
- * but for some small number of nodes and/or small data sizes they 
- * are just as fast as tuned/tree based segmenting operations 
+ * but for some small number of nodes and/or small data sizes they
+ * are just as fast as base/tree based segmenting operations
  * and as such may be selected by the decision functions
  * These are copied into this module due to the way we select modules
  * in V1. i.e. in V2 we will handle this differently and so will not
  * have to duplicate code.
- * JPG following the examples from other coll_tuned implementations. Dec06.
+ * JPG following the examples from other coll_base implementations. Dec06.
  */
 
 /* copied function (with appropriate renaming) starts here */
@@ -208,7 +193,7 @@ ompi_coll_tuned_scatter_intra_binomial(void *sbuf, int scount,
  *	Returns:	- MPI_SUCCESS or error code
  */
 int
-ompi_coll_tuned_scatter_intra_basic_linear(void *sbuf, int scount,
+ompi_coll_base_scatter_intra_basic_linear(void *sbuf, int scount,
                                            struct ompi_datatype_t *sdtype,
                                            void *rbuf, int rcount,
                                            struct ompi_datatype_t *rdtype,
@@ -269,153 +254,3 @@ ompi_coll_tuned_scatter_intra_basic_linear(void *sbuf, int scount,
 
 
 /* copied function (with appropriate renaming) ends here */
-
-/* The following are used by dynamic and forced rules */
-
-/* publish details of each algorithm and if its forced/fixed/locked in */
-/* as you add methods/algorithms you must update this and the query/map 
-   routines */
-
-/* this routine is called by the component only */
-/* this makes sure that the mca parameters are set to their initial values 
-   and perms */
-/* module does not call this they call the forced_getvalues routine instead */
-
-int 
-ompi_coll_tuned_scatter_intra_check_forced_init(coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices)
-{
-    mca_base_var_enum_t *new_enum;
-
-    ompi_coll_tuned_forced_max_algorithms[SCATTER] = coll_tuned_scatter_algorithm_count;
-
-    (void) mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                           "scatter_algorithm_count",
-                                           "Number of scatter algorithms available",
-                                           MCA_BASE_VAR_TYPE_INT, NULL, 0,
-                                           MCA_BASE_VAR_FLAG_DEFAULT_ONLY,
-                                           OPAL_INFO_LVL_5,
-                                           MCA_BASE_VAR_SCOPE_CONSTANT,
-                                           &coll_tuned_scatter_algorithm_count);
-
-    /* MPI_T: This variable should eventually be bound to a communicator */
-    coll_tuned_scatter_forced_algorithm = 0;
-    (void) mca_base_var_enum_create("coll_tuned_scatter_algorithms", scatter_algorithms, &new_enum);
-    mca_param_indices->algorithm_param_index =
-        mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                        "scatter_algorithm",
-                                        "Which scatter algorithm is used. Can be locked down to choice of: 0 ignore, 1 basic linear, 2 binomial.",       
-                                        MCA_BASE_VAR_TYPE_INT, new_enum, 0, 0,
-                                        OPAL_INFO_LVL_5,
-                                        MCA_BASE_VAR_SCOPE_READONLY,
-                                        &coll_tuned_scatter_forced_algorithm);
-    OBJ_RELEASE(new_enum);
-    if (mca_param_indices->algorithm_param_index < 0) {
-        return mca_param_indices->algorithm_param_index;
-    }
-
-    coll_tuned_scatter_segment_size = 0;
-    mca_param_indices->segsize_param_index =
-        mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                        "scatter_algorithm_segmentsize",
-                                        "Segment size in bytes used by default for scatter algorithms. Only has meaning if algorithm is forced and supports segmenting. 0 bytes means no segmentation. Currently, available algorithms do not support segmentation.",
-                                        MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
-                                        OPAL_INFO_LVL_5,
-                                        MCA_BASE_VAR_SCOPE_READONLY,
-                                        &coll_tuned_scatter_segment_size);
-
-    coll_tuned_scatter_tree_fanout = ompi_coll_tuned_init_tree_fanout; /* get system wide default */
-    mca_param_indices->tree_fanout_param_index =
-        mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                        "scatter_algorithm_tree_fanout",
-                                        "Fanout for n-tree used for scatter algorithms. Only has meaning if algorithm is forced and supports n-tree topo based operation. Currently, available algorithms do not support n-tree topologies.",
-                                        MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
-                                        OPAL_INFO_LVL_5,
-                                        MCA_BASE_VAR_SCOPE_READONLY,
-                                        &coll_tuned_scatter_tree_fanout);
-
-    coll_tuned_scatter_chain_fanout = ompi_coll_tuned_init_chain_fanout; /* get system wide default */
-    mca_param_indices->chain_fanout_param_index=
-      mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                      "scatter_algorithm_chain_fanout",
-                                      "Fanout for chains used for scatter algorithms. Only has meaning if algorithm is forced and supports chain topo based operation. Currently, available algorithms do not support chain topologies.",
-                                      MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
-                                      OPAL_INFO_LVL_5,
-                                      MCA_BASE_VAR_SCOPE_READONLY,
-                                      &coll_tuned_scatter_chain_fanout);
-
-    return (MPI_SUCCESS);
-}
-
-int
-ompi_coll_tuned_scatter_intra_do_forced(void *sbuf, int scount,
-                                        struct ompi_datatype_t *sdtype,
-                                        void* rbuf, int rcount,
-                                        struct ompi_datatype_t *rdtype,
-                                        int root,
-                                        struct ompi_communicator_t *comm,
-                                        mca_coll_base_module_t *module)
-{
-    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
-    mca_coll_tuned_comm_t *data = tuned_module->tuned_data;
-
-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "coll:tuned:scatter_intra_do_forced selected algorithm %d",
-                 data->user_forced[SCATTER].algorithm));
-
-    switch (data->user_forced[SCATTER].algorithm) {
-    case (0):
-        return ompi_coll_tuned_scatter_intra_dec_fixed (sbuf, scount, sdtype, 
-                                                        rbuf, rcount, rdtype, 
-                                                        root, comm, module);
-    case (1):
-        return ompi_coll_tuned_scatter_intra_basic_linear (sbuf, scount, sdtype,
-                                                           rbuf, rcount, rdtype,
-                                                           root, comm, module);
-    case (2):
-        return ompi_coll_tuned_scatter_intra_binomial(sbuf, scount, sdtype,
-                                                      rbuf, rcount, rdtype,
-                                                      root, comm, module);
-    default:
-        OPAL_OUTPUT((ompi_coll_tuned_stream,
-                     "coll:tuned:scatter_intra_do_forced attempt to select algorithm %d when only 0-%d is valid?", 
-                     data->user_forced[SCATTER].algorithm,
-                     ompi_coll_tuned_forced_max_algorithms[SCATTER]));
-        return (MPI_ERR_ARG);
-    } /* switch */
-}
-
-int
-ompi_coll_tuned_scatter_intra_do_this(void *sbuf, int scount,
-                                      struct ompi_datatype_t *sdtype,
-                                      void* rbuf, int rcount,
-                                      struct ompi_datatype_t *rdtype,
-                                      int root,
-                                      struct ompi_communicator_t *comm,
-                                      mca_coll_base_module_t *module,
-                                      int algorithm, int faninout, int segsize)
-{
-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "coll:tuned:scatter_intra_do_this selected algorithm %d topo faninout %d segsize %d", 
-                 algorithm, faninout, segsize));
-   
-    switch (algorithm) {
-    case (0):
-        return ompi_coll_tuned_scatter_intra_dec_fixed (sbuf, scount, sdtype, 
-                                                        rbuf, rcount, rdtype, 
-                                                        root, comm, module);
-    case (1):
-        return ompi_coll_tuned_scatter_intra_basic_linear (sbuf, scount, sdtype,
-                                                           rbuf, rcount, rdtype,
-                                                           root, comm, module);
-    case (2):  
-        return ompi_coll_tuned_scatter_intra_binomial(sbuf, scount, sdtype,
-                                                      rbuf, rcount, rdtype,
-                                                      root, comm, module);
-    default:
-        OPAL_OUTPUT((ompi_coll_tuned_stream,
-                     "coll:tuned:scatter_intra_do_this attempt to select algorithm %d when only 0-%d is valid?", 
-                     algorithm, 
-                     ompi_coll_tuned_forced_max_algorithms[SCATTER]));
-        return (MPI_ERR_ARG);
-    } /* switch */
-}
diff --git a/ompi/mca/coll/base/coll_base_topo.c b/ompi/mca/coll/base/coll_base_topo.c
index 0df6599dd6..0736faaf91 100644
--- a/ompi/mca/coll/base/coll_base_topo.c
+++ b/ompi/mca/coll/base/coll_base_topo.c
@@ -5,16 +5,16 @@
  * Copyright (c) 2004-2005 The University of Tennessee and The University
  *                         of Tennessee Research Foundation.  All rights
  *                         reserved.
- * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, 
+ * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
  *                         University of Stuttgart.  All rights reserved.
  * Copyright (c) 2004-2005 The Regents of the University of California.
  *                         All rights reserved.
  * Copyright (c) 2015      Research Organization for Information Science
  *                         and Technology (RIST). All rights reserved.
  * $COPYRIGHT$
- * 
+ *
  * Additional copyrights may follow
- * 
+ *
  * $HEADER$
  */
 
@@ -25,8 +25,8 @@
 #include "ompi/constants.h"
 #include "ompi/communicator/communicator.h"
 #include "ompi/mca/coll/base/coll_tags.h"
-#include "coll_tuned.h"
-#include "coll_tuned_topo.h"
+#include "ompi/mca/coll/base/coll_base_functions.h"
+#include "coll_base_topo.h"
 
 /*
  * Some static helpers.
@@ -75,36 +75,36 @@ static int calculate_num_nodes_up_to_level( int fanout, int level )
  */
 
 ompi_coll_tree_t*
-ompi_coll_tuned_topo_build_tree( int fanout,
+ompi_coll_base_topo_build_tree( int fanout,
                                  struct ompi_communicator_t* comm,
                                  int root )
 {
     int rank, size, schild, sparent, shiftedrank, i;
     int level; /* location of my rank in the tree structure of size */
     int delta; /* number of nodes on my level */
-    int slimit; /* total number of nodes on levels above me */ 
+    int slimit; /* total number of nodes on levels above me */
     ompi_coll_tree_t* tree;
 
-    OPAL_OUTPUT((ompi_coll_tuned_stream, "coll:tuned:topo_build_tree Building fo %d rt %d", fanout, root));
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output, "coll:base:topo_build_tree Building fo %d rt %d", fanout, root));
 
     if (fanout<1) {
-        OPAL_OUTPUT((ompi_coll_tuned_stream, "coll:tuned:topo_build_tree invalid fanout %d", fanout));
+        OPAL_OUTPUT((ompi_coll_base_framework.framework_output, "coll:base:topo_build_tree invalid fanout %d", fanout));
         return NULL;
     }
     if (fanout>MAXTREEFANOUT) {
-        OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:topo_build_tree invalid fanout %d bigger than max %d", fanout, MAXTREEFANOUT));
+        OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:topo_build_tree invalid fanout %d bigger than max %d", fanout, MAXTREEFANOUT));
         return NULL;
     }
 
-    /* 
-     * Get size and rank of the process in this communicator 
+    /*
+     * Get size and rank of the process in this communicator
      */
     size = ompi_comm_size(comm);
     rank = ompi_comm_rank(comm);
 
     tree = (ompi_coll_tree_t*)malloc(sizeof(ompi_coll_tree_t));
     if (!tree) {
-        OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:topo_build_tree PANIC::out of memory"));
+        OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:topo_build_tree PANIC::out of memory"));
         return NULL;
     }
 
@@ -115,8 +115,8 @@ ompi_coll_tuned_topo_build_tree( int fanout,
      * Set root
      */
     tree->tree_root = root;
-  
-    /* 
+
+    /*
      * Initialize tree
      */
     tree->tree_fanout   = fanout;
@@ -132,11 +132,11 @@ ompi_coll_tuned_topo_build_tree( int fanout,
     if( size < 2 ) {
         return tree;
     }
-  
+
     /*
-     * Shift all ranks by root, so that the algorithm can be 
+     * Shift all ranks by root, so that the algorithm can be
      * designed as if root would be always 0
-     * shiftedrank should be used in calculating distances 
+     * shiftedrank should be used in calculating distances
      * and position in tree
      */
     shiftedrank = rank - root;
@@ -158,7 +158,7 @@ ompi_coll_tuned_topo_build_tree( int fanout,
             break;
         }
     }
-    
+
     /* find my parent */
     slimit = calculate_num_nodes_up_to_level( fanout, level );
     sparent = shiftedrank;
@@ -170,12 +170,12 @@ ompi_coll_tuned_topo_build_tree( int fanout,
         }
     }
     tree->tree_prev = (sparent+root)%size;
-  
+
     return tree;
 }
 
 /*
- * Constructs in-order binary tree which can be used for non-commutative reduce 
+ * Constructs in-order binary tree which can be used for non-commutative reduce
  * operations.
  * Root of this tree is always rank (size-1) and fanout is 2.
  * Here are some of the examples of this tree:
@@ -189,28 +189,28 @@ ompi_coll_tuned_topo_build_tree( int fanout,
  *                                                        4     0
  */
 ompi_coll_tree_t*
-ompi_coll_tuned_topo_build_in_order_bintree( struct ompi_communicator_t* comm )
+ompi_coll_base_topo_build_in_order_bintree( struct ompi_communicator_t* comm )
 {
     int rank, size, myrank, rightsize, delta, parent, lchild, rchild;
     ompi_coll_tree_t* tree;
 
-    /* 
-     * Get size and rank of the process in this communicator 
+    /*
+     * Get size and rank of the process in this communicator
      */
     size = ompi_comm_size(comm);
     rank = ompi_comm_rank(comm);
 
     tree = (ompi_coll_tree_t*)malloc(sizeof(ompi_coll_tree_t));
     if (!tree) {
-        OPAL_OUTPUT((ompi_coll_tuned_stream,
-                     "coll:tuned:topo_build_tree PANIC::out of memory"));
+        OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                     "coll:base:topo_build_tree PANIC::out of memory"));
         return NULL;
     }
 
     tree->tree_root     = MPI_UNDEFINED;
     tree->tree_nextsize = MPI_UNDEFINED;
 
-    /* 
+    /*
      * Initialize tree
      */
     tree->tree_fanout   = 2;
@@ -220,11 +220,11 @@ ompi_coll_tuned_topo_build_in_order_bintree( struct ompi_communicator_t* comm )
     tree->tree_nextsize = 0;
     tree->tree_next[0]  = -1;
     tree->tree_next[1]  = -1;
-    OPAL_OUTPUT((ompi_coll_tuned_stream, 
-                 "coll:tuned:topo_build_in_order_tree Building fo %d rt %d", 
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                 "coll:base:topo_build_in_order_tree Building fo %d rt %d",
                  tree->tree_fanout, tree->tree_root));
 
-    /* 
+    /*
      * Build the tree
      */
     myrank = rank;
@@ -240,18 +240,18 @@ ompi_coll_tuned_topo_build_in_order_bintree( struct ompi_communicator_t* comm )
         rchild = -1;
         if (size - 1 > 0) {
             lchild = parent - 1;
-            if (lchild > 0) { 
+            if (lchild > 0) {
                 rchild = rightsize - 1;
             }
         }
-       
-        /* The following cases are possible: myrank can be 
+
+        /* The following cases are possible: myrank can be
            - a parent,
            - belong to the left subtree, or
            - belong to the right subtee
            Each of the cases need to be handled differently.
         */
-          
+
         if (myrank == parent) {
             /* I am the parent:
                - compute real ranks of my children, and exit the loop. */
@@ -262,7 +262,7 @@ ompi_coll_tuned_topo_build_in_order_bintree( struct ompi_communicator_t* comm )
         if (myrank > rchild) {
             /* I belong to the left subtree:
                - If I am the left child, compute real rank of my parent
-               - Iterate down through tree: 
+               - Iterate down through tree:
                compute new size, shift ranks down, and update delta.
             */
             if (myrank == lchild) {
@@ -276,8 +276,8 @@ ompi_coll_tuned_topo_build_in_order_bintree( struct ompi_communicator_t* comm )
         } else {
             /* I belong to the right subtree:
                - If I am the right child, compute real rank of my parent
-               - Iterate down through tree:  
-               compute new size and parent, 
+               - Iterate down through tree:
+               compute new size and parent,
                but the delta and rank do not need to change.
             */
             if (myrank == rchild) {
@@ -287,14 +287,14 @@ ompi_coll_tuned_topo_build_in_order_bintree( struct ompi_communicator_t* comm )
             parent = rchild;
         }
     }
-    
+
     if (tree->tree_next[0] >= 0) { tree->tree_nextsize = 1; }
     if (tree->tree_next[1] >= 0) { tree->tree_nextsize += 1; }
 
     return tree;
 }
 
-int ompi_coll_tuned_topo_destroy_tree( ompi_coll_tree_t** tree )
+int ompi_coll_base_topo_destroy_tree( ompi_coll_tree_t** tree )
 {
     ompi_coll_tree_t *ptr;
 
@@ -311,7 +311,7 @@ int ompi_coll_tuned_topo_destroy_tree( ompi_coll_tree_t** tree )
 }
 
 /*
- * 
+ *
  * Here are some of the examples of this tree:
  * size == 2                   size = 4                 size = 8
  *      0                           0                        0
@@ -323,16 +323,16 @@ int ompi_coll_tuned_topo_destroy_tree( ompi_coll_tree_t** tree )
  *                                                                7
  */
 ompi_coll_tree_t*
-ompi_coll_tuned_topo_build_bmtree( struct ompi_communicator_t* comm,
+ompi_coll_base_topo_build_bmtree( struct ompi_communicator_t* comm,
                                    int root )
 {
     int childs = 0, rank, size, mask = 1, index, remote, i;
     ompi_coll_tree_t *bmtree;
 
-    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:topo:build_bmtree rt %d", root));
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:topo:build_bmtree rt %d", root));
 
-    /* 
-     * Get size and rank of the process in this communicator 
+    /*
+     * Get size and rank of the process in this communicator
      */
     size = ompi_comm_size(comm);
     rank = ompi_comm_rank(comm);
@@ -341,7 +341,7 @@ ompi_coll_tuned_topo_build_bmtree( struct ompi_communicator_t* comm,
 
     bmtree = (ompi_coll_tree_t*)malloc(sizeof(ompi_coll_tree_t));
     if (!bmtree) {
-        OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:topo:build_bmtree PANIC out of memory"));
+        OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:topo:build_bmtree PANIC out of memory"));
         return NULL;
     }
 
@@ -372,7 +372,7 @@ ompi_coll_tuned_topo_build_bmtree( struct ompi_communicator_t* comm,
         remote += root;
         if( remote >= size ) remote -= size;
         if (childs==MAXTREEFANOUT) {
-            OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:topo:build_bmtree max fanout incorrect %d needed %d", MAXTREEFANOUT, childs));
+            OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:topo:build_bmtree max fanout incorrect %d needed %d", MAXTREEFANOUT, childs));
             free(bmtree);
             return NULL;
         }
@@ -388,7 +388,7 @@ ompi_coll_tuned_topo_build_bmtree( struct ompi_communicator_t* comm,
 /*
  * Constructs in-order binomial tree which can be used for gather/scatter
  * operations.
- * 
+ *
  * Here are some of the examples of this tree:
  * size == 2                   size = 4                 size = 8
  *      0                           0                        0
@@ -400,16 +400,16 @@ ompi_coll_tuned_topo_build_bmtree( struct ompi_communicator_t* comm,
  *                                                                 7
  */
 ompi_coll_tree_t*
-ompi_coll_tuned_topo_build_in_order_bmtree( struct ompi_communicator_t* comm,
+ompi_coll_base_topo_build_in_order_bmtree( struct ompi_communicator_t* comm,
                                             int root )
 {
     int childs = 0, rank, vrank, size, mask = 1, remote, i;
     ompi_coll_tree_t *bmtree;
 
-    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:topo:build_in_order_bmtree rt %d", root));
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:topo:build_in_order_bmtree rt %d", root));
 
-    /* 
-     * Get size and rank of the process in this communicator 
+    /*
+     * Get size and rank of the process in this communicator
      */
     size = ompi_comm_size(comm);
     rank = ompi_comm_rank(comm);
@@ -418,7 +418,7 @@ ompi_coll_tuned_topo_build_in_order_bmtree( struct ompi_communicator_t* comm,
 
     bmtree = (ompi_coll_tree_t*)malloc(sizeof(ompi_coll_tree_t));
     if (!bmtree) {
-        OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:topo:build_bmtree PANIC out of memory"));
+        OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:topo:build_bmtree PANIC out of memory"));
         return NULL;
     }
 
@@ -442,8 +442,8 @@ ompi_coll_tuned_topo_build_in_order_bmtree( struct ompi_communicator_t* comm,
             bmtree->tree_next[childs] = (remote + root) % size;
             childs++;
             if (childs==MAXTREEFANOUT) {
-                OPAL_OUTPUT((ompi_coll_tuned_stream,
-                             "coll:tuned:topo:build_bmtree max fanout incorrect %d needed %d",
+                OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                             "coll:base:topo:build_bmtree max fanout incorrect %d needed %d",
                              MAXTREEFANOUT, childs));
                 free (bmtree);
                 return NULL;
@@ -459,36 +459,36 @@ ompi_coll_tuned_topo_build_in_order_bmtree( struct ompi_communicator_t* comm,
 
 
 ompi_coll_tree_t*
-ompi_coll_tuned_topo_build_chain( int fanout,
+ompi_coll_base_topo_build_chain( int fanout,
                                   struct ompi_communicator_t* comm,
                                   int root )
 {
     int i, maxchainlen, mark, head, len, rank, size, srank /* shifted rank */;
     ompi_coll_tree_t *chain;
 
-    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:topo:build_chain fo %d rt %d", fanout, root));
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:topo:build_chain fo %d rt %d", fanout, root));
 
-    /* 
-     * Get size and rank of the process in this communicator 
+    /*
+     * Get size and rank of the process in this communicator
      */
     size = ompi_comm_size(comm);
     rank = ompi_comm_rank(comm);
 
     if( fanout < 1 ) {
-        OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:topo:build_chain WARNING invalid fanout of ZERO, forcing to 1 (pipeline)!"));
+        OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:topo:build_chain WARNING invalid fanout of ZERO, forcing to 1 (pipeline)!"));
         fanout = 1;
     }
     if (fanout>MAXTREEFANOUT) {
-        OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:topo:build_chain WARNING invalid fanout %d bigger than max %d, forcing to max!", fanout, MAXTREEFANOUT));
+        OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:topo:build_chain WARNING invalid fanout %d bigger than max %d, forcing to max!", fanout, MAXTREEFANOUT));
         fanout = MAXTREEFANOUT;
     }
 
     /*
-     * Allocate space for topology arrays if needed 
+     * Allocate space for topology arrays if needed
      */
     chain = (ompi_coll_tree_t*)malloc( sizeof(ompi_coll_tree_t) );
     if (!chain) {
-        OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:topo:build_chain PANIC out of memory"));
+        OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:topo:build_chain PANIC out of memory"));
         fflush(stdout);
         return NULL;
     }
@@ -496,17 +496,17 @@ ompi_coll_tuned_topo_build_chain( int fanout,
     chain->tree_nextsize = -1;
     for(i=0;i<fanout;i++) chain->tree_next[i] = -1;
 
-    /* 
+    /*
      * Set root & numchain
      */
     chain->tree_root = root;
-    if( (size - 1) < fanout ) { 
+    if( (size - 1) < fanout ) {
         chain->tree_nextsize = size-1;
         fanout = size-1;
     } else {
         chain->tree_nextsize = fanout;
     }
-    
+
     /*
      * Shift ranks
      */
@@ -577,7 +577,7 @@ ompi_coll_tuned_topo_build_chain( int fanout,
                 chain->tree_nextsize = 1;
             } else {
                 chain->tree_next[0] = -1;
-                chain->tree_nextsize = 0;    
+                chain->tree_nextsize = 0;
             }
         }
         chain->tree_prev = (chain->tree_prev+root)%size;
@@ -586,7 +586,7 @@ ompi_coll_tuned_topo_build_chain( int fanout,
         }
     } else {
         /*
-         * Unshift values 
+         * Unshift values
          */
         chain->tree_prev = -1;
         chain->tree_next[0] = (root+1)%size;
@@ -603,17 +603,62 @@ ompi_coll_tuned_topo_build_chain( int fanout,
     return chain;
 }
 
-int ompi_coll_tuned_topo_dump_tree (ompi_coll_tree_t* tree, int rank)
+int ompi_coll_base_topo_dump_tree (ompi_coll_tree_t* tree, int rank)
 {
     int i;
 
-    OPAL_OUTPUT((ompi_coll_tuned_stream, "coll:tuned:topo:topo_dump_tree %1d tree root %d"
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output, "coll:base:topo:topo_dump_tree %1d tree root %d"
                  " fanout %d BM %1d nextsize %d prev %d",
                  rank, tree->tree_root, tree->tree_bmtree, tree->tree_fanout,
                  tree->tree_nextsize, tree->tree_prev));
     if( tree->tree_nextsize ) {
         for( i = 0; i < tree->tree_nextsize; i++ )
-            OPAL_OUTPUT((ompi_coll_tuned_stream,"[%1d] %d", i, tree->tree_next[i]));
+            OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"[%1d] %d", i, tree->tree_next[i]));
     }
     return (0);
 }
+
+mca_coll_base_comm_t* ompi_coll_base_topo_construct( mca_coll_base_comm_t* data )
+{
+    if( NULL == data ) {
+        data = (mca_coll_base_comm_t*)calloc(1, sizeof(mca_coll_base_comm_t));
+    }
+    return data;
+}
+
+void ompi_coll_base_topo_destruct( mca_coll_base_comm_t* data )
+{
+    if(NULL == data) return;
+    
+#if OPAL_ENABLE_DEBUG
+    /* Reset the reqs to NULL/0 -- they'll be freed as part of freeing
+       the generel c_coll_selected_data */
+    data->mcct_reqs = NULL;
+    data->mcct_num_reqs = 0;
+#endif
+
+    /* free any cached information that has been allocated */
+    if (data->cached_ntree) { /* destroy general tree if defined */
+        ompi_coll_base_topo_destroy_tree (&data->cached_ntree);
+    }
+    if (data->cached_bintree) { /* destroy bintree if defined */
+        ompi_coll_base_topo_destroy_tree (&data->cached_bintree);
+    }
+    if (data->cached_bmtree) { /* destroy bmtree if defined */
+        ompi_coll_base_topo_destroy_tree (&data->cached_bmtree);
+    }
+    if (data->cached_in_order_bmtree) { /* destroy bmtree if defined */
+        ompi_coll_base_topo_destroy_tree (&data->cached_in_order_bmtree);
+    }
+    if (data->cached_chain) { /* destroy general chain if defined */
+        ompi_coll_base_topo_destroy_tree (&data->cached_chain);
+    }
+    if (data->cached_pipeline) { /* destroy pipeline if defined */
+        ompi_coll_base_topo_destroy_tree (&data->cached_pipeline);
+    }
+    if (data->cached_in_order_bintree) { /* destroy in order bintree if defined */
+        ompi_coll_base_topo_destroy_tree (&data->cached_in_order_bintree);
+    }
+    
+    free(data);
+}
diff --git a/ompi/mca/coll/base/coll_base_topo.h b/ompi/mca/coll/base/coll_base_topo.h
index 717b67aed9..35159dadfb 100644
--- a/ompi/mca/coll/base/coll_base_topo.h
+++ b/ompi/mca/coll/base/coll_base_topo.h
@@ -5,19 +5,19 @@
  * Copyright (c) 2004-2012 The University of Tennessee and The University
  *                         of Tennessee Research Foundation.  All rights
  *                         reserved.
- * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, 
+ * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
  *                         University of Stuttgart.  All rights reserved.
  * Copyright (c) 2004-2005 The Regents of the University of California.
  *                         All rights reserved.
  * $COPYRIGHT$
- * 
+ *
  * Additional copyrights may follow
- * 
+ *
  * $HEADER$
  */
 
-#ifndef MCA_COLL_TUNED_TOPO_H_HAS_BEEN_INCLUDED
-#define MCA_COLL_TUNED_TOPO_H_HAS_BEEN_INCLUDED
+#ifndef MCA_COLL_BASE_TOPO_H_HAS_BEEN_INCLUDED
+#define MCA_COLL_BASE_TOPO_H_HAS_BEEN_INCLUDED
 
 #include "ompi_config.h"
 
@@ -35,29 +35,28 @@ typedef struct ompi_coll_tree_t {
 } ompi_coll_tree_t;
 
 ompi_coll_tree_t*
-ompi_coll_tuned_topo_build_tree( int fanout,
+ompi_coll_base_topo_build_tree( int fanout,
                                  struct ompi_communicator_t* com,
                                  int root );
 ompi_coll_tree_t*
-ompi_coll_tuned_topo_build_in_order_bintree( struct ompi_communicator_t* comm );
+ompi_coll_base_topo_build_in_order_bintree( struct ompi_communicator_t* comm );
 
 ompi_coll_tree_t*
-ompi_coll_tuned_topo_build_bmtree( struct ompi_communicator_t* comm,
+ompi_coll_base_topo_build_bmtree( struct ompi_communicator_t* comm,
                                    int root );
 ompi_coll_tree_t*
-ompi_coll_tuned_topo_build_in_order_bmtree( struct ompi_communicator_t* comm,
+ompi_coll_base_topo_build_in_order_bmtree( struct ompi_communicator_t* comm,
                                             int root );
 ompi_coll_tree_t*
-ompi_coll_tuned_topo_build_chain( int fanout,
+ompi_coll_base_topo_build_chain( int fanout,
                                   struct ompi_communicator_t* com,
                                   int root );
 
-int ompi_coll_tuned_topo_destroy_tree( ompi_coll_tree_t** tree );
+int ompi_coll_base_topo_destroy_tree( ompi_coll_tree_t** tree );
 
 /* debugging stuff, will be removed later */
-int ompi_coll_tuned_topo_dump_tree (ompi_coll_tree_t* tree, int rank);
+int ompi_coll_base_topo_dump_tree (ompi_coll_tree_t* tree, int rank);
 
 END_C_DECLS
 
-#endif  /* MCA_COLL_TUNED_TOPO_H_HAS_BEEN_INCLUDED */
-
+#endif  /* MCA_COLL_BASE_TOPO_H_HAS_BEEN_INCLUDED */
diff --git a/ompi/mca/coll/base/coll_base_util.c b/ompi/mca/coll/base/coll_base_util.c
index 8fe57ce01b..34607067e8 100644
--- a/ompi/mca/coll/base/coll_base_util.c
+++ b/ompi/mca/coll/base/coll_base_util.c
@@ -2,7 +2,7 @@
  * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
  *                         University Research and Technology
  *                         Corporation.  All rights reserved.
- * Copyright (c) 2004-2014 The University of Tennessee and The University
+ * Copyright (c) 2004-2015 The University of Tennessee and The University
  *                         of Tennessee Research Foundation.  All rights
  *                         reserved.
  * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, 
@@ -19,17 +19,17 @@
  */
 
 #include "ompi_config.h"
-#include "coll_tuned.h"
 
 #include "mpi.h"
 #include "ompi/constants.h"
 #include "ompi/datatype/ompi_datatype.h"
 #include "ompi/communicator/communicator.h"
 #include "ompi/mca/coll/base/coll_tags.h"
+#include "ompi/mca/coll/base/coll_base_functions.h"
 #include "ompi/mca/pml/pml.h"
-#include "coll_tuned_util.h"
+#include "coll_base_util.h"
 
-int ompi_coll_tuned_sendrecv_nonzero_actual( void* sendbuf, size_t scount, 
+int ompi_coll_base_sendrecv_nonzero_actual( void* sendbuf, size_t scount, 
                                              ompi_datatype_t* sdatatype,
                                              int dest, int stag,
                                              void* recvbuf, size_t rcount, 
@@ -91,14 +91,14 @@ int ompi_coll_tuned_sendrecv_nonzero_actual( void* sendbuf, size_t scount,
             *status = statuses[err_index];
         }
         err = statuses[err_index].MPI_ERROR;
-        OPAL_OUTPUT ((ompi_coll_tuned_stream, "%s:%d: Error %d occurred in the %s"
-                                              " stage of ompi_coll_tuned_sendrecv_zero\n",
+        OPAL_OUTPUT ((ompi_coll_base_framework.framework_output, "%s:%d: Error %d occurred in the %s"
+                                              " stage of ompi_coll_base_sendrecv_zero\n",
                       __FILE__, line, err, (0 == err_index ? "receive" : "send")));
     } else {
         /* Error discovered during the posting of the irecv or isend,
          * and no status is available.
          */
-        OPAL_OUTPUT ((ompi_coll_tuned_stream, "%s:%d: Error %d occurred\n",
+        OPAL_OUTPUT ((ompi_coll_base_framework.framework_output, "%s:%d: Error %d occurred\n",
                       __FILE__, line, err));
         if (MPI_STATUS_IGNORE != status) {
             status->MPI_ERROR = err;
diff --git a/ompi/mca/coll/base/coll_base_util.h b/ompi/mca/coll/base/coll_base_util.h
index e46e7f4020..c49d6e37bd 100644
--- a/ompi/mca/coll/base/coll_base_util.h
+++ b/ompi/mca/coll/base/coll_base_util.h
@@ -18,8 +18,8 @@
  * $HEADER$
  */
 
-#ifndef MCA_COLL_TUNED_UTIL_EXPORT_H
-#define MCA_COLL_TUNED_UTIL_EXPORT_H
+#ifndef MCA_COLL_BASE_UTIL_EXPORT_H
+#define MCA_COLL_BASE_UTIL_EXPORT_H
 
 #include "ompi_config.h"
 
@@ -36,7 +36,7 @@ BEGIN_C_DECLS
  * If one of the communications results in a zero-byte message the
  * communication is ignored, and no message will cross to the peer.
  */
-int ompi_coll_tuned_sendrecv_nonzero_actual( void* sendbuf, size_t scount, 
+int ompi_coll_base_sendrecv_nonzero_actual( void* sendbuf, size_t scount, 
                                              ompi_datatype_t* sdatatype,
                                              int dest, int stag,
                                              void* recvbuf, size_t rcount, 
@@ -53,7 +53,7 @@ int ompi_coll_tuned_sendrecv_nonzero_actual( void* sendbuf, size_t scount,
  * communications.
  */
 static inline int
-ompi_coll_tuned_sendrecv( void* sendbuf, size_t scount, ompi_datatype_t* sdatatype,
+ompi_coll_base_sendrecv( void* sendbuf, size_t scount, ompi_datatype_t* sdatatype,
                           int dest, int stag,
                           void* recvbuf, size_t rcount, ompi_datatype_t* rdatatype,
                           int source, int rtag, 
@@ -64,13 +64,13 @@ ompi_coll_tuned_sendrecv( void* sendbuf, size_t scount, ompi_datatype_t* sdataty
         return (int) ompi_datatype_sndrcv(sendbuf, (int32_t) scount, sdatatype, 
                                           recvbuf, (int32_t) rcount, rdatatype);
     }
-    return ompi_coll_tuned_sendrecv_nonzero_actual (sendbuf, scount, sdatatype, 
+    return ompi_coll_base_sendrecv_nonzero_actual (sendbuf, scount, sdatatype, 
                                             dest, stag, 
                                             recvbuf, rcount, rdatatype,
                                             source, rtag, comm, status);
 }
 
 END_C_DECLS
-#endif /* MCA_COLL_TUNED_UTIL_EXPORT_H */
+#endif /* MCA_COLL_BASE_UTIL_EXPORT_H */
 
 
diff --git a/ompi/mca/coll/coll.h b/ompi/mca/coll/coll.h
index 34bed4a14e..82f62ff69c 100644
--- a/ompi/mca/coll/coll.h
+++ b/ompi/mca/coll/coll.h
@@ -470,6 +470,9 @@ struct mca_coll_base_module_2_1_0_t {
         be used for the given communicator */
     mca_coll_base_module_disable_1_1_0_fn_t coll_module_disable;
 
+    /** Data storage for all the algorithms defined in the base. Should
+        not be used by other modules */
+    struct mca_coll_base_comm_t* base_data;
 };
 typedef struct mca_coll_base_module_2_1_0_t mca_coll_base_module_2_1_0_t;