Merge pull request #423 from ICLDisco/tuned

Dismantle the Tuned collective
2015-02-26 16:19:43 -07:00 · 2015-02-26 16:19:43 -07:00 · cf56c6a9f2
--- a/ompi/mca/coll/base/Makefile.am
+++ b/ompi/mca/coll/base/Makefile.am
@ -2,7 +2,7 @@
 # Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
 #                         University Research and Technology
 #                         Corporation.  All rights reserved.
-# Copyright (c) 2004-2005 The University of Tennessee and The University
+# Copyright (c) 2004-2015 The University of Tennessee and The University
 #                         of Tennessee Research Foundation.  All rights
 #                         reserved.
 # Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, 
@ -20,10 +20,26 @@ dist_ompidata_DATA = base/help-mca-coll-base.txt

 headers += \
        base/base.h \
-        base/coll_tags.h
+        base/coll_tags.h \
+        base/coll_base_topo.h \
+        base/coll_base_util.h \
+        base/coll_base_functions.h

 libmca_coll_la_SOURCES += \
        base/coll_base_comm_select.c \
        base/coll_base_comm_unselect.c \
        base/coll_base_find_available.c \
-        base/coll_base_frame.c
+        base/coll_base_frame.c \
+        base/coll_base_bcast.c \
+        base/coll_base_scatter.c \
+        base/coll_base_topo.c \
+        base/coll_base_allgather.c \
+        base/coll_base_allgatherv.c \
+        base/coll_base_util.c \
+        base/coll_base_allreduce.c \
+        base/coll_base_alltoall.c \
+        base/coll_base_gather.c \
+        base/coll_base_alltoallv.c \
+        base/coll_base_reduce.c \
+        base/coll_base_barrier.c \
+        base/coll_base_reduce_scatter.c
--- a/ompi/mca/coll/tuned/coll_tuned_allgather.c
+++ b/ompi/mca/coll/tuned/coll_tuned_allgather.c
@ -2,7 +2,7 @@
 * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
 *                         University Research and Technology
 *                         Corporation.  All rights reserved.
- * Copyright (c) 2004-2014 The University of Tennessee and The University
+ * Copyright (c) 2004-2015 The University of Tennessee and The University
 *                         of Tennessee Research Foundation.  All rights
 *                         reserved.
 * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@ -30,31 +30,12 @@
 #include "ompi/communicator/communicator.h"
 #include "ompi/mca/coll/coll.h"
 #include "ompi/mca/coll/base/coll_tags.h"
-#include "coll_tuned.h"
-#include "coll_tuned_topo.h"
-#include "coll_tuned_util.h"
-
-/* allgather algorithm variables */
-static int coll_tuned_allgather_algorithm_count = 6;
-static int coll_tuned_allgather_forced_algorithm = 0;
-static int coll_tuned_allgather_segment_size = 0;
-static int coll_tuned_allgather_tree_fanout;
-static int coll_tuned_allgather_chain_fanout;
-
-/* valid values for coll_tuned_allgather_forced_algorithm */
-static mca_base_var_enum_value_t allgather_algorithms[] = {
-    {0, "ignore"},
-    {1, "linear"},
-    {2, "bruck"},
-    {3, "recursive_doubling"},
-    {4, "ring"},
-    {5, "neighbor"},
-    {6, "two_proc"},
-    {0, NULL}
-};
+#include "ompi/mca/coll/base/coll_base_functions.h"
+#include "coll_base_topo.h"
+#include "coll_base_util.h"

 /*
- * ompi_coll_tuned_allgather_intra_bruck
+ * ompi_coll_base_allgather_intra_bruck
 *
 * Function:     allgather using O(log(N)) steps.
 * Accepts:      Same arguments as MPI_Allgather
@ -101,7 +82,7 @@ static mca_base_var_enum_value_t allgather_algorithms[] = {
 *         [4]    [4]    [4]    [4]    [4]    [4]
 *         [5]    [5]    [5]    [5]    [5]    [5]
 */
-int ompi_coll_tuned_allgather_intra_bruck(void *sbuf, int scount,
+int ompi_coll_base_allgather_intra_bruck(void *sbuf, int scount,
                                          struct ompi_datatype_t *sdtype,
                                          void* rbuf, int rcount,
                                          struct ompi_datatype_t *rdtype,
@ -115,8 +96,8 @@ int ompi_coll_tuned_allgather_intra_bruck(void *sbuf, int scount,
    size = ompi_comm_size(comm);
    rank = ompi_comm_rank(comm);

-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "coll:tuned:allgather_intra_bruck rank %d", rank));
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                 "coll:base:allgather_intra_bruck rank %d", rank));

    err = ompi_datatype_get_extent (sdtype, &slb, &sext);
    if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
@ -167,7 +148,7 @@ int ompi_coll_tuned_allgather_intra_bruck(void *sbuf, int scount,
        }

        /* Sendreceive */
-        err = ompi_coll_tuned_sendrecv(tmpsend, blockcount * rcount, rdtype, 
+        err = ompi_coll_base_sendrecv(tmpsend, blockcount * rcount, rdtype,
                                       sendto, MCA_COLL_BASE_TAG_ALLGATHER,
                                       tmprecv, blockcount * rcount, rdtype,
                                       recvfrom, MCA_COLL_BASE_TAG_ALLGATHER,
@ -223,13 +204,13 @@ int ompi_coll_tuned_allgather_intra_bruck(void *sbuf, int scount,
    return OMPI_SUCCESS;

 err_hndl:
-    OPAL_OUTPUT((ompi_coll_tuned_stream,  "%s:%4d\tError occurred %d, rank %2d",
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,  "%s:%4d\tError occurred %d, rank %2d",
                 __FILE__, line, err, rank));
    return err;
 }

 /*
- * ompi_coll_tuned_allgather_intra_recursivedoubling
+ * ompi_coll_base_allgather_intra_recursivedoubling
 *
 * Function:     allgather using O(log(N)) steps.
 * Accepts:      Same arguments as MPI_Allgather
@ -274,7 +255,7 @@ int ompi_coll_tuned_allgather_intra_bruck(void *sbuf, int scount,
 *          step, and send them appropriate messages.
 */
 int
-ompi_coll_tuned_allgather_intra_recursivedoubling(void *sbuf, int scount,
+ompi_coll_base_allgather_intra_recursivedoubling(void *sbuf, int scount,
                                                  struct ompi_datatype_t *sdtype,
                                                  void* rbuf, int rcount,
                                                  struct ompi_datatype_t *rdtype,
@ -297,17 +278,17 @@ ompi_coll_tuned_allgather_intra_recursivedoubling(void *sbuf, int scount,
       print warning and call bruck allgather algorithm with same parameters.
    */
    if (pow2size != size) {
-        OPAL_OUTPUT((ompi_coll_tuned_stream,
-                     "coll:tuned:allgather_intra_recursivedoubling WARNING: non-pow-2 size %d, switching to bruck algorithm", 
+        OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                     "coll:base:allgather_intra_recursivedoubling WARNING: non-pow-2 size %d, switching to bruck algorithm",
                     size));

-        return ompi_coll_tuned_allgather_intra_bruck(sbuf, scount, sdtype, 
+        return ompi_coll_base_allgather_intra_bruck(sbuf, scount, sdtype,
                                                     rbuf, rcount, rdtype,
                                                     comm, module);
    }

-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "coll:tuned:allgather_intra_recursivedoubling rank %d, size %d", 
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                 "coll:base:allgather_intra_recursivedoubling rank %d, size %d",
                 rank, size));

    err = ompi_datatype_get_extent (sdtype, &slb, &sext);
@ -347,7 +328,7 @@ ompi_coll_tuned_allgather_intra_recursivedoubling(void *sbuf, int scount,
        }

        /* Sendreceive */
-        err = ompi_coll_tuned_sendrecv(tmpsend, (ptrdiff_t)distance * (ptrdiff_t)rcount, rdtype,
+        err = ompi_coll_base_sendrecv(tmpsend, (ptrdiff_t)distance * (ptrdiff_t)rcount, rdtype,
                                       remote, MCA_COLL_BASE_TAG_ALLGATHER,
                                       tmprecv, (ptrdiff_t)distance * (ptrdiff_t)rcount, rdtype,
                                       remote, MCA_COLL_BASE_TAG_ALLGATHER,
@ -359,7 +340,7 @@ ompi_coll_tuned_allgather_intra_recursivedoubling(void *sbuf, int scount,
    return OMPI_SUCCESS;

 err_hndl:
-    OPAL_OUTPUT((ompi_coll_tuned_stream,  "%s:%4d\tError occurred %d, rank %2d",
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,  "%s:%4d\tError occurred %d, rank %2d",
                 __FILE__, line, err, rank));
    return err;
 }
@ -367,7 +348,7 @@ ompi_coll_tuned_allgather_intra_recursivedoubling(void *sbuf, int scount,


 /*
- * ompi_coll_tuned_allgather_intra_ring
+ * ompi_coll_base_allgather_intra_ring
 *
 * Function:     allgather using O(N) steps.
 * Accepts:      Same arguments as MPI_Allgather
@ -381,7 +362,7 @@ ompi_coll_tuned_allgather_intra_recursivedoubling(void *sbuf, int scount,
 *               No additional memory requirements.
 *
 */
-int ompi_coll_tuned_allgather_intra_ring(void *sbuf, int scount,
+int ompi_coll_base_allgather_intra_ring(void *sbuf, int scount,
                                         struct ompi_datatype_t *sdtype,
                                         void* rbuf, int rcount,
                                         struct ompi_datatype_t *rdtype,
@ -395,8 +376,8 @@ int ompi_coll_tuned_allgather_intra_ring(void *sbuf, int scount,
    size = ompi_comm_size(comm);
    rank = ompi_comm_rank(comm);

-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "coll:tuned:allgather_intra_ring rank %d", rank));
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                 "coll:base:allgather_intra_ring rank %d", rank));

    err = ompi_datatype_get_extent (sdtype, &slb, &sext);
    if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
@ -434,7 +415,7 @@ int ompi_coll_tuned_allgather_intra_ring(void *sbuf, int scount,
        tmpsend = (char*)rbuf + (ptrdiff_t)senddatafrom * (ptrdiff_t)rcount * rext;

        /* Sendreceive */
-        err = ompi_coll_tuned_sendrecv(tmpsend, rcount, rdtype, sendto,
+        err = ompi_coll_base_sendrecv(tmpsend, rcount, rdtype, sendto,
                                       MCA_COLL_BASE_TAG_ALLGATHER,
                                       tmprecv, rcount, rdtype, recvfrom,
                                       MCA_COLL_BASE_TAG_ALLGATHER,
@ -446,13 +427,13 @@ int ompi_coll_tuned_allgather_intra_ring(void *sbuf, int scount,
    return OMPI_SUCCESS;

 err_hndl:
-    OPAL_OUTPUT((ompi_coll_tuned_stream,  "%s:%4d\tError occurred %d, rank %2d",
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,  "%s:%4d\tError occurred %d, rank %2d",
                 __FILE__, line, err, rank));
    return err;
 }

 /*
- * ompi_coll_tuned_allgather_intra_neighborexchange
+ * ompi_coll_base_allgather_intra_neighborexchange
 *
 * Function:     allgather using N/2 steps (O(N))
 * Accepts:      Same arguments as MPI_Allgather
@ -509,7 +490,7 @@ int ompi_coll_tuned_allgather_intra_ring(void *sbuf, int scount,
 *         [5]    [5]    [5]    [5]    [5]    [5]
 */
 int
-ompi_coll_tuned_allgather_intra_neighborexchange(void *sbuf, int scount,
+ompi_coll_base_allgather_intra_neighborexchange(void *sbuf, int scount,
                                                 struct ompi_datatype_t *sdtype,
                                                 void* rbuf, int rcount,
                                                 struct ompi_datatype_t *rdtype,
@ -525,16 +506,16 @@ ompi_coll_tuned_allgather_intra_neighborexchange(void *sbuf, int scount,
    rank = ompi_comm_rank(comm);

    if (size % 2) {
-        OPAL_OUTPUT((ompi_coll_tuned_stream,
-                     "coll:tuned:allgather_intra_neighborexchange WARNING: odd size %d, switching to ring algorithm", 
+        OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                     "coll:base:allgather_intra_neighborexchange WARNING: odd size %d, switching to ring algorithm",
                     size));
-        return ompi_coll_tuned_allgather_intra_ring(sbuf, scount, sdtype,
+        return ompi_coll_base_allgather_intra_ring(sbuf, scount, sdtype,
                                                    rbuf, rcount, rdtype,
                                                    comm, module);
    }

-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "coll:tuned:allgather_intra_neighborexchange rank %d", rank));
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                 "coll:base:allgather_intra_neighborexchange rank %d", rank));

    err = ompi_datatype_get_extent (sdtype, &slb, &sext);
    if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
@ -581,7 +562,7 @@ ompi_coll_tuned_allgather_intra_neighborexchange(void *sbuf, int scount,
    tmprecv = (char*)rbuf + (ptrdiff_t)neighbor[0] * (ptrdiff_t)rcount * rext;
    tmpsend = (char*)rbuf + (ptrdiff_t)rank * (ptrdiff_t)rcount * rext;
    /* Sendreceive */
-    err = ompi_coll_tuned_sendrecv(tmpsend, rcount, rdtype, neighbor[0],
+    err = ompi_coll_base_sendrecv(tmpsend, rcount, rdtype, neighbor[0],
                                   MCA_COLL_BASE_TAG_ALLGATHER,
                                   tmprecv, rcount, rdtype, neighbor[0],
                                   MCA_COLL_BASE_TAG_ALLGATHER,
@ -604,7 +585,7 @@ ompi_coll_tuned_allgather_intra_neighborexchange(void *sbuf, int scount,
        tmpsend = (char*)rbuf + (ptrdiff_t)send_data_from * rcount * rext;

        /* Sendreceive */
-        err = ompi_coll_tuned_sendrecv(tmpsend, (ptrdiff_t)2 * (ptrdiff_t)rcount, rdtype, 
+        err = ompi_coll_base_sendrecv(tmpsend, (ptrdiff_t)2 * (ptrdiff_t)rcount, rdtype,
                                       neighbor[i_parity],
                                       MCA_COLL_BASE_TAG_ALLGATHER,
                                       tmprecv, (ptrdiff_t)2 * (ptrdiff_t)rcount, rdtype,
@ -619,13 +600,13 @@ ompi_coll_tuned_allgather_intra_neighborexchange(void *sbuf, int scount,
    return OMPI_SUCCESS;

 err_hndl:
-    OPAL_OUTPUT((ompi_coll_tuned_stream,  "%s:%4d\tError occurred %d, rank %2d",
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,  "%s:%4d\tError occurred %d, rank %2d",
                 __FILE__, line, err, rank));
    return err;
 }


-int ompi_coll_tuned_allgather_intra_two_procs(void *sbuf, int scount,
+int ompi_coll_base_allgather_intra_two_procs(void *sbuf, int scount,
                                              struct ompi_datatype_t *sdtype,
                                              void* rbuf, int rcount,
                                              struct ompi_datatype_t *rdtype,
@ -638,8 +619,8 @@ int ompi_coll_tuned_allgather_intra_two_procs(void *sbuf, int scount,

    rank = ompi_comm_rank(comm);

-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "ompi_coll_tuned_allgather_intra_two_procs rank %d", rank));
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                 "ompi_coll_base_allgather_intra_two_procs rank %d", rank));

    err = ompi_datatype_get_extent (sdtype, &lb, &sext);
    if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
@ -661,7 +642,7 @@ int ompi_coll_tuned_allgather_intra_two_procs(void *sbuf, int scount,
    }
    tmprecv = (char*)rbuf + (ptrdiff_t)remote * (ptrdiff_t)rcount * rext;

-    err = ompi_coll_tuned_sendrecv(tmpsend, scount, sdtype, remote,
+    err = ompi_coll_base_sendrecv(tmpsend, scount, sdtype, remote,
                                   MCA_COLL_BASE_TAG_ALLGATHER,
                                   tmprecv, rcount, rdtype, remote,
                                   MCA_COLL_BASE_TAG_ALLGATHER,
@ -678,7 +659,7 @@ int ompi_coll_tuned_allgather_intra_two_procs(void *sbuf, int scount,
    return MPI_SUCCESS;

 err_hndl:
-    OPAL_OUTPUT((ompi_coll_tuned_stream, "%s:%4d\tError occurred %d, rank %2d",
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output, "%s:%4d\tError occurred %d, rank %2d",
                 __FILE__, line, err, rank));
    return err;
 }
@ -688,12 +669,12 @@ int ompi_coll_tuned_allgather_intra_two_procs(void *sbuf, int scount,
 * Linear functions are copied from the BASIC coll module
 * they do not segment the message and are simple implementations
 * but for some small number of nodes and/or small data sizes they
- * are just as fast as tuned/tree based segmenting operations 
+ * are just as fast as base/tree based segmenting operations
 * and as such may be selected by the decision functions
 * These are copied into this module due to the way we select modules
 * in V1. i.e. in V2 we will handle this differently and so will not
 * have to duplicate code.
- * JPG following the examples from other coll_tuned implementations. Dec06.
+ * JPG following the examples from other coll_base implementations. Dec06.
 */

 /* copied function (with appropriate renaming) starts here */
@ -706,7 +687,7 @@ int ompi_coll_tuned_allgather_intra_two_procs(void *sbuf, int scount,
 *    Returns:    - MPI_SUCCESS or error code
 */
 int
-ompi_coll_tuned_allgather_intra_basic_linear(void *sbuf, int scount,
+ompi_coll_base_allgather_intra_basic_linear(void *sbuf, int scount,
                                             struct ompi_datatype_t *sdtype,
                                             void *rbuf,
                                             int rcount,
@ -755,183 +736,3 @@ ompi_coll_tuned_allgather_intra_basic_linear(void *sbuf, int scount,
 }

 /* copied function (with appropriate renaming) ends here */
-
-/* The following are used by dynamic and forced rules */
-
-/* publish details of each algorithm and if its forced/fixed/locked in */
-/* as you add methods/algorithms you must update this and the query/map 
-   routines */
-
-/* this routine is called by the component only */
-/* this makes sure that the mca parameters are set to their initial values 
-   and perms */
-/* module does not call this they call the forced_getvalues routine instead */
-
-int 
-ompi_coll_tuned_allgather_intra_check_forced_init(coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices)
-{
-    mca_base_var_enum_t *new_enum;
-   
-    ompi_coll_tuned_forced_max_algorithms[ALLGATHER] = coll_tuned_allgather_algorithm_count;
-   
-    (void) mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                           "allgather_algorithm_count",
-                                           "Number of allgather algorithms available",
-                                           MCA_BASE_VAR_TYPE_INT, NULL, 0,
-                                           MCA_BASE_VAR_FLAG_DEFAULT_ONLY,
-                                           OPAL_INFO_LVL_5,
-                                           MCA_BASE_VAR_SCOPE_CONSTANT,
-                                           &coll_tuned_allgather_algorithm_count);
-
-    /* MPI_T: This variable should eventually be bound to a communicator */
-    coll_tuned_allgather_forced_algorithm = 0;
-    (void) mca_base_var_enum_create("coll_tuned_allgather_algorithms", allgather_algorithms, &new_enum);
-    mca_param_indices->algorithm_param_index =
-        mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                        "allgather_algorithm",
-                                        "Which allallgather algorithm is used. Can be locked down to choice of: 0 ignore, 1 basic linear, 2 bruck, 3 recursive doubling, 4 ring, 5 neighbor exchange, 6: two proc only.",
-                                        MCA_BASE_VAR_TYPE_INT, new_enum, 0, 0,
-                                        OPAL_INFO_LVL_5,
-                                        MCA_BASE_VAR_SCOPE_READONLY,
-                                        &coll_tuned_allgather_forced_algorithm);
-    OBJ_RELEASE(new_enum);
-    if (mca_param_indices->algorithm_param_index < 0) {
-        return mca_param_indices->algorithm_param_index;
-    }
-
-    coll_tuned_allgather_segment_size = 0;
-    mca_param_indices->segsize_param_index =
-        mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                        "allgather_algorithm_segmentsize",
-                                        "Segment size in bytes used by default for allgather algorithms. Only has meaning if algorithm is forced and supports segmenting. 0 bytes means no segmentation. Currently, available algorithms do not support segmentation.",
-                                        MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
-                                        OPAL_INFO_LVL_5,
-                                        MCA_BASE_VAR_SCOPE_READONLY,
-                                        &coll_tuned_allgather_segment_size);
-
-    coll_tuned_allgather_tree_fanout = ompi_coll_tuned_init_tree_fanout; /* get system wide default */
-    mca_param_indices->tree_fanout_param_index =
-        mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                        "allgather_algorithm_tree_fanout",
-                                        "Fanout for n-tree used for allgather algorithms. Only has meaning if algorithm is forced and supports n-tree topo based operation. Currently, available algorithms do not support n-tree topologies.",
-                                        MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
-                                        OPAL_INFO_LVL_5,
-                                        MCA_BASE_VAR_SCOPE_READONLY,
-                                        &coll_tuned_allgather_tree_fanout);
-
-    coll_tuned_allgather_chain_fanout = ompi_coll_tuned_init_chain_fanout; /* get system wide default */
-    mca_param_indices->chain_fanout_param_index = 
-      mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                      "allgather_algorithm_chain_fanout",
-                                      "Fanout for chains used for allgather algorithms. Only has meaning if algorithm is forced and supports chain topo based operation. Currently, available algorithms do not support chain topologies.",
-                                      MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
-                                      OPAL_INFO_LVL_5,
-                                      MCA_BASE_VAR_SCOPE_READONLY,
-                                      &coll_tuned_allgather_chain_fanout);
-
-    return (MPI_SUCCESS);
-}
-
-int ompi_coll_tuned_allgather_intra_do_forced(void *sbuf, int scount,
-                                              struct ompi_datatype_t *sdtype,
-                                              void* rbuf, int rcount,
-                                              struct ompi_datatype_t *rdtype,
-                                              struct ompi_communicator_t *comm,
-                                              mca_coll_base_module_t *module)
-{
-    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
-    mca_coll_tuned_comm_t *data = tuned_module->tuned_data;
-
-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "coll:tuned:allgather_intra_do_forced selected algorithm %d",
-                 data->user_forced[ALLGATHER].algorithm));
-
-    switch (data->user_forced[ALLGATHER].algorithm) {
-    case (0):   
-        return ompi_coll_tuned_allgather_intra_dec_fixed (sbuf, scount, sdtype, 
-                                                          rbuf, rcount, rdtype, 
-                                                          comm, module);
-    case (1):   
-        return ompi_coll_tuned_allgather_intra_basic_linear (sbuf, scount, sdtype,
-                                                             rbuf, rcount, rdtype,
-                                                             comm, module);
-    case (2):   
-        return ompi_coll_tuned_allgather_intra_bruck (sbuf, scount, sdtype, 
-                                                      rbuf, rcount, rdtype,
-                                                      comm, module);
-    case (3):   
-        return ompi_coll_tuned_allgather_intra_recursivedoubling (sbuf, scount, sdtype, 
-                                                                  rbuf, rcount, rdtype, 
-                                                                  comm, module);
-    case (4):
-        return ompi_coll_tuned_allgather_intra_ring (sbuf, scount, sdtype, 
-                                                     rbuf, rcount, rdtype,
-                                                     comm, module);
-    case (5):
-        return ompi_coll_tuned_allgather_intra_neighborexchange (sbuf, scount, sdtype, 
-                                                                 rbuf, rcount, rdtype, 
-                                                                 comm, module);
-    case (6):
-        return ompi_coll_tuned_allgather_intra_two_procs (sbuf, scount, sdtype, 
-                                                          rbuf, rcount, rdtype, 
-                                                          comm, module);
-    default:
-        OPAL_OUTPUT((ompi_coll_tuned_stream,
-                     "coll:tuned:allgather_intra_do_forced attempt to select algorithm %d when only 0-%d is valid?", 
-                     data->user_forced[ALLGATHER].algorithm,
-                     ompi_coll_tuned_forced_max_algorithms[ALLGATHER]));
-        return (MPI_ERR_ARG);
-    } /* switch */
-
-}
-
-
-int ompi_coll_tuned_allgather_intra_do_this(void *sbuf, int scount,
-                                            struct ompi_datatype_t *sdtype,
-                                            void* rbuf, int rcount,
-                                            struct ompi_datatype_t *rdtype,
-                                            struct ompi_communicator_t *comm,
-                                            mca_coll_base_module_t *module,
-                                            int algorithm, int faninout, int segsize)
-{
-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "coll:tuned:allgather_intra_do_this selected algorithm %d topo faninout %d segsize %d", 
-                 algorithm, faninout, segsize));
-   
-    switch (algorithm) {
-    case (0):   
-        return ompi_coll_tuned_allgather_intra_dec_fixed(sbuf, scount, sdtype, 
-                                                         rbuf, rcount, rdtype, 
-                                                         comm, module);
-    case (1):   
-        return ompi_coll_tuned_allgather_intra_basic_linear(sbuf, scount, sdtype,
-                                                            rbuf, rcount, rdtype,
-                                                            comm, module);
-    case (2): 
-        return ompi_coll_tuned_allgather_intra_bruck(sbuf, scount, sdtype, 
-                                                     rbuf, rcount, rdtype,
-                                                     comm, module);
-    case (3): 
-        return ompi_coll_tuned_allgather_intra_recursivedoubling(sbuf, scount, sdtype, 
-                                                                 rbuf, rcount, rdtype, 
-                                                                 comm, module);
-    case (4): 
-        return ompi_coll_tuned_allgather_intra_ring(sbuf, scount, sdtype, 
-                                                    rbuf, rcount, rdtype,
-                                                    comm, module);
-    case (5): 
-        return ompi_coll_tuned_allgather_intra_neighborexchange(sbuf, scount, sdtype, 
-                                                                rbuf, rcount, rdtype, 
-                                                                comm, module);
-    case (6):
-        return ompi_coll_tuned_allgather_intra_two_procs (sbuf, scount, sdtype, 
-                                                          rbuf, rcount, rdtype, 
-                                                          comm, module);
-    default:
-        OPAL_OUTPUT((ompi_coll_tuned_stream,
-                     "coll:tuned:allgather_intra_do_this attempt to select algorithm %d when only 0-%d is valid?", 
-                     algorithm, 
-                     ompi_coll_tuned_forced_max_algorithms[ALLGATHER]));
-        return (MPI_ERR_ARG);
-    } /* switch */
-}
--- a/ompi/mca/coll/tuned/coll_tuned_allgatherv.c
+++ b/ompi/mca/coll/tuned/coll_tuned_allgatherv.c
@ -3,7 +3,7 @@
 * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
 *                         University Research and Technology
 *                         Corporation.  All rights reserved.
- * Copyright (c) 2004-2012 The University of Tennessee and The University
+ * Copyright (c) 2004-2015 The University of Tennessee and The University
 *                         of Tennessee Research Foundation.  All rights
 *                         reserved.
 * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@ -30,19 +30,12 @@
 #include "ompi/communicator/communicator.h"
 #include "ompi/mca/coll/coll.h"
 #include "ompi/mca/coll/base/coll_tags.h"
-#include "coll_tuned.h"
-#include "coll_tuned_topo.h"
-#include "coll_tuned_util.h"
+#include "ompi/mca/coll/base/coll_base_functions.h"
+#include "coll_base_topo.h"
+#include "coll_base_util.h"

-/* allgatherv algorithm variables */
-static int coll_tuned_allgatherv_algorithm_count = 5;
-static int coll_tuned_allgatherv_forced_algorithm = 0;
-static int coll_tuned_allgatherv_segment_size = 0;
-static int coll_tuned_allgatherv_tree_fanout;
-static int coll_tuned_allgatherv_chain_fanout;
-
-/* valid values for coll_tuned_allgatherv_forced_algorithm */
-static mca_base_var_enum_value_t allgatherv_algorithms[] = {
+/* valid values for coll_base_allgatherv_forced_algorithm */
+mca_base_var_enum_value_t coll_base_allgatherv_algorithms[] = {
    {0, "ignore"},
    {1, "default"},
    {2, "bruck"},
@ -53,7 +46,7 @@ static mca_base_var_enum_value_t allgatherv_algorithms[] = {
 };

 /*
- * ompi_coll_tuned_allgatherv_intra_bruck
+ * ompi_coll_base_allgatherv_intra_bruck
 *
 * Function:     allgather using O(log(N)) steps.
 * Accepts:      Same arguments as MPI_Allgather
@ -107,7 +100,7 @@ static mca_base_var_enum_value_t allgatherv_algorithms[] = {
 *         [5]    [5]    [5]    [5]    [5]    [5]    [5]
 *         [6]    [6]    [6]    [6]    [6]    [6]    [6]
 */
-int ompi_coll_tuned_allgatherv_intra_bruck(void *sbuf, int scount,
+int ompi_coll_base_allgatherv_intra_bruck(void *sbuf, int scount,
                                           struct ompi_datatype_t *sdtype,
                                           void *rbuf, int *rcounts,
                                           int *rdispls,
@ -124,8 +117,8 @@ int ompi_coll_tuned_allgatherv_intra_bruck(void *sbuf, int scount,
    size = ompi_comm_size(comm);
    rank = ompi_comm_rank(comm);

-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "coll:tuned:allgather_intra_bruck rank %d", rank));
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                 "coll:base:allgather_intra_bruck rank %d", rank));

    err = ompi_datatype_get_extent (sdtype, &slb, &sext);
    if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
@ -198,7 +191,7 @@ int ompi_coll_tuned_allgatherv_intra_bruck(void *sbuf, int scount,
        if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }

        /* Sendreceive */
-        err = ompi_coll_tuned_sendrecv(rbuf, 1, new_sdtype, sendto,
+        err = ompi_coll_base_sendrecv(rbuf, 1, new_sdtype, sendto,
                                       MCA_COLL_BASE_TAG_ALLGATHERV,
                                       rbuf, 1, new_rdtype, recvfrom,
                                       MCA_COLL_BASE_TAG_ALLGATHERV,
@ -207,7 +200,6 @@ int ompi_coll_tuned_allgatherv_intra_bruck(void *sbuf, int scount,

        ompi_datatype_destroy(&new_sdtype);
        ompi_datatype_destroy(&new_rdtype);
-
    }

    free(new_rcounts);
@ -217,14 +209,14 @@ int ompi_coll_tuned_allgatherv_intra_bruck(void *sbuf, int scount,
 err_hndl:
    if( NULL != new_rcounts ) free(new_rcounts);

-    OPAL_OUTPUT((ompi_coll_tuned_stream,  "%s:%4d\tError occurred %d, rank %2d",
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,  "%s:%4d\tError occurred %d, rank %2d",
                 __FILE__, line, err, rank));
    return err;
 }


 /*
- * ompi_coll_tuned_allgatherv_intra_ring
+ * ompi_coll_base_allgatherv_intra_ring
 *
 * Function:     allgatherv using O(N) steps.
 * Accepts:      Same arguments as MPI_Allgatherv
@ -238,7 +230,7 @@ int ompi_coll_tuned_allgatherv_intra_bruck(void *sbuf, int scount,
 *               No additional memory requirements.
 *
 */
-int ompi_coll_tuned_allgatherv_intra_ring(void *sbuf, int scount,
+int ompi_coll_base_allgatherv_intra_ring(void *sbuf, int scount,
                                          struct ompi_datatype_t *sdtype,
                                          void* rbuf, int *rcounts, int *rdisps,
                                          struct ompi_datatype_t *rdtype,
@ -252,8 +244,8 @@ int ompi_coll_tuned_allgatherv_intra_ring(void *sbuf, int scount,
    size = ompi_comm_size(comm);
    rank = ompi_comm_rank(comm);

-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "coll:tuned:allgatherv_intra_ring rank %d", rank));
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                 "coll:base:allgatherv_intra_ring rank %d", rank));

    err = ompi_datatype_get_extent (sdtype, &slb, &sext);
    if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
@ -292,25 +284,24 @@ int ompi_coll_tuned_allgatherv_intra_ring(void *sbuf, int scount,
        tmpsend = (char*)rbuf + rdisps[senddatafrom] * rext;

        /* Sendreceive */
-        err = ompi_coll_tuned_sendrecv(tmpsend, rcounts[senddatafrom], rdtype, 
+        err = ompi_coll_base_sendrecv(tmpsend, rcounts[senddatafrom], rdtype,
                                       sendto, MCA_COLL_BASE_TAG_ALLGATHERV,
                                       tmprecv, rcounts[recvdatafrom], rdtype,
                                       recvfrom, MCA_COLL_BASE_TAG_ALLGATHERV,
                                       comm, MPI_STATUS_IGNORE, rank);
        if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
-
    }

    return OMPI_SUCCESS;

 err_hndl:
-    OPAL_OUTPUT((ompi_coll_tuned_stream,  "%s:%4d\tError occurred %d, rank %2d",
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,  "%s:%4d\tError occurred %d, rank %2d",
                 __FILE__, line, err, rank));
    return err;
 }

 /*
- * ompi_coll_tuned_allgatherv_intra_neighborexchange
+ * ompi_coll_base_allgatherv_intra_neighborexchange
 *
 * Function:     allgatherv using N/2 steps (O(N))
 * Accepts:      Same arguments as MPI_Allgatherv
@ -368,7 +359,7 @@ int ompi_coll_tuned_allgatherv_intra_ring(void *sbuf, int scount,
 *         [5]    [5]    [5]    [5]    [5]    [5]
 */
 int
-ompi_coll_tuned_allgatherv_intra_neighborexchange(void *sbuf, int scount,
+ompi_coll_base_allgatherv_intra_neighborexchange(void *sbuf, int scount,
                                                  struct ompi_datatype_t *sdtype,
                                                  void* rbuf, int *rcounts, int *rdispls,
                                                  struct ompi_datatype_t *rdtype,
@ -386,17 +377,17 @@ ompi_coll_tuned_allgatherv_intra_neighborexchange(void *sbuf, int scount,
    rank = ompi_comm_rank(comm);

    if (size % 2) {
-        OPAL_OUTPUT((ompi_coll_tuned_stream,
-                     "coll:tuned:allgatherv_intra_neighborexchange WARNING: odd size %d, switching to ring algorithm", 
+        OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                     "coll:base:allgatherv_intra_neighborexchange WARNING: odd size %d, switching to ring algorithm",
                     size));
-        return ompi_coll_tuned_allgatherv_intra_ring(sbuf, scount, sdtype,
+        return ompi_coll_base_allgatherv_intra_ring(sbuf, scount, sdtype,
                                                     rbuf, rcounts,
                                                     rdispls, rdtype,
                                                     comm, module);
    }

-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "coll:tuned:allgatherv_intra_neighborexchange rank %d", rank));
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                 "coll:base:allgatherv_intra_neighborexchange rank %d", rank));

    err = ompi_datatype_get_extent (sdtype, &slb, &sext);
    if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
@ -445,7 +436,7 @@ ompi_coll_tuned_allgatherv_intra_neighborexchange(void *sbuf, int scount,
    */
    tmprecv = (char*)rbuf + (ptrdiff_t)rdispls[neighbor[0]] * rext;
    tmpsend = (char*)rbuf + (ptrdiff_t)rdispls[rank] * rext;
-    err = ompi_coll_tuned_sendrecv(tmpsend, rcounts[rank], rdtype, 
+    err = ompi_coll_base_sendrecv(tmpsend, rcounts[rank], rdtype,
                                   neighbor[0], MCA_COLL_BASE_TAG_ALLGATHERV,
                                   tmprecv, rcounts[neighbor[0]], rdtype,
                                   neighbor[0], MCA_COLL_BASE_TAG_ALLGATHERV,
@ -493,7 +484,7 @@ ompi_coll_tuned_allgatherv_intra_neighborexchange(void *sbuf, int scount,
        tmpsend = (char*)rbuf;

        /* Sendreceive */
-        err = ompi_coll_tuned_sendrecv(tmpsend, 1, new_sdtype, neighbor[i_parity],
+        err = ompi_coll_base_sendrecv(tmpsend, 1, new_sdtype, neighbor[i_parity],
                                       MCA_COLL_BASE_TAG_ALLGATHERV,
                                       tmprecv, 1, new_rdtype, neighbor[i_parity],
                                       MCA_COLL_BASE_TAG_ALLGATHERV,
@ -509,13 +500,13 @@ ompi_coll_tuned_allgatherv_intra_neighborexchange(void *sbuf, int scount,
    return OMPI_SUCCESS;

 err_hndl:
-    OPAL_OUTPUT((ompi_coll_tuned_stream,  "%s:%4d\tError occurred %d, rank %2d",
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,  "%s:%4d\tError occurred %d, rank %2d",
                 __FILE__, line, err, rank));
    return err;
 }


-int ompi_coll_tuned_allgatherv_intra_two_procs(void *sbuf, int scount,
+int ompi_coll_base_allgatherv_intra_two_procs(void *sbuf, int scount,
                                               struct ompi_datatype_t *sdtype,
                                               void* rbuf, int *rcounts,
                                               int *rdispls,
@ -529,8 +520,8 @@ int ompi_coll_tuned_allgatherv_intra_two_procs(void *sbuf, int scount,

    rank = ompi_comm_rank(comm);

-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "ompi_coll_tuned_allgatherv_intra_two_procs rank %d", rank));
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                 "ompi_coll_base_allgatherv_intra_two_procs rank %d", rank));

    err = ompi_datatype_get_extent (sdtype, &lb, &sext);
    if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
@ -552,7 +543,7 @@ int ompi_coll_tuned_allgatherv_intra_two_procs(void *sbuf, int scount,
    }
    tmprecv = (char*)rbuf + (ptrdiff_t)rdispls[remote] * rext;

-    err = ompi_coll_tuned_sendrecv(tmpsend, scount, sdtype, remote,
+    err = ompi_coll_base_sendrecv(tmpsend, scount, sdtype, remote,
                                   MCA_COLL_BASE_TAG_ALLGATHERV,
                                   tmprecv, rcounts[remote], rdtype, remote,
                                   MCA_COLL_BASE_TAG_ALLGATHERV,
@ -570,7 +561,7 @@ int ompi_coll_tuned_allgatherv_intra_two_procs(void *sbuf, int scount,
    return MPI_SUCCESS;

 err_hndl:
-    OPAL_OUTPUT((ompi_coll_tuned_stream, "%s:%4d\tError occurred %d, rank %2d",
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output, "%s:%4d\tError occurred %d, rank %2d",
                 __FILE__, line, err, rank));
    return err;
 }
@ -580,12 +571,12 @@ int ompi_coll_tuned_allgatherv_intra_two_procs(void *sbuf, int scount,
 * Linear functions are copied from the BASIC coll module
 * they do not segment the message and are simple implementations
 * but for some small number of nodes and/or small data sizes they
- * are just as fast as tuned/tree based segmenting operations 
+ * are just as fast as base/tree based segmenting operations
 * and as such may be selected by the decision functions
 * These are copied into this module due to the way we select modules
 * in V1. i.e. in V2 we will handle this differently and so will not
 * have to duplicate code.
- * JPG following the examples from other coll_tuned implementations. Dec06.
+ * JPG following the examples from other coll_base implementations. Dec06.
 */

 /* copied function (with appropriate renaming) starts here */
@ -599,13 +590,13 @@ int ompi_coll_tuned_allgatherv_intra_two_procs(void *sbuf, int scount,
 *	Returns:	- MPI_SUCCESS or error code
 */
 int
-ompi_coll_tuned_allgatherv_intra_basic_default(void *sbuf, int scount,
-                                               struct ompi_datatype_t *sdtype,
-                                               void *rbuf, int *rcounts,
-                                               int *disps,
-                                               struct ompi_datatype_t *rdtype,
-                                               struct ompi_communicator_t *comm,
-                                               mca_coll_base_module_t *module)
+ompi_coll_base_allgatherv_intra_basic_default(void *sbuf, int scount,
+                                              struct ompi_datatype_t *sdtype,
+                                              void *rbuf, int *rcounts,
+                                              int *disps,
+                                              struct ompi_datatype_t *rdtype,
+                                              struct ompi_communicator_t *comm,
+                                              mca_coll_base_module_t *module)
 {
    int i, size, rank, err;
    MPI_Aint extent, lb;
@ -619,8 +610,8 @@ ompi_coll_tuned_allgatherv_intra_basic_default(void *sbuf, int scount,
     * to process with rank 0 (OMPI convention)
     */

-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "ompi_coll_tuned_allgatherv_intra_basic_default rank %d", 
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                 "ompi_coll_base_allgatherv_intra_basic_default rank %d",
                 rank));

    if (MPI_IN_PLACE == sbuf) {
@ -639,7 +630,6 @@ ompi_coll_tuned_allgatherv_intra_basic_default(void *sbuf, int scount,
                                    rcounts[rank], send_type,rbuf,
                                    rcounts, disps, rdtype, 0,
                                    comm, comm->c_coll.coll_gatherv_module);
-    
    if (MPI_SUCCESS != err) {
        return err;
    }
@ -675,178 +665,3 @@ ompi_coll_tuned_allgatherv_intra_basic_default(void *sbuf, int scount,
 }

 /* copied function (with appropriate renaming) ends here */
-
-/* The following are used by dynamic and forced rules */
-
-/* publish details of each algorithm and if its forced/fixed/locked in */
-/* as you add methods/algorithms you must update this and the query/map 
-   routines */
-
-/* this routine is called by the component only */
-/* this makes sure that the mca parameters are set to their initial values 
-   and perms */
-/* module does not call this they call the forced_getvalues routine instead */
-
-int 
-ompi_coll_tuned_allgatherv_intra_check_forced_init(coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices)
-{
-    mca_base_var_enum_t *new_enum;
-
-    ompi_coll_tuned_forced_max_algorithms[ALLGATHERV] = coll_tuned_allgatherv_algorithm_count;
-
-    (void) mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                           "allgatherv_algorithm_count",
-                                           "Number of allgatherv algorithms available",
-                                           MCA_BASE_VAR_TYPE_INT, NULL, 0,
-                                           MCA_BASE_VAR_FLAG_DEFAULT_ONLY,
-                                           OPAL_INFO_LVL_5,
-                                           MCA_BASE_VAR_SCOPE_CONSTANT,
-                                           &coll_tuned_allgatherv_algorithm_count);
-
-    /* MPI_T: This variable should eventually be bound to a communicator */
-    coll_tuned_allgatherv_forced_algorithm = 0;
-    (void) mca_base_var_enum_create("coll_tuned_allgatherv_algorithms", allgatherv_algorithms, &new_enum);
-    mca_param_indices->algorithm_param_index =
-        mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                        "allgatherv_algorithm",
-                                        "Which allallgatherv algorithm is used. Can be locked down to choice of: 0 ignore, 1 default (allgathervv + bcast), 2 bruck, 3 ring, 4 neighbor exchange, 5: two proc only.",
-                                        MCA_BASE_VAR_TYPE_INT, new_enum, 0, 0,
-                                        OPAL_INFO_LVL_5,
-                                        MCA_BASE_VAR_SCOPE_READONLY,
-                                        &coll_tuned_allgatherv_forced_algorithm);
-    OBJ_RELEASE(new_enum);
-    if (mca_param_indices->algorithm_param_index < 0) {
-        return mca_param_indices->algorithm_param_index;
-    }
-
-    coll_tuned_allgatherv_segment_size = 0;
-    mca_param_indices->segsize_param_index =
-        mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                        "allgatherv_algorithm_segmentsize",
-                                        "Segment size in bytes used by default for allgatherv algorithms. Only has meaning if algorithm is forced and supports segmenting. 0 bytes means no segmentation. Currently, available algorithms do not support segmentation.",
-                                        MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
-                                        OPAL_INFO_LVL_5,
-                                        MCA_BASE_VAR_SCOPE_READONLY,
-                                        &coll_tuned_allgatherv_segment_size);
-
-    coll_tuned_allgatherv_tree_fanout = ompi_coll_tuned_init_tree_fanout; /* get system wide default */
-    mca_param_indices->tree_fanout_param_index =
-        mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                        "allgatherv_algorithm_tree_fanout",
-                                        "Fanout for n-tree used for allgatherv algorithms. Only has meaning if algorithm is forced and supports n-tree topo based operation. Currently, available algorithms do not support n-tree topologies.",
-                                        MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
-                                        OPAL_INFO_LVL_5,
-                                        MCA_BASE_VAR_SCOPE_READONLY,
-                                        &coll_tuned_allgatherv_tree_fanout);
-
-    coll_tuned_allgatherv_chain_fanout = ompi_coll_tuned_init_chain_fanout; /* get system wide default */
-    mca_param_indices->chain_fanout_param_index = 
-      mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                      "allgatherv_algorithm_chain_fanout",
-                                      "Fanout for chains used for allgatherv algorithms. Only has meaning if algorithm is forced and supports chain topo based operation. Currently, available algorithms do not support chain topologies.",
-                                      MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
-                                      OPAL_INFO_LVL_5,
-                                      MCA_BASE_VAR_SCOPE_READONLY,
-                                      &coll_tuned_allgatherv_chain_fanout);
-
-    return (MPI_SUCCESS);
-}
-
-int ompi_coll_tuned_allgatherv_intra_do_forced(void *sbuf, int scount,
-                                               struct ompi_datatype_t *sdtype,
-                                               void *rbuf, int *rcounts, 
-                                               int *rdispls,
-                                               struct ompi_datatype_t *rdtype,
-                                               struct ompi_communicator_t *comm,
-                                               mca_coll_base_module_t *module)
-{
-    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
-    mca_coll_tuned_comm_t *data = tuned_module->tuned_data;
-
-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "coll:tuned:allgatherv_intra_do_forced selected algorithm %d",
-                 data->user_forced[ALLGATHERV].algorithm));
-
-    switch (data->user_forced[ALLGATHERV].algorithm) {
-    case (0):   
-        return ompi_coll_tuned_allgatherv_intra_dec_fixed (sbuf, scount, sdtype, 
-                                                           rbuf, rcounts, rdispls, rdtype, 
-                                                           comm, module);
-    case (1):   
-        return ompi_coll_tuned_allgatherv_intra_basic_default (sbuf, scount, sdtype,
-                                                               rbuf, rcounts, rdispls, rdtype,
-                                                               comm, module);
-    case (2):   
-        return ompi_coll_tuned_allgatherv_intra_bruck (sbuf, scount, sdtype,
-                                                       rbuf, rcounts, rdispls, rdtype, 
-                                                       comm, module);
-    case (3):   
-        return ompi_coll_tuned_allgatherv_intra_ring (sbuf, scount, sdtype, 
-                                                      rbuf, rcounts, rdispls, rdtype, 
-                                                      comm, module);
-    case (4):
-        return ompi_coll_tuned_allgatherv_intra_neighborexchange (sbuf, scount, sdtype, 
-                                                                  rbuf, rcounts, rdispls, rdtype,
-                                                                  comm, module);
-    case (5):
-        return ompi_coll_tuned_allgatherv_intra_two_procs (sbuf, scount, sdtype, 
-                                                           rbuf, rcounts, rdispls, rdtype, 
-                                                           comm, module);
-    default:
-        OPAL_OUTPUT((ompi_coll_tuned_stream,
-                     "coll:tuned:allgatherv_intra_do_forced attempt to select algorithm %d when only 0-%d is valid?", 
-                     data->user_forced[ALLGATHERV].algorithm,
-                     ompi_coll_tuned_forced_max_algorithms[ALLGATHERV]));
-        return (MPI_ERR_ARG);
-    } /* switch */
-
-}
-
-
-int ompi_coll_tuned_allgatherv_intra_do_this(void *sbuf, int scount,
-                                             struct ompi_datatype_t *sdtype,
-                                             void *rbuf, int *rcounts, 
-                                             int *rdispls, 
-                                             struct ompi_datatype_t *rdtype,
-                                             struct ompi_communicator_t *comm,
-                                             mca_coll_base_module_t *module,
-                                             int algorithm, int faninout, 
-                                             int segsize)
-{
-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "coll:tuned:allgatherv_intra_do_this selected algorithm %d topo faninout %d segsize %d", 
-                 algorithm, faninout, segsize));
-   
-    switch (algorithm) {
-    case (0):   
-        return ompi_coll_tuned_allgatherv_intra_dec_fixed(sbuf, scount, sdtype, 
-                                                          rbuf, rcounts, rdispls, rdtype, 
-                                                          comm, module);
-    case (1):   
-        return ompi_coll_tuned_allgatherv_intra_basic_default(sbuf, scount, sdtype,
-                                                              rbuf, rcounts, rdispls, rdtype,
-                                                              comm, module);
-    case (2): 
-        return ompi_coll_tuned_allgatherv_intra_bruck(sbuf, scount, sdtype, 
-                                                      rbuf, rcounts, rdispls, rdtype,
-                                                      comm, module);
-    case (3): 
-        return ompi_coll_tuned_allgatherv_intra_ring(sbuf, scount, sdtype, 
-                                                     rbuf, rcounts, rdispls, rdtype,
-                                                     comm, module);
-    case (4): 
-        return ompi_coll_tuned_allgatherv_intra_neighborexchange(sbuf, scount, sdtype, 
-                                                                 rbuf, rcounts, rdispls, rdtype,
-                                                                 comm, module);
-    case (5):
-        return ompi_coll_tuned_allgatherv_intra_two_procs (sbuf, scount, sdtype,
-                                                           rbuf, rcounts, rdispls, rdtype, 
-                                                           comm, module);
-    default:
-        OPAL_OUTPUT((ompi_coll_tuned_stream,
-                     "coll:tuned:allgatherv_intra_do_this attempt to select algorithm %d when only 0-%d is valid?", 
-                     algorithm, 
-                     ompi_coll_tuned_forced_max_algorithms[ALLGATHERV]));
-        return (MPI_ERR_ARG);
-    } /* switch */
-}
--- a/ompi/mca/coll/tuned/coll_tuned_allreduce.c
+++ b/ompi/mca/coll/tuned/coll_tuned_allreduce.c
@ -3,7 +3,7 @@
 * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
 *                         University Research and Technology
 *                         Corporation.  All rights reserved.
- * Copyright (c) 2004-2014 The University of Tennessee and The University
+ * Copyright (c) 2004-2015 The University of Tennessee and The University
 *                         of Tennessee Research Foundation.  All rights
 *                         reserved.
 * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@ -31,33 +31,15 @@
 #include "ompi/mca/coll/base/coll_tags.h"
 #include "ompi/mca/pml/pml.h"
 #include "ompi/op/op.h"
-#include "coll_tuned.h"
-#include "coll_tuned_topo.h"
-#include "coll_tuned_util.h"
-
-/* allreduce algorithm variables */
-static int coll_tuned_allreduce_algorithm_count = 5;
-static int coll_tuned_allreduce_forced_algorithm = 0;
-static int coll_tuned_allreduce_segment_size = 0;
-static int coll_tuned_allreduce_tree_fanout;
-static int coll_tuned_allreduce_chain_fanout;
-
-/* valid values for coll_tuned_allreduce_forced_algorithm */
-static mca_base_var_enum_value_t allreduce_algorithms[] = {
-    {0, "ignore"},
-    {1, "basic_linear"},
-    {2, "nonoverlapping"},
-    {3, "recursive_doubling"},
-    {4, "ring"},
-    {5, "segmented_ring"},
-    {0, NULL}
-};
+#include "ompi/mca/coll/base/coll_base_functions.h"
+#include "coll_base_topo.h"
+#include "coll_base_util.h"

 /*
- * ompi_coll_tuned_allreduce_intra_nonoverlapping
+ * ompi_coll_base_allreduce_intra_nonoverlapping
 *
 * This function just calls a reduce followed by a broadcast
- * both called functions are tuned but they complete sequentially,
+ * both called functions are base but they complete sequentially,
 * i.e. no additional overlapping
 * meaning if the number of segments used is greater than the topo depth
 * then once the first segment of data is fully 'reduced' it is not broadcast
@ -65,7 +47,7 @@ static mca_base_var_enum_value_t allreduce_algorithms[] = {
 *
 */
 int
-ompi_coll_tuned_allreduce_intra_nonoverlapping(void *sbuf, void *rbuf, int count,
+ompi_coll_base_allreduce_intra_nonoverlapping(void *sbuf, void *rbuf, int count,
                                               struct ompi_datatype_t *dtype,
                                               struct ompi_op_t *op,
                                               struct ompi_communicator_t *comm,
@ -75,7 +57,7 @@ ompi_coll_tuned_allreduce_intra_nonoverlapping(void *sbuf, void *rbuf, int count

    rank = ompi_comm_rank(comm);

-    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:allreduce_intra_nonoverlapping rank %d", rank));
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:allreduce_intra_nonoverlapping rank %d", rank));

    /* Reduce to 0 and broadcast. */

@ -100,7 +82,7 @@ ompi_coll_tuned_allreduce_intra_nonoverlapping(void *sbuf, void *rbuf, int count
 }

 /*
- *   ompi_coll_tuned_allreduce_intra_recursivedoubling
+ *   ompi_coll_base_allreduce_intra_recursivedoubling
 *
 *   Function:       Recursive doubling algorithm for allreduce operation
 *   Accepts:        Same as MPI_Allreduce()
@ -141,7 +123,7 @@ ompi_coll_tuned_allreduce_intra_nonoverlapping(void *sbuf, void *rbuf, int count
 *
 */
 int
-ompi_coll_tuned_allreduce_intra_recursivedoubling(void *sbuf, void *rbuf, 
+ompi_coll_base_allreduce_intra_recursivedoubling(void *sbuf, void *rbuf,
                                                  int count,
                                                  struct ompi_datatype_t *dtype,
                                                  struct ompi_op_t *op,
@ -157,8 +139,8 @@ ompi_coll_tuned_allreduce_intra_recursivedoubling(void *sbuf, void *rbuf,
    size = ompi_comm_size(comm);
    rank = ompi_comm_rank(comm);

-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "coll:tuned:allreduce_intra_recursivedoubling rank %d", rank));
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                 "coll:base:allreduce_intra_recursivedoubling rank %d", rank));

    /* Special case for size == 1 */
    if (1 == size) {
@ -287,14 +269,14 @@ ompi_coll_tuned_allreduce_intra_recursivedoubling(void *sbuf, void *rbuf,
    return MPI_SUCCESS;

 error_hndl:
-    OPAL_OUTPUT((ompi_coll_tuned_stream, "%s:%4d\tRank %d Error occurred %d\n",
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output, "%s:%4d\tRank %d Error occurred %d\n",
                 __FILE__, line, rank, ret));
    if (NULL != inplacebuf) free(inplacebuf);
    return ret;
 }

 /*
- *   ompi_coll_tuned_allreduce_intra_ring
+ *   ompi_coll_base_allreduce_intra_ring
 *
 *   Function:       Ring algorithm for allreduce operation
 *   Accepts:        Same as MPI_Allreduce()
@ -358,7 +340,7 @@ ompi_coll_tuned_allreduce_intra_recursivedoubling(void *sbuf, void *rbuf,
 *
 */
 int
-ompi_coll_tuned_allreduce_intra_ring(void *sbuf, void *rbuf, int count,
+ompi_coll_base_allreduce_intra_ring(void *sbuf, void *rbuf, int count,
                                     struct ompi_datatype_t *dtype,
                                     struct ompi_op_t *op,
                                     struct ompi_communicator_t *comm,
@ -375,8 +357,8 @@ ompi_coll_tuned_allreduce_intra_ring(void *sbuf, void *rbuf, int count,
    size = ompi_comm_size(comm);
    rank = ompi_comm_rank(comm);

-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "coll:tuned:allreduce_intra_ring rank %d, count %d", rank, count));
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                 "coll:base:allreduce_intra_ring rank %d, count %d", rank, count));

    /* Special case for size == 1 */
    if (1 == size) {
@ -389,8 +371,8 @@ ompi_coll_tuned_allreduce_intra_ring(void *sbuf, void *rbuf, int count,

    /* Special case for count less than size - use recursive doubling */
    if (count < size) {
-        OPAL_OUTPUT((ompi_coll_tuned_stream, "coll:tuned:allreduce_ring rank %d/%d, count %d, switching to recursive doubling", rank, size, count));
-        return (ompi_coll_tuned_allreduce_intra_recursivedoubling(sbuf, rbuf, 
+        OPAL_OUTPUT((ompi_coll_base_framework.framework_output, "coll:base:allreduce_ring rank %d/%d, count %d, switching to recursive doubling", rank, size, count));
+        return (ompi_coll_base_allreduce_intra_recursivedoubling(sbuf, rbuf,
                                                                  count,
                                                                  dtype, op,
                                                                  comm, module));
@ -411,7 +393,7 @@ ompi_coll_tuned_allreduce_intra_ring(void *sbuf, void *rbuf, int count,
       blocks (split_rank) .. (size - 1) are "late".
       Early blocks are at most 1 element larger than the late ones.
    */
-    COLL_TUNED_COMPUTE_BLOCKCOUNT( count, size, split_rank, 
+    COLL_BASE_COMPUTE_BLOCKCOUNT( count, size, split_rank,
                                   early_segcount, late_segcount );
    max_segcount = early_segcount;
    max_real_segsize = true_extent + (max_segcount - 1) * extent;
@ -531,7 +513,7 @@ ompi_coll_tuned_allreduce_intra_ring(void *sbuf, void *rbuf, int count,
        tmprecv = (char*)rbuf + (ptrdiff_t)recv_block_offset * extent;
        tmpsend = (char*)rbuf + (ptrdiff_t)send_block_offset * extent;

-        ret = ompi_coll_tuned_sendrecv(tmpsend, block_count, dtype, send_to,
+        ret = ompi_coll_base_sendrecv(tmpsend, block_count, dtype, send_to,
                                       MCA_COLL_BASE_TAG_ALLREDUCE,
                                       tmprecv, max_segcount, dtype, recv_from,
                                       MCA_COLL_BASE_TAG_ALLREDUCE,
@ -546,7 +528,7 @@ ompi_coll_tuned_allreduce_intra_ring(void *sbuf, void *rbuf, int count,
    return MPI_SUCCESS;

 error_hndl:
-    OPAL_OUTPUT((ompi_coll_tuned_stream, "%s:%4d\tRank %d Error occurred %d\n",
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output, "%s:%4d\tRank %d Error occurred %d\n",
                 __FILE__, line, rank, ret));
    if (NULL != inbuf[0]) free(inbuf[0]);
    if (NULL != inbuf[1]) free(inbuf[1]);
@ -554,7 +536,7 @@ ompi_coll_tuned_allreduce_intra_ring(void *sbuf, void *rbuf, int count,
 }

 /*
- *   ompi_coll_tuned_allreduce_intra_ring_segmented
+ *   ompi_coll_base_allreduce_intra_ring_segmented
 *
 *   Function:       Pipelined ring algorithm for allreduce operation
 *   Accepts:        Same as MPI_Allreduce(), segment size
@ -633,7 +615,7 @@ ompi_coll_tuned_allreduce_intra_ring(void *sbuf, void *rbuf, int count,
 *
 */
 int
-ompi_coll_tuned_allreduce_intra_ring_segmented(void *sbuf, void *rbuf, int count,
+ompi_coll_base_allreduce_intra_ring_segmented(void *sbuf, void *rbuf, int count,
                                               struct ompi_datatype_t *dtype,
                                               struct ompi_op_t *op,
                                               struct ompi_communicator_t *comm,
@ -652,8 +634,8 @@ ompi_coll_tuned_allreduce_intra_ring_segmented(void *sbuf, void *rbuf, int count
    size = ompi_comm_size(comm);
    rank = ompi_comm_rank(comm);

-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "coll:tuned:allreduce_intra_ring_segmented rank %d, count %d", rank, count));
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                 "coll:base:allreduce_intra_ring_segmented rank %d, count %d", rank, count));

    /* Special case for size == 1 */
    if (1 == size) {
@ -672,12 +654,12 @@ ompi_coll_tuned_allreduce_intra_ring_segmented(void *sbuf, void *rbuf, int count
    ret = ompi_datatype_type_size( dtype, &typelng);
    if (MPI_SUCCESS != ret) { line = __LINE__; goto error_hndl; }
    segcount = count;
-    COLL_TUNED_COMPUTED_SEGCOUNT(segsize, typelng, segcount)
+    COLL_BASE_COMPUTED_SEGCOUNT(segsize, typelng, segcount)

        /* Special case for count less than size * segcount - use regular ring */
        if (count < (size * segcount)) {
-            OPAL_OUTPUT((ompi_coll_tuned_stream, "coll:tuned:allreduce_ring_segmented rank %d/%d, count %d, switching to regular ring", rank, size, count));
-            return (ompi_coll_tuned_allreduce_intra_ring(sbuf, rbuf, count, dtype, op, 
+            OPAL_OUTPUT((ompi_coll_base_framework.framework_output, "coll:base:allreduce_ring_segmented rank %d/%d, count %d, switching to regular ring", rank, size, count));
+            return (ompi_coll_base_allreduce_intra_ring(sbuf, rbuf, count, dtype, op,
                                                         comm, module));
        }

@ -697,9 +679,9 @@ ompi_coll_tuned_allreduce_intra_ring_segmented(void *sbuf, void *rbuf, int count
       Note, these blocks will be split into num_phases segments,
       out of the largest one will have max_segcount elements.
    */
-    COLL_TUNED_COMPUTE_BLOCKCOUNT( count, size, split_rank, 
+    COLL_BASE_COMPUTE_BLOCKCOUNT( count, size, split_rank,
                                   early_blockcount, late_blockcount );
-    COLL_TUNED_COMPUTE_BLOCKCOUNT( early_blockcount, num_phases, inbi,
+    COLL_BASE_COMPUTE_BLOCKCOUNT( early_blockcount, num_phases, inbi,
                                   max_segcount, k);
    max_real_segsize = true_extent + (ptrdiff_t)(max_segcount - 1) * extent;

@ -754,7 +736,7 @@ ompi_coll_tuned_allreduce_intra_ring_segmented(void *sbuf, void *rbuf, int count
                        ((ptrdiff_t)rank * (ptrdiff_t)early_blockcount) :
                        ((ptrdiff_t)rank * (ptrdiff_t)late_blockcount + split_rank));
        block_count = ((rank < split_rank)? early_blockcount : late_blockcount);
-        COLL_TUNED_COMPUTE_BLOCKCOUNT(block_count, num_phases, split_phase,
+        COLL_BASE_COMPUTE_BLOCKCOUNT(block_count, num_phases, split_phase,
                                      early_phase_segcount, late_phase_segcount)
            phase_count = ((phase < split_phase)?
                           (early_phase_segcount) : (late_phase_segcount));
@ -790,7 +772,7 @@ ompi_coll_tuned_allreduce_intra_ring_segmented(void *sbuf, void *rbuf, int count
                            ((ptrdiff_t)prevblock * (ptrdiff_t)late_blockcount + split_rank));
            block_count = ((prevblock < split_rank)?
                           early_blockcount : late_blockcount);
-            COLL_TUNED_COMPUTE_BLOCKCOUNT(block_count, num_phases, split_phase,
+            COLL_BASE_COMPUTE_BLOCKCOUNT(block_count, num_phases, split_phase,
                                          early_phase_segcount, late_phase_segcount)
                phase_count = ((phase < split_phase)?
                               (early_phase_segcount) : (late_phase_segcount));
@ -819,7 +801,7 @@ ompi_coll_tuned_allreduce_intra_ring_segmented(void *sbuf, void *rbuf, int count
                        ((ptrdiff_t)recv_from * (ptrdiff_t)late_blockcount + split_rank));
        block_count = ((recv_from < split_rank)?
                       early_blockcount : late_blockcount);
-        COLL_TUNED_COMPUTE_BLOCKCOUNT(block_count, num_phases, split_phase,
+        COLL_BASE_COMPUTE_BLOCKCOUNT(block_count, num_phases, split_phase,
                                      early_phase_segcount, late_phase_segcount)
            phase_count = ((phase < split_phase)?
                           (early_phase_segcount) : (late_phase_segcount));
@ -850,7 +832,7 @@ ompi_coll_tuned_allreduce_intra_ring_segmented(void *sbuf, void *rbuf, int count
        tmprecv = (char*)rbuf + (ptrdiff_t)recv_block_offset * extent;
        tmpsend = (char*)rbuf + (ptrdiff_t)send_block_offset * extent;

-        ret = ompi_coll_tuned_sendrecv(tmpsend, block_count, dtype, send_to,
+        ret = ompi_coll_base_sendrecv(tmpsend, block_count, dtype, send_to,
                                       MCA_COLL_BASE_TAG_ALLREDUCE,
                                       tmprecv, early_blockcount, dtype, recv_from,
                                       MCA_COLL_BASE_TAG_ALLREDUCE,
@ -865,7 +847,7 @@ ompi_coll_tuned_allreduce_intra_ring_segmented(void *sbuf, void *rbuf, int count
    return MPI_SUCCESS;

 error_hndl:
-    OPAL_OUTPUT((ompi_coll_tuned_stream, "%s:%4d\tRank %d Error occurred %d\n",
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output, "%s:%4d\tRank %d Error occurred %d\n",
                 __FILE__, line, rank, ret));
    if (NULL != inbuf[0]) free(inbuf[0]);
    if (NULL != inbuf[1]) free(inbuf[1]);
@ -876,7 +858,7 @@ ompi_coll_tuned_allreduce_intra_ring_segmented(void *sbuf, void *rbuf, int count
 * Linear functions are copied from the BASIC coll module
 * they do not segment the message and are simple implementations
 * but for some small number of nodes and/or small data sizes they
- * are just as fast as tuned/tree based segmenting operations 
+ * are just as fast as base/tree based segmenting operations
 * and as such may be selected by the decision functions
 * These are copied into this module due to the way we select modules
 * in V1. i.e. in V2 we will handle this differently and so will not
@ -895,7 +877,7 @@ ompi_coll_tuned_allreduce_intra_ring_segmented(void *sbuf, void *rbuf, int count
 *	Returns:	- MPI_SUCCESS or error code
 */
 int
-ompi_coll_tuned_allreduce_intra_basic_linear(void *sbuf, void *rbuf, int count,
+ompi_coll_base_allreduce_intra_basic_linear(void *sbuf, void *rbuf, int count,
                                             struct ompi_datatype_t *dtype,
                                             struct ompi_op_t *op,
                                             struct ompi_communicator_t *comm,
@ -905,158 +887,28 @@ ompi_coll_tuned_allreduce_intra_basic_linear(void *sbuf, void *rbuf, int count,

    rank = ompi_comm_rank(comm);

-    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:allreduce_intra_basic_linear rank %d", rank));
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:allreduce_intra_basic_linear rank %d", rank));

    /* Reduce to 0 and broadcast. */

    if (MPI_IN_PLACE == sbuf) {
        if (0 == rank) {
-            err = ompi_coll_tuned_reduce_intra_basic_linear (MPI_IN_PLACE, rbuf, count, dtype,
+            err = ompi_coll_base_reduce_intra_basic_linear (MPI_IN_PLACE, rbuf, count, dtype,
                                                             op, 0, comm, module);
        } else {
-            err = ompi_coll_tuned_reduce_intra_basic_linear(rbuf, NULL, count, dtype,
+            err = ompi_coll_base_reduce_intra_basic_linear(rbuf, NULL, count, dtype,
                                                            op, 0, comm, module);
        }
    } else {
-        err = ompi_coll_tuned_reduce_intra_basic_linear(sbuf, rbuf, count, dtype,
+        err = ompi_coll_base_reduce_intra_basic_linear(sbuf, rbuf, count, dtype,
                                                        op, 0, comm, module);
    }
    if (MPI_SUCCESS != err) {
        return err;
    }

-    return ompi_coll_tuned_bcast_intra_basic_linear(rbuf, count, dtype, 0, comm, module);
+    return ompi_coll_base_bcast_intra_basic_linear(rbuf, count, dtype, 0, comm, module);
 }


 /* copied function (with appropriate renaming) ends here */
-
-/* The following are used by dynamic and forced rules */
-
-/* publish details of each algorithm and if its forced/fixed/locked in */
-/* as you add methods/algorithms you must update this and the query/map routines */
-
-/* this routine is called by the component only */
-/* this makes sure that the mca parameters are set to their initial values and perms */
-/* module does not call this they call the forced_getvalues routine instead */
-
-int ompi_coll_tuned_allreduce_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices)
-{
-    mca_base_var_enum_t *new_enum;
-
-    ompi_coll_tuned_forced_max_algorithms[ALLREDUCE] = coll_tuned_allreduce_algorithm_count;
-
-    (void) mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                           "allreduce_algorithm_count",
-                                           "Number of allreduce algorithms available",
-                                           MCA_BASE_VAR_TYPE_INT, NULL, 0,
-                                           MCA_BASE_VAR_FLAG_DEFAULT_ONLY,
-                                           OPAL_INFO_LVL_5,
-                                           MCA_BASE_VAR_SCOPE_CONSTANT,
-                                           &coll_tuned_allreduce_algorithm_count);
-
-    /* MPI_T: This variable should eventually be bound to a communicator */
-    coll_tuned_allreduce_forced_algorithm = 0;
-    (void) mca_base_var_enum_create("coll_tuned_allreduce_algorithms", allreduce_algorithms, &new_enum);
-    mca_param_indices->algorithm_param_index =
-        mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                        "allreduce_algorithm",
-                                        "Which allreduce algorithm is used. Can be locked down to any of: 0 ignore, 1 basic linear, 2 nonoverlapping (tuned reduce + tuned bcast), 3 recursive doubling, 4 ring, 5 segmented ring",
-                                        MCA_BASE_VAR_TYPE_INT, new_enum, 0, 0,
-                                        OPAL_INFO_LVL_5,
-                                        MCA_BASE_VAR_SCOPE_READONLY,
-                                        &coll_tuned_allreduce_forced_algorithm);
-    OBJ_RELEASE(new_enum);
-    if (mca_param_indices->algorithm_param_index < 0) {
-        return mca_param_indices->algorithm_param_index;
-    }
-
-    coll_tuned_allreduce_segment_size = 0;
-    mca_param_indices->segsize_param_index =
-        mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                        "allreduce_algorithm_segmentsize",
-                                        "Segment size in bytes used by default for allreduce algorithms. Only has meaning if algorithm is forced and supports segmenting. 0 bytes means no segmentation.",
-                                        MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
-                                        OPAL_INFO_LVL_5,
-                                        MCA_BASE_VAR_SCOPE_READONLY,
-                                        &coll_tuned_allreduce_segment_size);
-
-    coll_tuned_allreduce_tree_fanout = ompi_coll_tuned_init_tree_fanout; /* get system wide default */
-    mca_param_indices->tree_fanout_param_index =
-        mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                        "allreduce_algorithm_tree_fanout",
-                                        "Fanout for n-tree used for allreduce algorithms. Only has meaning if algorithm is forced and supports n-tree topo based operation.",
-                                        MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
-                                        OPAL_INFO_LVL_5,
-                                        MCA_BASE_VAR_SCOPE_READONLY,
-                                        &coll_tuned_allreduce_tree_fanout);
-
-    coll_tuned_allreduce_chain_fanout = ompi_coll_tuned_init_chain_fanout; /* get system wide default */
-    mca_param_indices->chain_fanout_param_index = 
-      mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                      "allreduce_algorithm_chain_fanout",
-                                      "Fanout for chains used for allreduce algorithms. Only has meaning if algorithm is forced and supports chain topo based operation.",
-                                      MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
-                                      OPAL_INFO_LVL_5,
-                                      MCA_BASE_VAR_SCOPE_READONLY,
-                                      &coll_tuned_allreduce_chain_fanout);
-
-    return (MPI_SUCCESS);
-}
-
-
-int ompi_coll_tuned_allreduce_intra_do_forced(void *sbuf, void *rbuf, int count,
-                                              struct ompi_datatype_t *dtype,
-                                              struct ompi_op_t *op,
-                                              struct ompi_communicator_t *comm,
-                                              mca_coll_base_module_t *module)
-{
-    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
-    mca_coll_tuned_comm_t *data = tuned_module->tuned_data;
-
-    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:allreduce_intra_do_forced selected algorithm %d, segment size %d", 
-                 data->user_forced[ALLREDUCE].algorithm,
-                 data->user_forced[ALLREDUCE].segsize));
-
-    switch (data->user_forced[ALLREDUCE].algorithm) {
-    case (0):  return ompi_coll_tuned_allreduce_intra_dec_fixed (sbuf, rbuf, count, dtype, op, comm, module);
-    case (1):  return ompi_coll_tuned_allreduce_intra_basic_linear (sbuf, rbuf, count, dtype, op, comm, module);
-    case (2):  return ompi_coll_tuned_allreduce_intra_nonoverlapping (sbuf, rbuf, count, dtype, op, comm, module);
-    case (3):  return ompi_coll_tuned_allreduce_intra_recursivedoubling (sbuf, rbuf, count, dtype, op, comm, module);
-    case (4):  return ompi_coll_tuned_allreduce_intra_ring (sbuf, rbuf, count, dtype, op, comm, module);
-    case (5):  return ompi_coll_tuned_allreduce_intra_ring_segmented (sbuf, rbuf, count, dtype, op, comm, module, data->user_forced[ALLREDUCE].segsize);
-    default:
-        OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:allreduce_intra_do_forced attempt to select algorithm %d when only 0-%d is valid?",
-                     data->user_forced[ALLREDUCE].algorithm, 
-                     ompi_coll_tuned_forced_max_algorithms[ALLREDUCE]));
-        return (MPI_ERR_ARG);
-    } /* switch */
-
-}
-
-
-int ompi_coll_tuned_allreduce_intra_do_this(void *sbuf, void *rbuf, int count,
-                                            struct ompi_datatype_t *dtype,
-                                            struct ompi_op_t *op,
-                                            struct ompi_communicator_t *comm,
-                                            mca_coll_base_module_t *module,
-                                            int algorithm, int faninout, int segsize)
-{
-    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:allreduce_intra_do_this algorithm %d topo fan in/out %d segsize %d", 
-                 algorithm, faninout, segsize));
-
-    switch (algorithm) {
-    case (0):   return ompi_coll_tuned_allreduce_intra_dec_fixed (sbuf, rbuf, count, dtype, op, comm, module);
-    case (1):   return ompi_coll_tuned_allreduce_intra_basic_linear (sbuf, rbuf, count, dtype, op, comm, module);
-    case (2):   return ompi_coll_tuned_allreduce_intra_nonoverlapping (sbuf, rbuf, count, dtype, op, comm, module);
-    case (3):   return ompi_coll_tuned_allreduce_intra_recursivedoubling (sbuf, rbuf, count, dtype, op, comm, module);
-    case (4):   return ompi_coll_tuned_allreduce_intra_ring (sbuf, rbuf, count, dtype, op, comm, module);
-    case (5):   return ompi_coll_tuned_allreduce_intra_ring_segmented (sbuf, rbuf, count, dtype, op, comm, module, segsize);
-    default:
-        OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:allreduce_intra_do_this attempt to select algorithm %d when only 0-%d is valid?",
-                     algorithm, ompi_coll_tuned_forced_max_algorithms[ALLREDUCE]));
-        return (MPI_ERR_ARG);
-    } /* switch */
-
-}
-
--- a/ompi/mca/coll/tuned/coll_tuned_alltoall.c
+++ b/ompi/mca/coll/tuned/coll_tuned_alltoall.c
@ -3,7 +3,7 @@
 * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
 *                         University Research and Technology
 *                         Corporation.  All rights reserved.
- * Copyright (c) 2004-2012 The University of Tennessee and The University
+ * Copyright (c) 2004-2015 The University of Tennessee and The University
 *                         of Tennessee Research Foundation.  All rights
 *                         reserved.
 * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@ -30,37 +30,18 @@
 #include "ompi/mca/coll/coll.h"
 #include "ompi/mca/coll/base/coll_tags.h"
 #include "ompi/mca/pml/pml.h"
-#include "coll_tuned.h"
-#include "coll_tuned_topo.h"
-#include "coll_tuned_util.h"
-
-/* alltoall algorithm variables */
-static int coll_tuned_alltoall_algorithm_count = 5;
-static int coll_tuned_alltoall_forced_algorithm = 0;
-static int coll_tuned_alltoall_segment_size = 0;
-static int coll_tuned_alltoall_max_requests;
-static int coll_tuned_alltoall_tree_fanout;
-static int coll_tuned_alltoall_chain_fanout;
-
-/* valid values for coll_tuned_alltoall_forced_algorithm */
-static mca_base_var_enum_value_t alltoall_algorithms[] = {
-    {0, "ignore"},
-    {1, "linear"},
-    {2, "pairwise"},
-    {3, "modified_bruck"},
-    {4, "linear_sync"},
-    {5, "two_proc"},
-    {0, NULL}
-};
+#include "ompi/mca/coll/base/coll_base_functions.h"
+#include "coll_base_topo.h"
+#include "coll_base_util.h"

 /* MPI_IN_PLACE all to all algorithm. TODO: implement a better one. */
-static int
-mca_coll_tuned_alltoall_intra_basic_inplace(void *rbuf, int rcount,
-                                            struct ompi_datatype_t *rdtype,
-                                            struct ompi_communicator_t *comm,
-                                            mca_coll_base_module_t *module)
+int
+mca_coll_base_alltoall_intra_basic_inplace(void *rbuf, int rcount,
+                                           struct ompi_datatype_t *rdtype,
+                                           struct ompi_communicator_t *comm,
+                                           mca_coll_base_module_t *module)
 {
-    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
+    mca_coll_base_module_t *base_module = (mca_coll_base_module_t*) module;
    int i, j, size, rank, err=MPI_SUCCESS;
    MPI_Request *preq;
    char *tmp_buffer;
@ -91,7 +72,7 @@ mca_coll_tuned_alltoall_intra_basic_inplace(void *rbuf, int rcount,
    for (i = 0 ; i < size ; ++i) {
        for (j = i+1 ; j < size ; ++j) {
            /* Initiate all send/recv to/from others. */
-            preq = tuned_module->tuned_data->mcct_reqs;
+            preq = coll_base_comm_get_reqs(base_module->base_data, size * 2);

            if (i == rank) {
                /* Copy the data into the temporary buffer */
@ -128,11 +109,8 @@ mca_coll_tuned_alltoall_intra_basic_inplace(void *rbuf, int rcount,
            }

            /* Wait for the requests to complete */
-            err = ompi_request_wait_all (2, tuned_module->tuned_data->mcct_reqs, MPI_STATUSES_IGNORE);
+            err = ompi_request_wait_all (2, base_module->base_data->mcct_reqs, MPI_STATUSES_IGNORE);
            if (MPI_SUCCESS != err) { goto error_hndl; }
-
-            /* Free the requests. */
-            mca_coll_tuned_free_reqs(tuned_module->tuned_data->mcct_reqs, 2);
        }
    }

@ -145,7 +123,7 @@ mca_coll_tuned_alltoall_intra_basic_inplace(void *rbuf, int rcount,
    return err;
 }

-int ompi_coll_tuned_alltoall_intra_pairwise(void *sbuf, int scount, 
+int ompi_coll_base_alltoall_intra_pairwise(void *sbuf, int scount,
                                            struct ompi_datatype_t *sdtype,
                                            void* rbuf, int rcount,
                                            struct ompi_datatype_t *rdtype,
@ -157,15 +135,15 @@ int ompi_coll_tuned_alltoall_intra_pairwise(void *sbuf, int scount,
    ptrdiff_t lb, sext, rext;

    if (MPI_IN_PLACE == sbuf) {
-        return mca_coll_tuned_alltoall_intra_basic_inplace (rbuf, rcount, rdtype,
+        return mca_coll_base_alltoall_intra_basic_inplace (rbuf, rcount, rdtype,
                                                            comm, module);
    }

    size = ompi_comm_size(comm);
    rank = ompi_comm_rank(comm);

-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "coll:tuned:alltoall_intra_pairwise rank %d", rank));
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                 "coll:base:alltoall_intra_pairwise rank %d", rank));

    err = ompi_datatype_get_extent (sdtype, &lb, &sext);
    if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl; }
@ -185,7 +163,7 @@ int ompi_coll_tuned_alltoall_intra_pairwise(void *sbuf, int scount,
        tmprecv = (char*)rbuf + (ptrdiff_t)recvfrom * rext * (ptrdiff_t)rcount;

        /* send and receive */
-        err = ompi_coll_tuned_sendrecv( tmpsend, scount, sdtype, sendto, 
+        err = ompi_coll_base_sendrecv( tmpsend, scount, sdtype, sendto,
                                        MCA_COLL_BASE_TAG_ALLTOALL,
                                        tmprecv, rcount, rdtype, recvfrom,
                                        MCA_COLL_BASE_TAG_ALLTOALL,
@ -196,40 +174,36 @@ int ompi_coll_tuned_alltoall_intra_pairwise(void *sbuf, int scount,
    return MPI_SUCCESS;

 err_hndl:
-    OPAL_OUTPUT((ompi_coll_tuned_stream,
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
                 "%s:%4d\tError occurred %d, rank %2d", __FILE__, line,
                 err, rank));
    return err;
 }


-int ompi_coll_tuned_alltoall_intra_bruck(void *sbuf, int scount,
+int ompi_coll_base_alltoall_intra_bruck(void *sbuf, int scount,
                                         struct ompi_datatype_t *sdtype,
                                         void* rbuf, int rcount,
                                         struct ompi_datatype_t *rdtype,
                                         struct ompi_communicator_t *comm,
                                         mca_coll_base_module_t *module)
 {
-    int i, k, line = -1, rank, size, err = 0, weallocated = 0;
+    int i, k, line = -1, rank, size, err = 0;
    int sendto, recvfrom, distance, *displs = NULL, *blen = NULL;
    char *tmpbuf = NULL, *tmpbuf_free = NULL;
    ptrdiff_t rlb, slb, tlb, sext, rext, tsext;
    struct ompi_datatype_t *new_ddt;
-#ifdef blahblah
-    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
-    mca_coll_tuned_comm_t *data = tuned_module->tuned_data;
-#endif

    if (MPI_IN_PLACE == sbuf) {
-        return mca_coll_tuned_alltoall_intra_basic_inplace (rbuf, rcount, rdtype,
+        return mca_coll_base_alltoall_intra_basic_inplace (rbuf, rcount, rdtype,
                                                            comm, module);
    }

    size = ompi_comm_size(comm);
    rank = ompi_comm_rank(comm);

-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "coll:tuned:alltoall_intra_bruck rank %d", rank));
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                 "coll:base:alltoall_intra_bruck rank %d", rank));

    err = ompi_datatype_get_extent (sdtype, &slb, &sext);
    if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl; }
@ -241,25 +215,10 @@ int ompi_coll_tuned_alltoall_intra_bruck(void *sbuf, int scount,
    if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl; }


-#ifdef blahblah
-    /* try and SAVE memory by using the data segment hung off 
-       the communicator if possible */
-    if (data->mcct_num_reqs >= size) { 
-        /* we have enought preallocated for displments and lengths */
-        displs = (int*) data->mcct_reqs;
-        blen = (int *) (displs + size);
-        weallocated = 0;
-    } 
-    else { /* allocate the buffers ourself */
-#endif
-        displs = (int *) malloc(size * sizeof(int));
-        if (displs == NULL) { line = __LINE__; err = -1; goto err_hndl; }
-        blen = (int *) malloc(size * sizeof(int));
-        if (blen == NULL) { line = __LINE__; err = -1; goto err_hndl; }
-        weallocated = 1;
-#ifdef blahblah
-    }
-#endif
+    displs = (int *) malloc(size * sizeof(int));
+    if (displs == NULL) { line = __LINE__; err = -1; goto err_hndl; }
+    blen = (int *) malloc(size * sizeof(int));
+    if (blen == NULL) { line = __LINE__; err = -1; goto err_hndl; }

    /* tmp buffer allocation for message data */
    tmpbuf_free = (char *) malloc(tsext + ((ptrdiff_t)scount * (ptrdiff_t)size - 1) * sext);
@ -307,7 +266,7 @@ int ompi_coll_tuned_alltoall_intra_bruck(void *sbuf, int scount,
        if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl;  }

        /* Sendreceive */
-        err = ompi_coll_tuned_sendrecv ( tmpbuf, 1, new_ddt, sendto,
+        err = ompi_coll_base_sendrecv ( tmpbuf, 1, new_ddt, sendto,
                                         MCA_COLL_BASE_TAG_ALLTOALL,
                                         rbuf, 1, new_ddt, recvfrom,
                                         MCA_COLL_BASE_TAG_ALLTOALL,
@ -334,14 +293,12 @@ int ompi_coll_tuned_alltoall_intra_bruck(void *sbuf, int scount,

    /* Step 4 - clean up */
    if (tmpbuf != NULL) free(tmpbuf_free);
-    if (weallocated) {
-        if (displs != NULL) free(displs);
-        if (blen != NULL) free(blen);
-    }
+    if (displs != NULL) free(displs);
+    if (blen != NULL) free(blen);
    return OMPI_SUCCESS;

 err_hndl:
-    OPAL_OUTPUT((ompi_coll_tuned_stream,
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
                 "%s:%4d\tError occurred %d, rank %2d", __FILE__, line, err,
                 rank));
    if (tmpbuf != NULL) free(tmpbuf_free);
@ -367,7 +324,7 @@ int ompi_coll_tuned_alltoall_intra_bruck(void *sbuf, int scount,
 *                    - wait for any request to complete
 *                    - replace that request by the new one of the same type.
 */
-int ompi_coll_tuned_alltoall_intra_linear_sync(void *sbuf, int scount,
+int ompi_coll_base_alltoall_intra_linear_sync(void *sbuf, int scount,
                                               struct ompi_datatype_t *sdtype,
                                               void* rbuf, int rcount,
                                               struct ompi_datatype_t *rdtype,
@ -382,7 +339,7 @@ int ompi_coll_tuned_alltoall_intra_linear_sync(void *sbuf, int scount,
    ompi_request_t **reqs = NULL;

    if (MPI_IN_PLACE == sbuf) {
-        return mca_coll_tuned_alltoall_intra_basic_inplace (rbuf, rcount, rdtype,
+        return mca_coll_base_alltoall_intra_basic_inplace (rbuf, rcount, rdtype,
                                                            comm, module);
    }

@ -391,8 +348,8 @@ int ompi_coll_tuned_alltoall_intra_linear_sync(void *sbuf, int scount,
    size = ompi_comm_size(comm);
    rank = ompi_comm_rank(comm);

-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "ompi_coll_tuned_alltoall_intra_linear_sync rank %d", rank));
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                 "ompi_coll_base_alltoall_intra_linear_sync rank %d", rank));

    error = ompi_datatype_get_extent(sdtype, &slb, &sext);
    if (OMPI_SUCCESS != error) {
@ -506,7 +463,7 @@ int ompi_coll_tuned_alltoall_intra_linear_sync(void *sbuf, int scount,
    return MPI_SUCCESS;

 error_hndl:
-    OPAL_OUTPUT((ompi_coll_tuned_stream,
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
                 "%s:%4d\tError occurred %d, rank %2d", __FILE__, line, error,
                 rank));
    if (NULL != reqs) free(reqs);
@ -514,7 +471,7 @@ int ompi_coll_tuned_alltoall_intra_linear_sync(void *sbuf, int scount,
 }


-int ompi_coll_tuned_alltoall_intra_two_procs(void *sbuf, int scount,
+int ompi_coll_base_alltoall_intra_two_procs(void *sbuf, int scount,
                                             struct ompi_datatype_t *sdtype,
                                             void* rbuf, int rcount,
                                             struct ompi_datatype_t *rdtype,
@ -526,14 +483,14 @@ int ompi_coll_tuned_alltoall_intra_two_procs(void *sbuf, int scount,
    ptrdiff_t sext, rext, lb;

    if (MPI_IN_PLACE == sbuf) {
-        return mca_coll_tuned_alltoall_intra_basic_inplace (rbuf, rcount, rdtype,
+        return mca_coll_base_alltoall_intra_basic_inplace (rbuf, rcount, rdtype,
                                                            comm, module);
    }

    rank = ompi_comm_rank(comm);

-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "ompi_coll_tuned_alltoall_intra_two_procs rank %d", rank));
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                 "ompi_coll_base_alltoall_intra_two_procs rank %d", rank));

    err = ompi_datatype_get_extent (sdtype, &lb, &sext);
    if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl; }
@ -548,7 +505,7 @@ int ompi_coll_tuned_alltoall_intra_two_procs(void *sbuf, int scount,
    tmprecv = (char*)rbuf + (ptrdiff_t)remote * rext * (ptrdiff_t)rcount;

    /* send and receive */
-    err = ompi_coll_tuned_sendrecv ( tmpsend, scount, sdtype, remote, 
+    err = ompi_coll_base_sendrecv ( tmpsend, scount, sdtype, remote,
                                     MCA_COLL_BASE_TAG_ALLTOALL,
                                     tmprecv, rcount, rdtype, remote,
                                     MCA_COLL_BASE_TAG_ALLTOALL,
@ -566,7 +523,7 @@ int ompi_coll_tuned_alltoall_intra_two_procs(void *sbuf, int scount,
    return MPI_SUCCESS;

 err_hndl:
-    OPAL_OUTPUT((ompi_coll_tuned_stream,
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
                 "%s:%4d\tError occurred %d, rank %2d", __FILE__, line, err,
                 rank));
    return err;
@ -578,7 +535,7 @@ int ompi_coll_tuned_alltoall_intra_two_procs(void *sbuf, int scount,
 * Linear functions are copied from the BASIC coll module
 * they do not segment the message and are simple implementations
 * but for some small number of nodes and/or small data sizes they
- * are just as fast as tuned/tree based segmenting operations 
+ * are just as fast as base/tree based segmenting operations
 * and as such may be selected by the decision functions
 * These are copied into this module due to the way we select modules
 * in V1. i.e. in V2 we will handle this differently and so will not
@ -588,22 +545,22 @@ int ompi_coll_tuned_alltoall_intra_two_procs(void *sbuf, int scount,

 /* copied function (with appropriate renaming) starts here */

-int ompi_coll_tuned_alltoall_intra_basic_linear(void *sbuf, int scount,
-                                                struct ompi_datatype_t *sdtype,
-                                                void* rbuf, int rcount,
-                                                struct ompi_datatype_t *rdtype,
-                                                struct ompi_communicator_t *comm,
-                                                mca_coll_base_module_t *module)
+int ompi_coll_base_alltoall_intra_basic_linear(void *sbuf, int scount,
+                                               struct ompi_datatype_t *sdtype,
+                                               void* rbuf, int rcount,
+                                               struct ompi_datatype_t *rdtype,
+                                               struct ompi_communicator_t *comm,
+                                               mca_coll_base_module_t *module)
 {
    int i, rank, size, err, nreqs;
    char *psnd, *prcv;
    MPI_Aint lb, sndinc, rcvinc;
    ompi_request_t **req, **sreq, **rreq;
-    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
-    mca_coll_tuned_comm_t *data = tuned_module->tuned_data;
+    mca_coll_base_module_t *base_module = (mca_coll_base_module_t*) module;
+    mca_coll_base_comm_t *data = base_module->base_data;

    if (MPI_IN_PLACE == sbuf) {
-        return mca_coll_tuned_alltoall_intra_basic_inplace (rbuf, rcount, rdtype,
+        return mca_coll_base_alltoall_intra_basic_inplace (rbuf, rcount, rdtype,
                                                            comm, module);
    }

@ -612,9 +569,8 @@ int ompi_coll_tuned_alltoall_intra_basic_linear(void *sbuf, int scount,
    size = ompi_comm_size(comm);
    rank = ompi_comm_rank(comm);

-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "ompi_coll_tuned_alltoall_intra_basic_linear rank %d", rank));
-
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                 "ompi_coll_base_alltoall_intra_basic_linear rank %d", rank));

    err = ompi_datatype_get_extent(sdtype, &lb, &sndinc);
    if (OMPI_SUCCESS != err) {
@ -646,8 +602,7 @@ int ompi_coll_tuned_alltoall_intra_basic_linear(void *sbuf, int scount,

    /* Initiate all send/recv to/from others. */

-    req = rreq = data->mcct_reqs;
-    sreq = rreq + size - 1;
+    req = rreq = coll_base_comm_get_reqs(data, (size - 1) * 2);

    prcv = (char *) rbuf;
    psnd = (char *) sbuf;
@ -656,12 +611,11 @@ int ompi_coll_tuned_alltoall_intra_basic_linear(void *sbuf, int scount,

    for (nreqs = 0, i = (rank + 1) % size; i != rank;
         i = (i + 1) % size, ++rreq, ++nreqs) {
-        err =
-            MCA_PML_CALL(irecv_init
-                         (prcv + (ptrdiff_t)i * rcvinc, rcount, rdtype, i,
-                          MCA_COLL_BASE_TAG_ALLTOALL, comm, rreq));
+        err = MCA_PML_CALL(irecv_init
+                           (prcv + (ptrdiff_t)i * rcvinc, rcount, rdtype, i,
+                           MCA_COLL_BASE_TAG_ALLTOALL, comm, rreq));
        if (MPI_SUCCESS != err) {
-            ompi_coll_tuned_free_reqs(req, rreq - req);
+            ompi_coll_base_free_reqs(req, nreqs);
            return err;
        }
    }
@ -670,20 +624,19 @@ int ompi_coll_tuned_alltoall_intra_basic_linear(void *sbuf, int scount,
       - We would like to minimize the search time through message queue
         when messages actually arrive in the order in which they were posted.
     */
-    for (nreqs = 0, i = (rank + size - 1) % size; i != rank; 
+    sreq = rreq;
+    for (i = (rank + size - 1) % size; i != rank;
         i = (i + size - 1) % size, ++sreq, ++nreqs) {
-        err =
-            MCA_PML_CALL(isend_init
-                         (psnd + (ptrdiff_t)i * sndinc, scount, sdtype, i,
-                          MCA_COLL_BASE_TAG_ALLTOALL,
-                          MCA_PML_BASE_SEND_STANDARD, comm, sreq));
+        err = MCA_PML_CALL(isend_init
+                           (psnd + (ptrdiff_t)i * sndinc, scount, sdtype, i,
+                           MCA_COLL_BASE_TAG_ALLTOALL,
+                           MCA_PML_BASE_SEND_STANDARD, comm, sreq));
        if (MPI_SUCCESS != err) {
-            ompi_coll_tuned_free_reqs(req, sreq - req);
+            ompi_coll_base_free_reqs(req, nreqs);
            return err;
        }
    }

-    nreqs = (size - 1) * 2;
    /* Start your engines.  This will never return an error. */

    MCA_PML_CALL(start(nreqs, req));
@ -698,165 +651,10 @@ int ompi_coll_tuned_alltoall_intra_basic_linear(void *sbuf, int scount,
    err = ompi_request_wait_all(nreqs, req, MPI_STATUSES_IGNORE);

    /* Free the reqs */
-
-    ompi_coll_tuned_free_reqs(req, nreqs);
+    ompi_coll_base_free_reqs(req, nreqs);

    /* All done */
-
    return err;
 }

 /* copied function (with appropriate renaming) ends here */
-
-/* The following are used by dynamic and forced rules */
-
-/* publish details of each algorithm and if its forced/fixed/locked in */
-/* as you add methods/algorithms you must update this and the query/map routines */
-
-/* this routine is called by the component only */
-/* this makes sure that the mca parameters are set to their initial values and perms */
-/* module does not call this they call the forced_getvalues routine instead */
-
-int ompi_coll_tuned_alltoall_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices)
-{
-    mca_base_var_enum_t*new_enum;
-
-    ompi_coll_tuned_forced_max_algorithms[ALLTOALL] = coll_tuned_alltoall_algorithm_count;
-
-    (void) mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                           "alltoall_algorithm_count",
-                                           "Number of alltoall algorithms available",
-                                           MCA_BASE_VAR_TYPE_INT, NULL, 0,
-                                           MCA_BASE_VAR_FLAG_DEFAULT_ONLY,
-                                           OPAL_INFO_LVL_5,
-                                           MCA_BASE_VAR_SCOPE_CONSTANT,
-                                           &coll_tuned_alltoall_algorithm_count);
-
-    /* MPI_T: This variable should eventually be bound to a communicator */
-    coll_tuned_alltoall_forced_algorithm = 0;
-    (void) mca_base_var_enum_create("coll_tuned_alltoall_algorithms", alltoall_algorithms, &new_enum);
-    mca_param_indices->algorithm_param_index =
-        mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                        "alltoall_algorithm",
-                                        "Which alltoall algorithm is used. Can be locked down to choice of: 0 ignore, 1 basic linear, 2 pairwise, 3: modified bruck, 4: linear with sync, 5:two proc only.",
-                                        MCA_BASE_VAR_TYPE_INT, new_enum, 0, 0,
-                                        OPAL_INFO_LVL_5,
-                                        MCA_BASE_VAR_SCOPE_READONLY,
-                                        &coll_tuned_alltoall_forced_algorithm);
-    OBJ_RELEASE(new_enum);
-    if (mca_param_indices->algorithm_param_index < 0) {
-        return mca_param_indices->algorithm_param_index;
-    }
-
-    coll_tuned_alltoall_segment_size = 0;
-    mca_param_indices->segsize_param_index =
-        mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                        "alltoall_algorithm_segmentsize",
-                                        "Segment size in bytes used by default for alltoall algorithms. Only has meaning if algorithm is forced and supports segmenting. 0 bytes means no segmentation.",
-                                        MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
-                                        OPAL_INFO_LVL_5,
-                                        MCA_BASE_VAR_SCOPE_READONLY,
-                                        &coll_tuned_alltoall_segment_size);
-
-    coll_tuned_alltoall_tree_fanout = ompi_coll_tuned_init_tree_fanout; /* get system wide default */
-    mca_param_indices->tree_fanout_param_index =
-        mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                        "alltoall_algorithm_tree_fanout",
-                                        "Fanout for n-tree used for alltoall algorithms. Only has meaning if algorithm is forced and supports n-tree topo based operation.",
-                                        MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
-                                        OPAL_INFO_LVL_5,
-                                        MCA_BASE_VAR_SCOPE_READONLY,
-                                        &coll_tuned_alltoall_tree_fanout);
-
-    coll_tuned_alltoall_chain_fanout = ompi_coll_tuned_init_chain_fanout; /* get system wide default */
-    mca_param_indices->chain_fanout_param_index = 
-      mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                      "alltoall_algorithm_chain_fanout",
-                                      "Fanout for chains used for alltoall algorithms. Only has meaning if algorithm is forced and supports chain topo based operation.",
-                                      MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
-                                      OPAL_INFO_LVL_5,
-                                      MCA_BASE_VAR_SCOPE_READONLY,
-                                      &coll_tuned_alltoall_chain_fanout);
-
-    coll_tuned_alltoall_max_requests = 0; /* no limit for alltoall by default */
-    mca_param_indices->max_requests_param_index = 
-      mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                      "alltoall_algorithm_max_requests",
-                                      "Maximum number of outstanding send or recv requests.  Only has meaning for synchronized algorithms.",
-                                      MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
-                                      OPAL_INFO_LVL_5,
-                                      MCA_BASE_VAR_SCOPE_READONLY,
-                                      &coll_tuned_alltoall_max_requests);
-    if (mca_param_indices->max_requests_param_index < 0) {
-        return mca_param_indices->max_requests_param_index;
-    }
-
-    if (coll_tuned_alltoall_max_requests < 0) {
-        if( 0 == ompi_comm_rank( MPI_COMM_WORLD ) ) {
-            opal_output( 0, "Maximum outstanding requests must be positive number greater than 1.  Switching to system level default %d \n",
-                         ompi_coll_tuned_init_max_requests );
-        }
-        coll_tuned_alltoall_max_requests = 0;
-    }
-
-    return (MPI_SUCCESS);
-}
-
-
-
-int ompi_coll_tuned_alltoall_intra_do_forced(void *sbuf, int scount,
-                                             struct ompi_datatype_t *sdtype,
-                                             void* rbuf, int rcount,
-                                             struct ompi_datatype_t *rdtype,
-                                             struct ompi_communicator_t *comm,
-                                             mca_coll_base_module_t *module)
-{
-    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
-    mca_coll_tuned_comm_t *data = tuned_module->tuned_data;
-
-    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:alltoall_intra_do_forced selected algorithm %d",
-                 data->user_forced[ALLTOALL].algorithm));
-
-    switch (data->user_forced[ALLTOALL].algorithm) {
-    case (0):   return ompi_coll_tuned_alltoall_intra_dec_fixed (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm, module);
-    case (1):   return ompi_coll_tuned_alltoall_intra_basic_linear (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm, module);
-    case (2):   return ompi_coll_tuned_alltoall_intra_pairwise (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm, module);
-    case (3):   return ompi_coll_tuned_alltoall_intra_bruck (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm, module);
-    case (4):   return ompi_coll_tuned_alltoall_intra_linear_sync (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm, module, data->user_forced[ALLTOALL].max_requests);
-    case (5):   return ompi_coll_tuned_alltoall_intra_two_procs (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm, module);
-    default:
-        OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:alltoall_intra_do_forced attempt to select algorithm %d when only 0-%d is valid?", 
-                     data->user_forced[ALLTOALL].algorithm, ompi_coll_tuned_forced_max_algorithms[ALLTOALL]));
-        return (MPI_ERR_ARG);
-    } /* switch */
-
-}
-
-
-int ompi_coll_tuned_alltoall_intra_do_this(void *sbuf, int scount,
-                                           struct ompi_datatype_t *sdtype,
-                                           void* rbuf, int rcount,
-                                           struct ompi_datatype_t *rdtype,
-                                           struct ompi_communicator_t *comm,
-                                           mca_coll_base_module_t *module,
-                                           int algorithm, int faninout, int segsize, 
-                                           int max_requests)
-{
-    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:alltoall_intra_do_this selected algorithm %d topo faninout %d segsize %d", 
-                 algorithm, faninout, segsize));
-
-    switch (algorithm) {
-    case (0):   return ompi_coll_tuned_alltoall_intra_dec_fixed (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm, module);
-    case (1):   return ompi_coll_tuned_alltoall_intra_basic_linear (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm, module);
-    case (2):   return ompi_coll_tuned_alltoall_intra_pairwise (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm, module);
-    case (3):   return ompi_coll_tuned_alltoall_intra_bruck (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm, module);
-    case (4):   return ompi_coll_tuned_alltoall_intra_linear_sync (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm, module, max_requests);
-    case (5):   return ompi_coll_tuned_alltoall_intra_two_procs (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm, module);
-    default:
-        OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:alltoall_intra_do_this attempt to select algorithm %d when only 0-%d is valid?", 
-                     algorithm, ompi_coll_tuned_forced_max_algorithms[ALLTOALL]));
-        return (MPI_ERR_ARG);
-    } /* switch */
-
-}
-
--- a/ompi/mca/coll/tuned/coll_tuned_alltoallv.c
+++ b/ompi/mca/coll/tuned/coll_tuned_alltoallv.c
@ -3,7 +3,7 @@
 * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
 *                         University Research and Technology
 *                         Corporation.  All rights reserved.
- * Copyright (c) 2004-2012 The University of Tennessee and The University
+ * Copyright (c) 2004-2015 The University of Tennessee and The University
 *                         of Tennessee Research Foundation.  All rights
 *                         reserved.
 * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@ -32,29 +32,17 @@
 #include "ompi/mca/coll/coll.h"
 #include "ompi/mca/coll/base/coll_tags.h"
 #include "ompi/mca/pml/pml.h"
-#include "coll_tuned.h"
-#include "coll_tuned_topo.h"
-#include "coll_tuned_util.h"
+#include "ompi/mca/coll/base/coll_base_functions.h"
+#include "coll_base_topo.h"
+#include "coll_base_util.h"

-/* alltoallv algorithm variables */
-static int coll_tuned_alltoallv_algorithm_count = 2;
-static int coll_tuned_alltoallv_forced_algorithm = 0;
-
-/* valid values for coll_tuned_alltoallv_forced_algorithm */
-static mca_base_var_enum_value_t alltoallv_algorithms[] = {
-    {0, "ignore"},
-    {1, "basic_linear"},
-    {2, "pairwise"},
-    {0, NULL}
-};
-
-static int
-mca_coll_tuned_alltoallv_intra_basic_inplace(void *rbuf, const int *rcounts, const int *rdisps,
+int
+mca_coll_base_alltoallv_intra_basic_inplace(void *rbuf, const int *rcounts, const int *rdisps,
                                             struct ompi_datatype_t *rdtype,
                                             struct ompi_communicator_t *comm,
                                             mca_coll_base_module_t *module)
 {
-    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
+    mca_coll_base_module_t *base_module = (mca_coll_base_module_t*) module;
    int i, j, size, rank, err=MPI_SUCCESS;
    MPI_Request *preq;
    char *tmp_buffer;
@ -90,7 +78,7 @@ mca_coll_tuned_alltoallv_intra_basic_inplace(void *rbuf, const int *rcounts, con
    for (i = 0 ; i < size ; ++i) {
        for (j = i+1 ; j < size ; ++j) {
            /* Initiate all send/recv to/from others. */
-            preq = tuned_module->tuned_data->mcct_reqs;
+            preq = coll_base_comm_get_reqs(base_module->base_data, 2);

            if (i == rank && rcounts[j]) {
                /* Copy the data into the temporary buffer */
@ -127,11 +115,8 @@ mca_coll_tuned_alltoallv_intra_basic_inplace(void *rbuf, const int *rcounts, con
            }

            /* Wait for the requests to complete */
-            err = ompi_request_wait_all (2, tuned_module->tuned_data->mcct_reqs, MPI_STATUSES_IGNORE);
+            err = ompi_request_wait_all (2, base_module->base_data->mcct_reqs, MPI_STATUSES_IGNORE);
            if (MPI_SUCCESS != err) { goto error_hndl; }
-
-            /* Free the requests. */
-            mca_coll_tuned_free_reqs(tuned_module->tuned_data->mcct_reqs, 2);
        }
    }

@ -145,7 +130,7 @@ mca_coll_tuned_alltoallv_intra_basic_inplace(void *rbuf, const int *rcounts, con
 }

 int
-ompi_coll_tuned_alltoallv_intra_pairwise(void *sbuf, int *scounts, int *sdisps,
+ompi_coll_base_alltoallv_intra_pairwise(void *sbuf, int *scounts, int *sdisps,
                                         struct ompi_datatype_t *sdtype,
                                         void* rbuf, int *rcounts, int *rdisps,
                                         struct ompi_datatype_t *rdtype,
@ -157,15 +142,15 @@ ompi_coll_tuned_alltoallv_intra_pairwise(void *sbuf, int *scounts, int *sdisps,
    ptrdiff_t sext, rext;

    if (MPI_IN_PLACE == sbuf) {
-        return mca_coll_tuned_alltoallv_intra_basic_inplace (rbuf, rcounts, rdisps,
+        return mca_coll_base_alltoallv_intra_basic_inplace (rbuf, rcounts, rdisps,
                                                             rdtype, comm, module);
    }

    size = ompi_comm_size(comm);
    rank = ompi_comm_rank(comm);

-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "coll:tuned:alltoallv_intra_pairwise rank %d", rank));
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                 "coll:base:alltoallv_intra_pairwise rank %d", rank));

    ompi_datatype_type_extent(sdtype, &sext);
    ompi_datatype_type_extent(rdtype, &rext);
@ -182,7 +167,7 @@ ompi_coll_tuned_alltoallv_intra_pairwise(void *sbuf, int *scounts, int *sdisps,
        prcv = (char*)rbuf + (ptrdiff_t)rdisps[recvfrom] * rext;

        /* send and receive */
-        err = ompi_coll_tuned_sendrecv( psnd, scounts[sendto], sdtype, sendto, 
+        err = ompi_coll_base_sendrecv( psnd, scounts[sendto], sdtype, sendto,
                                        MCA_COLL_BASE_TAG_ALLTOALLV,
                                        prcv, rcounts[recvfrom], rdtype, recvfrom,
                                        MCA_COLL_BASE_TAG_ALLTOALLV,
@ -193,23 +178,22 @@ ompi_coll_tuned_alltoallv_intra_pairwise(void *sbuf, int *scounts, int *sdisps,
    return MPI_SUCCESS;

 err_hndl:
-    OPAL_OUTPUT((ompi_coll_tuned_stream,
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
                 "%s:%4d\tError occurred %d, rank %2d at step %d", __FILE__, line,
                 err, rank, step));
    return err;
 }

-/*  
+/**
 * Linear functions are copied from the basic coll module.  For
 * some small number of nodes and/or small data sizes they are just as
- * fast as tuned/tree based segmenting operations and as such may be
+ * fast as base/tree based segmenting operations and as such may be
 * selected by the decision functions.  These are copied into this module
 * due to the way we select modules in V1. i.e. in V2 we will handle this
 * differently and so will not have to duplicate code.
- * GEF Oct05 after asking Jeff.  
 */
 int
-ompi_coll_tuned_alltoallv_intra_basic_linear(void *sbuf, int *scounts, int *sdisps,
+ompi_coll_base_alltoallv_intra_basic_linear(void *sbuf, int *scounts, int *sdisps,
                                            struct ompi_datatype_t *sdtype,
                                            void *rbuf, int *rcounts, int *rdisps,
                                            struct ompi_datatype_t *rdtype,
@ -220,19 +204,19 @@ ompi_coll_tuned_alltoallv_intra_basic_linear(void *sbuf, int *scounts, int *sdis
    char *psnd, *prcv;
    ptrdiff_t sext, rext;
    MPI_Request *preq;
-    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
-    mca_coll_tuned_comm_t *data = tuned_module->tuned_data;
+    mca_coll_base_module_t *base_module = (mca_coll_base_module_t*) module;
+    mca_coll_base_comm_t *data = base_module->base_data;

    if (MPI_IN_PLACE == sbuf) {
-        return  mca_coll_tuned_alltoallv_intra_basic_inplace (rbuf, rcounts, rdisps,
+        return  mca_coll_base_alltoallv_intra_basic_inplace (rbuf, rcounts, rdisps,
                                                              rdtype, comm, module);
    }

    size = ompi_comm_size(comm);
    rank = ompi_comm_rank(comm);

-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "coll:tuned:alltoallv_intra_basic_linear rank %d", rank));
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                 "coll:base:alltoallv_intra_basic_linear rank %d", rank));

    ompi_datatype_type_extent(sdtype, &sext);
    ompi_datatype_type_extent(rdtype, &rext);
@ -255,7 +239,7 @@ ompi_coll_tuned_alltoallv_intra_basic_linear(void *sbuf, int *scounts, int *sdis

    /* Now, initiate all send/recv to/from others. */
    nreqs = 0;
-    preq = data->mcct_reqs;
+    preq = coll_base_comm_get_reqs(data, 2 * size);

    /* Post all receives first */
    for (i = 0; i < size; ++i) {
@ -269,7 +253,7 @@ ompi_coll_tuned_alltoallv_intra_basic_linear(void *sbuf, int *scounts, int *sdis
                                      preq++));
        ++nreqs;
        if (MPI_SUCCESS != err) {
-            ompi_coll_tuned_free_reqs(data->mcct_reqs, nreqs);
+            ompi_coll_base_free_reqs(data->mcct_reqs, nreqs);
            return err;
        }
    }
@ -287,7 +271,7 @@ ompi_coll_tuned_alltoallv_intra_basic_linear(void *sbuf, int *scounts, int *sdis
                                      preq++));
        ++nreqs;
        if (MPI_SUCCESS != err) {
-            ompi_coll_tuned_free_reqs(data->mcct_reqs, nreqs);
+            ompi_coll_base_free_reqs(data->mcct_reqs, nreqs);
            return err;
        }
    }
@ -305,128 +289,7 @@ ompi_coll_tuned_alltoallv_intra_basic_linear(void *sbuf, int *scounts, int *sdis
                                MPI_STATUSES_IGNORE);

    /* Free the requests. */
-    ompi_coll_tuned_free_reqs(data->mcct_reqs, nreqs);
+    ompi_coll_base_free_reqs(data->mcct_reqs, nreqs);

    return err;
 }
-
-/* 
- * The following are used by dynamic and forced rules.  Publish
- * details of each algorithm and if its forced/fixed/locked in as you add
- * methods/algorithms you must update this and the query/map routines.
- * This routine is called by the component only.  This makes sure that
- * the mca parameters are set to their initial values and perms.
- * Module does not call this.  They call the forced_getvalues routine
- * instead.
- */
-int ompi_coll_tuned_alltoallv_intra_check_forced_init(coll_tuned_force_algorithm_mca_param_indices_t
-                                                      *mca_param_indices)
-{
-    mca_base_var_enum_t *new_enum;
-
-    ompi_coll_tuned_forced_max_algorithms[ALLTOALLV] = coll_tuned_alltoallv_algorithm_count;
-
-    (void) mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                           "alltoallv_algorithm_count",
-                                           "Number of alltoallv algorithms available",
-                                           MCA_BASE_VAR_TYPE_INT, NULL, 0,
-                                           MCA_BASE_VAR_FLAG_DEFAULT_ONLY,
-                                           OPAL_INFO_LVL_5,
-                                           MCA_BASE_VAR_SCOPE_CONSTANT,
-                                           &coll_tuned_alltoallv_algorithm_count);
-
-    /* MPI_T: This variable should eventually be bound to a communicator */
-    coll_tuned_alltoallv_forced_algorithm = 0;
-    (void) mca_base_var_enum_create("coll_tuned_alltoallv_algorithms", alltoallv_algorithms, &new_enum);
-    mca_param_indices->algorithm_param_index =
-        mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                        "alltoallv_algorithm",
-                                        "Which alltoallv algorithm is used. "
-                                        "Can be locked down to choice of: 0 ignore, "
-                                        "1 basic linear, 2 pairwise.",
-                                        MCA_BASE_VAR_TYPE_INT, new_enum, 0, 0,
-                                        OPAL_INFO_LVL_5,
-                                        MCA_BASE_VAR_SCOPE_READONLY,
-                                        &coll_tuned_alltoallv_forced_algorithm);
-    OBJ_RELEASE(new_enum);
-    if (mca_param_indices->algorithm_param_index < 0) {
-        return mca_param_indices->algorithm_param_index;
-    }
-    
-    return (MPI_SUCCESS);
-}
-
-
-
-int ompi_coll_tuned_alltoallv_intra_do_forced(void *sbuf, int *scounts, int *sdisps, 
-                                              struct ompi_datatype_t *sdtype,
-                                              void* rbuf, int *rcounts, int *rdisps, 
-                                              struct ompi_datatype_t *rdtype,
-                                              struct ompi_communicator_t *comm,
-                                              mca_coll_base_module_t *module)
-{
-    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
-    mca_coll_tuned_comm_t *data = tuned_module->tuned_data;
-
-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "coll:tuned:alltoallv_intra_do_forced selected algorithm %d",
-                 data->user_forced[ALLTOALLV].algorithm));
-
-    switch (data->user_forced[ALLTOALLV].algorithm) {
-    case (0):   
-        return ompi_coll_tuned_alltoallv_intra_dec_fixed(sbuf, scounts, sdisps, sdtype, 
-                                                         rbuf, rcounts, rdisps, rdtype,
-                                                         comm, module);
-    case (1):
-        return ompi_coll_tuned_alltoallv_intra_basic_linear(sbuf, scounts, sdisps, sdtype,
-                                                            rbuf, rcounts, rdisps, rdtype,      
-                                                            comm, module);
-    case (2): 
-        return ompi_coll_tuned_alltoallv_intra_pairwise(sbuf, scounts, sdisps, sdtype,
-                                                        rbuf, rcounts, rdisps, rdtype,
-                                                        comm, module);
-    default:
-        OPAL_OUTPUT((ompi_coll_tuned_stream,
-                     "coll:tuned:alltoallv_intra_do_forced attempt to "
-                     "select algorithm %d when only 0-%d is valid.", 
-                     data->user_forced[ALLTOALLV].algorithm,
-                     ompi_coll_tuned_forced_max_algorithms[ALLTOALLV]));
-        return (MPI_ERR_ARG);
-    }
-}
-
-/* If the user selects dynamic rules and specifies the algorithm to
- * use, then this function is called.  */
-int ompi_coll_tuned_alltoallv_intra_do_this(void *sbuf, int *scounts, int *sdisps,
-                                            struct ompi_datatype_t *sdtype,
-                                            void* rbuf, int *rcounts, int *rdisps,
-                                            struct ompi_datatype_t *rdtype,
-                                            struct ompi_communicator_t *comm,
-                                            mca_coll_base_module_t *module,
-                                            int algorithm)
-{
-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "coll:tuned:alltoallv_intra_do_this selected algorithm %d ",
-                 algorithm));
-
-    switch (algorithm) {
-    case (0):
-        return ompi_coll_tuned_alltoallv_intra_dec_fixed(sbuf, scounts, sdisps, sdtype,
-                                                         rbuf, rcounts, rdisps, rdtype,
-                                                         comm, module);
-    case (1):
-        return ompi_coll_tuned_alltoallv_intra_basic_linear(sbuf, scounts, sdisps, sdtype,
-                                                            rbuf, rcounts, rdisps, rdtype,
-                                                            comm, module);
-    case (2):
-        return ompi_coll_tuned_alltoallv_intra_pairwise(sbuf, scounts, sdisps, sdtype,
-                                                        rbuf, rcounts, rdisps, rdtype,
-                                                        comm, module);
-    default:
-        OPAL_OUTPUT((ompi_coll_tuned_stream,
-                     "coll:tuned:alltoall_intra_do_this attempt to select "
-                     "algorithm %d when only 0-%d is valid.", 
-                     algorithm, ompi_coll_tuned_forced_max_algorithms[ALLTOALLV]));
-        return (MPI_ERR_ARG);
-    }
-}
--- a/ompi/mca/coll/tuned/coll_tuned_barrier.c
+++ b/ompi/mca/coll/tuned/coll_tuned_barrier.c
@ -3,7 +3,7 @@
 * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
 *                         University Research and Technology
 *                         Corporation.  All rights reserved.
- * Copyright (c) 2004-2014 The University of Tennessee and The University
+ * Copyright (c) 2004-2015 The University of Tennessee and The University
 *                         of Tennessee Research Foundation.  All rights
 *                         reserved.
 * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@ -31,25 +31,9 @@
 #include "ompi/mca/coll/coll.h"
 #include "ompi/mca/coll/base/coll_tags.h"
 #include "ompi/mca/pml/pml.h"
-#include "coll_tuned.h"
-#include "coll_tuned_topo.h"
-#include "coll_tuned_util.h"
-
-/* barrier algorithm variables */
-static int coll_tuned_barrier_algorithm_count = 6;
-static int coll_tuned_barrier_forced_algorithm = 0;
-
-/* valid values for coll_tuned_barrier_forced_algorithm */
-static mca_base_var_enum_value_t barrier_algorithms[] = {
-    {0, "ignore"},
-    {1, "linear"},
-    {2, "double_ring"},
-    {3, "recursive_doubling"},
-    {4, "bruck"},
-    {5, "two_proc"},
-    {6, "tree"},
-    {0, NULL}
-};
+#include "ompi/mca/coll/base/coll_base_functions.h"
+#include "coll_base_topo.h"
+#include "coll_base_util.h"

 /**
 * A quick version of the MPI_Sendreceive implemented for the barrier.
@ -57,7 +41,7 @@ static mca_base_var_enum_value_t barrier_algorithms[] = {
 * signal a two peer synchronization.
 */
 static inline int
-ompi_coll_tuned_sendrecv_zero(int dest, int stag,
+ompi_coll_base_sendrecv_zero(int dest, int stag,
                              int source, int rtag,
                              MPI_Comm comm)

@ -87,8 +71,8 @@ ompi_coll_tuned_sendrecv_zero(int dest, int stag,
            err_index = 1;
        }
        err = statuses[err_index].MPI_ERROR;
-        OPAL_OUTPUT ((ompi_coll_tuned_stream, "%s:%d: Error %d occurred in the %s"
-                                              " stage of ompi_coll_tuned_sendrecv_zero\n",
+        OPAL_OUTPUT ((ompi_coll_base_framework.framework_output, "%s:%d: Error %d occurred in the %s"
+                                              " stage of ompi_coll_base_sendrecv_zero\n",
                      __FILE__, line, err, (0 == err_index ? "receive" : "send")));
        return err;
    }
@ -100,7 +84,7 @@ ompi_coll_tuned_sendrecv_zero(int dest, int stag,
    /* Error discovered during the posting of the irecv or isend,
     * and no status is available.
     */
-    OPAL_OUTPUT ((ompi_coll_tuned_stream, "%s:%d: Error %d occurred\n",
+    OPAL_OUTPUT ((ompi_coll_base_framework.framework_output, "%s:%d: Error %d occurred\n",
                  __FILE__, line, err));
    return err;
 }
@ -124,7 +108,7 @@ ompi_coll_tuned_sendrecv_zero(int dest, int stag,
 * synchronous gurantee made by last ring of sends are synchronous
 *
 */
-int ompi_coll_tuned_barrier_intra_doublering(struct ompi_communicator_t *comm,
+int ompi_coll_base_barrier_intra_doublering(struct ompi_communicator_t *comm,
                                             mca_coll_base_module_t *module)
 {
    int rank, size, err = 0, line = 0, left, right;
@ -132,7 +116,7 @@ int ompi_coll_tuned_barrier_intra_doublering(struct ompi_communicator_t *comm,
    rank = ompi_comm_rank(comm);
    size = ompi_comm_size(comm);

-    OPAL_OUTPUT((ompi_coll_tuned_stream,"ompi_coll_tuned_barrier_intra_doublering rank %d", rank));
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"ompi_coll_base_barrier_intra_doublering rank %d", rank));

    left = ((rank-1)%size);
    right = ((rank+1)%size);
@ -183,7 +167,7 @@ int ompi_coll_tuned_barrier_intra_doublering(struct ompi_communicator_t *comm,
    return MPI_SUCCESS;

 err_hndl:
-    OPAL_OUTPUT((ompi_coll_tuned_stream,"%s:%4d\tError occurred %d, rank %2d", 
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"%s:%4d\tError occurred %d, rank %2d",
                 __FILE__, line, err, rank));
    return err;
 }
@ -193,15 +177,15 @@ int ompi_coll_tuned_barrier_intra_doublering(struct ompi_communicator_t *comm,
 * To make synchronous, uses sync sends and sync sendrecvs
 */

-int ompi_coll_tuned_barrier_intra_recursivedoubling(struct ompi_communicator_t *comm,
+int ompi_coll_base_barrier_intra_recursivedoubling(struct ompi_communicator_t *comm,
                                                    mca_coll_base_module_t *module)
 {
    int rank, size, adjsize, err, line, mask, remote;

    rank = ompi_comm_rank(comm);
    size = ompi_comm_size(comm);
-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "ompi_coll_tuned_barrier_intra_recursivedoubling rank %d", 
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                 "ompi_coll_base_barrier_intra_recursivedoubling rank %d",
                 rank));

    /* do nearest power of 2 less than size calc */
@ -213,7 +197,7 @@ int ompi_coll_tuned_barrier_intra_recursivedoubling(struct ompi_communicator_t *
        if (rank >= adjsize) {
            /* send message to lower ranked node */
            remote = rank - adjsize;
-            err = ompi_coll_tuned_sendrecv_zero(remote, MCA_COLL_BASE_TAG_BARRIER,
+            err = ompi_coll_base_sendrecv_zero(remote, MCA_COLL_BASE_TAG_BARRIER,
                                                remote, MCA_COLL_BASE_TAG_BARRIER,
                                                comm);
            if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl;}
@ -238,7 +222,7 @@ int ompi_coll_tuned_barrier_intra_recursivedoubling(struct ompi_communicator_t *
            if (remote >= adjsize) continue;

            /* post receive from the remote node */
-            err = ompi_coll_tuned_sendrecv_zero(remote, MCA_COLL_BASE_TAG_BARRIER,
+            err = ompi_coll_base_sendrecv_zero(remote, MCA_COLL_BASE_TAG_BARRIER,
                                                remote, MCA_COLL_BASE_TAG_BARRIER,
                                                comm);
            if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl;}
@ -261,7 +245,7 @@ int ompi_coll_tuned_barrier_intra_recursivedoubling(struct ompi_communicator_t *
    return MPI_SUCCESS;

 err_hndl:
-    OPAL_OUTPUT((ompi_coll_tuned_stream,"%s:%4d\tError occurred %d, rank %2d",
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"%s:%4d\tError occurred %d, rank %2d",
                 __FILE__, line, err, rank));
    return err;
 }
@ -271,15 +255,15 @@ int ompi_coll_tuned_barrier_intra_recursivedoubling(struct ompi_communicator_t *
 * To make synchronous, uses sync sends and sync sendrecvs
 */

-int ompi_coll_tuned_barrier_intra_bruck(struct ompi_communicator_t *comm,
+int ompi_coll_base_barrier_intra_bruck(struct ompi_communicator_t *comm,
                                        mca_coll_base_module_t *module)
 {
    int rank, size, distance, to, from, err, line = 0;

    rank = ompi_comm_rank(comm);
    size = ompi_comm_size(comm);
-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "ompi_coll_tuned_barrier_intra_bruck rank %d", rank));
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                 "ompi_coll_base_barrier_intra_bruck rank %d", rank));

    /* exchange data with rank-2^k and rank+2^k */
    for (distance = 1; distance < size; distance <<= 1) {
@ -287,7 +271,7 @@ int ompi_coll_tuned_barrier_intra_bruck(struct ompi_communicator_t *comm,
        to   = (rank + distance) % size;

        /* send message to lower ranked node */
-        err = ompi_coll_tuned_sendrecv_zero(to, MCA_COLL_BASE_TAG_BARRIER,
+        err = ompi_coll_base_sendrecv_zero(to, MCA_COLL_BASE_TAG_BARRIER,
                                            from, MCA_COLL_BASE_TAG_BARRIER,
                                            comm);
        if (err != MPI_SUCCESS) { line = __LINE__; goto err_hndl;}
@ -296,7 +280,7 @@ int ompi_coll_tuned_barrier_intra_bruck(struct ompi_communicator_t *comm,
    return MPI_SUCCESS;

 err_hndl:
-    OPAL_OUTPUT((ompi_coll_tuned_stream,"%s:%4d\tError occurred %d, rank %2d", 
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"%s:%4d\tError occurred %d, rank %2d",
                 __FILE__, line, err, rank));
    return err;
 }
@ -306,17 +290,17 @@ int ompi_coll_tuned_barrier_intra_bruck(struct ompi_communicator_t *comm,
 * To make synchronous, uses sync sends and sync sendrecvs
 */
 /* special case for two processes */
-int ompi_coll_tuned_barrier_intra_two_procs(struct ompi_communicator_t *comm,
+int ompi_coll_base_barrier_intra_two_procs(struct ompi_communicator_t *comm,
                                            mca_coll_base_module_t *module)
 {
    int remote, err;

    remote = ompi_comm_rank(comm);
-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "ompi_coll_tuned_barrier_intra_two_procs rank %d", remote));
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                 "ompi_coll_base_barrier_intra_two_procs rank %d", remote));
    remote = (remote + 1) & 0x1;

-    err = ompi_coll_tuned_sendrecv_zero(remote, MCA_COLL_BASE_TAG_BARRIER, 
+    err = ompi_coll_base_sendrecv_zero(remote, MCA_COLL_BASE_TAG_BARRIER,
                                        remote, MCA_COLL_BASE_TAG_BARRIER,
                                        comm);
    return (err);
@ -327,7 +311,7 @@ int ompi_coll_tuned_barrier_intra_two_procs(struct ompi_communicator_t *comm,
 * Linear functions are copied from the BASIC coll module
 * they do not segment the message and are simple implementations
 * but for some small number of nodes and/or small data sizes they
- * are just as fast as tuned/tree based segmenting operations
+ * are just as fast as base/tree based segmenting operations
 * and as such may be selected by the decision functions
 * These are copied into this module due to the way we select modules
 * in V1. i.e. in V2 we will handle this differently and so will not
@ -337,8 +321,8 @@ int ompi_coll_tuned_barrier_intra_two_procs(struct ompi_communicator_t *comm,

 /* copied function (with appropriate renaming) starts here */

-static int ompi_coll_tuned_barrier_intra_basic_linear(struct ompi_communicator_t *comm,
-                                                      mca_coll_base_module_t *module)
+int ompi_coll_base_barrier_intra_basic_linear(struct ompi_communicator_t *comm,
+                                              mca_coll_base_module_t *module)
 {
    int i, err, rank, size;

@ -379,15 +363,14 @@ static int ompi_coll_tuned_barrier_intra_basic_linear(struct ompi_communicator_t
        ompi_request_wait_all( size-1, requests+1, MPI_STATUSES_IGNORE );

        for (i = 1; i < size; ++i) {
-            err = MCA_PML_CALL(isend(NULL, 0, MPI_BYTE, i,
-                                     MCA_COLL_BASE_TAG_BARRIER, 
-                                     MCA_PML_BASE_SEND_STANDARD, comm,
-                                     &(requests[i])));
+            err = MCA_PML_CALL(send(NULL, 0, MPI_BYTE, i,
+                                    MCA_COLL_BASE_TAG_BARRIER,
+                                    MCA_PML_BASE_SEND_STANDARD, comm));
            if (MPI_SUCCESS != err) {
                return err;
            }
        }
-        ompi_request_wait_all( size-1, requests+1, MPI_STATUSES_IGNORE );
+
        free( requests );
    }

@ -402,15 +385,15 @@ static int ompi_coll_tuned_barrier_intra_basic_linear(struct ompi_communicator_t
 * Another recursive doubling type algorithm, but in this case
 * we go up the tree and back down the tree.
 */
-int ompi_coll_tuned_barrier_intra_tree(struct ompi_communicator_t *comm,
+int ompi_coll_base_barrier_intra_tree(struct ompi_communicator_t *comm,
                                       mca_coll_base_module_t *module)
 {
    int rank, size, depth, err, jump, partner;

    rank = ompi_comm_rank(comm);
    size = ompi_comm_size(comm);
-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "ompi_coll_tuned_barrier_intra_tree %d", 
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                 "ompi_coll_base_barrier_intra_tree %d",
                 rank));

    /* Find the nearest power of 2 of the communicator size. */
@ -457,101 +440,3 @@ int ompi_coll_tuned_barrier_intra_tree(struct ompi_communicator_t *comm,

    return MPI_SUCCESS;
 }
-
-
-/* The following are used by dynamic and forced rules */
-
-/* publish details of each algorithm and if its forced/fixed/locked in */
-/* as you add methods/algorithms you must update this and the query/map  */
-/* routines */
-
-/* this routine is called by the component only */
-/* this makes sure that the mca parameters are set to their initial values */
-/* and perms */
-/* module does not call this they call the forced_getvalues routine instead */
-
-int ompi_coll_tuned_barrier_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices)
-{
-    mca_base_var_enum_t *new_enum;
-
-    ompi_coll_tuned_forced_max_algorithms[BARRIER] = coll_tuned_barrier_algorithm_count;
-
-    (void) mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                           "barrier_algorithm_count",
-                                           "Number of barrier algorithms available",
-                                           MCA_BASE_VAR_TYPE_INT, NULL, 0,
-                                           MCA_BASE_VAR_FLAG_DEFAULT_ONLY,
-                                           OPAL_INFO_LVL_5,
-                                           MCA_BASE_VAR_SCOPE_CONSTANT,
-                                           &coll_tuned_barrier_algorithm_count);
-
-    /* MPI_T: This variable should eventually be bound to a communicator */
-    coll_tuned_barrier_forced_algorithm = 0;
-    (void) mca_base_var_enum_create("coll_tuned_barrier_algorithms", barrier_algorithms, &new_enum);
-    mca_param_indices->algorithm_param_index =
-        mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                        "barrier_algorithm",
-                                        "Which barrier algorithm is used. Can be locked down to choice of: 0 ignore, 1 linear, 2 double ring, 3: recursive doubling 4: bruck, 5: two proc only, 6: tree",
-                                        MCA_BASE_VAR_TYPE_INT, new_enum, 0, 0,
-                                        OPAL_INFO_LVL_5,
-                                        MCA_BASE_VAR_SCOPE_READONLY,
-                                        &coll_tuned_barrier_forced_algorithm);
-    OBJ_RELEASE(new_enum);
-    if (mca_param_indices->algorithm_param_index < 0) {
-        return mca_param_indices->algorithm_param_index;
-    }
-
-    return (MPI_SUCCESS);
-}
-
-
-
-int ompi_coll_tuned_barrier_intra_do_forced(struct ompi_communicator_t *comm,
-                                            mca_coll_base_module_t *module)
-{
-    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
-    mca_coll_tuned_comm_t *data = tuned_module->tuned_data;
-
-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "coll:tuned:barrier_intra_do_forced selected algorithm %d",
-                 data->user_forced[BARRIER].algorithm));
-
-    switch (data->user_forced[BARRIER].algorithm) {
-    case (0):   return ompi_coll_tuned_barrier_intra_dec_fixed (comm, module);
-    case (1):   return ompi_coll_tuned_barrier_intra_basic_linear (comm, module); 
-    case (2):   return ompi_coll_tuned_barrier_intra_doublering (comm, module);
-    case (3):   return ompi_coll_tuned_barrier_intra_recursivedoubling (comm, module);
-    case (4):   return ompi_coll_tuned_barrier_intra_bruck (comm, module);
-    case (5):   return ompi_coll_tuned_barrier_intra_two_procs (comm, module);
-    case (6):   return ompi_coll_tuned_barrier_intra_tree (comm, module);
-    default:
-        OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:barrier_intra_do_forced attempt to select algorithm %d when only 0-%d is valid?",
-                     data->user_forced[BARRIER].algorithm,
-                     ompi_coll_tuned_forced_max_algorithms[BARRIER]));
-        return (MPI_ERR_ARG);
-    } /* switch */
-
-}
-
-
-int ompi_coll_tuned_barrier_intra_do_this (struct ompi_communicator_t *comm,
-                                           mca_coll_base_module_t *module,
-                                           int algorithm, int faninout, int segsize)
-{
-    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:barrier_intra_do_this selected algorithm %d topo fanin/out%d", algorithm, faninout));
-
-    switch (algorithm) {
-    case (0):   return ompi_coll_tuned_barrier_intra_dec_fixed (comm, module);
-    case (1):   return ompi_coll_tuned_barrier_intra_basic_linear (comm, module); 
-    case (2):   return ompi_coll_tuned_barrier_intra_doublering (comm, module);
-    case (3):   return ompi_coll_tuned_barrier_intra_recursivedoubling (comm, module);
-    case (4):   return ompi_coll_tuned_barrier_intra_bruck (comm, module);
-    case (5):   return ompi_coll_tuned_barrier_intra_two_procs (comm, module);
-    case (6):   return ompi_coll_tuned_barrier_intra_tree (comm, module);
-    default:
-        OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:barrier_intra_do_this attempt to select algorithm %d when only 0-%d is valid?",
-                     algorithm, ompi_coll_tuned_forced_max_algorithms[BARRIER]));
-        return (MPI_ERR_ARG);
-    } /* switch */
-}
-
--- a/ompi/mca/coll/tuned/coll_tuned_bcast.c
+++ b/ompi/mca/coll/tuned/coll_tuned_bcast.c
@ -3,7 +3,7 @@
 * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
 *                         University Research and Technology
 *                         Corporation.  All rights reserved.
- * Copyright (c) 2004-2012 The University of Tennessee and The University
+ * Copyright (c) 2004-2015 The University of Tennessee and The University
 *                         of Tennessee Research Foundation.  All rights
 *                         reserved.
 * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@ -27,31 +27,12 @@
 #include "ompi/mca/coll/coll.h"
 #include "ompi/mca/coll/base/coll_tags.h"
 #include "ompi/mca/pml/pml.h"
-#include "coll_tuned.h"
-#include "coll_tuned_topo.h"
-#include "coll_tuned_util.h"
-
-/* bcast algorithm variables */
-static int coll_tuned_bcast_algorithm_count = 6;
-static int coll_tuned_bcast_forced_algorithm = 0;
-static int coll_tuned_bcast_segment_size = 0;
-static int coll_tuned_bcast_tree_fanout;
-static int coll_tuned_bcast_chain_fanout;
-
-/* valid values for coll_tuned_bcast_forced_algorithm */
-static mca_base_var_enum_value_t bcast_algorithms[] = {
-    {0, "ignore"},
-    {1, "basic_linear"},
-    {2, "chain"},
-    {3, "pipeline"},
-    {4, "split_binary_tree"},
-    {5, "binary_tree"},
-    {6, "binomial"},
-    {0, NULL}
-};
+#include "ompi/mca/coll/base/coll_base_functions.h"
+#include "coll_base_topo.h"
+#include "coll_base_util.h"

 int
-ompi_coll_tuned_bcast_intra_generic( void* buffer,
+ompi_coll_base_bcast_intra_generic( void* buffer,
                                     int original_count,
                                     struct ompi_datatype_t* datatype,
                                     int root,
@ -67,7 +48,7 @@ ompi_coll_tuned_bcast_intra_generic( void* buffer,
    char *tmpbuf;
    ptrdiff_t extent, lb;
    ompi_request_t *recv_reqs[2] = {MPI_REQUEST_NULL, MPI_REQUEST_NULL};
-#if !defined(COLL_TUNED_BCAST_USE_BLOCKING)
+#if !defined(COLL_BASE_BCAST_USE_BLOCKING)
    ompi_request_t **send_reqs = NULL;
 #endif

@ -83,7 +64,7 @@ ompi_coll_tuned_bcast_intra_generic( void* buffer,
    /* Set the buffer pointers */
    tmpbuf = (char *) buffer;

-#if !defined(COLL_TUNED_BCAST_USE_BLOCKING)
+#if !defined(COLL_BASE_BCAST_USE_BLOCKING)
    if( tree->tree_nextsize != 0 ) {
        send_reqs = (ompi_request_t**)malloc( (ptrdiff_t)tree->tree_nextsize *
                                              sizeof(ompi_request_t*) );
@ -103,7 +84,7 @@ ompi_coll_tuned_bcast_intra_generic( void* buffer,
                sendcount = original_count - segindex * count_by_segment;
            }
            for( i = 0; i < tree->tree_nextsize; i++ ) {
-#if defined(COLL_TUNED_BCAST_USE_BLOCKING)
+#if defined(COLL_BASE_BCAST_USE_BLOCKING)
                err = MCA_PML_CALL(send(tmpbuf, sendcount, datatype,
                                        tree->tree_next[i],
                                        MCA_COLL_BASE_TAG_BCAST,
@ -114,16 +95,16 @@ ompi_coll_tuned_bcast_intra_generic( void* buffer,
                                         MCA_COLL_BASE_TAG_BCAST,
                                         MCA_PML_BASE_SEND_STANDARD, comm,
                                         &send_reqs[i]));
-#endif  /* COLL_TUNED_BCAST_USE_BLOCKING */
+#endif  /* COLL_BASE_BCAST_USE_BLOCKING */
                if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
            }

-#if !defined(COLL_TUNED_BCAST_USE_BLOCKING)
+#if !defined(COLL_BASE_BCAST_USE_BLOCKING)
            /* complete the sends before starting the next sends */
            err = ompi_request_wait_all( tree->tree_nextsize, send_reqs,
                                         MPI_STATUSES_IGNORE );
            if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
-#endif /* not COLL_TUNED_BCAST_USE_BLOCKING */
+#endif /* not COLL_BASE_BCAST_USE_BLOCKING */

            /* update tmp buffer */
            tmpbuf += realsegsize;
@ -167,7 +148,7 @@ ompi_coll_tuned_bcast_intra_generic( void* buffer,
            if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }

            for( i = 0; i < tree->tree_nextsize; i++ ) {
-#if defined(COLL_TUNED_BCAST_USE_BLOCKING)
+#if defined(COLL_BASE_BCAST_USE_BLOCKING)
                err = MCA_PML_CALL(send(tmpbuf, count_by_segment, datatype,
                                        tree->tree_next[i],
                                        MCA_COLL_BASE_TAG_BCAST,
@ -178,16 +159,16 @@ ompi_coll_tuned_bcast_intra_generic( void* buffer,
                                         MCA_COLL_BASE_TAG_BCAST,
                                         MCA_PML_BASE_SEND_STANDARD, comm,
                                         &send_reqs[i]));
-#endif  /* COLL_TUNED_BCAST_USE_BLOCKING */
+#endif  /* COLL_BASE_BCAST_USE_BLOCKING */
                if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
            }

-#if !defined(COLL_TUNED_BCAST_USE_BLOCKING)
+#if !defined(COLL_BASE_BCAST_USE_BLOCKING)
            /* complete the sends before starting the next iteration */
            err = ompi_request_wait_all( tree->tree_nextsize, send_reqs,
                                         MPI_STATUSES_IGNORE );
            if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
-#endif  /* COLL_TUNED_BCAST_USE_BLOCKING */
+#endif  /* COLL_BASE_BCAST_USE_BLOCKING */

            /* Update the receive buffer */
            tmpbuf += realsegsize;
@ -199,7 +180,7 @@ ompi_coll_tuned_bcast_intra_generic( void* buffer,
        if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
        sendcount = original_count - (ptrdiff_t)(num_segments - 1) * count_by_segment;
        for( i = 0; i < tree->tree_nextsize; i++ ) {
-#if defined(COLL_TUNED_BCAST_USE_BLOCKING)
+#if defined(COLL_BASE_BCAST_USE_BLOCKING)
            err = MCA_PML_CALL(send(tmpbuf, sendcount, datatype,
                                    tree->tree_next[i],
                                    MCA_COLL_BASE_TAG_BCAST,
@ -210,15 +191,15 @@ ompi_coll_tuned_bcast_intra_generic( void* buffer,
                                     MCA_COLL_BASE_TAG_BCAST,
                                     MCA_PML_BASE_SEND_STANDARD, comm,
                                     &send_reqs[i]));
-#endif  /* COLL_TUNED_BCAST_USE_BLOCKING */
+#endif  /* COLL_BASE_BCAST_USE_BLOCKING */
            if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
        }

-#if !defined(COLL_TUNED_BCAST_USE_BLOCKING)
+#if !defined(COLL_BASE_BCAST_USE_BLOCKING)
        err = ompi_request_wait_all( tree->tree_nextsize, send_reqs,
                                     MPI_STATUSES_IGNORE );
        if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
-#endif  /* COLL_TUNED_BCAST_USE_BLOCKING */
+#endif  /* COLL_BASE_BCAST_USE_BLOCKING */
    }

    /* Leaf nodes */
@ -255,23 +236,23 @@ ompi_coll_tuned_bcast_intra_generic( void* buffer,
        if (err != MPI_SUCCESS) { line = __LINE__; goto error_hndl; }
    }

-#if !defined(COLL_TUNED_BCAST_USE_BLOCKING)
+#if !defined(COLL_BASE_BCAST_USE_BLOCKING)
    if( NULL != send_reqs ) free(send_reqs);
 #endif

    return (MPI_SUCCESS);

 error_hndl:
-    OPAL_OUTPUT( (ompi_coll_tuned_stream,"%s:%4d\tError occurred %d, rank %2d",
+    OPAL_OUTPUT( (ompi_coll_base_framework.framework_output,"%s:%4d\tError occurred %d, rank %2d",
                  __FILE__, line, err, rank) );
-#if !defined(COLL_TUNED_BCAST_USE_BLOCKING)
+#if !defined(COLL_BASE_BCAST_USE_BLOCKING)
    if( NULL != send_reqs ) free(send_reqs);
 #endif
    return (err);
 }

 int
-ompi_coll_tuned_bcast_intra_bintree ( void* buffer,
+ompi_coll_base_bcast_intra_bintree ( void* buffer,
                                      int count,
                                      struct ompi_datatype_t* datatype,
                                      int root,
@ -281,26 +262,25 @@ ompi_coll_tuned_bcast_intra_bintree ( void* buffer,
 {
    int segcount = count;
    size_t typelng;
-    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
-    mca_coll_tuned_comm_t *data = tuned_module->tuned_data;
+    mca_coll_base_comm_t *data = module->base_data;

-    COLL_TUNED_UPDATE_BINTREE( comm, tuned_module, root );
+    COLL_BASE_UPDATE_BINTREE( comm, module, root );

    /**
     * Determine number of elements sent per operation.
     */
    ompi_datatype_type_size( datatype, &typelng );
-    COLL_TUNED_COMPUTED_SEGCOUNT( segsize, typelng, segcount );
+    COLL_BASE_COMPUTED_SEGCOUNT( segsize, typelng, segcount );

-    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:bcast_intra_binary rank %d ss %5d typelng %lu segcount %d",
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:bcast_intra_binary rank %d ss %5d typelng %lu segcount %d",
                 ompi_comm_rank(comm), segsize, (unsigned long)typelng, segcount));

-    return ompi_coll_tuned_bcast_intra_generic( buffer, count, datatype, root, comm, module,
+    return ompi_coll_base_bcast_intra_generic( buffer, count, datatype, root, comm, module,
                                                segcount, data->cached_bintree );
 }

 int
-ompi_coll_tuned_bcast_intra_pipeline( void* buffer,
+ompi_coll_base_bcast_intra_pipeline( void* buffer,
                                      int count,
                                      struct ompi_datatype_t* datatype,
                                      int root,
@ -310,26 +290,25 @@ ompi_coll_tuned_bcast_intra_pipeline( void* buffer,
 {
    int segcount = count;
    size_t typelng;
-    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
-    mca_coll_tuned_comm_t *data = tuned_module->tuned_data;
+    mca_coll_base_comm_t *data = module->base_data;

-    COLL_TUNED_UPDATE_PIPELINE( comm, tuned_module, root );
+    COLL_BASE_UPDATE_PIPELINE( comm, module, root );

    /**
     * Determine number of elements sent per operation.
     */
    ompi_datatype_type_size( datatype, &typelng );
-    COLL_TUNED_COMPUTED_SEGCOUNT( segsize, typelng, segcount );
+    COLL_BASE_COMPUTED_SEGCOUNT( segsize, typelng, segcount );

-    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:bcast_intra_pipeline rank %d ss %5d typelng %lu segcount %d",
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:bcast_intra_pipeline rank %d ss %5d typelng %lu segcount %d",
                 ompi_comm_rank(comm), segsize, (unsigned long)typelng, segcount));

-    return ompi_coll_tuned_bcast_intra_generic( buffer, count, datatype, root, comm, module,
+    return ompi_coll_base_bcast_intra_generic( buffer, count, datatype, root, comm, module,
                                                segcount, data->cached_pipeline );
 }

 int
-ompi_coll_tuned_bcast_intra_chain( void* buffer,
+ompi_coll_base_bcast_intra_chain( void* buffer,
                                   int count,
                                   struct ompi_datatype_t* datatype,
                                   int root,
@ -339,26 +318,25 @@ ompi_coll_tuned_bcast_intra_chain( void* buffer,
 {
    int segcount = count;
    size_t typelng;
-    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
-    mca_coll_tuned_comm_t *data = tuned_module->tuned_data;
+    mca_coll_base_comm_t *data = module->base_data;

-    COLL_TUNED_UPDATE_CHAIN( comm, tuned_module, root, chains );
+    COLL_BASE_UPDATE_CHAIN( comm, module, root, chains );

    /**
     * Determine number of elements sent per operation.
     */
    ompi_datatype_type_size( datatype, &typelng );
-    COLL_TUNED_COMPUTED_SEGCOUNT( segsize, typelng, segcount );
+    COLL_BASE_COMPUTED_SEGCOUNT( segsize, typelng, segcount );

-    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:bcast_intra_chain rank %d fo %d ss %5d typelng %lu segcount %d",
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:bcast_intra_chain rank %d fo %d ss %5d typelng %lu segcount %d",
                 ompi_comm_rank(comm), chains, segsize, (unsigned long)typelng, segcount));

-    return ompi_coll_tuned_bcast_intra_generic( buffer, count, datatype, root, comm, module,
+    return ompi_coll_base_bcast_intra_generic( buffer, count, datatype, root, comm, module,
                                                segcount, data->cached_chain );
 }

 int
-ompi_coll_tuned_bcast_intra_binomial( void* buffer,
+ompi_coll_base_bcast_intra_binomial( void* buffer,
                                      int count,
                                      struct ompi_datatype_t* datatype,
                                      int root,
@ -368,26 +346,25 @@ ompi_coll_tuned_bcast_intra_binomial( void* buffer,
 {
    int segcount = count;
    size_t typelng;
-    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
-    mca_coll_tuned_comm_t *data = tuned_module->tuned_data;
+    mca_coll_base_comm_t *data = module->base_data;

-    COLL_TUNED_UPDATE_BMTREE( comm, tuned_module, root );
+    COLL_BASE_UPDATE_BMTREE( comm, module, root );

    /**
     * Determine number of elements sent per operation.
     */
    ompi_datatype_type_size( datatype, &typelng );
-    COLL_TUNED_COMPUTED_SEGCOUNT( segsize, typelng, segcount );
+    COLL_BASE_COMPUTED_SEGCOUNT( segsize, typelng, segcount );

-    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:bcast_intra_binomial rank %d ss %5d typelng %lu segcount %d",
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:bcast_intra_binomial rank %d ss %5d typelng %lu segcount %d",
                 ompi_comm_rank(comm), segsize, (unsigned long)typelng, segcount));

-    return ompi_coll_tuned_bcast_intra_generic( buffer, count, datatype, root, comm, module,
+    return ompi_coll_base_bcast_intra_generic( buffer, count, datatype, root, comm, module,
                                                segcount, data->cached_bmtree );
 }

 int
-ompi_coll_tuned_bcast_intra_split_bintree ( void* buffer,
+ompi_coll_base_bcast_intra_split_bintree ( void* buffer,
                                            int count,
                                            struct ompi_datatype_t* datatype,
                                            int root,
@ -405,20 +382,19 @@ ompi_coll_tuned_bcast_intra_split_bintree ( void* buffer,
    ptrdiff_t type_extent, lb;
    ompi_request_t *base_req, *new_req;
    ompi_coll_tree_t *tree;
-    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
-    mca_coll_tuned_comm_t *data = tuned_module->tuned_data;
+    mca_coll_base_comm_t *data = module->base_data;

    size = ompi_comm_size(comm);
    rank = ompi_comm_rank(comm);

-    OPAL_OUTPUT((ompi_coll_tuned_stream,"ompi_coll_tuned_bcast_intra_split_bintree rank %d root %d ss %5d", rank, root, segsize));
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"ompi_coll_base_bcast_intra_split_bintree rank %d root %d ss %5d", rank, root, segsize));

    if (size == 1) {
        return MPI_SUCCESS;
    }

    /* setup the binary tree topology. */
-    COLL_TUNED_UPDATE_BINTREE( comm, tuned_module, root );
+    COLL_BASE_UPDATE_BINTREE( comm, module, root );
    tree = data->cached_bintree;

    err = ompi_datatype_type_size( datatype, &type_size );
@ -431,7 +407,7 @@ ompi_coll_tuned_bcast_intra_split_bintree ( void* buffer,
        /* Note that ompi_datatype_type_size() will never return a negative
           value in typelng; it returns an int [vs. an unsigned type]
           because of the MPI spec. */
-    	if (segsize < ((uint32_t) type_size)) {
+        if (segsize < ((uint32_t) type_size)) {
            segsize = type_size; /* push segsize up to hold one type */
        }
        segcount[0] = segcount[1] = segsize / type_size;
@ -450,7 +426,7 @@ ompi_coll_tuned_bcast_intra_split_bintree ( void* buffer,
        (segsize > ((ptrdiff_t)counts[0] * type_size)) ||
        (segsize > ((ptrdiff_t)counts[1] * type_size)) ) {
        /* call linear version here ! */
-        return (ompi_coll_tuned_bcast_intra_chain ( buffer, count, datatype, 
+        return (ompi_coll_base_bcast_intra_chain ( buffer, count, datatype,
                                                    root, comm, module,
                                                    segsize, 1 ));
    }
@ -593,7 +569,7 @@ ompi_coll_tuned_bcast_intra_split_bintree ( void* buffer,

    if ( (size%2) != 0 && rank != root) {

-        err = ompi_coll_tuned_sendrecv( tmpbuf[lr], counts[lr], datatype,
+        err = ompi_coll_base_sendrecv( tmpbuf[lr], counts[lr], datatype,
                                        pair, MCA_COLL_BASE_TAG_BCAST,
                                        tmpbuf[(lr+1)%2], counts[(lr+1)%2], datatype,
                                        pair, MCA_COLL_BASE_TAG_BCAST,
@ -617,7 +593,7 @@ ompi_coll_tuned_bcast_intra_split_bintree ( void* buffer,
        }
        /* everyone else exchanges buffers */
        else {
-            err = ompi_coll_tuned_sendrecv( tmpbuf[lr], counts[lr], datatype,
+            err = ompi_coll_base_sendrecv( tmpbuf[lr], counts[lr], datatype,
                                            pair, MCA_COLL_BASE_TAG_BCAST,
                                            tmpbuf[(lr+1)%2], counts[(lr+1)%2], datatype,
                                            pair, MCA_COLL_BASE_TAG_BCAST,
@ -628,7 +604,7 @@ ompi_coll_tuned_bcast_intra_split_bintree ( void* buffer,
    return (MPI_SUCCESS);

 error_hndl:
-    OPAL_OUTPUT((ompi_coll_tuned_stream,"%s:%4d\tError occurred %d, rank %2d", __FILE__,line,err,rank));
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"%s:%4d\tError occurred %d, rank %2d", __FILE__,line,err,rank));
    return (err);
 }

@ -637,7 +613,7 @@ ompi_coll_tuned_bcast_intra_split_bintree ( void* buffer,
 * Linear functions are copied from the BASIC coll module
 * they do not segment the message and are simple implementations
 * but for some small number of nodes and/or small data sizes they
- * are just as fast as tuned/tree based segmenting operations 
+ * are just as fast as base/tree based segmenting operations
 * and as such may be selected by the decision functions
 * These are copied into this module due to the way we select modules
 * in V1. i.e. in V2 we will handle this differently and so will not
@ -655,21 +631,20 @@ ompi_coll_tuned_bcast_intra_split_bintree ( void* buffer,
 *  Returns:    - MPI_SUCCESS or error code
 */
 int
-ompi_coll_tuned_bcast_intra_basic_linear (void *buff, int count,
-                                          struct ompi_datatype_t *datatype, int root,
-                                          struct ompi_communicator_t *comm,
-                                          mca_coll_base_module_t *module)
+ompi_coll_base_bcast_intra_basic_linear(void *buff, int count,
+                                        struct ompi_datatype_t *datatype, int root,
+                                        struct ompi_communicator_t *comm,
+                                        mca_coll_base_module_t *module)
 {
    int i, size, rank, err;
-    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
-    mca_coll_tuned_comm_t *data = tuned_module->tuned_data;
-    ompi_request_t **preq, **reqs = data->mcct_reqs;
+    mca_coll_base_comm_t *data = module->base_data;
+    ompi_request_t **preq, **reqs;


    size = ompi_comm_size(comm);
    rank = ompi_comm_rank(comm);

-    OPAL_OUTPUT((ompi_coll_tuned_stream,"ompi_coll_tuned_bcast_intra_basic_linear rank %d root %d", rank, root));
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"ompi_coll_base_bcast_intra_basic_linear rank %d root %d", rank, root));

    /* Non-root receive the data. */

@ -680,8 +655,8 @@ ompi_coll_tuned_bcast_intra_basic_linear (void *buff, int count,
    }

    /* Root sends data to all others. */
-
-    for (i = 0, preq = reqs; i < size; ++i) {
+    preq = reqs = coll_base_comm_get_reqs(data, size-1);
+    for (i = 0; i < size; ++i) {
        if (i == rank) {
            continue;
        }
@ -691,6 +666,7 @@ ompi_coll_tuned_bcast_intra_basic_linear (void *buff, int count,
                                      MCA_PML_BASE_SEND_STANDARD,
                                      comm, preq++));
        if (MPI_SUCCESS != err) {
+            ompi_coll_base_free_reqs(data->mcct_reqs, i);
            return err;
        }
    }
@ -710,148 +686,11 @@ ompi_coll_tuned_bcast_intra_basic_linear (void *buff, int count,
    err = ompi_request_wait_all(i, reqs, MPI_STATUSES_IGNORE);

    /* Free the reqs */
-
-    ompi_coll_tuned_free_reqs(reqs, i);
+    ompi_coll_base_free_reqs(reqs, i);

    /* All done */
-
    return err;
 }


 /* copied function (with appropriate renaming) ends here */
-
-/* The following are used by dynamic and forced rules */
-
-/* publish details of each algorithm and if its forced/fixed/locked in */
-/* as you add methods/algorithms you must update this and the query/map routines */
-
-/* this routine is called by the component only */
-/* this makes sure that the mca parameters are set to their initial values and perms */
-/* module does not call this they call the forced_getvalues routine instead */
-
-int ompi_coll_tuned_bcast_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices)
-{
-    mca_base_var_enum_t *new_enum;
-
-    ompi_coll_tuned_forced_max_algorithms[BCAST] = coll_tuned_bcast_algorithm_count;
-
-    (void) mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                           "bcast_algorithm_count",
-                                           "Number of bcast algorithms available",
-                                           MCA_BASE_VAR_TYPE_INT, NULL, 0,
-                                           MCA_BASE_VAR_FLAG_DEFAULT_ONLY,
-                                           OPAL_INFO_LVL_5,
-                                           MCA_BASE_VAR_SCOPE_CONSTANT,
-                                           &coll_tuned_bcast_algorithm_count);
-
-    /* MPI_T: This variable should eventually be bound to a communicator */
-    coll_tuned_bcast_forced_algorithm = 0;
-    (void) mca_base_var_enum_create("coll_tuned_bcast_algorithms", bcast_algorithms, &new_enum);
-    mca_param_indices->algorithm_param_index =
-        mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                        "bcast_algorithm",
-                                        "Which bcast algorithm is used. Can be locked down to choice of: 0 ignore, 1 basic linear, 2 chain, 3: pipeline, 4: split binary tree, 5: binary tree, 6: binomial tree.",
-                                        MCA_BASE_VAR_TYPE_INT, new_enum, 0, 0,
-                                        OPAL_INFO_LVL_5,
-                                        MCA_BASE_VAR_SCOPE_READONLY,
-                                        &coll_tuned_bcast_forced_algorithm);
-    OBJ_RELEASE(new_enum);
-    if (mca_param_indices->algorithm_param_index < 0) {
-        return mca_param_indices->algorithm_param_index;
-    }
-
-    coll_tuned_bcast_segment_size = 0;
-    mca_param_indices->segsize_param_index =
-        mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                        "bcast_algorithm_segmentsize",
-                                        "Segment size in bytes used by default for bcast algorithms. Only has meaning if algorithm is forced and supports segmenting. 0 bytes means no segmentation.",
-                                        MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
-                                        OPAL_INFO_LVL_5,
-                                        MCA_BASE_VAR_SCOPE_READONLY,
-                                        &coll_tuned_bcast_segment_size);
-
-    coll_tuned_bcast_tree_fanout = ompi_coll_tuned_init_tree_fanout; /* get system wide default */
-    mca_param_indices->tree_fanout_param_index =
-        mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                        "bcast_algorithm_tree_fanout",
-                                        "Fanout for n-tree used for bcast algorithms. Only has meaning if algorithm is forced and supports n-tree topo based operation.",
-                                        MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
-                                        OPAL_INFO_LVL_5,
-                                        MCA_BASE_VAR_SCOPE_READONLY,
-                                        &coll_tuned_bcast_tree_fanout);
-
-    coll_tuned_bcast_chain_fanout = ompi_coll_tuned_init_chain_fanout; /* get system wide default */
-    mca_param_indices->chain_fanout_param_index = 
-      mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                      "bcast_algorithm_chain_fanout",
-                                      "Fanout for chains used for bcast algorithms. Only has meaning if algorithm is forced and supports chain topo based operation.",
-                                      MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
-                                      OPAL_INFO_LVL_5,
-                                      MCA_BASE_VAR_SCOPE_READONLY,
-                                      &coll_tuned_bcast_chain_fanout);
-
-    return (MPI_SUCCESS);
-}
-
-
-int ompi_coll_tuned_bcast_intra_do_forced(void *buf, int count,
-                                          struct ompi_datatype_t *dtype,
-                                          int root,
-                                          struct ompi_communicator_t *comm,
-                                          mca_coll_base_module_t *module)
-{
-    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
-    mca_coll_tuned_comm_t *data = tuned_module->tuned_data;
-
-    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:bcast_intra_do_forced algorithm %d", 
-                 data->user_forced[BCAST].algorithm));
-
-    switch (data->user_forced[BCAST].algorithm) {
-    case (0):   return ompi_coll_tuned_bcast_intra_dec_fixed( buf, count, dtype, root, comm, module );
-    case (1):   return ompi_coll_tuned_bcast_intra_basic_linear( buf, count, dtype, root, comm, module );
-    case (2):   return ompi_coll_tuned_bcast_intra_chain( buf, count, dtype, root, comm, module,
-                                                          data->user_forced[BCAST].segsize,
-                                                          data->user_forced[BCAST].chain_fanout );
-    case (3):   return ompi_coll_tuned_bcast_intra_pipeline( buf, count, dtype, root, comm, module,
-                                                             data->user_forced[BCAST].segsize );
-    case (4):   return ompi_coll_tuned_bcast_intra_split_bintree( buf, count, dtype, root, comm, module,
-                                                                  data->user_forced[BCAST].segsize );
-    case (5):   return ompi_coll_tuned_bcast_intra_bintree( buf, count, dtype, root, comm, module,
-                                                            data->user_forced[BCAST].segsize );
-    case (6):   return ompi_coll_tuned_bcast_intra_binomial( buf, count, dtype, root, comm, module,
-                                                             data->user_forced[BCAST].segsize );
-    default:
-        OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:bcast_intra_do_forced attempt to select algorithm %d when only 0-%d is valid?",
-                     data->user_forced[BCAST].algorithm, ompi_coll_tuned_forced_max_algorithms[BCAST]));
-    } /* switch */
-    return (MPI_ERR_ARG);
-}
-
-
-int ompi_coll_tuned_bcast_intra_do_this(void *buf, int count,
-                                        struct ompi_datatype_t *dtype,
-                                        int root,
-                                        struct ompi_communicator_t *comm,
-                                        mca_coll_base_module_t *module,
-                                        int algorithm, int faninout, int segsize)
-
-{
-    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:bcast_intra_do_this algorithm %d topo faninout %d segsize %d", 
-                 algorithm, faninout, segsize));
-
-    switch (algorithm) {
-    case (0):   return ompi_coll_tuned_bcast_intra_dec_fixed( buf, count, dtype, root, comm, module );
-    case (1):   return ompi_coll_tuned_bcast_intra_basic_linear( buf, count, dtype, root, comm, module );
-    case (2):   return ompi_coll_tuned_bcast_intra_chain( buf, count, dtype, root, comm, module, segsize, faninout );
-    case (3):   return ompi_coll_tuned_bcast_intra_pipeline( buf, count, dtype, root, comm, module, segsize );
-    case (4):   return ompi_coll_tuned_bcast_intra_split_bintree( buf, count, dtype, root, comm, module, segsize );
-    case (5):   return ompi_coll_tuned_bcast_intra_bintree( buf, count, dtype, root, comm, module, segsize );
-    case (6):   return ompi_coll_tuned_bcast_intra_binomial( buf, count, dtype, root, comm, module, segsize );
-    default:
-        OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:bcast_intra_do_this attempt to select algorithm %d when only 0-%d is valid?",
-                     algorithm, ompi_coll_tuned_forced_max_algorithms[BCAST]));
-    } /* switch */
-    return (MPI_ERR_ARG);
-}
-
--- a/ompi/mca/coll/base/coll_base_frame.c
+++ b/ompi/mca/coll/base/coll_base_frame.c
@ -3,7 +3,7 @@
 * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
 *                         University Research and Technology
 *                         Corporation.  All rights reserved.
- * Copyright (c) 2004-2005 The University of Tennessee and The University
+ * Copyright (c) 2004-2015 The University of Tennessee and The University
 *                         of Tennessee Research Foundation.  All rights
 *                         reserved.
 * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@ -33,6 +33,7 @@

 #include "ompi/mca/coll/coll.h"
 #include "ompi/mca/coll/base/base.h"
+#include "ompi/mca/coll/base/coll_base_functions.h"

 /*
 * The following file was created by configure.  It contains extern
@ -49,10 +50,94 @@ static void coll_base_module_construct(mca_coll_base_module_t *m)
    /* zero out all functions */
    memset ((char *) m + sizeof (m->super), 0, sizeof (*m) - sizeof (m->super));
    m->coll_module_disable = NULL;
+    m->base_data = NULL;
+}
+
+static void
+coll_base_module_destruct(mca_coll_base_module_t *module)
+{
+    if (NULL != module->base_data) {
+        OBJ_RELEASE(module->base_data);
+    }
 }

 OBJ_CLASS_INSTANCE(mca_coll_base_module_t, opal_object_t,
-                   coll_base_module_construct, NULL);
+                   coll_base_module_construct, coll_base_module_destruct);
+
+
+static void
+coll_base_comm_construct(mca_coll_base_comm_t *data)
+{
+    data->mcct_reqs = NULL;
+    data->mcct_num_reqs = 0;
+    data->cached_ntree = NULL;
+    data->cached_bintree = NULL;
+    data->cached_bmtree = NULL;
+    data->cached_in_order_bmtree = NULL;
+    data->cached_chain = NULL;
+    data->cached_pipeline = NULL;
+    data->cached_in_order_bintree = NULL;
+}
+
+static void
+coll_base_comm_destruct(mca_coll_base_comm_t *data)
+{
+    if( NULL != data->mcct_reqs ) {
+        for( int i = 0; i < data->mcct_num_reqs; ++i ) {
+            if( MPI_REQUEST_NULL != data->mcct_reqs[i] )
+                ompi_request_free(&data->mcct_reqs[i]);
+        }
+        free(data->mcct_reqs);
+        data->mcct_reqs = NULL;
+        data->mcct_num_reqs = 0;
+    }
+    assert(0 == data->mcct_num_reqs);
+
+    /* free any cached information that has been allocated */
+    if (data->cached_ntree) { /* destroy general tree if defined */
+        ompi_coll_base_topo_destroy_tree (&data->cached_ntree);
+    }
+    if (data->cached_bintree) { /* destroy bintree if defined */
+        ompi_coll_base_topo_destroy_tree (&data->cached_bintree);
+    }
+    if (data->cached_bmtree) { /* destroy bmtree if defined */
+        ompi_coll_base_topo_destroy_tree (&data->cached_bmtree);
+    }
+    if (data->cached_in_order_bmtree) { /* destroy bmtree if defined */
+        ompi_coll_base_topo_destroy_tree (&data->cached_in_order_bmtree);
+    }
+    if (data->cached_chain) { /* destroy general chain if defined */
+        ompi_coll_base_topo_destroy_tree (&data->cached_chain);
+    }
+    if (data->cached_pipeline) { /* destroy pipeline if defined */
+        ompi_coll_base_topo_destroy_tree (&data->cached_pipeline);
+    }
+    if (data->cached_in_order_bintree) { /* destroy in order bintree if defined */
+        ompi_coll_base_topo_destroy_tree (&data->cached_in_order_bintree);
+    }
+}
+
+OBJ_CLASS_INSTANCE(mca_coll_base_comm_t, opal_object_t,
+                   coll_base_comm_construct, coll_base_comm_destruct);
+
+ompi_request_t** coll_base_comm_get_reqs(mca_coll_base_comm_t* data, int nreqs)
+{
+    int startfrom = data->mcct_num_reqs;
+
+    if( NULL == data->mcct_reqs ) {
+        assert(0 == data->mcct_num_reqs);
+        data->mcct_reqs = (ompi_request_t**)malloc(sizeof(ompi_request_t*) * nreqs);
+    } else if( data->mcct_num_reqs <= nreqs ) {
+        data->mcct_reqs = (ompi_request_t**)realloc(data->mcct_reqs, sizeof(ompi_request_t*) * nreqs);
+    }
+    if( NULL != data->mcct_reqs ) {
+        data->mcct_num_reqs = nreqs;
+        for( int i = startfrom; i < data->mcct_num_reqs; i++ )
+            data->mcct_reqs[i] = MPI_REQUEST_NULL;
+    } else
+        data->mcct_num_reqs = 0;  /* nothing to return */
+    return data->mcct_reqs;
+}

 MCA_BASE_FRAMEWORK_DECLARE(ompi, coll, "Collectives", NULL, NULL, NULL,
                           mca_coll_base_static_components, 0);
--- a/ompi/mca/coll/base/coll_base_functions.h
+++ b/ompi/mca/coll/base/coll_base_functions.h
@ -0,0 +1,355 @@
+/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
+/*
+ * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
+ *                         University Research and Technology
+ *                         Corporation.  All rights reserved.
+ * Copyright (c) 2004-2015 The University of Tennessee and The University
+ *                         of Tennessee Research Foundation.  All rights
+ *                         reserved.
+ * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
+ *                         University of Stuttgart.  All rights reserved.
+ * Copyright (c) 2004-2005 The Regents of the University of California.
+ *                         All rights reserved.
+ * Copyright (c) 2008      Sun Microsystems, Inc.  All rights reserved.
+ * Copyright (c) 2008      Cisco Systems, Inc.  All rights reserved.
+ * Copyright (c) 2013      Los Alamos National Security, LLC. All rights
+ *                         reserved.
+ * $COPYRIGHT$
+ *
+ * Additional copyrights may follow
+ *
+ * $HEADER$
+ */
+
+#ifndef MCA_COLL_BASE_EXPORT_H
+#define MCA_COLL_BASE_EXPORT_H
+
+#include "ompi_config.h"
+
+#include "ompi/mca/coll/base/base.h"
+#include "opal/mca/mca.h"
+#include "ompi/mca/coll/coll.h"
+#include "ompi/request/request.h"
+
+/* need to include our own topo prototypes so we can malloc data on the comm correctly */
+#include "coll_base_topo.h"
+
+/* some fixed value index vars to simplify certain operations */
+typedef enum COLLTYPE {
+    ALLGATHER = 0,  /*  0 */
+    ALLGATHERV,     /*  1 */
+    ALLREDUCE,      /*  2 */
+    ALLTOALL,       /*  3 */
+    ALLTOALLV,      /*  4 */
+    ALLTOALLW,      /*  5 */
+    BARRIER,        /*  6 */
+    BCAST,          /*  7 */
+    EXSCAN,         /*  8 */
+    GATHER,         /*  9 */
+    GATHERV,        /* 10 */
+    REDUCE,         /* 11 */
+    REDUCESCATTER,  /* 12 */
+    SCAN,           /* 13 */
+    SCATTER,        /* 14 */
+    SCATTERV,       /* 15 */
+    COLLCOUNT       /* 16 end counter keep it as last element */
+} COLLTYPE_T;
+
+/* defined arg lists to simply auto inclusion of user overriding decision functions */
+#define ALLGATHER_ARGS void *sbuf, int scount, struct ompi_datatype_t *sdtype, void *rbuf, int rcount, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module
+#define ALLGATHERV_ARGS void *sbuf, int scount, struct ompi_datatype_t *sdtype, void * rbuf, int *rcounts, int *disps, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module
+#define ALLREDUCE_ARGS void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, mca_coll_base_module_t *module
+#define ALLTOALL_ARGS void *sbuf, int scount, struct ompi_datatype_t *sdtype, void* rbuf, int rcount, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module
+#define ALLTOALLV_ARGS void *sbuf, int *scounts, int *sdisps, struct ompi_datatype_t *sdtype, void *rbuf, int *rcounts, int *rdisps, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module
+#define ALLTOALLW_ARGS void *sbuf, int *scounts, int *sdisps,  struct ompi_datatype_t **sdtypes, void *rbuf, int *rcounts, int *rdisps, struct ompi_datatype_t **rdtypes, struct ompi_communicator_t *comm, mca_coll_base_module_t *module
+#define BARRIER_ARGS struct ompi_communicator_t *comm, mca_coll_base_module_t *module
+#define BCAST_ARGS void *buff, int count, struct ompi_datatype_t *datatype, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module
+#define EXSCAN_ARGS void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, mca_coll_base_module_t *module
+#define GATHER_ARGS void *sbuf, int scount, struct ompi_datatype_t *sdtype, void *rbuf, int rcount, struct ompi_datatype_t *rdtype, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module
+#define GATHERV_ARGS void *sbuf, int scount, struct ompi_datatype_t *sdtype, void *rbuf, int *rcounts, int *disps, struct ompi_datatype_t *rdtype, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module
+#define REDUCE_ARGS void *sbuf, void* rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module
+#define REDUCESCATTER_ARGS void *sbuf, void *rbuf, int *rcounts, struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, mca_coll_base_module_t *module
+#define SCAN_ARGS void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype,  struct ompi_op_t *op, struct ompi_communicator_t *comm, mca_coll_base_module_t *module
+#define SCATTER_ARGS void *sbuf, int scount, struct ompi_datatype_t *sdtype, void *rbuf, int rcount, struct ompi_datatype_t *rdtype, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module
+#define SCATTERV_ARGS void *sbuf, int *scounts, int *disps, struct ompi_datatype_t *sdtype, void* rbuf, int rcount, struct ompi_datatype_t *rdtype, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module
+/* end defined arg lists to simply auto inclusion of user overriding decision functions */
+
+BEGIN_C_DECLS
+
+/* All Gather */
+int ompi_coll_base_allgather_intra_bruck(ALLGATHER_ARGS);
+int ompi_coll_base_allgather_intra_recursivedoubling(ALLGATHER_ARGS);
+int ompi_coll_base_allgather_intra_ring(ALLGATHER_ARGS);
+int ompi_coll_base_allgather_intra_neighborexchange(ALLGATHER_ARGS);
+int ompi_coll_base_allgather_intra_basic_linear(ALLGATHER_ARGS);
+int ompi_coll_base_allgather_intra_two_procs(ALLGATHER_ARGS);
+
+/* All GatherV */
+int ompi_coll_base_allgatherv_intra_bruck(ALLGATHERV_ARGS);
+int ompi_coll_base_allgatherv_intra_ring(ALLGATHERV_ARGS);
+int ompi_coll_base_allgatherv_intra_neighborexchange(ALLGATHERV_ARGS);
+int ompi_coll_base_allgatherv_intra_basic_default(ALLGATHERV_ARGS);
+int ompi_coll_base_allgatherv_intra_two_procs(ALLGATHERV_ARGS);
+
+/* All Reduce */
+int ompi_coll_base_allreduce_intra_nonoverlapping(ALLREDUCE_ARGS);
+int ompi_coll_base_allreduce_intra_recursivedoubling(ALLREDUCE_ARGS);
+int ompi_coll_base_allreduce_intra_ring(ALLREDUCE_ARGS);
+int ompi_coll_base_allreduce_intra_ring_segmented(ALLREDUCE_ARGS, uint32_t segsize);
+int ompi_coll_base_allreduce_intra_basic_linear(ALLREDUCE_ARGS);
+
+/* AlltoAll */
+int ompi_coll_base_alltoall_intra_pairwise(ALLTOALL_ARGS);
+int ompi_coll_base_alltoall_intra_bruck(ALLTOALL_ARGS);
+int ompi_coll_base_alltoall_intra_basic_linear(ALLTOALL_ARGS);
+int ompi_coll_base_alltoall_intra_linear_sync(ALLTOALL_ARGS, int max_requests);
+int ompi_coll_base_alltoall_intra_two_procs(ALLTOALL_ARGS);
+int mca_coll_base_alltoall_intra_basic_inplace(void *rbuf, int rcount,
+                                               struct ompi_datatype_t *rdtype,
+                                               struct ompi_communicator_t *comm,
+                                               mca_coll_base_module_t *module);  /* special version for INPLACE */
+
+/* AlltoAllV */
+int ompi_coll_base_alltoallv_intra_pairwise(ALLTOALLV_ARGS);
+int ompi_coll_base_alltoallv_intra_basic_linear(ALLTOALLV_ARGS);
+int mca_coll_base_alltoallv_intra_basic_inplace(void *rbuf, const int *rcounts, const int *rdisps,
+                                                struct ompi_datatype_t *rdtype,
+                                                struct ompi_communicator_t *comm,
+                                                mca_coll_base_module_t *module);  /* special version for INPLACE */
+
+/* AlltoAllW */
+
+/* Barrier */
+int ompi_coll_base_barrier_intra_doublering(BARRIER_ARGS);
+int ompi_coll_base_barrier_intra_recursivedoubling(BARRIER_ARGS);
+int ompi_coll_base_barrier_intra_bruck(BARRIER_ARGS);
+int ompi_coll_base_barrier_intra_two_procs(BARRIER_ARGS);
+int ompi_coll_base_barrier_intra_tree(BARRIER_ARGS);
+int ompi_coll_base_barrier_intra_basic_linear(BARRIER_ARGS);
+
+/* Bcast */
+int ompi_coll_base_bcast_intra_basic_linear(BCAST_ARGS);
+int ompi_coll_base_bcast_intra_chain(BCAST_ARGS, uint32_t segsize, int32_t chains);
+int ompi_coll_base_bcast_intra_pipeline(BCAST_ARGS, uint32_t segsize);
+int ompi_coll_base_bcast_intra_binomial(BCAST_ARGS, uint32_t segsize);
+int ompi_coll_base_bcast_intra_bintree(BCAST_ARGS, uint32_t segsize);
+int ompi_coll_base_bcast_intra_split_bintree(BCAST_ARGS, uint32_t segsize);
+
+/* Exscan */
+
+/* Gather */
+int ompi_coll_base_gather_intra_basic_linear(GATHER_ARGS);
+int ompi_coll_base_gather_intra_binomial(GATHER_ARGS);
+int ompi_coll_base_gather_intra_linear_sync(GATHER_ARGS, int first_segment_size);
+
+/* GatherV */
+
+/* Reduce */
+int ompi_coll_base_reduce_intra_basic_linear(REDUCE_ARGS);
+int ompi_coll_base_reduce_intra_chain(REDUCE_ARGS, uint32_t segsize, int fanout, int max_outstanding_reqs );
+int ompi_coll_base_reduce_intra_pipeline(REDUCE_ARGS, uint32_t segsize, int max_outstanding_reqs );
+int ompi_coll_base_reduce_intra_binary(REDUCE_ARGS, uint32_t segsize, int max_outstanding_reqs );
+int ompi_coll_base_reduce_intra_binomial(REDUCE_ARGS, uint32_t segsize, int max_outstanding_reqs );
+int ompi_coll_base_reduce_intra_in_order_binary(REDUCE_ARGS, uint32_t segsize, int max_outstanding_reqs );
+
+/* Reduce_scatter */
+int ompi_coll_base_reduce_scatter_intra_nonoverlapping(REDUCESCATTER_ARGS);
+int ompi_coll_base_reduce_scatter_intra_basic_recursivehalving(REDUCESCATTER_ARGS);
+int ompi_coll_base_reduce_scatter_intra_ring(REDUCESCATTER_ARGS);
+
+/* Scan */
+
+/* Scatter */
+int ompi_coll_base_scatter_intra_basic_linear(SCATTER_ARGS);
+int ompi_coll_base_scatter_intra_binomial(SCATTER_ARGS);
+
+/* ScatterV */
+
+END_C_DECLS
+
+#define COLL_BASE_UPDATE_BINTREE( OMPI_COMM, BASE_MODULE, ROOT )	\
+do {                                                                                       \
+    mca_coll_base_comm_t* coll_comm = (BASE_MODULE)->base_data;                        \
+    if( !( (coll_comm->cached_bintree)                                                     \
+           && (coll_comm->cached_bintree_root == (ROOT)) ) ) {                             \
+        if( coll_comm->cached_bintree ) { /* destroy previous binomial if defined */       \
+            ompi_coll_base_topo_destroy_tree( &(coll_comm->cached_bintree) );             \
+        }                                                                                  \
+        coll_comm->cached_bintree = ompi_coll_base_topo_build_tree(2,(OMPI_COMM),(ROOT)); \
+        coll_comm->cached_bintree_root = (ROOT);                                           \
+    }                                                                                      \
+} while (0)
+
+#define COLL_BASE_UPDATE_BMTREE( OMPI_COMM, BASE_MODULE, ROOT )	\
+do {                                                                                         \
+    mca_coll_base_comm_t* coll_comm = (BASE_MODULE)->base_data;                           \
+    if( !( (coll_comm->cached_bmtree)                                                        \
+           && (coll_comm->cached_bmtree_root == (ROOT)) ) ) {                                \
+        if( coll_comm->cached_bmtree ) { /* destroy previous binomial if defined */          \
+            ompi_coll_base_topo_destroy_tree( &(coll_comm->cached_bmtree) );                \
+        }                                                                                    \
+        coll_comm->cached_bmtree = ompi_coll_base_topo_build_bmtree( (OMPI_COMM), (ROOT) ); \
+        coll_comm->cached_bmtree_root = (ROOT);                                              \
+    }                                                                                        \
+} while (0)
+
+#define COLL_BASE_UPDATE_IN_ORDER_BMTREE( OMPI_COMM, BASE_MODULE, ROOT ) \
+do {                                                                                         \
+    mca_coll_base_comm_t* coll_comm = (BASE_MODULE)->base_data;                           \
+    if( !( (coll_comm->cached_in_order_bmtree)                                               \
+           && (coll_comm->cached_in_order_bmtree_root == (ROOT)) ) ) {                       \
+        if( coll_comm->cached_in_order_bmtree ) { /* destroy previous binomial if defined */ \
+            ompi_coll_base_topo_destroy_tree( &(coll_comm->cached_in_order_bmtree) );       \
+        }                                                                                    \
+        coll_comm->cached_in_order_bmtree = ompi_coll_base_topo_build_in_order_bmtree( (OMPI_COMM), (ROOT) ); \
+        coll_comm->cached_in_order_bmtree_root = (ROOT);                                     \
+    }                                                                                        \
+} while (0)
+
+#define COLL_BASE_UPDATE_PIPELINE( OMPI_COMM, BASE_MODULE, ROOT )	\
+do {                                                                                             \
+    mca_coll_base_comm_t* coll_comm = (BASE_MODULE)->base_data;                               \
+    if( !( (coll_comm->cached_pipeline)                                                          \
+           && (coll_comm->cached_pipeline_root == (ROOT)) ) ) {                                  \
+        if (coll_comm->cached_pipeline) { /* destroy previous pipeline if defined */             \
+            ompi_coll_base_topo_destroy_tree( &(coll_comm->cached_pipeline) );                  \
+        }                                                                                        \
+        coll_comm->cached_pipeline = ompi_coll_base_topo_build_chain( 1, (OMPI_COMM), (ROOT) ); \
+        coll_comm->cached_pipeline_root = (ROOT);                                                \
+    }                                                                                            \
+} while (0)
+
+#define COLL_BASE_UPDATE_CHAIN( OMPI_COMM, BASE_MODULE, ROOT, FANOUT )	\
+do {                                                                                             \
+    mca_coll_base_comm_t* coll_comm = (BASE_MODULE)->base_data;                               \
+    if( !( (coll_comm->cached_chain)                                                             \
+           && (coll_comm->cached_chain_root == (ROOT))                                           \
+           && (coll_comm->cached_chain_fanout == (FANOUT)) ) ) {                                 \
+        if( coll_comm->cached_chain) { /* destroy previous chain if defined */                   \
+            ompi_coll_base_topo_destroy_tree( &(coll_comm->cached_chain) );                     \
+        }                                                                                        \
+        coll_comm->cached_chain = ompi_coll_base_topo_build_chain((FANOUT), (OMPI_COMM), (ROOT)); \
+        coll_comm->cached_chain_root = (ROOT);                                                   \
+        coll_comm->cached_chain_fanout = (FANOUT);                                               \
+    }                                                                                            \
+} while (0)
+
+#define COLL_BASE_UPDATE_IN_ORDER_BINTREE( OMPI_COMM, BASE_MODULE )	\
+do {                                                                           \
+    mca_coll_base_comm_t* coll_comm = (BASE_MODULE)->base_data;             \
+    if( !(coll_comm->cached_in_order_bintree) ) {                              \
+        /* In-order binary tree topology is defined by communicator size */    \
+        /* Thus, there is no need to destroy anything */                       \
+        coll_comm->cached_in_order_bintree =                                   \
+        ompi_coll_base_topo_build_in_order_bintree((OMPI_COMM)); \
+    }                                                                          \
+} while (0)
+
+/**
+ * This macro give a generic way to compute the best count of
+ * the segment (i.e. the number of complete datatypes that
+ * can fit in the specified SEGSIZE). Beware, when this macro
+ * is called, the SEGCOUNT should be initialized to the count as
+ * expected by the collective call.
+ */
+#define COLL_BASE_COMPUTED_SEGCOUNT(SEGSIZE, TYPELNG, SEGCOUNT)        \
+    if( ((SEGSIZE) >= (TYPELNG)) &&                                     \
+        ((SEGSIZE) < ((TYPELNG) * (SEGCOUNT))) ) {                      \
+        size_t residual;                                                \
+        (SEGCOUNT) = (int)((SEGSIZE) / (TYPELNG));                      \
+        residual = (SEGSIZE) - (SEGCOUNT) * (TYPELNG);                  \
+        if( residual > ((TYPELNG) >> 1) )                               \
+            (SEGCOUNT)++;                                               \
+    }                                                                   \
+
+/**
+ * This macro gives a generic wait to compute the well distributed block counts
+ * when the count and number of blocks are fixed.
+ * Macro returns "early-block" count, "late-block" count, and "split-index"
+ * which is the block at which we switch from "early-block" count to
+ * the "late-block" count.
+ * count = split_index * early_block_count +
+ *         (block_count - split_index) * late_block_count
+ * We do not perform ANY error checks - make sure that the input values
+ * make sense (eg. count > num_blocks).
+ */
+#define COLL_BASE_COMPUTE_BLOCKCOUNT( COUNT, NUM_BLOCKS, SPLIT_INDEX,       \
+                                       EARLY_BLOCK_COUNT, LATE_BLOCK_COUNT ) \
+    EARLY_BLOCK_COUNT = LATE_BLOCK_COUNT = COUNT / NUM_BLOCKS;               \
+    SPLIT_INDEX = COUNT % NUM_BLOCKS;                                        \
+    if (0 != SPLIT_INDEX) {                                                  \
+        EARLY_BLOCK_COUNT = EARLY_BLOCK_COUNT + 1;                           \
+    }                                                                        \
+
+/*
+ * Data structure for hanging data off the communicator
+ * i.e. per module instance
+ */
+struct mca_coll_base_comm_t {
+    opal_object_t super;
+
+    /* standard data for requests and PML usage */
+
+    /* Precreate space for requests
+     * Note this does not effect basic,
+     * but if in wrong context can confuse a debugger
+     * this is controlled by an MCA param
+     */
+
+    ompi_request_t **mcct_reqs;
+    int mcct_num_reqs;
+
+    /*
+     * base topo information caching per communicator
+     *
+     * for each communicator we cache the topo information so we can
+     * reuse without regenerating if we change the root, [or fanout]
+     * then regenerate and recache this information
+     */
+
+    /* general tree with n fan out */
+    ompi_coll_tree_t *cached_ntree;
+    int cached_ntree_root;
+    int cached_ntree_fanout;
+
+    /* binary tree */
+    ompi_coll_tree_t *cached_bintree;
+    int cached_bintree_root;
+
+    /* binomial tree */
+    ompi_coll_tree_t *cached_bmtree;
+    int cached_bmtree_root;
+
+    /* binomial tree */
+    ompi_coll_tree_t *cached_in_order_bmtree;
+    int cached_in_order_bmtree_root;
+
+    /* chained tree (fanout followed by pipelines) */
+    ompi_coll_tree_t *cached_chain;
+    int cached_chain_root;
+    int cached_chain_fanout;
+
+    /* pipeline */
+    ompi_coll_tree_t *cached_pipeline;
+    int cached_pipeline_root;
+
+    /* in-order binary tree (root of the in-order binary tree is rank 0) */
+    ompi_coll_tree_t *cached_in_order_bintree;
+};
+typedef struct mca_coll_base_comm_t mca_coll_base_comm_t;
+OMPI_DECLSPEC OBJ_CLASS_DECLARATION(mca_coll_base_comm_t);
+
+static inline void ompi_coll_base_free_reqs(ompi_request_t **reqs, int count)
+{
+    int i;
+    for (i = 0; i < count; ++i)
+        ompi_request_free(&reqs[i]);
+}
+
+/**
+ * Return the array of requests on the data. If the array was not initialized
+ * or if it's size was too small, allocate it to fit the requested size.
+ */
+ompi_request_t** coll_base_comm_get_reqs(mca_coll_base_comm_t* data, int nreqs);
+
+#endif /* MCA_COLL_BASE_EXPORT_H */
--- a/ompi/mca/coll/tuned/coll_tuned_gather.c
+++ b/ompi/mca/coll/tuned/coll_tuned_gather.c
@ -3,7 +3,7 @@
 * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
 *                         University Research and Technology
 *                         Corporation.  All rights reserved.
- * Copyright (c) 2004-2014 The University of Tennessee and The University
+ * Copyright (c) 2004-2015 The University of Tennessee and The University
 *                         of Tennessee Research Foundation.  All rights
 *                         reserved.
 * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@ -30,30 +30,14 @@
 #include "ompi/mca/coll/coll.h"
 #include "ompi/mca/coll/base/coll_tags.h"
 #include "ompi/mca/pml/pml.h"
-#include "coll_tuned.h"
-#include "coll_tuned_topo.h"
-#include "coll_tuned_util.h"
-
-/* gather algorithm variables */
-static int coll_tuned_gather_algorithm_count = 3;
-static int coll_tuned_gather_forced_algorithm = 0;
-static int coll_tuned_gather_segment_size = 0;
-static int coll_tuned_gather_tree_fanout;
-static int coll_tuned_gather_chain_fanout;
-
-/* valid values for coll_tuned_gather_forced_algorithm */
-static mca_base_var_enum_value_t gather_algorithms[] = {
-    {0, "ignore"},
-    {1, "basic_linear"},
-    {2, "binomial"},
-    {3, "linear_sync"},
-    {0, NULL}
-};
+#include "ompi/mca/coll/base/coll_base_functions.h"
+#include "coll_base_topo.h"
+#include "coll_base_util.h"

 /* Todo: gather_intra_generic, gather_intra_binary, gather_intra_chain,
 * gather_intra_pipeline, segmentation? */
 int
-ompi_coll_tuned_gather_intra_binomial(void *sbuf, int scount,
+ompi_coll_base_gather_intra_binomial(void *sbuf, int scount,
                                      struct ompi_datatype_t *sdtype,
                                      void *rbuf, int rcount,
                                      struct ompi_datatype_t *rdtype,
@ -67,17 +51,17 @@ ompi_coll_tuned_gather_intra_binomial(void *sbuf, int scount,
    MPI_Status status;
    MPI_Aint sextent, slb, strue_lb, strue_extent;
    MPI_Aint rextent, rlb, rtrue_lb, rtrue_extent;
-    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
-    mca_coll_tuned_comm_t *data = tuned_module->tuned_data;
+    mca_coll_base_module_t *base_module = (mca_coll_base_module_t*) module;
+    mca_coll_base_comm_t *data = base_module->base_data;

    size = ompi_comm_size(comm);
    rank = ompi_comm_rank(comm);

-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "ompi_coll_tuned_gather_intra_binomial rank %d", rank));
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                 "ompi_coll_base_gather_intra_binomial rank %d", rank));

    /* create the binomial tree */
-    COLL_TUNED_UPDATE_IN_ORDER_BMTREE( comm, tuned_module, root );
+    COLL_BASE_UPDATE_IN_ORDER_BMTREE( comm, base_module, root );
    bmtree = data->cached_in_order_bmtree;

    ompi_datatype_get_extent(sdtype, &slb, &sextent);
@ -157,8 +141,8 @@ ompi_coll_tuned_gather_intra_binomial(void *sbuf, int scount,
                mycount = size - vkid;
            mycount *= rcount;

-            OPAL_OUTPUT((ompi_coll_tuned_stream,
-                         "ompi_coll_tuned_gather_intra_binomial rank %d recv %d mycount = %d",
+            OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                         "ompi_coll_base_gather_intra_binomial rank %d recv %d mycount = %d",
                         rank, bmtree->tree_next[i], mycount));

            err = MCA_PML_CALL(recv(ptmp + total_recv*rextent, (ptrdiff_t)rcount * size - total_recv, rdtype,
@ -172,8 +156,8 @@ ompi_coll_tuned_gather_intra_binomial(void *sbuf, int scount,

    if (rank != root) {
        /* all nodes except root send to parents */
-        OPAL_OUTPUT((ompi_coll_tuned_stream,
-                     "ompi_coll_tuned_gather_intra_binomial rank %d send %d count %d\n",
+        OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                     "ompi_coll_base_gather_intra_binomial rank %d send %d count %d\n",
                     rank, bmtree->tree_prev, total_recv));

        err = MCA_PML_CALL(send(ptmp, total_recv, sdtype,
@ -207,7 +191,7 @@ ompi_coll_tuned_gather_intra_binomial(void *sbuf, int scount,
    if (NULL != tempbuf)
        free(tempbuf);

-    OPAL_OUTPUT((ompi_coll_tuned_stream,  "%s:%4d\tError occurred %d, rank %2d",
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,  "%s:%4d\tError occurred %d, rank %2d",
                 __FILE__, line, err, rank));
    return err;
 }
@ -220,7 +204,7 @@ ompi_coll_tuned_gather_intra_binomial(void *sbuf, int scount,
 *	Returns:	- MPI_SUCCESS or error code
 */
 int
-ompi_coll_tuned_gather_intra_linear_sync(void *sbuf, int scount,
+ompi_coll_base_gather_intra_linear_sync(void *sbuf, int scount,
                                         struct ompi_datatype_t *sdtype,
                                         void *rbuf, int rcount,
                                         struct ompi_datatype_t *rdtype,
@ -230,15 +214,15 @@ ompi_coll_tuned_gather_intra_linear_sync(void *sbuf, int scount,
                                         int first_segment_size)
 {
    int i, ret, line, rank, size, first_segment_count;
+    ompi_request_t **reqs = NULL;
    MPI_Aint extent, lb;
    size_t typelng;
-    ompi_request_t **reqs = NULL;

    size = ompi_comm_size(comm);
    rank = ompi_comm_rank(comm);

-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "ompi_coll_tuned_gather_intra_linear_sync rank %d, segment %d", rank, first_segment_size));
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                 "ompi_coll_base_gather_intra_linear_sync rank %d, segment %d", rank, first_segment_size));

    if (rank != root) {
        /* Non-root processes:
@ -250,7 +234,7 @@ ompi_coll_tuned_gather_intra_linear_sync(void *sbuf, int scount,
        ompi_datatype_type_size(sdtype, &typelng);
        ompi_datatype_get_extent(sdtype, &lb, &extent);
        first_segment_count = scount;
-        COLL_TUNED_COMPUTED_SEGCOUNT( (size_t) first_segment_size, typelng, 
+        COLL_BASE_COMPUTED_SEGCOUNT( (size_t) first_segment_size, typelng,
                                      first_segment_count );

        ret = MCA_PML_CALL(recv(sbuf, 0, MPI_BYTE, root,
@ -288,7 +272,7 @@ ompi_coll_tuned_gather_intra_linear_sync(void *sbuf, int scount,
        ompi_datatype_type_size(rdtype, &typelng);
        ompi_datatype_get_extent(rdtype, &lb, &extent);
        first_segment_count = rcount;
-        COLL_TUNED_COMPUTED_SEGCOUNT( (size_t)first_segment_size, typelng, 
+        COLL_BASE_COMPUTED_SEGCOUNT( (size_t)first_segment_size, typelng,
                                      first_segment_count );

        ptmp = (char *) rbuf;
@ -346,7 +330,7 @@ ompi_coll_tuned_gather_intra_linear_sync(void *sbuf, int scount,
    if (NULL != reqs) {
        free(reqs);
    }
-    OPAL_OUTPUT (( ompi_coll_tuned_stream, 
+    OPAL_OUTPUT (( ompi_coll_base_framework.framework_output,
                   "ERROR_HNDL: node %d file %s line %d error %d\n",
                   rank, __FILE__, line, ret ));
    return ret;
@ -356,12 +340,12 @@ ompi_coll_tuned_gather_intra_linear_sync(void *sbuf, int scount,
 * Linear functions are copied from the BASIC coll module
 * they do not segment the message and are simple implementations
 * but for some small number of nodes and/or small data sizes they
- * are just as fast as tuned/tree based segmenting operations 
+ * are just as fast as base/tree based segmenting operations
 * and as such may be selected by the decision functions
 * These are copied into this module due to the way we select modules
 * in V1. i.e. in V2 we will handle this differently and so will not
 * have to duplicate code.
- * JPG following the examples from other coll_tuned implementations. Dec06.
+ * JPG following the examples from other coll_base implementations. Dec06.
 */

 /* copied function (with appropriate renaming) starts here */
@ -373,7 +357,7 @@ ompi_coll_tuned_gather_intra_linear_sync(void *sbuf, int scount,
 *	Returns:	- MPI_SUCCESS or error code
 */
 int
-ompi_coll_tuned_gather_intra_basic_linear(void *sbuf, int scount,
+ompi_coll_base_gather_intra_basic_linear(void *sbuf, int scount,
                                          struct ompi_datatype_t *sdtype,
                                          void *rbuf, int rcount,
                                          struct ompi_datatype_t *rdtype,
@ -389,8 +373,8 @@ ompi_coll_tuned_gather_intra_basic_linear(void *sbuf, int scount,
    rank = ompi_comm_rank(comm);

    /* Everyone but root sends data and returns. */
-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "ompi_coll_tuned_gather_intra_basic_linear rank %d", rank));
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                 "ompi_coll_base_gather_intra_basic_linear rank %d", rank));

    if (rank != root) {
        return MCA_PML_CALL(send(sbuf, scount, sdtype, root,
@ -427,164 +411,3 @@ ompi_coll_tuned_gather_intra_basic_linear(void *sbuf, int scount,


 /* copied function (with appropriate renaming) ends here */
-
-/* The following are used by dynamic and forced rules */
-
-/* publish details of each algorithm and if its forced/fixed/locked in */
-/* as you add methods/algorithms you must update this and the query/map 
-   routines */
-
-/* this routine is called by the component only */
-/* this makes sure that the mca parameters are set to their initial values 
-   and perms */
-/* module does not call this they call the forced_getvalues routine instead */
-
-int 
-ompi_coll_tuned_gather_intra_check_forced_init(coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices)
-{
-    mca_base_var_enum_t *new_enum;
-
-    ompi_coll_tuned_forced_max_algorithms[GATHER] = coll_tuned_gather_algorithm_count;
-
-    (void) mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                           "gather_algorithm_count",
-                                           "Number of gather algorithms available",
-                                           MCA_BASE_VAR_TYPE_INT, NULL, 0,
-                                           MCA_BASE_VAR_FLAG_DEFAULT_ONLY,
-                                           OPAL_INFO_LVL_5,
-                                           MCA_BASE_VAR_SCOPE_CONSTANT,
-                                           &coll_tuned_gather_algorithm_count);
-
-    /* MPI_T: This variable should eventually be bound to a communicator */
-    coll_tuned_gather_forced_algorithm = 0;
-    (void) mca_base_var_enum_create("coll_tuned_gather_algorithms", gather_algorithms, &new_enum);
-    mca_param_indices->algorithm_param_index =
-        mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                        "gather_algorithm",
-                                        "Which gather algorithm is used. Can be locked down to choice of: 0 ignore, 1 basic linear, 2 binomial, 3 linear with synchronization.",
-                                        MCA_BASE_VAR_TYPE_INT, new_enum, 0, 0,
-                                        OPAL_INFO_LVL_5,
-                                        MCA_BASE_VAR_SCOPE_READONLY,
-                                        &coll_tuned_gather_forced_algorithm);
-    OBJ_RELEASE(new_enum);
-    if (mca_param_indices->algorithm_param_index < 0) {
-        return mca_param_indices->algorithm_param_index;
-    }
-
-    coll_tuned_gather_segment_size = 0;
-    mca_param_indices->segsize_param_index =
-        mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                        "gather_algorithm_segmentsize",
-                                        "Segment size in bytes used by default for gather algorithms. Only has meaning if algorithm is forced and supports segmenting. 0 bytes means no segmentation. Currently, available algorithms do not support segmentation.",
-                                        MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
-                                        OPAL_INFO_LVL_5,
-                                        MCA_BASE_VAR_SCOPE_READONLY,
-                                        &coll_tuned_gather_segment_size);
-
-    coll_tuned_gather_tree_fanout = ompi_coll_tuned_init_tree_fanout; /* get system wide default */
-    mca_param_indices->tree_fanout_param_index =
-        mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                        "gather_algorithm_tree_fanout",
-                                        "Fanout for n-tree used for gather algorithms. Only has meaning if algorithm is forced and supports n-tree topo based operation. Currently, available algorithms do not support n-tree topologies.",
-                                        MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
-                                        OPAL_INFO_LVL_5,
-                                        MCA_BASE_VAR_SCOPE_READONLY,
-                                        &coll_tuned_gather_tree_fanout);
-
-    coll_tuned_gather_chain_fanout = ompi_coll_tuned_init_chain_fanout; /* get system wide default */
-    mca_param_indices->chain_fanout_param_index = 
-      mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                      "gather_algorithm_chain_fanout",
-                                      "Fanout for chains used for gather algorithms. Only has meaning if algorithm is forced and supports chain topo based operation. Currently, available algorithms do not support chain topologies.",
-                                      MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
-                                      OPAL_INFO_LVL_5,
-                                      MCA_BASE_VAR_SCOPE_READONLY,
-                                      &coll_tuned_gather_chain_fanout);
-
-    return (MPI_SUCCESS);
-}
-
-int
-ompi_coll_tuned_gather_intra_do_forced(void *sbuf, int scount,
-                                       struct ompi_datatype_t *sdtype,
-                                       void* rbuf, int rcount,
-                                       struct ompi_datatype_t *rdtype,
-                                       int root,
-                                       struct ompi_communicator_t *comm,
-                                       mca_coll_base_module_t *module)
-{
-    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
-    mca_coll_tuned_comm_t *data = tuned_module->tuned_data;
-
-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "coll:tuned:gather_intra_do_forced selected algorithm %d",
-                 data->user_forced[GATHER].algorithm));
-
-    switch (data->user_forced[GATHER].algorithm) {
-    case (0):
-        return ompi_coll_tuned_gather_intra_dec_fixed (sbuf, scount, sdtype, 
-                                                       rbuf, rcount, rdtype, 
-                                                       root, comm, module);
-    case (1):
-        return ompi_coll_tuned_gather_intra_basic_linear (sbuf, scount, sdtype,
-                                                          rbuf, rcount, rdtype,
-                                                          root, comm, module);
-    case (2):
-        return ompi_coll_tuned_gather_intra_binomial(sbuf, scount, sdtype,
-                                                     rbuf, rcount, rdtype,
-                                                     root, comm, module);
-    case (3):
-            return ompi_coll_tuned_gather_intra_linear_sync (sbuf, scount, sdtype,
-                                                             rbuf, rcount, rdtype,
-                                                             root, comm, module,
-                                                             data->user_forced[GATHER].segsize);
-    default:
-        OPAL_OUTPUT((ompi_coll_tuned_stream,
-                     "coll:tuned:gather_intra_do_forced attempt to select algorithm %d when only 0-%d is valid?", 
-                     data->user_forced[GATHER].algorithm,
-                     ompi_coll_tuned_forced_max_algorithms[GATHER]));
-        return (MPI_ERR_ARG);
-    } /* switch */
-}
-
-int
-ompi_coll_tuned_gather_intra_do_this(void *sbuf, int scount,
-                                     struct ompi_datatype_t *sdtype,
-                                     void* rbuf, int rcount,
-                                     struct ompi_datatype_t *rdtype,
-                                     int root,
-                                     struct ompi_communicator_t *comm,
-                                     mca_coll_base_module_t *module,
-                                     int algorithm, int faninout, int segsize)
-{
-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "coll:tuned:gather_intra_do_this selected algorithm %d topo faninout %d segsize %d", 
-                 algorithm, faninout, segsize));
-   
-    switch (algorithm) {
-    case (0):
-        return ompi_coll_tuned_gather_intra_dec_fixed (sbuf, scount, sdtype, 
-                                                       rbuf, rcount, rdtype, 
-                                                       root, comm, module);
-    case (1):
-        return ompi_coll_tuned_gather_intra_basic_linear (sbuf, scount, sdtype,
-                                                          rbuf, rcount, rdtype,
-                                                          root, comm, module);
-    case (2):  
-        return ompi_coll_tuned_gather_intra_binomial(sbuf, scount, sdtype,
-                                                     rbuf, rcount, rdtype,
-                                                     root, comm, module);
-    case (3):
-        return ompi_coll_tuned_gather_intra_linear_sync (sbuf, scount, sdtype,
-                                                         rbuf, rcount, rdtype,
-                                                         root, comm, module,
-                                                         segsize);
-
-    default:
-        OPAL_OUTPUT((ompi_coll_tuned_stream,
-                     "coll:tuned:gather_intra_do_this attempt to select algorithm %d when only 0-%d is valid?", 
-                     algorithm, 
-                     ompi_coll_tuned_forced_max_algorithms[GATHER]));
-        return (MPI_ERR_ARG);
-    } /* switch */
-}
--- a/ompi/mca/coll/tuned/coll_tuned_reduce.c
+++ b/ompi/mca/coll/tuned/coll_tuned_reduce.c
@ -3,7 +3,7 @@
 * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
 *                         University Research and Technology
 *                         Corporation.  All rights reserved.
- * Copyright (c) 2004-2014 The University of Tennessee and The University
+ * Copyright (c) 2004-2015 The University of Tennessee and The University
 *                         of Tennessee Research Foundation.  All rights
 *                         reserved.
 * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@ -31,28 +31,8 @@
 #include "ompi/mca/coll/base/coll_tags.h"
 #include "ompi/mca/pml/pml.h"
 #include "ompi/op/op.h"
-#include "coll_tuned.h"
-#include "coll_tuned_topo.h"
-
-/* reduce algorithm variables */
-static int coll_tuned_reduce_algorithm_count = 6;
-static int coll_tuned_reduce_forced_algorithm = 0;
-static int coll_tuned_reduce_segment_size = 0;
-static int coll_tuned_reduce_max_requests;
-static int coll_tuned_reduce_tree_fanout;
-static int coll_tuned_reduce_chain_fanout;
-
-/* valid values for coll_tuned_reduce_forced_algorithm */
-static mca_base_var_enum_value_t reduce_algorithms[] = {
-    {0, "ignore"},
-    {1, "linear"},
-    {2, "chain"},
-    {3, "pipeline"},
-    {4, "binary"},
-    {5, "binomial"},
-    {6, "in-order_binary"},
-    {0, NULL}
-};
+#include "ompi/mca/coll/base/coll_base_functions.h"
+#include "coll_base_topo.h"

 /**
 * This is a generic implementation of the reduce protocol. It used the tree
@ -65,7 +45,7 @@ static mca_base_var_enum_value_t reduce_algorithms[] = {
 * for the first block: thus we must copy sendbuf to accumbuf on intermediate
 * to keep the optimized loop happy.
 */
-int ompi_coll_tuned_reduce_generic( void* sendbuf, void* recvbuf, int original_count,
+int ompi_coll_base_reduce_generic( void* sendbuf, void* recvbuf, int original_count,
                                    ompi_datatype_t* datatype, ompi_op_t* op,
                                    int root, ompi_communicator_t* comm,
                                    mca_coll_base_module_t *module,
@ -95,7 +75,7 @@ int ompi_coll_tuned_reduce_generic( void* sendbuf, void* recvbuf, int original_c
        sendtmpbuf = (char *)recvbuf;
    }

-    OPAL_OUTPUT((ompi_coll_tuned_stream, "coll:tuned:reduce_generic count %d, msg size %ld, segsize %ld, max_requests %d",
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output, "coll:base:reduce_generic count %d, msg size %ld, segsize %ld, max_requests %d",
                 original_count, (unsigned long)((ptrdiff_t)num_segments * (ptrdiff_t)segment_increment),
                 (unsigned long)segment_increment, max_outstanding_reqs));

@ -353,7 +333,7 @@ int ompi_coll_tuned_reduce_generic( void* sendbuf, void* recvbuf, int original_c
    return OMPI_SUCCESS;

 error_hndl:  /* error handler */
-    OPAL_OUTPUT (( ompi_coll_tuned_stream, 
+    OPAL_OUTPUT (( ompi_coll_base_framework.framework_output,
                   "ERROR_HNDL: node %d file %s line %d error %d\n",
                   rank, __FILE__, line, ret ));
    if( inbuf_free[0] != NULL ) free(inbuf_free[0]);
@ -369,7 +349,7 @@ int ompi_coll_tuned_reduce_generic( void* sendbuf, void* recvbuf, int original_c
     meaning that at least one datatype must fit in the segment !
 */

-int ompi_coll_tuned_reduce_intra_chain( void *sendbuf, void *recvbuf, int count,
+int ompi_coll_base_reduce_intra_chain( void *sendbuf, void *recvbuf, int count,
                                        ompi_datatype_t* datatype,
                                        ompi_op_t* op, int root,
                                        ompi_communicator_t* comm,
@ -379,27 +359,27 @@ int ompi_coll_tuned_reduce_intra_chain( void *sendbuf, void *recvbuf, int count,
 {
    int segcount = count;
    size_t typelng;
-    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
-    mca_coll_tuned_comm_t *data = tuned_module->tuned_data;
+    mca_coll_base_module_t *base_module = (mca_coll_base_module_t*) module;
+    mca_coll_base_comm_t *data = base_module->base_data;

-    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:reduce_intra_chain rank %d fo %d ss %5d", ompi_comm_rank(comm), fanout, segsize));
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:reduce_intra_chain rank %d fo %d ss %5d", ompi_comm_rank(comm), fanout, segsize));

-    COLL_TUNED_UPDATE_CHAIN( comm, tuned_module, root, fanout );
+    COLL_BASE_UPDATE_CHAIN( comm, base_module, root, fanout );
    /**
     * Determine number of segments and number of elements
     * sent per operation
     */
    ompi_datatype_type_size( datatype, &typelng );
-    COLL_TUNED_COMPUTED_SEGCOUNT( segsize, typelng, segcount );
+    COLL_BASE_COMPUTED_SEGCOUNT( segsize, typelng, segcount );

-    return ompi_coll_tuned_reduce_generic( sendbuf, recvbuf, count, datatype, 
+    return ompi_coll_base_reduce_generic( sendbuf, recvbuf, count, datatype,
                                           op, root, comm, module,
                                           data->cached_chain,
                                           segcount, max_outstanding_reqs );
 }


-int ompi_coll_tuned_reduce_intra_pipeline( void *sendbuf, void *recvbuf,
+int ompi_coll_base_reduce_intra_pipeline( void *sendbuf, void *recvbuf,
                                           int count, ompi_datatype_t* datatype,
                                           ompi_op_t* op, int root,
                                           ompi_communicator_t* comm,
@ -409,28 +389,28 @@ int ompi_coll_tuned_reduce_intra_pipeline( void *sendbuf, void *recvbuf,
 {
    int segcount = count;
    size_t typelng;
-    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
-    mca_coll_tuned_comm_t *data = tuned_module->tuned_data;
+    mca_coll_base_module_t *base_module = (mca_coll_base_module_t*) module;
+    mca_coll_base_comm_t *data = base_module->base_data;

-    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:reduce_intra_pipeline rank %d ss %5d",
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:reduce_intra_pipeline rank %d ss %5d",
                 ompi_comm_rank(comm), segsize));

-    COLL_TUNED_UPDATE_PIPELINE( comm, tuned_module, root );
+    COLL_BASE_UPDATE_PIPELINE( comm, base_module, root );

    /**
     * Determine number of segments and number of elements
     * sent per operation
     */
    ompi_datatype_type_size( datatype, &typelng );
-    COLL_TUNED_COMPUTED_SEGCOUNT( segsize, typelng, segcount );
+    COLL_BASE_COMPUTED_SEGCOUNT( segsize, typelng, segcount );

-    return ompi_coll_tuned_reduce_generic( sendbuf, recvbuf, count, datatype, 
+    return ompi_coll_base_reduce_generic( sendbuf, recvbuf, count, datatype,
                                           op, root, comm, module,
                                           data->cached_pipeline,
                                           segcount, max_outstanding_reqs );
 }

-int ompi_coll_tuned_reduce_intra_binary( void *sendbuf, void *recvbuf,
+int ompi_coll_base_reduce_intra_binary( void *sendbuf, void *recvbuf,
                                         int count, ompi_datatype_t* datatype,
                                         ompi_op_t* op, int root,
                                         ompi_communicator_t* comm,
@ -440,28 +420,28 @@ int ompi_coll_tuned_reduce_intra_binary( void *sendbuf, void *recvbuf,
 {
    int segcount = count;
    size_t typelng;
-    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
-    mca_coll_tuned_comm_t *data = tuned_module->tuned_data;
+    mca_coll_base_module_t *base_module = (mca_coll_base_module_t*) module;
+    mca_coll_base_comm_t *data = base_module->base_data;

-    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:reduce_intra_binary rank %d ss %5d",
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:reduce_intra_binary rank %d ss %5d",
                 ompi_comm_rank(comm), segsize));

-    COLL_TUNED_UPDATE_BINTREE( comm, tuned_module, root );
+    COLL_BASE_UPDATE_BINTREE( comm, base_module, root );

    /**
     * Determine number of segments and number of elements
     * sent per operation
     */
    ompi_datatype_type_size( datatype, &typelng );
-    COLL_TUNED_COMPUTED_SEGCOUNT( segsize, typelng, segcount );
+    COLL_BASE_COMPUTED_SEGCOUNT( segsize, typelng, segcount );

-    return ompi_coll_tuned_reduce_generic( sendbuf, recvbuf, count, datatype, 
+    return ompi_coll_base_reduce_generic( sendbuf, recvbuf, count, datatype,
                                           op, root, comm, module,
                                           data->cached_bintree,
                                           segcount, max_outstanding_reqs );
 }

-int ompi_coll_tuned_reduce_intra_binomial( void *sendbuf, void *recvbuf,
+int ompi_coll_base_reduce_intra_binomial( void *sendbuf, void *recvbuf,
                                           int count, ompi_datatype_t* datatype,
                                           ompi_op_t* op, int root,
                                           ompi_communicator_t* comm,
@ -471,22 +451,22 @@ int ompi_coll_tuned_reduce_intra_binomial( void *sendbuf, void *recvbuf,
 {
    int segcount = count;
    size_t typelng;
-    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
-    mca_coll_tuned_comm_t *data = tuned_module->tuned_data;
+    mca_coll_base_module_t *base_module = (mca_coll_base_module_t*) module;
+    mca_coll_base_comm_t *data = base_module->base_data;

-    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:reduce_intra_binomial rank %d ss %5d",
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:reduce_intra_binomial rank %d ss %5d",
                 ompi_comm_rank(comm), segsize));

-    COLL_TUNED_UPDATE_IN_ORDER_BMTREE( comm, tuned_module, root );
+    COLL_BASE_UPDATE_IN_ORDER_BMTREE( comm, base_module, root );

    /**
     * Determine number of segments and number of elements
     * sent per operation
     */
    ompi_datatype_type_size( datatype, &typelng );
-    COLL_TUNED_COMPUTED_SEGCOUNT( segsize, typelng, segcount );
+    COLL_BASE_COMPUTED_SEGCOUNT( segsize, typelng, segcount );

-    return ompi_coll_tuned_reduce_generic( sendbuf, recvbuf, count, datatype, 
+    return ompi_coll_base_reduce_generic( sendbuf, recvbuf, count, datatype,
                                           op, root, comm, module,
                                           data->cached_in_order_bmtree,
                                           segcount, max_outstanding_reqs );
@ -499,7 +479,7 @@ int ompi_coll_tuned_reduce_intra_binomial( void *sendbuf, void *recvbuf,
 * Acecpts:       same as MPI_Reduce()
 * Returns:       MPI_SUCCESS or error code
 */
-int ompi_coll_tuned_reduce_intra_in_order_binary( void *sendbuf, void *recvbuf,
+int ompi_coll_base_reduce_intra_in_order_binary( void *sendbuf, void *recvbuf,
                                                  int count,
                                                  ompi_datatype_t* datatype,
                                                  ompi_op_t* op, int root,
@ -511,22 +491,22 @@ int ompi_coll_tuned_reduce_intra_in_order_binary( void *sendbuf, void *recvbuf,
    int ret, rank, size, io_root, segcount = count;
    void *use_this_sendbuf = NULL, *use_this_recvbuf = NULL;
    size_t typelng;
-    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
-    mca_coll_tuned_comm_t *data = tuned_module->tuned_data;
+    mca_coll_base_module_t *base_module = (mca_coll_base_module_t*) module;
+    mca_coll_base_comm_t *data = base_module->base_data;

    rank = ompi_comm_rank(comm);
    size = ompi_comm_size(comm);
-    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:reduce_intra_in_order_binary rank %d ss %5d",
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:reduce_intra_in_order_binary rank %d ss %5d",
                 rank, segsize));

-    COLL_TUNED_UPDATE_IN_ORDER_BINTREE( comm, tuned_module );
+    COLL_BASE_UPDATE_IN_ORDER_BINTREE( comm, base_module );

    /**
     * Determine number of segments and number of elements
     * sent per operation
     */
    ompi_datatype_type_size( datatype, &typelng );
-    COLL_TUNED_COMPUTED_SEGCOUNT( segsize, typelng, segcount );
+    COLL_BASE_COMPUTED_SEGCOUNT( segsize, typelng, segcount );

    /* An in-order binary tree must use root (size-1) to preserve the order of
       operations.  Thus, if root is not rank (size - 1), then we must handle
@ -564,7 +544,7 @@ int ompi_coll_tuned_reduce_intra_in_order_binary( void *sendbuf, void *recvbuf,
    }

    /* Use generic reduce with in-order binary tree topology and io_root */
-    ret = ompi_coll_tuned_reduce_generic( use_this_sendbuf, use_this_recvbuf, count, datatype,
+    ret = ompi_coll_base_reduce_generic( use_this_sendbuf, use_this_recvbuf, count, datatype,
                                          op, io_root, comm, module,
                                          data->cached_in_order_bintree,
                                          segcount, max_outstanding_reqs );
@ -599,7 +579,7 @@ int ompi_coll_tuned_reduce_intra_in_order_binary( void *sendbuf, void *recvbuf,
 * Linear functions are copied from the BASIC coll module
 * they do not segment the message and are simple implementations
 * but for some small number of nodes and/or small data sizes they
- * are just as fast as tuned/tree based segmenting operations 
+ * are just as fast as base/tree based segmenting operations
 * and as such may be selected by the decision functions
 * These are copied into this module due to the way we select modules
 * in V1. i.e. in V2 we will handle this differently and so will not
@ -617,25 +597,25 @@ int ompi_coll_tuned_reduce_intra_in_order_binary( void *sendbuf, void *recvbuf,
 *  Returns:    - MPI_SUCCESS or error code
 */
 int
-ompi_coll_tuned_reduce_intra_basic_linear(void *sbuf, void *rbuf, int count,
-                                          struct ompi_datatype_t *dtype,
-                                          struct ompi_op_t *op,
-                                          int root,
-                                          struct ompi_communicator_t *comm,
-                                          mca_coll_base_module_t *module)
+ompi_coll_base_reduce_intra_basic_linear(void *sbuf, void *rbuf, int count,
+                                         struct ompi_datatype_t *dtype,
+                                         struct ompi_op_t *op,
+                                         int root,
+                                         struct ompi_communicator_t *comm,
+                                         mca_coll_base_module_t *module)
 {
    int i, rank, err, size;
    ptrdiff_t true_lb, true_extent, lb, extent;
-    char *free_buffer = NULL, *pml_buffer = NULL;
-    char *inplace_temp = NULL, *inbuf;
+    char *free_buffer = NULL;
+    char *pml_buffer = NULL;
+    char *inplace_temp = NULL;
+    char *inbuf;

    /* Initialize */

    rank = ompi_comm_rank(comm);
    size = ompi_comm_size(comm);

-    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:reduce_intra_basic_linear rank %d", rank));
-
    /* If not root, send data to the root. */

    if (rank != root) {
@ -645,16 +625,136 @@ ompi_coll_tuned_reduce_intra_basic_linear(void *sbuf, void *rbuf, int count,
        return err;
    }

-    /* see discussion in ompi_coll_basic_reduce_lin_intra about 
-       extent and true extent */
-    /* for reducing buffer allocation lengths.... */
+    /* Root receives and reduces messages.  Allocate buffer to receive
+     * messages.  This comment applies to all collectives in this basic
+     * module where we allocate a temporary buffer.  For the next few
+     * lines of code, it's tremendously complicated how we decided that
+     * this was the Right Thing to do.  Sit back and enjoy.  And prepare
+     * to have your mind warped. :-)
+     *
+     * Recall some definitions (I always get these backwards, so I'm
+     * going to put them here):
+     *
+     * extent: the length from the lower bound to the upper bound -- may
+     * be considerably larger than the buffer required to hold the data
+     * (or smaller!  But it's easiest to think about when it's larger).
+     *
+     * true extent: the exact number of bytes required to hold the data
+     * in the layout pattern in the datatype.
+     *
+     * For example, consider the following buffer (just talking about
+     * true_lb, extent, and true extent -- extrapolate for true_ub:
+     *
+     * A              B                                       C
+     * --------------------------------------------------------
+     * |              |                                       |
+     * --------------------------------------------------------
+     *
+     * There are multiple cases:
+     *
+     * 1. A is what we give to MPI_Send (and friends), and A is where
+     * the data starts, and C is where the data ends.  In this case:
+     *
+     * - extent: C-A
+     * - true extent: C-A
+     * - true_lb: 0
+     *
+     * A                                                      C
+     * --------------------------------------------------------
+     * |                                                      |
+     * --------------------------------------------------------
+     * <=======================extent=========================>
+     * <======================true extent=====================>
+     *
+     * 2. A is what we give to MPI_Send (and friends), B is where the
+     * data starts, and C is where the data ends.  In this case:
+     *
+     * - extent: C-A
+     * - true extent: C-B
+     * - true_lb: positive
+     *
+     * A              B                                       C
+     * --------------------------------------------------------
+     * |              |           User buffer                 |
+     * --------------------------------------------------------
+     * <=======================extent=========================>
+     * <===============true extent=============>
+     *
+     * 3. B is what we give to MPI_Send (and friends), A is where the
+     * data starts, and C is where the data ends.  In this case:
+     *
+     * - extent: C-A
+     * - true extent: C-A
+     * - true_lb: negative
+     *
+     * A              B                                       C
+     * --------------------------------------------------------
+     * |              |           User buffer                 |
+     * --------------------------------------------------------
+     * <=======================extent=========================>
+     * <======================true extent=====================>
+     *
+     * 4. MPI_BOTTOM is what we give to MPI_Send (and friends), B is
+     * where the data starts, and C is where the data ends.  In this
+     * case:
+     *
+     * - extent: C-MPI_BOTTOM
+     * - true extent: C-B
+     * - true_lb: [potentially very large] positive
+     *
+     * MPI_BOTTOM     B                                       C
+     * --------------------------------------------------------
+     * |              |           User buffer                 |
+     * --------------------------------------------------------
+     * <=======================extent=========================>
+     * <===============true extent=============>
+     *
+     * So in all cases, for a temporary buffer, all we need to malloc()
+     * is a buffer of size true_extent.  We therefore need to know two
+     * pointer values: what value to give to MPI_Send (and friends) and
+     * what value to give to free(), because they might not be the same.
+     *
+     * Clearly, what we give to free() is exactly what was returned from
+     * malloc().  That part is easy.  :-)
+     *
+     * What we give to MPI_Send (and friends) is a bit more complicated.
+     * Let's take the 4 cases from above:
+     *
+     * 1. If A is what we give to MPI_Send and A is where the data
+     * starts, then clearly we give to MPI_Send what we got back from
+     * malloc().
+     *
+     * 2. If B is what we get back from malloc, but we give A to
+     * MPI_Send, then the buffer range [A,B) represents "dead space"
+     * -- no data will be put there.  So it's safe to give B-true_lb to
+     * MPI_Send.  More specifically, the true_lb is positive, so B-true_lb is
+     * actually A.
+     *
+     * 3. If A is what we get back from malloc, and B is what we give to
+     * MPI_Send, then the true_lb is negative, so A-true_lb will actually equal
+     * B.
+     *
+     * 4. Although this seems like the weirdest case, it's actually
+     * quite similar to case #2 -- the pointer we give to MPI_Send is
+     * smaller than the pointer we got back from malloc().
+     *
+     * Hence, in all cases, we give (return_from_malloc - true_lb) to MPI_Send.
+     *
+     * This works fine and dandy if we only have (count==1), which we
+     * rarely do.  ;-) So we really need to allocate (true_extent +
+     * ((count - 1) * extent)) to get enough space for the rest.  This may
+     * be more than is necessary, but it's ok.
+     *
+     * Simple, no?  :-)
+     *
+     */

    ompi_datatype_get_extent(dtype, &lb, &extent);
    ompi_datatype_get_true_extent(dtype, &true_lb, &true_extent);

    if (MPI_IN_PLACE == sbuf) {
        sbuf = rbuf;
-        inplace_temp = (char*)malloc(true_extent + (ptrdiff_t)(count - 1) * extent);
+        inplace_temp = (char*)malloc(true_extent + (count - 1) * extent);
        if (NULL == inplace_temp) {
            return OMPI_ERR_OUT_OF_RESOURCE;
        }
@ -662,10 +762,12 @@ ompi_coll_tuned_reduce_intra_basic_linear(void *sbuf, void *rbuf, int count,
    }

    if (size > 1) {
-        free_buffer = (char*)malloc(true_extent + (ptrdiff_t)(count - 1) * extent);
+        free_buffer = (char*)malloc(true_extent + (count - 1) * extent);
        if (NULL == free_buffer) {
-            err = OMPI_ERR_OUT_OF_RESOURCE;
-            goto exit;
+            if (NULL != inplace_temp) {
+                free(inplace_temp);
+            }
+            return OMPI_ERR_OUT_OF_RESOURCE;
        }
        pml_buffer = free_buffer - true_lb;
    }
@ -673,15 +775,17 @@ ompi_coll_tuned_reduce_intra_basic_linear(void *sbuf, void *rbuf, int count,
    /* Initialize the receive buffer. */

    if (rank == (size - 1)) {
-        err = ompi_datatype_copy_content_same_ddt(dtype, count, (char*)rbuf, 
-                                                  (char*)sbuf);
+        err = ompi_datatype_copy_content_same_ddt(dtype, count, (char*)rbuf, (char*)sbuf);
    } else {
        err = MCA_PML_CALL(recv(rbuf, count, dtype, size - 1,
                                MCA_COLL_BASE_TAG_REDUCE, comm,
                                MPI_STATUS_IGNORE));
    }
    if (MPI_SUCCESS != err) {
-        goto exit;
+        if (NULL != free_buffer) {
+            free(free_buffer);
+        }
+        return err;
    }

    /* Loop receiving and calling reduction function (C or Fortran). */
@ -694,25 +798,22 @@ ompi_coll_tuned_reduce_intra_basic_linear(void *sbuf, void *rbuf, int count,
                                    MCA_COLL_BASE_TAG_REDUCE, comm,
                                    MPI_STATUS_IGNORE));
            if (MPI_SUCCESS != err) {
-                goto exit;
+                if (NULL != free_buffer) {
+                    free(free_buffer);
+                }
+                return err;
            }

            inbuf = pml_buffer;
        }

        /* Perform the reduction */
+
        ompi_op_reduce(op, inbuf, rbuf, count, dtype);
    }

    if (NULL != inplace_temp) {
-        err = ompi_datatype_copy_content_same_ddt(dtype, count, (char*)sbuf, 
-                                                  inplace_temp);
-    } else {
-        err = MPI_SUCCESS;
-    }
-
-  exit:
-    if (NULL != inplace_temp) {
+        err = ompi_datatype_copy_content_same_ddt(dtype, count, (char*)sbuf, inplace_temp);
        free(inplace_temp);
    }
    if (NULL != free_buffer) {
@ -720,189 +821,8 @@ ompi_coll_tuned_reduce_intra_basic_linear(void *sbuf, void *rbuf, int count,
    }

    /* All done */
-    return err;
+
+    return MPI_SUCCESS;
 }

 /* copied function (with appropriate renaming) ends here */
-
-
-/**
- * The following are used by dynamic and forced rules
- *
- * publish details of each algorithm and if its forced/fixed/locked in
- * as you add methods/algorithms you must update this and the query/map routines
- *
- * this routine is called by the component only
- * this makes sure that the mca parameters are set to their initial values and 
- * perms module does not call this they call the forced_getvalues routine 
- * instead.
- */
-
-int ompi_coll_tuned_reduce_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices)
-{
-    mca_base_var_enum_t*new_enum;
-
-    ompi_coll_tuned_forced_max_algorithms[REDUCE] = coll_tuned_reduce_algorithm_count;
-
-    (void) mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                           "reduce_algorithm_count",
-                                           "Number of reduce algorithms available",
-                                           MCA_BASE_VAR_TYPE_INT, NULL, 0,
-                                           MCA_BASE_VAR_FLAG_DEFAULT_ONLY,
-                                           OPAL_INFO_LVL_5,
-                                           MCA_BASE_VAR_SCOPE_CONSTANT,
-                                           &coll_tuned_reduce_algorithm_count);
-
-    /* MPI_T: This variable should eventually be bound to a communicator */
-    coll_tuned_reduce_forced_algorithm = 0;
-    (void) mca_base_var_enum_create("coll_tuned_reduce_algorithms", reduce_algorithms, &new_enum);
-    mca_param_indices->algorithm_param_index =
-        mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                        "reduce_algorithm",
-                                        "Which reduce algorithm is used. Can be locked down to choice of: 0 ignore, 1 linear, 2 chain, 3 pipeline, 4 binary, 5 binomial, 6 in-order binary",
-                                        MCA_BASE_VAR_TYPE_INT, new_enum, 0, 0,
-                                        OPAL_INFO_LVL_5,
-                                        MCA_BASE_VAR_SCOPE_READONLY,
-                                        &coll_tuned_reduce_forced_algorithm);
-    OBJ_RELEASE(new_enum);
-    if (mca_param_indices->algorithm_param_index < 0) {
-        return mca_param_indices->algorithm_param_index;
-    }
-
-    coll_tuned_reduce_segment_size = 0;
-    mca_param_indices->segsize_param_index =
-        mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                        "reduce_algorithm_segmentsize",
-                                        "Segment size in bytes used by default for reduce algorithms. Only has meaning if algorithm is forced and supports segmenting. 0 bytes means no segmentation.",
-                                        MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
-                                        OPAL_INFO_LVL_5,
-                                        MCA_BASE_VAR_SCOPE_READONLY,
-                                        &coll_tuned_reduce_segment_size);
-
-    coll_tuned_reduce_tree_fanout = ompi_coll_tuned_init_tree_fanout; /* get system wide default */
-    mca_param_indices->tree_fanout_param_index =
-        mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                        "reduce_algorithm_tree_fanout",
-                                        "Fanout for n-tree used for reduce algorithms. Only has meaning if algorithm is forced and supports n-tree topo based operation.",
-                                        MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
-                                        OPAL_INFO_LVL_5,
-                                        MCA_BASE_VAR_SCOPE_READONLY,
-                                        &coll_tuned_reduce_tree_fanout);
-
-    coll_tuned_reduce_chain_fanout = ompi_coll_tuned_init_chain_fanout; /* get system wide default */
-    mca_param_indices->chain_fanout_param_index = 
-      mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                      "reduce_algorithm_chain_fanout",
-                                      "Fanout for chains used for reduce algorithms. Only has meaning if algorithm is forced and supports chain topo based operation.",
-                                      MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
-                                      OPAL_INFO_LVL_5,
-                                      MCA_BASE_VAR_SCOPE_READONLY,
-                                      &coll_tuned_reduce_chain_fanout);
-
-    coll_tuned_reduce_max_requests = 0; /* no limit for reduce by default */
-    mca_param_indices->max_requests_param_index = 
-      mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                      "reduce_algorithm_max_requests",
-                                      "Maximum number of outstanding send requests on leaf nodes. 0 means no limit.",
-                                      MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
-                                      OPAL_INFO_LVL_5,
-                                      MCA_BASE_VAR_SCOPE_READONLY,
-                                      &coll_tuned_reduce_max_requests);
-    if (mca_param_indices->max_requests_param_index < 0) {
-        return mca_param_indices->max_requests_param_index;
-    }
-
-    if (coll_tuned_reduce_max_requests < 0) {
-        if( 0 == ompi_comm_rank( MPI_COMM_WORLD ) ) {
-            opal_output( 0, "Maximum outstanding requests must be positive number or 0.  Initializing to 0 (no limit).\n" );
-        }
-        coll_tuned_reduce_max_requests = 0;
-    }
-
-    return (MPI_SUCCESS);
-}
-
-
-int ompi_coll_tuned_reduce_intra_do_forced(void *sbuf, void* rbuf, int count,
-                                           struct ompi_datatype_t *dtype,
-                                           struct ompi_op_t *op, int root,
-                                           struct ompi_communicator_t *comm,
-                                           mca_coll_base_module_t *module)
-{
-    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
-    mca_coll_tuned_comm_t *data = tuned_module->tuned_data;
-
-    const int segsize      = data->user_forced[REDUCE].segsize;
-    const int chain_fanout = data->user_forced[REDUCE].chain_fanout;
-    const int max_requests = data->user_forced[REDUCE].max_requests;
-
-    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:reduce_intra_do_forced selected algorithm %d", 
-                 data->user_forced[REDUCE].algorithm));
-
-
-    switch (data->user_forced[REDUCE].algorithm) {
-    case (0):  return ompi_coll_tuned_reduce_intra_dec_fixed (sbuf, rbuf, count, dtype, 
-                                                              op, root, comm, module);
-    case (1):  return ompi_coll_tuned_reduce_intra_basic_linear (sbuf, rbuf, count, dtype,
-                                                                 op, root, comm, module);
-    case (2):  return ompi_coll_tuned_reduce_intra_chain (sbuf, rbuf, count, dtype,
-                                                          op, root, comm, module,
-                                                          segsize, chain_fanout, max_requests);
-    case (3):  return ompi_coll_tuned_reduce_intra_pipeline (sbuf, rbuf, count, dtype,
-                                                             op, root, comm, module,
-                                                             segsize, max_requests);
-    case (4):  return ompi_coll_tuned_reduce_intra_binary (sbuf, rbuf, count, dtype,
-                                                           op, root, comm, module,
-                                                           segsize, max_requests);
-    case (5):  return ompi_coll_tuned_reduce_intra_binomial (sbuf, rbuf, count, dtype,
-                                                             op, root, comm, module,
-                                                             segsize, max_requests);
-    case (6):  return ompi_coll_tuned_reduce_intra_in_order_binary(sbuf, rbuf, count, dtype,
-                                                                   op, root, comm, module,
-                                                                   segsize, max_requests);
-    default:
-        OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:reduce_intra_do_forced attempt to select algorithm %d when only 0-%d is valid?",
-                     data->user_forced[REDUCE].algorithm, ompi_coll_tuned_forced_max_algorithms[REDUCE]));
-        return (MPI_ERR_ARG);
-    } /* switch */
-}
-
-
-int ompi_coll_tuned_reduce_intra_do_this(void *sbuf, void* rbuf, int count,
-                                         struct ompi_datatype_t *dtype,
-                                         struct ompi_op_t *op, int root,
-                                         struct ompi_communicator_t *comm,
-                                         mca_coll_base_module_t *module,
-                                         int algorithm, int faninout, 
-                                         int segsize, int max_requests )
-{
-    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:reduce_intra_do_this selected algorithm %d topo faninout %d segsize %d",
-                 algorithm, faninout, segsize));
-
-    switch (algorithm) {
-    case (0):  return ompi_coll_tuned_reduce_intra_dec_fixed (sbuf, rbuf, count, dtype,
-                                                              op, root, comm, module);
-    case (1):  return ompi_coll_tuned_reduce_intra_basic_linear (sbuf, rbuf, count, dtype, 
-                                                                 op, root, comm, module);
-    case (2):  return ompi_coll_tuned_reduce_intra_chain (sbuf, rbuf, count, dtype,
-                                                          op, root, comm, module,
-                                                          segsize, faninout, max_requests);
-    case (3):  return ompi_coll_tuned_reduce_intra_pipeline (sbuf, rbuf, count, dtype,
-                                                             op, root, comm, module,
-                                                             segsize, max_requests);
-    case (4):  return ompi_coll_tuned_reduce_intra_binary (sbuf, rbuf, count, dtype,
-                                                           op, root, comm, module,
-                                                           segsize, max_requests); 
-    case (5):  return ompi_coll_tuned_reduce_intra_binomial (sbuf, rbuf, count, dtype,
-                                                             op, root, comm, module,
-                                                             segsize, max_requests); 
-    case (6):  return ompi_coll_tuned_reduce_intra_in_order_binary(sbuf, rbuf, count, dtype,
-                                                                   op, root, comm, module,
-                                                                   segsize, max_requests);
-    default:
-        OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:reduce_intra_do_this attempt to select algorithm %d when only 0-%d is valid?",
-                     algorithm, ompi_coll_tuned_forced_max_algorithms[REDUCE]));
-        return (MPI_ERR_ARG);
-    } /* switch */
-}
-
--- a/ompi/mca/coll/tuned/coll_tuned_reduce_scatter.c
+++ b/ompi/mca/coll/tuned/coll_tuned_reduce_scatter.c
@ -3,7 +3,7 @@
 * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
 *                         University Research and Technology
 *                         Corporation.  All rights reserved.
- * Copyright (c) 2004-2014 The University of Tennessee and The University
+ * Copyright (c) 2004-2015 The University of Tennessee and The University
 *                         of Tennessee Research Foundation.  All rights
 *                         reserved.
 * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@ -32,32 +32,16 @@
 #include "ompi/mca/coll/base/coll_tags.h"
 #include "ompi/mca/pml/pml.h"
 #include "ompi/op/op.h"
-#include "coll_tuned.h"
-#include "coll_tuned_topo.h"
-
-/* reduce_scatter algorithm variables */
-static int coll_tuned_reduce_scatter_algorithm_count = 2;
-static int coll_tuned_reduce_scatter_forced_algorithm = 0;
-static int coll_tuned_reduce_scatter_segment_size = 0;
-static int coll_tuned_reduce_scatter_tree_fanout;
-static int coll_tuned_reduce_scatter_chain_fanout;
-
-/* valid values for coll_tuned_reduce_scatter_forced_algorithm */
-static mca_base_var_enum_value_t reduce_scatter_algorithms[] = {
-    {0, "ignore"},
-    {1, "non-overlapping"},
-    {2, "recursive_halfing"},
-    {3, "ring"},
-    {0, NULL}
-};
+#include "ompi/mca/coll/base/coll_base_functions.h"
+#include "coll_base_topo.h"

 /*******************************************************************************
- * ompi_coll_tuned_reduce_scatter_intra_nonoverlapping
+ * ompi_coll_base_reduce_scatter_intra_nonoverlapping
 *
 * This function just calls a reduce to rank 0, followed by an
 * appropriate scatterv call.
 */
-int ompi_coll_tuned_reduce_scatter_intra_nonoverlapping(void *sbuf, void *rbuf, 
+int ompi_coll_base_reduce_scatter_intra_nonoverlapping(void *sbuf, void *rbuf,
                                                        int *rcounts,
                                                        struct ompi_datatype_t *dtype,
                                                        struct ompi_op_t *op,
@ -71,7 +55,7 @@ int ompi_coll_tuned_reduce_scatter_intra_nonoverlapping(void *sbuf, void *rbuf,
    rank = ompi_comm_rank(comm);
    size = ompi_comm_size(comm);

-    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:reduce_scatter_intra_nonoverlapping, rank %d", rank));
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:reduce_scatter_intra_nonoverlapping, rank %d", rank));

    for (i = 0, total_count = 0; i < size; i++) { total_count += rcounts[i]; }

@ -138,7 +122,7 @@ int ompi_coll_tuned_reduce_scatter_intra_nonoverlapping(void *sbuf, void *rbuf,
 *  Limitation: - Works only for commutative operations.
 */
 int
-ompi_coll_tuned_reduce_scatter_intra_basic_recursivehalving(void *sbuf, 
+ompi_coll_base_reduce_scatter_intra_basic_recursivehalving(void *sbuf,
                                                            void *rbuf,
                                                            int *rcounts,
                                                            struct ompi_datatype_t *dtype,
@ -156,7 +140,7 @@ ompi_coll_tuned_reduce_scatter_intra_basic_recursivehalving(void *sbuf,
    rank = ompi_comm_rank(comm);
    size = ompi_comm_size(comm);

-    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:reduce_scatter_intra_basic_recursivehalving, rank %d", rank));
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:reduce_scatter_intra_basic_recursivehalving, rank %d", rank));

    /* Find displacements and the like */
    disps = (int*) malloc(sizeof(int) * size);
@ -404,7 +388,7 @@ ompi_coll_tuned_reduce_scatter_intra_basic_recursivehalving(void *sbuf,


 /*
- *   ompi_coll_tuned_reduce_scatter_intra_ring
+ *   ompi_coll_base_reduce_scatter_intra_ring
 *
 *   Function:       Ring algorithm for reduce_scatter operation
 *   Accepts:        Same as MPI_Reduce_scatter()
@ -463,7 +447,7 @@ ompi_coll_tuned_reduce_scatter_intra_basic_recursivehalving(void *sbuf,
 *
 */
 int
-ompi_coll_tuned_reduce_scatter_intra_ring(void *sbuf, void *rbuf, int *rcounts,
+ompi_coll_base_reduce_scatter_intra_ring(void *sbuf, void *rbuf, int *rcounts,
                                          struct ompi_datatype_t *dtype,
                                          struct ompi_op_t *op,
                                          struct ompi_communicator_t *comm,
@ -480,8 +464,8 @@ ompi_coll_tuned_reduce_scatter_intra_ring(void *sbuf, void *rbuf, int *rcounts,
    size = ompi_comm_size(comm);
    rank = ompi_comm_rank(comm);

-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "coll:tuned:reduce_scatter_intra_ring rank %d, size %d", 
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                 "coll:base:reduce_scatter_intra_ring rank %d, size %d",
                 rank, size));

    /* Determine the maximum number of elements per node,
@ -626,7 +610,7 @@ ompi_coll_tuned_reduce_scatter_intra_ring(void *sbuf, void *rbuf, int *rcounts,
    return MPI_SUCCESS;

 error_hndl:
-    OPAL_OUTPUT((ompi_coll_tuned_stream, "%s:%4d\tRank %d Error occurred %d\n",
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output, "%s:%4d\tRank %d Error occurred %d\n",
                 __FILE__, line, rank, ret));
    if (NULL != displs) free(displs);
    if (NULL != accumbuf_free) free(accumbuf_free);
@ -634,139 +618,3 @@ ompi_coll_tuned_reduce_scatter_intra_ring(void *sbuf, void *rbuf, int *rcounts,
    if (NULL != inbuf_free[1]) free(inbuf_free[1]);
    return ret;
 }
-
-
-/**
- * The following are used by dynamic and forced rules
- *
- * publish details of each algorithm and if its forced/fixed/locked in
- * as you add methods/algorithms you must update this and the query/map routines
- *
- * this routine is called by the component only
- * this makes sure that the mca parameters are set to their initial values and 
- * perms module does not call this they call the forced_getvalues routine 
- * instead
- */
-
-int ompi_coll_tuned_reduce_scatter_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices)
-{
-    mca_base_var_enum_t *new_enum;
-
-    ompi_coll_tuned_forced_max_algorithms[REDUCESCATTER] = coll_tuned_reduce_scatter_algorithm_count;
-
-    (void) mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                           "reduce_scatter_algorithm_count",
-                                           "Number of reduce_scatter algorithms available",
-                                           MCA_BASE_VAR_TYPE_INT, NULL, 0,
-                                           MCA_BASE_VAR_FLAG_DEFAULT_ONLY,
-                                           OPAL_INFO_LVL_5,
-                                           MCA_BASE_VAR_SCOPE_CONSTANT,
-                                           &coll_tuned_reduce_scatter_algorithm_count);
-
-    /* MPI_T: This variable should eventually be bound to a communicator */
-    coll_tuned_reduce_scatter_forced_algorithm = 0;
-    (void) mca_base_var_enum_create("coll_tuned_reduce_scatter_algorithms", reduce_scatter_algorithms, &new_enum);
-    mca_param_indices->algorithm_param_index =
-        mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                        "reduce_scatter_algorithm",
-                                        "Which reduce reduce_scatter algorithm is used. Can be locked down to choice of: 0 ignore, 1 non-overlapping (Reduce + Scatterv), 2 recursive halving, 3 ring",
-                                        MCA_BASE_VAR_TYPE_INT, new_enum, 0, 0,
-                                        OPAL_INFO_LVL_5,
-                                        MCA_BASE_VAR_SCOPE_READONLY,
-                                        &coll_tuned_reduce_scatter_forced_algorithm);
-    OBJ_RELEASE(new_enum);
-    if (mca_param_indices->algorithm_param_index < 0) {
-        return mca_param_indices->algorithm_param_index;
-    }
-
-    coll_tuned_reduce_scatter_segment_size = 0;
-    mca_param_indices->segsize_param_index =
-        mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                        "reduce_scatter_algorithm_segmentsize",
-                                        "Segment size in bytes used by default for reduce_scatter algorithms. Only has meaning if algorithm is forced and supports segmenting. 0 bytes means no segmentation.",
-                                        MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
-                                        OPAL_INFO_LVL_5,
-                                        MCA_BASE_VAR_SCOPE_READONLY,
-                                        &coll_tuned_reduce_scatter_segment_size);
-
-    coll_tuned_reduce_scatter_tree_fanout = ompi_coll_tuned_init_tree_fanout; /* get system wide default */
-    mca_param_indices->tree_fanout_param_index =
-        mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                        "reduce_scatter_algorithm_tree_fanout",
-                                        "Fanout for n-tree used for reduce_scatter algorithms. Only has meaning if algorithm is forced and supports n-tree topo based operation.",
-                                        MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
-                                        OPAL_INFO_LVL_5,
-                                        MCA_BASE_VAR_SCOPE_READONLY,
-                                        &coll_tuned_reduce_scatter_tree_fanout);
-
-    coll_tuned_reduce_scatter_chain_fanout = ompi_coll_tuned_init_chain_fanout; /* get system wide default */
-    mca_param_indices->chain_fanout_param_index = 
-      mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                      "reduce_scatter_algorithm_chain_fanout",
-                                      "Fanout for chains used for reduce_scatter algorithms. Only has meaning if algorithm is forced and supports chain topo based operation.",
-                                      MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
-                                      OPAL_INFO_LVL_5,
-                                      MCA_BASE_VAR_SCOPE_READONLY,
-                                      &coll_tuned_reduce_scatter_chain_fanout);
-
-    return (MPI_SUCCESS);
-}
-
-
-int ompi_coll_tuned_reduce_scatter_intra_do_forced(void *sbuf, void* rbuf, 
-                                                   int *rcounts,
-                                                   struct ompi_datatype_t *dtype,
-                                                   struct ompi_op_t *op, 
-                                                   struct ompi_communicator_t *comm,
-                                                   mca_coll_base_module_t *module)
-{
-    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
-    mca_coll_tuned_comm_t *data = tuned_module->tuned_data;
-
-    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:reduce_scatter_intra_do_forced selected algorithm %d", 
-                 data->user_forced[REDUCESCATTER].algorithm));
-
-    switch (data->user_forced[REDUCESCATTER].algorithm) {
-    case (0): return ompi_coll_tuned_reduce_scatter_intra_dec_fixed (sbuf, rbuf, rcounts, 
-                                                                     dtype, op, comm, module);
-    case (1): return ompi_coll_tuned_reduce_scatter_intra_nonoverlapping(sbuf, rbuf, rcounts,
-                                                                         dtype, op, comm, module);
-    case (2): return ompi_coll_tuned_reduce_scatter_intra_basic_recursivehalving(sbuf, rbuf, rcounts,
-                                                                                 dtype, op, comm, module);
-    case (3): return ompi_coll_tuned_reduce_scatter_intra_ring (sbuf, rbuf, rcounts,
-                                                                dtype, op, comm, module);
-    default:
-        OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:reduce_scatter_intra_do_forced attempt to select algorithm %d when only 0-%d is valid?",
-                     data->user_forced[REDUCESCATTER].algorithm, ompi_coll_tuned_forced_max_algorithms[REDUCESCATTER]));
-        return (MPI_ERR_ARG);
-    } /* switch */
-}
-
-
-int ompi_coll_tuned_reduce_scatter_intra_do_this(void *sbuf, void* rbuf, 
-                                                 int *rcounts,
-                                                 struct ompi_datatype_t *dtype,
-                                                 struct ompi_op_t *op, 
-                                                 struct ompi_communicator_t *comm,
-                                                 mca_coll_base_module_t *module,
-                                                 int algorithm, int faninout, int segsize)
-{
-    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:reduce_scatter_intra_do_this selected algorithm %d topo faninout %d segsize %d",
-                 algorithm, faninout, segsize));
-   
-    switch (algorithm) {
-    case (0): return ompi_coll_tuned_reduce_scatter_intra_dec_fixed (sbuf, rbuf, rcounts, 
-                                                                     dtype, op, comm, module);
-    case (1): return ompi_coll_tuned_reduce_scatter_intra_nonoverlapping(sbuf, rbuf, rcounts,
-                                                                         dtype, op, comm, module);
-    case (2): return ompi_coll_tuned_reduce_scatter_intra_basic_recursivehalving(sbuf, rbuf, rcounts,
-                                                                                 dtype, op, comm, module);
-    case (3): return ompi_coll_tuned_reduce_scatter_intra_ring (sbuf, rbuf, rcounts,
-                                                                dtype, op, comm, module);
-    default:
-        OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:reduce_scatter_intra_do_this attempt to select algorithm %d when only 0-%d is valid?",
-                     algorithm, ompi_coll_tuned_forced_max_algorithms[REDUCESCATTER]));
-        return (MPI_ERR_ARG);
-    } /* switch */
-}
-
--- a/ompi/mca/coll/base/coll_base_scatter.c
+++ b/ompi/mca/coll/base/coll_base_scatter.c
@ -0,0 +1,256 @@
+/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
+/*
+ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
+ *                         University Research and Technology
+ *                         Corporation.  All rights reserved.
+ * Copyright (c) 2004-2015 The University of Tennessee and The University
+ *                         of Tennessee Research Foundation.  All rights
+ *                         reserved.
+ * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
+ *                         University of Stuttgart.  All rights reserved.
+ * Copyright (c) 2004-2005 The Regents of the University of California.
+ *                         All rights reserved.
+ * Copyright (c) 2013      Los Alamos National Security, LLC. All rights
+ *                         reserved.
+ * $COPYRIGHT$
+ *
+ * Additional copyrights may follow
+ *
+ * $HEADER$
+ */
+
+#include "ompi_config.h"
+
+#include "mpi.h"
+#include "ompi/constants.h"
+#include "ompi/datatype/ompi_datatype.h"
+#include "ompi/communicator/communicator.h"
+#include "ompi/mca/coll/coll.h"
+#include "ompi/mca/coll/base/coll_tags.h"
+#include "ompi/mca/pml/pml.h"
+#include "ompi/mca/coll/base/coll_base_functions.h"
+#include "coll_base_topo.h"
+#include "coll_base_util.h"
+
+int
+ompi_coll_base_scatter_intra_binomial(void *sbuf, int scount,
+                                       struct ompi_datatype_t *sdtype,
+                                       void *rbuf, int rcount,
+                                       struct ompi_datatype_t *rdtype,
+                                       int root,
+                                       struct ompi_communicator_t *comm,
+                                       mca_coll_base_module_t *module)
+{
+    int line = -1, i, rank, vrank, size, total_send = 0, err;
+    char *ptmp, *tempbuf = NULL;
+    ompi_coll_tree_t* bmtree;
+    MPI_Status status;
+    MPI_Aint sextent, slb, strue_lb, strue_extent;
+    MPI_Aint rextent, rlb, rtrue_lb, rtrue_extent;
+    mca_coll_base_module_t *base_module = (mca_coll_base_module_t*) module;
+    mca_coll_base_comm_t *data = base_module->base_data;
+
+    size = ompi_comm_size(comm);
+    rank = ompi_comm_rank(comm);
+
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                 "ompi_coll_base_scatter_intra_binomial rank %d", rank));
+
+    /* create the binomial tree */
+    COLL_BASE_UPDATE_IN_ORDER_BMTREE( comm, base_module, root );
+    bmtree = data->cached_in_order_bmtree;
+
+    ompi_datatype_get_extent(sdtype, &slb, &sextent);
+    ompi_datatype_get_true_extent(sdtype, &strue_lb, &strue_extent);
+    ompi_datatype_get_extent(rdtype, &rlb, &rextent);
+    ompi_datatype_get_true_extent(rdtype, &rtrue_lb, &rtrue_extent);
+
+    vrank = (rank - root + size) % size;
+    ptmp = (char *) rbuf;  /* by default suppose leaf nodes, just use rbuf */
+
+    if (rank == root) {
+        if (0 == root) {
+            /* root on 0, just use the send buffer */
+            ptmp = (char *) sbuf;
+            if (rbuf != MPI_IN_PLACE) {
+                /* local copy to rbuf */
+                err = ompi_datatype_sndrcv(sbuf, scount, sdtype,
+                                           rbuf, rcount, rdtype);
+                if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
+            }
+        } else {
+            /* root is not on 0, allocate temp buffer for send */
+            tempbuf = (char *) malloc(strue_extent + ((ptrdiff_t)scount * (ptrdiff_t)size - 1) * sextent);
+            if (NULL == tempbuf) {
+                err = OMPI_ERR_OUT_OF_RESOURCE; line = __LINE__; goto err_hndl;
+            }
+
+            ptmp = tempbuf - strue_lb;
+
+            /* and rotate data so they will eventually in the right place */
+            err = ompi_datatype_copy_content_same_ddt(sdtype, (ptrdiff_t)scount * (ptrdiff_t)(size - root),
+                                                      ptmp, (char *) sbuf + sextent * (ptrdiff_t)root * (ptrdiff_t)scount);
+            if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
+
+
+            err = ompi_datatype_copy_content_same_ddt(sdtype, (ptrdiff_t)scount * (ptrdiff_t)root,
+                                                      ptmp + sextent * (ptrdiff_t)scount * (ptrdiff_t)(size - root), (char *)sbuf);
+            if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
+
+            if (rbuf != MPI_IN_PLACE) {
+                /* local copy to rbuf */
+                err = ompi_datatype_sndrcv(ptmp, scount, sdtype,
+                                           rbuf, rcount, rdtype);
+                if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
+            }
+        }
+        total_send = scount;
+    } else if (!(vrank % 2)) {
+        /* non-root, non-leaf nodes, allocte temp buffer for recv
+         * the most we need is rcount*size/2 */
+        tempbuf = (char *) malloc(rtrue_extent + ((ptrdiff_t)rcount * (ptrdiff_t)size - 1) * rextent);
+        if (NULL == tempbuf) {
+            err= OMPI_ERR_OUT_OF_RESOURCE; line = __LINE__; goto err_hndl;
+        }
+
+        ptmp = tempbuf - rtrue_lb;
+
+        sdtype = rdtype;
+        scount = rcount;
+        sextent = rextent;
+        total_send = scount;
+    }
+
+    if (!(vrank % 2)) {
+        if (rank != root) {
+            /* recv from parent on non-root */
+            err = MCA_PML_CALL(recv(ptmp, (ptrdiff_t)rcount * (ptrdiff_t)size, rdtype, bmtree->tree_prev,
+                                    MCA_COLL_BASE_TAG_SCATTER, comm, &status));
+            if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
+            /* local copy to rbuf */
+            err = ompi_datatype_sndrcv(ptmp, scount, sdtype,
+                                       rbuf, rcount, rdtype);
+            if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
+        }
+        /* send to children on all non-leaf */
+        for (i = 0; i < bmtree->tree_nextsize; i++) {
+            size_t mycount = 0;
+            int vkid;
+            /* figure out how much data I have to send to this child */
+            vkid = (bmtree->tree_next[i] - root + size) % size;
+            mycount = vkid - vrank;
+            if( (int)mycount > (size - vkid) )
+                mycount = size - vkid;
+            mycount *= scount;
+
+            err = MCA_PML_CALL(send(ptmp + (ptrdiff_t)total_send * sextent, mycount, sdtype,
+                                    bmtree->tree_next[i],
+                                    MCA_COLL_BASE_TAG_SCATTER,
+                                    MCA_PML_BASE_SEND_STANDARD, comm));
+            if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
+
+            total_send += mycount;
+        }
+
+        if (NULL != tempbuf)
+            free(tempbuf);
+    } else {
+        /* recv from parent on leaf nodes */
+        err = MCA_PML_CALL(recv(ptmp, rcount, rdtype, bmtree->tree_prev,
+                                MCA_COLL_BASE_TAG_SCATTER, comm, &status));
+        if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
+    }
+
+    return MPI_SUCCESS;
+
+ err_hndl:
+    if (NULL != tempbuf)
+        free(tempbuf);
+
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,  "%s:%4d\tError occurred %d, rank %2d",
+                 __FILE__, line, err, rank));
+    return err;
+}
+
+/*
+ * Linear functions are copied from the BASIC coll module
+ * they do not segment the message and are simple implementations
+ * but for some small number of nodes and/or small data sizes they
+ * are just as fast as base/tree based segmenting operations
+ * and as such may be selected by the decision functions
+ * These are copied into this module due to the way we select modules
+ * in V1. i.e. in V2 we will handle this differently and so will not
+ * have to duplicate code.
+ * JPG following the examples from other coll_base implementations. Dec06.
+ */
+
+/* copied function (with appropriate renaming) starts here */
+/*
+ *	scatter_intra
+ *
+ *	Function:	- basic scatter operation
+ *	Accepts:	- same arguments as MPI_Scatter()
+ *	Returns:	- MPI_SUCCESS or error code
+ */
+int
+ompi_coll_base_scatter_intra_basic_linear(void *sbuf, int scount,
+                                          struct ompi_datatype_t *sdtype,
+                                          void *rbuf, int rcount,
+                                          struct ompi_datatype_t *rdtype,
+                                          int root,
+                                          struct ompi_communicator_t *comm,
+                                          mca_coll_base_module_t *module)
+{
+    int i, rank, size, err;
+    ptrdiff_t lb, incr;
+    char *ptmp;
+
+    /* Initialize */
+
+    rank = ompi_comm_rank(comm);
+    size = ompi_comm_size(comm);
+
+    /* If not root, receive data. */
+
+    if (rank != root) {
+        err = MCA_PML_CALL(recv(rbuf, rcount, rdtype, root,
+                                MCA_COLL_BASE_TAG_SCATTER,
+                                comm, MPI_STATUS_IGNORE));
+        return err;
+    }
+
+    /* I am the root, loop sending data. */
+
+    err = ompi_datatype_get_extent(sdtype, &lb, &incr);
+    if (OMPI_SUCCESS != err) {
+        return OMPI_ERROR;
+    }
+
+    incr *= scount;
+    for (i = 0, ptmp = (char *) sbuf; i < size; ++i, ptmp += incr) {
+
+        /* simple optimization */
+
+        if (i == rank) {
+            if (MPI_IN_PLACE != rbuf) {
+                err =
+                    ompi_datatype_sndrcv(ptmp, scount, sdtype, rbuf, rcount,
+                                         rdtype);
+            }
+        } else {
+            err = MCA_PML_CALL(send(ptmp, scount, sdtype, i,
+                                    MCA_COLL_BASE_TAG_SCATTER,
+                                    MCA_PML_BASE_SEND_STANDARD, comm));
+        }
+        if (MPI_SUCCESS != err) {
+            return err;
+        }
+    }
+
+    /* All done */
+
+    return MPI_SUCCESS;
+}
+
+
+/* copied function (with appropriate renaming) ends here */
--- a/ompi/mca/coll/tuned/coll_tuned_topo.c
+++ b/ompi/mca/coll/tuned/coll_tuned_topo.c
@ -2,7 +2,7 @@
 * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
 *                         University Research and Technology
 *                         Corporation.  All rights reserved.
- * Copyright (c) 2004-2005 The University of Tennessee and The University
+ * Copyright (c) 2004-2015 The University of Tennessee and The University
 *                         of Tennessee Research Foundation.  All rights
 *                         reserved.
 * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@ -25,8 +25,8 @@
 #include "ompi/constants.h"
 #include "ompi/communicator/communicator.h"
 #include "ompi/mca/coll/base/coll_tags.h"
-#include "coll_tuned.h"
-#include "coll_tuned_topo.h"
+#include "ompi/mca/coll/base/coll_base_functions.h"
+#include "coll_base_topo.h"

 /*
 * Some static helpers.
@ -75,7 +75,7 @@ static int calculate_num_nodes_up_to_level( int fanout, int level )
 */

 ompi_coll_tree_t*
-ompi_coll_tuned_topo_build_tree( int fanout,
+ompi_coll_base_topo_build_tree( int fanout,
                                 struct ompi_communicator_t* comm,
                                 int root )
 {
@ -85,14 +85,14 @@ ompi_coll_tuned_topo_build_tree( int fanout,
    int slimit; /* total number of nodes on levels above me */
    ompi_coll_tree_t* tree;

-    OPAL_OUTPUT((ompi_coll_tuned_stream, "coll:tuned:topo_build_tree Building fo %d rt %d", fanout, root));
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output, "coll:base:topo_build_tree Building fo %d rt %d", fanout, root));

    if (fanout<1) {
-        OPAL_OUTPUT((ompi_coll_tuned_stream, "coll:tuned:topo_build_tree invalid fanout %d", fanout));
+        OPAL_OUTPUT((ompi_coll_base_framework.framework_output, "coll:base:topo_build_tree invalid fanout %d", fanout));
        return NULL;
    }
    if (fanout>MAXTREEFANOUT) {
-        OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:topo_build_tree invalid fanout %d bigger than max %d", fanout, MAXTREEFANOUT));
+        OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:topo_build_tree invalid fanout %d bigger than max %d", fanout, MAXTREEFANOUT));
        return NULL;
    }

@ -104,7 +104,7 @@ ompi_coll_tuned_topo_build_tree( int fanout,

    tree = (ompi_coll_tree_t*)malloc(sizeof(ompi_coll_tree_t));
    if (!tree) {
-        OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:topo_build_tree PANIC::out of memory"));
+        OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:topo_build_tree PANIC::out of memory"));
        return NULL;
    }

@ -189,7 +189,7 @@ ompi_coll_tuned_topo_build_tree( int fanout,
 *                                                        4     0
 */
 ompi_coll_tree_t*
-ompi_coll_tuned_topo_build_in_order_bintree( struct ompi_communicator_t* comm )
+ompi_coll_base_topo_build_in_order_bintree( struct ompi_communicator_t* comm )
 {
    int rank, size, myrank, rightsize, delta, parent, lchild, rchild;
    ompi_coll_tree_t* tree;
@ -202,8 +202,8 @@ ompi_coll_tuned_topo_build_in_order_bintree( struct ompi_communicator_t* comm )

    tree = (ompi_coll_tree_t*)malloc(sizeof(ompi_coll_tree_t));
    if (!tree) {
-        OPAL_OUTPUT((ompi_coll_tuned_stream,
-                     "coll:tuned:topo_build_tree PANIC::out of memory"));
+        OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                     "coll:base:topo_build_tree PANIC::out of memory"));
        return NULL;
    }

@ -220,8 +220,8 @@ ompi_coll_tuned_topo_build_in_order_bintree( struct ompi_communicator_t* comm )
    tree->tree_nextsize = 0;
    tree->tree_next[0]  = -1;
    tree->tree_next[1]  = -1;
-    OPAL_OUTPUT((ompi_coll_tuned_stream, 
-                 "coll:tuned:topo_build_in_order_tree Building fo %d rt %d", 
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                 "coll:base:topo_build_in_order_tree Building fo %d rt %d",
                 tree->tree_fanout, tree->tree_root));

    /*
@ -294,7 +294,7 @@ ompi_coll_tuned_topo_build_in_order_bintree( struct ompi_communicator_t* comm )
    return tree;
 }

-int ompi_coll_tuned_topo_destroy_tree( ompi_coll_tree_t** tree )
+int ompi_coll_base_topo_destroy_tree( ompi_coll_tree_t** tree )
 {
    ompi_coll_tree_t *ptr;

@ -323,13 +323,13 @@ int ompi_coll_tuned_topo_destroy_tree( ompi_coll_tree_t** tree )
 *                                                                7
 */
 ompi_coll_tree_t*
-ompi_coll_tuned_topo_build_bmtree( struct ompi_communicator_t* comm,
+ompi_coll_base_topo_build_bmtree( struct ompi_communicator_t* comm,
                                   int root )
 {
    int childs = 0, rank, size, mask = 1, index, remote, i;
    ompi_coll_tree_t *bmtree;

-    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:topo:build_bmtree rt %d", root));
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:topo:build_bmtree rt %d", root));

    /*
     * Get size and rank of the process in this communicator
@ -341,7 +341,7 @@ ompi_coll_tuned_topo_build_bmtree( struct ompi_communicator_t* comm,

    bmtree = (ompi_coll_tree_t*)malloc(sizeof(ompi_coll_tree_t));
    if (!bmtree) {
-        OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:topo:build_bmtree PANIC out of memory"));
+        OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:topo:build_bmtree PANIC out of memory"));
        return NULL;
    }

@ -372,7 +372,7 @@ ompi_coll_tuned_topo_build_bmtree( struct ompi_communicator_t* comm,
        remote += root;
        if( remote >= size ) remote -= size;
        if (childs==MAXTREEFANOUT) {
-            OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:topo:build_bmtree max fanout incorrect %d needed %d", MAXTREEFANOUT, childs));
+            OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:topo:build_bmtree max fanout incorrect %d needed %d", MAXTREEFANOUT, childs));
            free(bmtree);
            return NULL;
        }
@ -400,13 +400,13 @@ ompi_coll_tuned_topo_build_bmtree( struct ompi_communicator_t* comm,
 *                                                                 7
 */
 ompi_coll_tree_t*
-ompi_coll_tuned_topo_build_in_order_bmtree( struct ompi_communicator_t* comm,
+ompi_coll_base_topo_build_in_order_bmtree( struct ompi_communicator_t* comm,
                                            int root )
 {
    int childs = 0, rank, vrank, size, mask = 1, remote, i;
    ompi_coll_tree_t *bmtree;

-    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:topo:build_in_order_bmtree rt %d", root));
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:topo:build_in_order_bmtree rt %d", root));

    /*
     * Get size and rank of the process in this communicator
@ -418,7 +418,7 @@ ompi_coll_tuned_topo_build_in_order_bmtree( struct ompi_communicator_t* comm,

    bmtree = (ompi_coll_tree_t*)malloc(sizeof(ompi_coll_tree_t));
    if (!bmtree) {
-        OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:topo:build_bmtree PANIC out of memory"));
+        OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:topo:build_bmtree PANIC out of memory"));
        return NULL;
    }

@ -442,10 +442,10 @@ ompi_coll_tuned_topo_build_in_order_bmtree( struct ompi_communicator_t* comm,
            bmtree->tree_next[childs] = (remote + root) % size;
            childs++;
            if (childs==MAXTREEFANOUT) {
-                OPAL_OUTPUT((ompi_coll_tuned_stream,
-                             "coll:tuned:topo:build_bmtree max fanout incorrect %d needed %d",
+                OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
+                             "coll:base:topo:build_bmtree max fanout incorrect %d needed %d",
                             MAXTREEFANOUT, childs));
-                free (bmtree);
+                free(bmtree);
                return NULL;
            }
        }
@ -459,14 +459,14 @@ ompi_coll_tuned_topo_build_in_order_bmtree( struct ompi_communicator_t* comm,


 ompi_coll_tree_t*
-ompi_coll_tuned_topo_build_chain( int fanout,
+ompi_coll_base_topo_build_chain( int fanout,
                                  struct ompi_communicator_t* comm,
                                  int root )
 {
    int i, maxchainlen, mark, head, len, rank, size, srank /* shifted rank */;
    ompi_coll_tree_t *chain;

-    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:topo:build_chain fo %d rt %d", fanout, root));
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:topo:build_chain fo %d rt %d", fanout, root));

    /*
     * Get size and rank of the process in this communicator
@ -475,11 +475,11 @@ ompi_coll_tuned_topo_build_chain( int fanout,
    rank = ompi_comm_rank(comm);

    if( fanout < 1 ) {
-        OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:topo:build_chain WARNING invalid fanout of ZERO, forcing to 1 (pipeline)!"));
+        OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:topo:build_chain WARNING invalid fanout of ZERO, forcing to 1 (pipeline)!"));
        fanout = 1;
    }
    if (fanout>MAXTREEFANOUT) {
-        OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:topo:build_chain WARNING invalid fanout %d bigger than max %d, forcing to max!", fanout, MAXTREEFANOUT));
+        OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:topo:build_chain WARNING invalid fanout %d bigger than max %d, forcing to max!", fanout, MAXTREEFANOUT));
        fanout = MAXTREEFANOUT;
    }

@ -488,7 +488,7 @@ ompi_coll_tuned_topo_build_chain( int fanout,
     */
    chain = (ompi_coll_tree_t*)malloc( sizeof(ompi_coll_tree_t) );
    if (!chain) {
-        OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:topo:build_chain PANIC out of memory"));
+        OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"coll:base:topo:build_chain PANIC out of memory"));
        fflush(stdout);
        return NULL;
    }
@ -603,17 +603,18 @@ ompi_coll_tuned_topo_build_chain( int fanout,
    return chain;
 }

-int ompi_coll_tuned_topo_dump_tree (ompi_coll_tree_t* tree, int rank)
+int ompi_coll_base_topo_dump_tree (ompi_coll_tree_t* tree, int rank)
 {
    int i;

-    OPAL_OUTPUT((ompi_coll_tuned_stream, "coll:tuned:topo:topo_dump_tree %1d tree root %d"
+    OPAL_OUTPUT((ompi_coll_base_framework.framework_output, "coll:base:topo:topo_dump_tree %1d tree root %d"
                 " fanout %d BM %1d nextsize %d prev %d",
                 rank, tree->tree_root, tree->tree_bmtree, tree->tree_fanout,
                 tree->tree_nextsize, tree->tree_prev));
    if( tree->tree_nextsize ) {
        for( i = 0; i < tree->tree_nextsize; i++ )
-            OPAL_OUTPUT((ompi_coll_tuned_stream,"[%1d] %d", i, tree->tree_next[i]));
+            OPAL_OUTPUT((ompi_coll_base_framework.framework_output,"[%1d] %d", i, tree->tree_next[i]));
    }
    return (0);
 }
+
--- a/ompi/mca/coll/tuned/coll_tuned_topo.h
+++ b/ompi/mca/coll/tuned/coll_tuned_topo.h
@ -2,7 +2,7 @@
 * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
 *                         University Research and Technology
 *                         Corporation.  All rights reserved.
- * Copyright (c) 2004-2012 The University of Tennessee and The University
+ * Copyright (c) 2004-2015 The University of Tennessee and The University
 *                         of Tennessee Research Foundation.  All rights
 *                         reserved.
 * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@ -16,8 +16,8 @@
 * $HEADER$
 */

-#ifndef MCA_COLL_TUNED_TOPO_H_HAS_BEEN_INCLUDED
-#define MCA_COLL_TUNED_TOPO_H_HAS_BEEN_INCLUDED
+#ifndef MCA_COLL_BASE_TOPO_H_HAS_BEEN_INCLUDED
+#define MCA_COLL_BASE_TOPO_H_HAS_BEEN_INCLUDED

 #include "ompi_config.h"

@ -35,29 +35,28 @@ typedef struct ompi_coll_tree_t {
 } ompi_coll_tree_t;

 ompi_coll_tree_t*
-ompi_coll_tuned_topo_build_tree( int fanout,
+ompi_coll_base_topo_build_tree( int fanout,
                                 struct ompi_communicator_t* com,
                                 int root );
 ompi_coll_tree_t*
-ompi_coll_tuned_topo_build_in_order_bintree( struct ompi_communicator_t* comm );
+ompi_coll_base_topo_build_in_order_bintree( struct ompi_communicator_t* comm );

 ompi_coll_tree_t*
-ompi_coll_tuned_topo_build_bmtree( struct ompi_communicator_t* comm,
+ompi_coll_base_topo_build_bmtree( struct ompi_communicator_t* comm,
                                   int root );
 ompi_coll_tree_t*
-ompi_coll_tuned_topo_build_in_order_bmtree( struct ompi_communicator_t* comm,
+ompi_coll_base_topo_build_in_order_bmtree( struct ompi_communicator_t* comm,
                                            int root );
 ompi_coll_tree_t*
-ompi_coll_tuned_topo_build_chain( int fanout,
+ompi_coll_base_topo_build_chain( int fanout,
                                  struct ompi_communicator_t* com,
                                  int root );

-int ompi_coll_tuned_topo_destroy_tree( ompi_coll_tree_t** tree );
+int ompi_coll_base_topo_destroy_tree( ompi_coll_tree_t** tree );

 /* debugging stuff, will be removed later */
-int ompi_coll_tuned_topo_dump_tree (ompi_coll_tree_t* tree, int rank);
+int ompi_coll_base_topo_dump_tree (ompi_coll_tree_t* tree, int rank);

 END_C_DECLS

-#endif  /* MCA_COLL_TUNED_TOPO_H_HAS_BEEN_INCLUDED */
-
+#endif  /* MCA_COLL_BASE_TOPO_H_HAS_BEEN_INCLUDED */
--- a/ompi/mca/coll/tuned/coll_tuned_util.c
+++ b/ompi/mca/coll/tuned/coll_tuned_util.c
@ -2,7 +2,7 @@
 * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
 *                         University Research and Technology
 *                         Corporation.  All rights reserved.
- * Copyright (c) 2004-2014 The University of Tennessee and The University
+ * Copyright (c) 2004-2015 The University of Tennessee and The University
 *                         of Tennessee Research Foundation.  All rights
 *                         reserved.
 * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, 
@ -19,17 +19,17 @@
 */

 #include "ompi_config.h"
-#include "coll_tuned.h"

 #include "mpi.h"
 #include "ompi/constants.h"
 #include "ompi/datatype/ompi_datatype.h"
 #include "ompi/communicator/communicator.h"
 #include "ompi/mca/coll/base/coll_tags.h"
+#include "ompi/mca/coll/base/coll_base_functions.h"
 #include "ompi/mca/pml/pml.h"
-#include "coll_tuned_util.h"
+#include "coll_base_util.h"

-int ompi_coll_tuned_sendrecv_nonzero_actual( void* sendbuf, size_t scount, 
+int ompi_coll_base_sendrecv_nonzero_actual( void* sendbuf, size_t scount, 
                                             ompi_datatype_t* sdatatype,
                                             int dest, int stag,
                                             void* recvbuf, size_t rcount, 
@ -91,14 +91,14 @@ int ompi_coll_tuned_sendrecv_nonzero_actual( void* sendbuf, size_t scount,
            *status = statuses[err_index];
        }
        err = statuses[err_index].MPI_ERROR;
-        OPAL_OUTPUT ((ompi_coll_tuned_stream, "%s:%d: Error %d occurred in the %s"
-                                              " stage of ompi_coll_tuned_sendrecv_zero\n",
+        OPAL_OUTPUT ((ompi_coll_base_framework.framework_output, "%s:%d: Error %d occurred in the %s"
+                                              " stage of ompi_coll_base_sendrecv_zero\n",
                      __FILE__, line, err, (0 == err_index ? "receive" : "send")));
    } else {
        /* Error discovered during the posting of the irecv or isend,
         * and no status is available.
         */
-        OPAL_OUTPUT ((ompi_coll_tuned_stream, "%s:%d: Error %d occurred\n",
+        OPAL_OUTPUT ((ompi_coll_base_framework.framework_output, "%s:%d: Error %d occurred\n",
                      __FILE__, line, err));
        if (MPI_STATUS_IGNORE != status) {
            status->MPI_ERROR = err;
--- a/ompi/mca/coll/tuned/coll_tuned_util.h
+++ b/ompi/mca/coll/tuned/coll_tuned_util.h
@ -2,7 +2,7 @@
 * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
 *                         University Research and Technology
 *                         Corporation.  All rights reserved.
- * Copyright (c) 2004-2014 The University of Tennessee and The University
+ * Copyright (c) 2004-2015 The University of Tennessee and The University
 *                         of Tennessee Research Foundation.  All rights
 *                         reserved.
 * Copyright (c) 2004-2007 High Performance Computing Center Stuttgart,
@ -18,8 +18,8 @@
 * $HEADER$
 */

-#ifndef MCA_COLL_TUNED_UTIL_EXPORT_H
-#define MCA_COLL_TUNED_UTIL_EXPORT_H
+#ifndef MCA_COLL_BASE_UTIL_EXPORT_H
+#define MCA_COLL_BASE_UTIL_EXPORT_H

 #include "ompi_config.h"

@ -36,7 +36,7 @@ BEGIN_C_DECLS
 * If one of the communications results in a zero-byte message the
 * communication is ignored, and no message will cross to the peer.
 */
-int ompi_coll_tuned_sendrecv_nonzero_actual( void* sendbuf, size_t scount, 
+int ompi_coll_base_sendrecv_nonzero_actual( void* sendbuf, size_t scount,
                                             ompi_datatype_t* sdatatype,
                                             int dest, int stag,
                                             void* recvbuf, size_t rcount,
@ -53,7 +53,7 @@ int ompi_coll_tuned_sendrecv_nonzero_actual( void* sendbuf, size_t scount,
 * communications.
 */
 static inline int
-ompi_coll_tuned_sendrecv( void* sendbuf, size_t scount, ompi_datatype_t* sdatatype,
+ompi_coll_base_sendrecv( void* sendbuf, size_t scount, ompi_datatype_t* sdatatype,
                          int dest, int stag,
                          void* recvbuf, size_t rcount, ompi_datatype_t* rdatatype,
                          int source, int rtag,
@ -64,13 +64,11 @@ ompi_coll_tuned_sendrecv( void* sendbuf, size_t scount, ompi_datatype_t* sdataty
        return (int) ompi_datatype_sndrcv(sendbuf, (int32_t) scount, sdatatype,
                                          recvbuf, (int32_t) rcount, rdatatype);
    }
-    return ompi_coll_tuned_sendrecv_nonzero_actual (sendbuf, scount, sdatatype, 
+    return ompi_coll_base_sendrecv_nonzero_actual (sendbuf, scount, sdatatype,
                                            dest, stag,
                                            recvbuf, rcount, rdatatype,
                                            source, rtag, comm, status);
 }

 END_C_DECLS
-#endif /* MCA_COLL_TUNED_UTIL_EXPORT_H */
-
-
+#endif /* MCA_COLL_BASE_UTIL_EXPORT_H */
--- a/ompi/mca/coll/basic/coll_basic.h
+++ b/ompi/mca/coll/basic/coll_basic.h
@ -3,7 +3,7 @@
 * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
 *                         University Research and Technology
 *                         Corporation.  All rights reserved.
- * Copyright (c) 2004-2006 The University of Tennessee and The University
+ * Copyright (c) 2004-2015 The University of Tennessee and The University
 *                         of Tennessee Research Foundation.  All rights
 *                         reserved.
 * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, 
@ -31,6 +31,7 @@
 #include "ompi/mca/coll/coll.h"
 #include "ompi/request/request.h"
 #include "ompi/communicator/communicator.h"
+#include "ompi/mca/coll/base/coll_base_functions.h"

 BEGIN_C_DECLS

@ -52,12 +53,6 @@ BEGIN_C_DECLS
    int mca_coll_basic_module_enable(mca_coll_base_module_t *module,
                                     struct ompi_communicator_t *comm);

-    int mca_coll_basic_allgather_intra(void *sbuf, int scount,
-                                       struct ompi_datatype_t *sdtype,
-                                       void *rbuf, int rcount,
-                                       struct ompi_datatype_t *rdtype,
-                                       struct ompi_communicator_t *comm,
-                                       mca_coll_base_module_t *module);
    int mca_coll_basic_allgather_inter(void *sbuf, int scount,
                                       struct ompi_datatype_t *sdtype,
                                       void *rbuf, int rcount,
@ -65,13 +60,6 @@ BEGIN_C_DECLS
                                       struct ompi_communicator_t *comm,
                                       mca_coll_base_module_t *module);

-    int mca_coll_basic_allgatherv_intra(void *sbuf, int scount,
-                                        struct ompi_datatype_t *sdtype,
-                                        void *rbuf, int *rcounts,
-                                        int *disps,
-                                        struct ompi_datatype_t *rdtype,
-                                        struct ompi_communicator_t *comm,
-                                        mca_coll_base_module_t *module);
    int mca_coll_basic_allgatherv_inter(void *sbuf, int scount,
                                        struct ompi_datatype_t *sdtype,
                                        void *rbuf, int *rcounts,
@ -91,12 +79,6 @@ BEGIN_C_DECLS
                                       struct ompi_communicator_t *comm,
                                       mca_coll_base_module_t *module);

-    int mca_coll_basic_alltoall_intra(void *sbuf, int scount,
-                                      struct ompi_datatype_t *sdtype,
-                                      void *rbuf, int rcount,
-                                      struct ompi_datatype_t *rdtype,
-                                      struct ompi_communicator_t *comm,
-                                      mca_coll_base_module_t *module);
    int mca_coll_basic_alltoall_inter(void *sbuf, int scount,
                                      struct ompi_datatype_t *sdtype,
                                      void *rbuf, int rcount,
@ -104,14 +86,6 @@ BEGIN_C_DECLS
                                      struct ompi_communicator_t *comm,
                                      mca_coll_base_module_t *module);

-    int mca_coll_basic_alltoallv_intra(void *sbuf, int *scounts,
-                                       int *sdisps,
-                                       struct ompi_datatype_t *sdtype,
-                                       void *rbuf, int *rcounts,
-                                       int *rdisps,
-                                       struct ompi_datatype_t *rdtype,
-                                       struct ompi_communicator_t *comm,
-                                       mca_coll_base_module_t *module);
    int mca_coll_basic_alltoallv_inter(void *sbuf, int *scounts,
                                       int *sdisps,
                                       struct ompi_datatype_t *sdtype,
@ -138,21 +112,12 @@ BEGIN_C_DECLS
                                       struct ompi_communicator_t *comm,
                                       mca_coll_base_module_t *module);

-    int mca_coll_basic_barrier_intra_lin(struct ompi_communicator_t *comm,
-                                         mca_coll_base_module_t *module);
-
    int mca_coll_basic_barrier_inter_lin(struct ompi_communicator_t *comm,
                                         mca_coll_base_module_t *module);

    int mca_coll_basic_barrier_intra_log(struct ompi_communicator_t *comm,
                                         mca_coll_base_module_t *module);

-    int mca_coll_basic_bcast_lin_intra(void *buff, int count,
-                                       struct ompi_datatype_t *datatype,
-                                       int root,
-                                       struct ompi_communicator_t *comm,
-                                       mca_coll_base_module_t *module);
-
    int mca_coll_basic_bcast_lin_inter(void *buff, int count,
                                       struct ompi_datatype_t *datatype,
                                       int root,
@ -183,13 +148,6 @@ BEGIN_C_DECLS
                                    struct ompi_communicator_t *comm,
                                    mca_coll_base_module_t *module);

-    int mca_coll_basic_gather_intra(void *sbuf, int scount,
-                                    struct ompi_datatype_t *sdtype,
-                                    void *rbuf, int rcount,
-                                    struct ompi_datatype_t *rdtype,
-                                    int root,
-                                    struct ompi_communicator_t *comm,
-                                    mca_coll_base_module_t *module);
    int mca_coll_basic_gather_inter(void *sbuf, int scount,
                                    struct ompi_datatype_t *sdtype,
                                    void *rbuf, int rcount,
@ -214,12 +172,6 @@ BEGIN_C_DECLS
                                     struct ompi_communicator_t *comm,
                                     mca_coll_base_module_t *module);

-    int mca_coll_basic_reduce_lin_intra(void *sbuf, void *rbuf, int count,
-                                        struct ompi_datatype_t *dtype,
-                                        struct ompi_op_t *op,
-                                        int root,
-                                        struct ompi_communicator_t *comm,
-                                        mca_coll_base_module_t *module);
    int mca_coll_basic_reduce_lin_inter(void *sbuf, void *rbuf, int count,
                                        struct ompi_datatype_t *dtype,
                                        struct ompi_op_t *op,
@ -279,13 +231,6 @@ BEGIN_C_DECLS
                                  struct ompi_communicator_t *comm,
                                  mca_coll_base_module_t *module);

-    int mca_coll_basic_scatter_intra(void *sbuf, int scount,
-                                     struct ompi_datatype_t *sdtype,
-                                     void *rbuf, int rcount,
-                                     struct ompi_datatype_t *rdtype,
-                                     int root,
-                                     struct ompi_communicator_t *comm,
-                                     mca_coll_base_module_t *module);
    int mca_coll_basic_scatter_inter(void *sbuf, int scount,
                                     struct ompi_datatype_t *sdtype,
                                     void *rbuf, int rcount,
--- a/ompi/mca/coll/basic/coll_basic_allgather.c
+++ b/ompi/mca/coll/basic/coll_basic_allgather.c
@ -2,7 +2,7 @@
 * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
 *                         University Research and Technology
 *                         Corporation.  All rights reserved.
- * Copyright (c) 2004-2006 The University of Tennessee and The University
+ * Copyright (c) 2004-2015 The University of Tennessee and The University
 *                         of Tennessee Research Foundation.  All rights
 *                         reserved.
 * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, 
@ -32,50 +32,6 @@
 #include "coll_basic.h"


-/*
- *	allgather_intra
- *
- *	Function:	- allgather using other MPI collections
- *	Accepts:	- same as MPI_Allgather()
- *	Returns:	- MPI_SUCCESS or error code
- */
-int
-mca_coll_basic_allgather_intra(void *sbuf, int scount,
-                               struct ompi_datatype_t *sdtype, void *rbuf,
-                               int rcount, struct ompi_datatype_t *rdtype,
-                               struct ompi_communicator_t *comm,
-                               mca_coll_base_module_t *module)
-{
-    int err;
-    ptrdiff_t lb, extent;
-
-    /* Handle MPI_IN_PLACE (see explanantion in reduce.c for how to
-       allocate temp buffer) -- note that rank 0 can use IN_PLACE
-       natively, and we can just alias the right position in rbuf
-       as sbuf and avoid using a temporary buffer if gather is
-       implemented correctly */
-    if (MPI_IN_PLACE == sbuf && 0 != ompi_comm_rank(comm)) {
-       ompi_datatype_get_extent(rdtype, &lb, &extent);
-       sbuf = ((char*) rbuf) + (ompi_comm_rank(comm) * extent * rcount);
-       sdtype = rdtype;
-       scount = rcount;
-    } 
-
-    /* Gather and broadcast. */
-
-    err = comm->c_coll.coll_gather(sbuf, scount, sdtype, rbuf, rcount,
-                                   rdtype, 0, comm, comm->c_coll.coll_gather_module);
-    if (MPI_SUCCESS == err) {
-        err = comm->c_coll.coll_bcast(rbuf, rcount * ompi_comm_size(comm), 
-                                      rdtype, 0, comm, comm->c_coll.coll_bcast_module);
-    }
-
-    /* All done */
-
-    return err;
-}
-
-
 /*
 *	allgather_inter
 *
--- a/ompi/mca/coll/basic/coll_basic_allgatherv.c
+++ b/ompi/mca/coll/basic/coll_basic_allgatherv.c
@ -2,7 +2,7 @@
 * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
 *                         University Research and Technology
 *                         Corporation.  All rights reserved.
- * Copyright (c) 2004-2006 The University of Tennessee and The University
+ * Copyright (c) 2004-2015 The University of Tennessee and The University
 *                         of Tennessee Research Foundation.  All rights
 *                         reserved.
 * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, 
@ -28,87 +28,6 @@
 #include "coll_basic.h"


-/*
- *	allgatherv_intra
- *
- *	Function:	- allgatherv using other MPI collectives
- *	Accepts:	- same as MPI_Allgatherv()
- *	Returns:	- MPI_SUCCESS or error code
- */
-int
-mca_coll_basic_allgatherv_intra(void *sbuf, int scount,
-                                struct ompi_datatype_t *sdtype,
-                                void *rbuf, int *rcounts, int *disps,
-                                struct ompi_datatype_t *rdtype,
-                                struct ompi_communicator_t *comm,
-                                mca_coll_base_module_t *module)
-{
-    int i, size, rank ;
-    int err;
-    MPI_Aint extent;
-    MPI_Aint lb;
-    char *send_buf = NULL;
-    struct ompi_datatype_t *newtype, *send_type;
-
-    size = ompi_comm_size(comm);
-    rank = ompi_comm_rank(comm);
-    /*
-     * We don't have a root process defined. Arbitrarily assign root
-     * to process with rank 0 (OMPI convention)
-     */
-
-    if (MPI_IN_PLACE == sbuf) {
-        ompi_datatype_get_extent(rdtype, &lb, &extent);
-        send_type = rdtype;
-        send_buf = (char*)rbuf;
-        for (i = 0; i < rank; ++i) {
-            send_buf += (rcounts[i] * extent);
-        }
-    } else {
-        send_buf = (char*)sbuf;
-        send_type = sdtype;
-    }
-
-    err = comm->c_coll.coll_gatherv(send_buf,
-                                    rcounts[rank], send_type,rbuf,
-                                    rcounts, disps, rdtype, 0,
-                                    comm, comm->c_coll.coll_gatherv_module);
-
-    if (MPI_SUCCESS != err) {
-        return err;
-    }
-    /*
-     * we now have all the data in the root's rbuf. Need to
-     * broadcast the data out to the other processes
-     *
-     * Need to define a datatype that captures the different vectors
-     * from each process. MPI_TYPE_INDEXED with params 
-     *                    size,rcount,displs,rdtype,newtype
-     * should do the trick.
-     * Use underlying ddt functions to create, and commit the
-     * new datatype on each process, then broadcast and destroy the
-     * datatype.
-     */
-
-    err = ompi_datatype_create_indexed(size,rcounts,disps,rdtype,&newtype);
-    if (MPI_SUCCESS != err) {
-        return err;
-    }
-    
-    err = ompi_datatype_commit(&newtype);
-    if(MPI_SUCCESS != err) {
-       return err;
-    }
-
-    err = comm->c_coll.coll_bcast( rbuf, 1 ,newtype,0,comm,
-                                   comm->c_coll.coll_bcast_module);
-
-    ompi_datatype_destroy (&newtype);
-
-    return err;
-}
-
-
 /*
 *	allgatherv_inter
 *
--- a/ompi/mca/coll/basic/coll_basic_alltoall.c
+++ b/ompi/mca/coll/basic/coll_basic_alltoall.c
@ -3,7 +3,7 @@
 * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
 *                         University Research and Technology
 *                         Corporation.  All rights reserved.
- * Copyright (c) 2004-2005 The University of Tennessee and The University
+ * Copyright (c) 2004-2015 The University of Tennessee and The University
 *                         of Tennessee Research Foundation.  All rights
 *                         reserved.
 * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, 
@ -32,224 +32,6 @@
 #include "ompi/mca/pml/pml.h"


-static int
-mca_coll_basic_alltoall_intra_inplace(void *rbuf, int rcount,
-                                      struct ompi_datatype_t *rdtype,
-                                      struct ompi_communicator_t *comm,
-                                      mca_coll_base_module_t *module)
-{
-    mca_coll_basic_module_t *basic_module = (mca_coll_basic_module_t*) module;
-    int i, j, size, rank, err=MPI_SUCCESS;
-    MPI_Request *preq;
-    char *tmp_buffer;
-    size_t max_size;
-    ptrdiff_t ext;
-
-    /* Initialize. */
-
-    size = ompi_comm_size(comm);
-    rank = ompi_comm_rank(comm);
-
-    /* If only one process, we're done. */
-    if (1 == size) {
-        return MPI_SUCCESS;
-    }
-
-    /* Find the largest receive amount */
-    ompi_datatype_type_extent (rdtype, &ext);
-    max_size = ext * rcount;
-
-    /* Allocate a temporary buffer */
-    tmp_buffer = calloc (max_size, 1);
-    if (NULL == tmp_buffer) {
-      return OMPI_ERR_OUT_OF_RESOURCE;
-    }
-
-    /* in-place alltoall slow algorithm (but works) */
-    for (i = 0 ; i < size ; ++i) {
-        for (j = i+1 ; j < size ; ++j) {
-            /* Initiate all send/recv to/from others. */
-            preq = basic_module->mccb_reqs;
-
-            if (i == rank) {
-                /* Copy the data into the temporary buffer */
-                err = ompi_datatype_copy_content_same_ddt (rdtype, rcount, tmp_buffer,
-                                                           (char *) rbuf + j * max_size);
-                if (MPI_SUCCESS != err) { goto error_hndl; }
-
-                /* Exchange data with the peer */
-                err = MCA_PML_CALL(irecv ((char *) rbuf + max_size * j, rcount, rdtype,
-                                          j, MCA_COLL_BASE_TAG_ALLTOALL, comm, preq++));
-                if (MPI_SUCCESS != err) { goto error_hndl; }
-
-                err = MCA_PML_CALL(isend ((char *) tmp_buffer,  rcount, rdtype,
-                                          j, MCA_COLL_BASE_TAG_ALLTOALL, MCA_PML_BASE_SEND_STANDARD,
-                                          comm, preq++));
-                if (MPI_SUCCESS != err) { goto error_hndl; }
-            } else if (j == rank) {
-                /* Copy the data into the temporary buffer */
-                err = ompi_datatype_copy_content_same_ddt (rdtype, rcount, tmp_buffer,
-                                                       (char *) rbuf + i * max_size);
-                if (MPI_SUCCESS != err) { goto error_hndl; }
-
-                /* Exchange data with the peer */
-                err = MCA_PML_CALL(irecv ((char *) rbuf + max_size * i, rcount, rdtype,
-                                          i, MCA_COLL_BASE_TAG_ALLTOALL, comm, preq++));
-                if (MPI_SUCCESS != err) { goto error_hndl; }
-
-                err = MCA_PML_CALL(isend ((char *) tmp_buffer,  rcount, rdtype,
-                                          i, MCA_COLL_BASE_TAG_ALLTOALL, MCA_PML_BASE_SEND_STANDARD,
-                                          comm, preq++));
-                if (MPI_SUCCESS != err) { goto error_hndl; }
-            } else {
-                continue;
-            }
-
-            /* Wait for the requests to complete */
-            err = ompi_request_wait_all (2, basic_module->mccb_reqs, MPI_STATUSES_IGNORE);
-            if (MPI_SUCCESS != err) { goto error_hndl; }
-
-            /* Free the requests. */
-            mca_coll_basic_free_reqs(basic_module->mccb_reqs, 2);
-        }
-    }
-
- error_hndl:
-    /* Free the temporary buffer */
-    free (tmp_buffer);
-
-    /* All done */
-
-    return err;
-}
-
-/*
- *	alltoall_intra
- *
- *	Function:	- MPI_Alltoall 
- *	Accepts:	- same as MPI_Alltoall()
- *	Returns:	- MPI_SUCCESS or an MPI error code
- */
-int
-mca_coll_basic_alltoall_intra(void *sbuf, int scount,
-                              struct ompi_datatype_t *sdtype,
-                              void *rbuf, int rcount,
-                              struct ompi_datatype_t *rdtype,
-                              struct ompi_communicator_t *comm,
-                              mca_coll_base_module_t *module)
-{
-    int i;
-    int rank;
-    int size;
-    int err;
-    int nreqs;
-    char *psnd;
-    char *prcv;
-    MPI_Aint lb;
-    MPI_Aint sndinc;
-    MPI_Aint rcvinc;
-
-    ompi_request_t **req;
-    ompi_request_t **sreq;
-    ompi_request_t **rreq;
-    mca_coll_basic_module_t *basic_module = (mca_coll_basic_module_t*) module;
-
-    /* Initialize. */
-    if (MPI_IN_PLACE == sbuf) {
-        return mca_coll_basic_alltoall_intra_inplace (rbuf, rcount, rdtype,
-                                                      comm, module);
-    }
-
-    size = ompi_comm_size(comm);
-    rank = ompi_comm_rank(comm);
-
-    err = ompi_datatype_get_extent(sdtype, &lb, &sndinc);
-    if (OMPI_SUCCESS != err) {
-        return err;
-    }
-    sndinc *= scount;
-
-    err = ompi_datatype_get_extent(rdtype, &lb, &rcvinc);
-    if (OMPI_SUCCESS != err) {
-        return err;
-    }
-    rcvinc *= rcount;
-
-    /* simple optimization */
-
-    psnd = ((char *) sbuf) + (rank * sndinc);
-    prcv = ((char *) rbuf) + (rank * rcvinc);
-
-    err = ompi_datatype_sndrcv(psnd, scount, sdtype, prcv, rcount, rdtype);
-    if (MPI_SUCCESS != err) {
-        return err;
-    }
-
-    /* If only one process, we're done. */
-
-    if (1 == size) {
-        return MPI_SUCCESS;
-    }
-
-    /* Initiate all send/recv to/from others. */
-
-    req = rreq = basic_module->mccb_reqs;
-    sreq = rreq + size - 1;
-
-    prcv = (char *) rbuf;
-    psnd = (char *) sbuf;
-
-    /* Post all receives first -- a simple optimization */
-
-    for (nreqs = 0, i = (rank + 1) % size; i != rank; i = (i + 1) % size, ++rreq, ++nreqs) {
-        err =
-            MCA_PML_CALL(irecv_init
-                         (prcv + (i * rcvinc), rcount, rdtype, i,
-                          MCA_COLL_BASE_TAG_ALLTOALL, comm, rreq));
-        if (MPI_SUCCESS != err) {
-            mca_coll_basic_free_reqs(req, nreqs);
-            return err;
-        }
-    }
-
-    /* Now post all sends */
-
-    for (nreqs = 0, i = (rank + 1) % size; i != rank; i = (i + 1) % size, ++sreq, ++nreqs) {
-        err =
-            MCA_PML_CALL(isend_init
-                         (psnd + (i * sndinc), scount, sdtype, i,
-                          MCA_COLL_BASE_TAG_ALLTOALL,
-                          MCA_PML_BASE_SEND_STANDARD, comm, sreq));
-        if (MPI_SUCCESS != err) {
-            mca_coll_basic_free_reqs(req, nreqs);
-            return err;
-        }
-    }
-
-    nreqs = (size - 1) * 2;
-    /* Start your engines.  This will never return an error. */
-
-    MCA_PML_CALL(start(nreqs, req));
-
-    /* Wait for them all.  If there's an error, note that we don't
-     * care what the error was -- just that there *was* an error.  The
-     * PML will finish all requests, even if one or more of them fail.
-     * i.e., by the end of this call, all the requests are free-able.
-     * So free them anyway -- even if there was an error, and return
-     * the error after we free everything. */
-
-    err = ompi_request_wait_all(nreqs, req, MPI_STATUSES_IGNORE);
-
-    /* Free the reqs */
-
-    mca_coll_basic_free_reqs(req, nreqs);
-
-    /* All done */
-
-    return err;
-}
-
-
 /*
 *	alltoall_inter
 *
--- a/ompi/mca/coll/basic/coll_basic_alltoallv.c
+++ b/ompi/mca/coll/basic/coll_basic_alltoallv.c
@ -3,7 +3,7 @@
 * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
 *                         University Research and Technology
 *                         Corporation.  All rights reserved.
- * Copyright (c) 2004-2005 The University of Tennessee and The University
+ * Copyright (c) 2004-2015 The University of Tennessee and The University
 *                         of Tennessee Research Foundation.  All rights
 *                         reserved.
 * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, 
@ -33,226 +33,6 @@
 #include "ompi/mca/pml/pml.h"


-static int
-mca_coll_basic_alltoallv_intra_inplace(void *rbuf, const int *rcounts, const int *rdisps,
-                                       struct ompi_datatype_t *rdtype,
-                                       struct ompi_communicator_t *comm,
-                                       mca_coll_base_module_t *module)
-{
-    mca_coll_basic_module_t *basic_module = (mca_coll_basic_module_t*) module;
-    int i, j, size, rank, err=MPI_SUCCESS;
-    MPI_Request *preq;
-    char *tmp_buffer;
-    size_t max_size;
-    ptrdiff_t ext;
-
-    /* Initialize. */
-
-    size = ompi_comm_size(comm);
-    rank = ompi_comm_rank(comm);
-
-    /* If only one process, we're done. */
-    if (1 == size) {
-        return MPI_SUCCESS;
-    }
-
-    /* Find the largest receive amount */
-    ompi_datatype_type_extent (rdtype, &ext);
-    for (i = 0, max_size = 0 ; i < size ; ++i) {
-        size_t size = ext * rcounts[i];
-
-        max_size = size > max_size ? size : max_size;
-    }
-
-    /* Allocate a temporary buffer */
-    tmp_buffer = calloc (max_size, 1);
-    if (NULL == tmp_buffer) {
-        return OMPI_ERR_OUT_OF_RESOURCE;
-    }
-
-    /* in-place alltoallv slow algorithm (but works) */
-    for (i = 0 ; i < size ; ++i) {
-        for (j = i+1 ; j < size ; ++j) {
-            /* Initiate all send/recv to/from others. */
-            preq = basic_module->mccb_reqs;
-
-            if (i == rank && rcounts[j]) {
-                /* Copy the data into the temporary buffer */
-                err = ompi_datatype_copy_content_same_ddt (rdtype, rcounts[j],
-                                                           tmp_buffer, (char *) rbuf + rdisps[j] * ext);
-                if (MPI_SUCCESS != err) { goto error_hndl; }
-
-                /* Exchange data with the peer */
-                err = MCA_PML_CALL(irecv ((char *) rbuf + rdisps[j] * ext, rcounts[j], rdtype,
-                                          j, MCA_COLL_BASE_TAG_ALLTOALLV, comm, preq++));
-                if (MPI_SUCCESS != err) { goto error_hndl; }
-
-                err = MCA_PML_CALL(isend ((void *) tmp_buffer,  rcounts[j], rdtype,
-                                          j, MCA_COLL_BASE_TAG_ALLTOALLV, MCA_PML_BASE_SEND_STANDARD,
-                                          comm, preq++));
-                if (MPI_SUCCESS != err) { goto error_hndl; }
-            } else if (j == rank && rcounts[i]) {
-                /* Copy the data into the temporary buffer */
-                err = ompi_datatype_copy_content_same_ddt (rdtype, rcounts[i],
-                                                           tmp_buffer, (char *) rbuf + rdisps[i] * ext);
-                if (MPI_SUCCESS != err) { goto error_hndl; }
-
-                /* Exchange data with the peer */
-                err = MCA_PML_CALL(irecv ((char *) rbuf + rdisps[i] * ext, rcounts[i], rdtype,
-                                          i, MCA_COLL_BASE_TAG_ALLTOALLV, comm, preq++));
-                if (MPI_SUCCESS != err) { goto error_hndl; }
-
-                err = MCA_PML_CALL(isend ((void *) tmp_buffer,  rcounts[i], rdtype,
-                                          i, MCA_COLL_BASE_TAG_ALLTOALLV, MCA_PML_BASE_SEND_STANDARD,
-                                          comm, preq++));
-                if (MPI_SUCCESS != err) { goto error_hndl; }
-            } else {
-                continue;
-            }
-
-            /* Wait for the requests to complete */
-            err = ompi_request_wait_all (2, basic_module->mccb_reqs, MPI_STATUSES_IGNORE);
-            if (MPI_SUCCESS != err) { goto error_hndl; }
-
-            /* Free the requests. */
-            mca_coll_basic_free_reqs(basic_module->mccb_reqs, 2);
-        }
-    }
-
- error_hndl:
-    /* Free the temporary buffer */
-    free (tmp_buffer);
-
-    /* All done */
-
-    return err;
-}
-
-/*
- *	alltoallv_intra
- *
- *	Function:	- MPI_Alltoallv
- *	Accepts:	- same as MPI_Alltoallv()
- *	Returns:	- MPI_SUCCESS or an MPI error code
- */
-int
-mca_coll_basic_alltoallv_intra(void *sbuf, int *scounts, int *sdisps,
-                               struct ompi_datatype_t *sdtype,
-                               void *rbuf, int *rcounts, int *rdisps,
-                               struct ompi_datatype_t *rdtype,
-                               struct ompi_communicator_t *comm,
-                               mca_coll_base_module_t *module)
-{
-    int i;
-    int size;
-    int rank;
-    int err;
-    char *psnd;
-    char *prcv;
-    int nreqs;
-    MPI_Aint sndextent;
-    MPI_Aint rcvextent;
-    MPI_Request *preq;
-
-    mca_coll_basic_module_t *basic_module = (mca_coll_basic_module_t*) module;
-
-    /* Initialize. */
-    if (MPI_IN_PLACE == sbuf) {
-        return mca_coll_basic_alltoallv_intra_inplace (rbuf, rcounts, rdisps,
-                                                       rdtype, comm, module);
-    }
-
-    size = ompi_comm_size(comm);
-    rank = ompi_comm_rank(comm);
-
-    ompi_datatype_type_extent(sdtype, &sndextent);
-    ompi_datatype_type_extent(rdtype, &rcvextent);
-
-    /* simple optimization */
-
-    psnd = ((char *) sbuf) + (sdisps[rank] * sndextent);
-    prcv = ((char *) rbuf) + (rdisps[rank] * rcvextent);
-
-    if (0 != scounts[rank]) {
-        err = ompi_datatype_sndrcv(psnd, scounts[rank], sdtype,
-                              prcv, rcounts[rank], rdtype);
-        if (MPI_SUCCESS != err) {
-            return err;
-        }
-    }
-
-    /* If only one process, we're done. */
-
-    if (1 == size) {
-        return MPI_SUCCESS;
-    }
-
-    /* Initiate all send/recv to/from others. */
-
-    nreqs = 0;
-    preq = basic_module->mccb_reqs;
-
-    /* Post all receives first -- a simple optimization */
-
-    for (i = 0; i < size; ++i) {
-        if (i == rank || 0 == rcounts[i]) {
-            continue;
-        }
-
-        prcv = ((char *) rbuf) + (rdisps[i] * rcvextent);
-        err = MCA_PML_CALL(irecv_init(prcv, rcounts[i], rdtype,
-                                      i, MCA_COLL_BASE_TAG_ALLTOALLV, comm,
-                                      preq++));
-        ++nreqs;
-        if (MPI_SUCCESS != err) {
-            mca_coll_basic_free_reqs(basic_module->mccb_reqs, nreqs);
-            return err;
-        }
-    }
-
-    /* Now post all sends */
-
-    for (i = 0; i < size; ++i) {
-        if (i == rank || 0 == scounts[i]) {
-            continue;
-        }
-
-        psnd = ((char *) sbuf) + (sdisps[i] * sndextent);
-        err = MCA_PML_CALL(isend_init(psnd, scounts[i], sdtype,
-                                      i, MCA_COLL_BASE_TAG_ALLTOALLV,
-                                      MCA_PML_BASE_SEND_STANDARD, comm,
-                                      preq++));
-        ++nreqs;
-        if (MPI_SUCCESS != err) {
-            mca_coll_basic_free_reqs(basic_module->mccb_reqs, nreqs);
-            return err;
-        }
-    }
-
-    /* Start your engines.  This will never return an error. */
-
-    MCA_PML_CALL(start(nreqs, basic_module->mccb_reqs));
-
-    /* Wait for them all.  If there's an error, note that we don't care
-     * what the error was -- just that there *was* an error.  The PML
-     * will finish all requests, even if one or more of them fail.
-     * i.e., by the end of this call, all the requests are free-able.
-     * So free them anyway -- even if there was an error, and return the
-     * error after we free everything. */
-
-    err = ompi_request_wait_all(nreqs, basic_module->mccb_reqs,
-                                MPI_STATUSES_IGNORE);
-
-    /* Free the requests. */
-
-    mca_coll_basic_free_reqs(basic_module->mccb_reqs, nreqs);
-
-    /* All done */
-
-    return err;
-}
-
-
 /*
 *	alltoallv_inter
 *
--- a/ompi/mca/coll/basic/coll_basic_barrier.c
+++ b/ompi/mca/coll/basic/coll_basic_barrier.c
@ -2,7 +2,7 @@
 * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
 *                         University Research and Technology
 *                         Corporation.  All rights reserved.
- * Copyright (c) 2004-2005 The University of Tennessee and The University
+ * Copyright (c) 2004-2015 The University of Tennessee and The University
 *                         of Tennessee Research Foundation.  All rights
 *                         reserved.
 * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, 
@ -30,72 +30,6 @@
 #include "coll_basic.h"


-/*
- *	barrier_intra_lin
- *
- *	Function:	- barrier using O(N) algorithm
- *	Accepts:	- same as MPI_Barrier()
- *	Returns:	- MPI_SUCCESS or error code
- */
-int
-mca_coll_basic_barrier_intra_lin(struct ompi_communicator_t *comm,
-                                 mca_coll_base_module_t *module)
-{
-    int i;
-    int err;
-    int size = ompi_comm_size(comm);
-    int rank = ompi_comm_rank(comm);
-
-    /* All non-root send & receive zero-length message. */
-
-    if (rank > 0) {
-        err =
-            MCA_PML_CALL(send
-                         (NULL, 0, MPI_BYTE, 0, MCA_COLL_BASE_TAG_BARRIER,
-                          MCA_PML_BASE_SEND_STANDARD, comm));
-        if (MPI_SUCCESS != err) {
-            return err;
-        }
-
-        err =
-            MCA_PML_CALL(recv
-                         (NULL, 0, MPI_BYTE, 0, MCA_COLL_BASE_TAG_BARRIER,
-                          comm, MPI_STATUS_IGNORE));
-        if (MPI_SUCCESS != err) {
-            return err;
-        }
-    }
-
-    /* The root collects and broadcasts the messages. */
-
-    else {
-        for (i = 1; i < size; ++i) {
-            err = MCA_PML_CALL(recv(NULL, 0, MPI_BYTE, MPI_ANY_SOURCE,
-                                    MCA_COLL_BASE_TAG_BARRIER,
-                                    comm, MPI_STATUS_IGNORE));
-            if (MPI_SUCCESS != err) {
-                return err;
-            }
-        }
-
-        for (i = 1; i < size; ++i) {
-            err =
-                MCA_PML_CALL(send
-                             (NULL, 0, MPI_BYTE, i,
-                              MCA_COLL_BASE_TAG_BARRIER,
-                              MCA_PML_BASE_SEND_STANDARD, comm));
-            if (MPI_SUCCESS != err) {
-                return err;
-            }
-        }
-    }
-
-    /* All done */
-
-    return MPI_SUCCESS;
-}
-
-
 /*
 *	barrier_intra_log
 *
--- a/ompi/mca/coll/basic/coll_basic_bcast.c
+++ b/ompi/mca/coll/basic/coll_basic_bcast.c
@ -2,7 +2,7 @@
 * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
 *                         University Research and Technology
 *                         Corporation.  All rights reserved.
- * Copyright (c) 2004-2005 The University of Tennessee and The University
+ * Copyright (c) 2004-2015 The University of Tennessee and The University
 *                         of Tennessee Research Foundation.  All rights
 *                         reserved.
 * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, 
@ -30,78 +30,6 @@
 #include "opal/util/bit_ops.h"


-/*
- *	bcast_lin_intra
- *
- *	Function:	- broadcast using O(N) algorithm
- *	Accepts:	- same arguments as MPI_Bcast()
- *	Returns:	- MPI_SUCCESS or error code
- */
-int
-mca_coll_basic_bcast_lin_intra(void *buff, int count,
-                               struct ompi_datatype_t *datatype, int root,
-                               struct ompi_communicator_t *comm,
-                               mca_coll_base_module_t *module)
-{
-    int i;
-    int size;
-    int rank;
-    int err;
-    ompi_request_t **preq;
-    mca_coll_basic_module_t *basic_module = (mca_coll_basic_module_t*) module;
-    ompi_request_t **reqs = basic_module->mccb_reqs;
-
-    size = ompi_comm_size(comm);
-    rank = ompi_comm_rank(comm);
-
-    /* Non-root receive the data. */
-
-    if (rank != root) {
-        return MCA_PML_CALL(recv(buff, count, datatype, root,
-                                 MCA_COLL_BASE_TAG_BCAST, comm,
-                                 MPI_STATUS_IGNORE));
-    }
-
-    /* Root sends data to all others. */
-
-    for (i = 0, preq = reqs; i < size; ++i) {
-        if (i == rank) {
-            continue;
-        }
-
-        err = MCA_PML_CALL(isend_init(buff, count, datatype, i,
-                                      MCA_COLL_BASE_TAG_BCAST,
-                                      MCA_PML_BASE_SEND_STANDARD,
-                                      comm, preq++));
-        if (MPI_SUCCESS != err) {
-            return err;
-        }
-    }
-    --i;
-
-    /* Start your engines.  This will never return an error. */
-
-    MCA_PML_CALL(start(i, reqs));
-
-    /* Wait for them all.  If there's an error, note that we don't
-     * care what the error was -- just that there *was* an error.  The
-     * PML will finish all requests, even if one or more of them fail.
-     * i.e., by the end of this call, all the requests are free-able.
-     * So free them anyway -- even if there was an error, and return
-     * the error after we free everything. */
-
-    err = ompi_request_wait_all(i, reqs, MPI_STATUSES_IGNORE);
-
-    /* Free the reqs */
-
-    mca_coll_basic_free_reqs(reqs, i);
-
-    /* All done */
-
-    return err;
-}
-
-
 /*
 *	bcast_log_intra
 *
--- a/ompi/mca/coll/basic/coll_basic_gather.c
+++ b/ompi/mca/coll/basic/coll_basic_gather.c
@ -2,7 +2,7 @@
 * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
 *                         University Research and Technology
 *                         Corporation.  All rights reserved.
- * Copyright (c) 2004-2005 The University of Tennessee and The University
+ * Copyright (c) 2004-2015 The University of Tennessee and The University
 *                         of Tennessee Research Foundation.  All rights
 *                         reserved.
 * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, 
@ -27,68 +27,6 @@
 #include "ompi/mca/coll/base/coll_tags.h"
 #include "ompi/mca/pml/pml.h"

-/*
- *	gather_intra
- *
- *	Function:	- basic gather operation
- *	Accepts:	- same arguments as MPI_Gather()
- *	Returns:	- MPI_SUCCESS or error code
- */
-int
-mca_coll_basic_gather_intra(void *sbuf, int scount,
-                            struct ompi_datatype_t *sdtype,
-                            void *rbuf, int rcount,
-                            struct ompi_datatype_t *rdtype,
-                            int root, struct ompi_communicator_t *comm,
-                            mca_coll_base_module_t *module)
-{
-    int i;
-    int err;
-    int rank;
-    int size;
-    char *ptmp;
-    MPI_Aint incr;
-    MPI_Aint extent;
-    MPI_Aint lb;
-
-    size = ompi_comm_size(comm);
-    rank = ompi_comm_rank(comm);
-
-    /* Everyone but root sends data and returns. */
-
-    if (rank != root) {
-        return MCA_PML_CALL(send(sbuf, scount, sdtype, root,
-                                 MCA_COLL_BASE_TAG_GATHER,
-                                 MCA_PML_BASE_SEND_STANDARD, comm));
-    }
-
-    /* I am the root, loop receiving the data. */
-
-    ompi_datatype_get_extent(rdtype, &lb, &extent);
-    incr = extent * rcount;
-    for (i = 0, ptmp = (char *) rbuf; i < size; ++i, ptmp += incr) {
-        if (i == rank) {
-            if (MPI_IN_PLACE != sbuf) {
-                err = ompi_datatype_sndrcv(sbuf, scount, sdtype,
-                                      ptmp, rcount, rdtype);
-            } else {
-                err = MPI_SUCCESS;
-            }
-        } else {
-            err = MCA_PML_CALL(recv(ptmp, rcount, rdtype, i,
-                                    MCA_COLL_BASE_TAG_GATHER,
-                                    comm, MPI_STATUS_IGNORE));
-        }
-        if (MPI_SUCCESS != err) {
-            return err;
-        }
-    }
-
-    /* All done */
-
-    return MPI_SUCCESS;
-}
-

 /*
 *	gather_inter
--- a/ompi/mca/coll/basic/coll_basic_module.c
+++ b/ompi/mca/coll/basic/coll_basic_module.c
@ -3,7 +3,7 @@
 * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
 *                         University Research and Technology
 *                         Corporation.  All rights reserved.
- * Copyright (c) 2004-2006 The University of Tennessee and The University
+ * Copyright (c) 2004-2015 The University of Tennessee and The University
 *                         of Tennessee Research Foundation.  All rights
 *                         reserved.
 * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@ -129,40 +129,40 @@ mca_coll_basic_comm_query(struct ompi_communicator_t *comm,
        basic_module->super.coll_scatter    = mca_coll_basic_scatter_inter;
        basic_module->super.coll_scatterv   = mca_coll_basic_scatterv_inter;
    } else if (ompi_comm_size(comm) <= mca_coll_basic_crossover) {
-        basic_module->super.coll_allgather  = mca_coll_basic_allgather_intra;
-        basic_module->super.coll_allgatherv = mca_coll_basic_allgatherv_intra;
+        basic_module->super.coll_allgather  = ompi_coll_base_allgather_intra_basic_linear;
+        basic_module->super.coll_allgatherv = ompi_coll_base_allgatherv_intra_basic_default;
        basic_module->super.coll_allreduce  = mca_coll_basic_allreduce_intra;
-        basic_module->super.coll_alltoall   = mca_coll_basic_alltoall_intra;
-        basic_module->super.coll_alltoallv  = mca_coll_basic_alltoallv_intra;
+        basic_module->super.coll_alltoall   = ompi_coll_base_alltoall_intra_basic_linear;
+        basic_module->super.coll_alltoallv  = ompi_coll_base_alltoallv_intra_basic_linear;
        basic_module->super.coll_alltoallw  = mca_coll_basic_alltoallw_intra;
-        basic_module->super.coll_barrier    = mca_coll_basic_barrier_intra_lin;
-        basic_module->super.coll_bcast      = mca_coll_basic_bcast_lin_intra;
+        basic_module->super.coll_barrier    = ompi_coll_base_barrier_intra_basic_linear;
+        basic_module->super.coll_bcast      = ompi_coll_base_bcast_intra_basic_linear;
        basic_module->super.coll_exscan     = mca_coll_basic_exscan_intra;
-        basic_module->super.coll_gather     = mca_coll_basic_gather_intra;
+        basic_module->super.coll_gather     = ompi_coll_base_gather_intra_basic_linear;
        basic_module->super.coll_gatherv    = mca_coll_basic_gatherv_intra;
-        basic_module->super.coll_reduce     = mca_coll_basic_reduce_lin_intra;
+        basic_module->super.coll_reduce     = ompi_coll_base_reduce_intra_basic_linear;
        basic_module->super.coll_reduce_scatter_block = mca_coll_basic_reduce_scatter_block_intra;
        basic_module->super.coll_reduce_scatter = mca_coll_basic_reduce_scatter_intra;
        basic_module->super.coll_scan       = mca_coll_basic_scan_intra;
-        basic_module->super.coll_scatter    = mca_coll_basic_scatter_intra;
+        basic_module->super.coll_scatter    = ompi_coll_base_scatter_intra_basic_linear;
        basic_module->super.coll_scatterv   = mca_coll_basic_scatterv_intra;
    } else {
-        basic_module->super.coll_allgather  = mca_coll_basic_allgather_intra;
-        basic_module->super.coll_allgatherv = mca_coll_basic_allgatherv_intra;
+        basic_module->super.coll_allgather  = ompi_coll_base_allgather_intra_basic_linear;
+        basic_module->super.coll_allgatherv = ompi_coll_base_allgatherv_intra_basic_default;
        basic_module->super.coll_allreduce  = mca_coll_basic_allreduce_intra;
-        basic_module->super.coll_alltoall   = mca_coll_basic_alltoall_intra;
-        basic_module->super.coll_alltoallv  = mca_coll_basic_alltoallv_intra;
+        basic_module->super.coll_alltoall   = ompi_coll_base_alltoall_intra_basic_linear;
+        basic_module->super.coll_alltoallv  = ompi_coll_base_alltoallv_intra_basic_linear;
        basic_module->super.coll_alltoallw  = mca_coll_basic_alltoallw_intra;
        basic_module->super.coll_barrier    = mca_coll_basic_barrier_intra_log;
        basic_module->super.coll_bcast      = mca_coll_basic_bcast_log_intra;
        basic_module->super.coll_exscan     = mca_coll_basic_exscan_intra;
-        basic_module->super.coll_gather     = mca_coll_basic_gather_intra;
+        basic_module->super.coll_gather     = ompi_coll_base_gather_intra_basic_linear;
        basic_module->super.coll_gatherv    = mca_coll_basic_gatherv_intra;
        basic_module->super.coll_reduce     = mca_coll_basic_reduce_log_intra;
        basic_module->super.coll_reduce_scatter_block = mca_coll_basic_reduce_scatter_block_intra;
        basic_module->super.coll_reduce_scatter = mca_coll_basic_reduce_scatter_intra;
        basic_module->super.coll_scan       = mca_coll_basic_scan_intra;
-        basic_module->super.coll_scatter    = mca_coll_basic_scatter_intra;
+        basic_module->super.coll_scatter    = ompi_coll_base_scatter_intra_basic_linear;
        basic_module->super.coll_scatterv   = mca_coll_basic_scatterv_intra;
    }

--- a/ompi/mca/coll/basic/coll_basic_reduce.c
+++ b/ompi/mca/coll/basic/coll_basic_reduce.c
@ -2,7 +2,7 @@
 * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
 *                         University Research and Technology
 *                         Corporation.  All rights reserved.
- * Copyright (c) 2004-2014 The University of Tennessee and The University
+ * Copyright (c) 2004-2015 The University of Tennessee and The University
 *                         of Tennessee Research Foundation.  All rights
 *                         reserved.
 * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@ -28,241 +28,6 @@
 #include "ompi/mca/pml/pml.h"
 #include "ompi/op/op.h"

-/*
- *	reduce_lin_intra
- *
- *	Function:	- reduction using O(N) algorithm
- *	Accepts:	- same as MPI_Reduce()
- *	Returns:	- MPI_SUCCESS or error code
- */
-int
-mca_coll_basic_reduce_lin_intra(void *sbuf, void *rbuf, int count,
-                                struct ompi_datatype_t *dtype,
-                                struct ompi_op_t *op,
-                                int root, struct ompi_communicator_t *comm,
-                                mca_coll_base_module_t *module)
-{
-    int i, rank, err, size;
-    ptrdiff_t true_lb, true_extent, lb, extent;
-    char *free_buffer = NULL;
-    char *pml_buffer = NULL;
-    char *inplace_temp = NULL;
-    char *inbuf;
-
-    /* Initialize */
-
-    rank = ompi_comm_rank(comm);
-    size = ompi_comm_size(comm);
-
-    /* If not root, send data to the root. */
-
-    if (rank != root) {
-        err = MCA_PML_CALL(send(sbuf, count, dtype, root,
-                                MCA_COLL_BASE_TAG_REDUCE,
-                                MCA_PML_BASE_SEND_STANDARD, comm));
-        return err;
-    }
-
-    /* Root receives and reduces messages.  Allocate buffer to receive
-     * messages.  This comment applies to all collectives in this basic
-     * module where we allocate a temporary buffer.  For the next few
-     * lines of code, it's tremendously complicated how we decided that
-     * this was the Right Thing to do.  Sit back and enjoy.  And prepare
-     * to have your mind warped. :-)
-     *
-     * Recall some definitions (I always get these backwards, so I'm
-     * going to put them here):
-     *
-     * extent: the length from the lower bound to the upper bound -- may
-     * be considerably larger than the buffer required to hold the data
-     * (or smaller!  But it's easiest to think about when it's larger).
-     *
-     * true extent: the exact number of bytes required to hold the data
-     * in the layout pattern in the datatype.
-     *
-     * For example, consider the following buffer (just talking about
-     * true_lb, extent, and true extent -- extrapolate for true_ub:
-     *
-     * A              B                                       C
-     * --------------------------------------------------------
-     * |              |                                       |
-     * --------------------------------------------------------
-     *
-     * There are multiple cases:
-     *
-     * 1. A is what we give to MPI_Send (and friends), and A is where
-     * the data starts, and C is where the data ends.  In this case:
-     *
-     * - extent: C-A
-     * - true extent: C-A
-     * - true_lb: 0
-     *
-     * A                                                      C
-     * --------------------------------------------------------
-     * |                                                      |
-     * --------------------------------------------------------
-     * <=======================extent=========================>
-     * <======================true extent=====================>
-     *
-     * 2. A is what we give to MPI_Send (and friends), B is where the
-     * data starts, and C is where the data ends.  In this case:
-     *
-     * - extent: C-A
-     * - true extent: C-B
-     * - true_lb: positive
-     *
-     * A              B                                       C
-     * --------------------------------------------------------
-     * |              |           User buffer                 |
-     * --------------------------------------------------------
-     * <=======================extent=========================>
-     * <===============true extent=============>
-     *
-     * 3. B is what we give to MPI_Send (and friends), A is where the
-     * data starts, and C is where the data ends.  In this case:
-     *
-     * - extent: C-A
-     * - true extent: C-A
-     * - true_lb: negative
-     *
-     * A              B                                       C
-     * --------------------------------------------------------
-     * |              |           User buffer                 |
-     * --------------------------------------------------------
-     * <=======================extent=========================>
-     * <======================true extent=====================>
-     *
-     * 4. MPI_BOTTOM is what we give to MPI_Send (and friends), B is
-     * where the data starts, and C is where the data ends.  In this
-     * case:
-     *
-     * - extent: C-MPI_BOTTOM
-     * - true extent: C-B
-     * - true_lb: [potentially very large] positive
-     *
-     * MPI_BOTTOM     B                                       C
-     * --------------------------------------------------------
-     * |              |           User buffer                 |
-     * --------------------------------------------------------
-     * <=======================extent=========================>
-     * <===============true extent=============>
-     *
-     * So in all cases, for a temporary buffer, all we need to malloc()
-     * is a buffer of size true_extent.  We therefore need to know two
-     * pointer values: what value to give to MPI_Send (and friends) and
-     * what value to give to free(), because they might not be the same.
-     *
-     * Clearly, what we give to free() is exactly what was returned from
-     * malloc().  That part is easy.  :-)
-     *
-     * What we give to MPI_Send (and friends) is a bit more complicated.
-     * Let's take the 4 cases from above:
-     *
-     * 1. If A is what we give to MPI_Send and A is where the data
-     * starts, then clearly we give to MPI_Send what we got back from
-     * malloc().
-     *
-     * 2. If B is what we get back from malloc, but we give A to
-     * MPI_Send, then the buffer range [A,B) represents "dead space"
-     * -- no data will be put there.  So it's safe to give B-true_lb to
-     * MPI_Send.  More specifically, the true_lb is positive, so B-true_lb is
-     * actually A.
-     *
-     * 3. If A is what we get back from malloc, and B is what we give to
-     * MPI_Send, then the true_lb is negative, so A-true_lb will actually equal
-     * B.
-     *
-     * 4. Although this seems like the weirdest case, it's actually
-     * quite similar to case #2 -- the pointer we give to MPI_Send is
-     * smaller than the pointer we got back from malloc().
-     *
-     * Hence, in all cases, we give (return_from_malloc - true_lb) to MPI_Send.
-     *
-     * This works fine and dandy if we only have (count==1), which we
-     * rarely do.  ;-) So we really need to allocate (true_extent +
-     * ((count - 1) * extent)) to get enough space for the rest.  This may
-     * be more than is necessary, but it's ok.
-     *
-     * Simple, no?  :-)
-     *
-     */
-
-    ompi_datatype_get_extent(dtype, &lb, &extent);
-    ompi_datatype_get_true_extent(dtype, &true_lb, &true_extent);
-
-    if (MPI_IN_PLACE == sbuf) {
-        sbuf = rbuf;
-        inplace_temp = (char*)malloc(true_extent + (count - 1) * extent);
-        if (NULL == inplace_temp) {
-            return OMPI_ERR_OUT_OF_RESOURCE;
-        }
-        rbuf = inplace_temp - true_lb;
-    }
-
-    if (size > 1) {
-        free_buffer = (char*)malloc(true_extent + (count - 1) * extent);
-        if (NULL == free_buffer) {
-            if (NULL != inplace_temp) {
-                free(inplace_temp);
-            }
-            return OMPI_ERR_OUT_OF_RESOURCE;
-        }
-        pml_buffer = free_buffer - true_lb;
-    }
-
-    /* Initialize the receive buffer. */
-
-    if (rank == (size - 1)) {
-        err = ompi_datatype_copy_content_same_ddt(dtype, count, (char*)rbuf, (char*)sbuf);
-    } else {
-        err = MCA_PML_CALL(recv(rbuf, count, dtype, size - 1,
-                                MCA_COLL_BASE_TAG_REDUCE, comm,
-                                MPI_STATUS_IGNORE));
-    }
-    if (MPI_SUCCESS != err) {
-        if (NULL != free_buffer) {
-            free(free_buffer);
-        }
-        return err;
-    }
-
-    /* Loop receiving and calling reduction function (C or Fortran). */
-
-    for (i = size - 2; i >= 0; --i) {
-        if (rank == i) {
-            inbuf = (char*)sbuf;
-        } else {
-            err = MCA_PML_CALL(recv(pml_buffer, count, dtype, i,
-                                    MCA_COLL_BASE_TAG_REDUCE, comm,
-                                    MPI_STATUS_IGNORE));
-            if (MPI_SUCCESS != err) {
-                if (NULL != free_buffer) {
-                    free(free_buffer);
-                }
-                return err;
-            }
-
-            inbuf = pml_buffer;
-        }
-
-        /* Perform the reduction */
-
-        ompi_op_reduce(op, inbuf, rbuf, count, dtype);
-    }
-
-    if (NULL != inplace_temp) {
-        err = ompi_datatype_copy_content_same_ddt(dtype, count, (char*)sbuf, inplace_temp);
-        free(inplace_temp);
-    }
-    if (NULL != free_buffer) {
-        free(free_buffer);
-    }
-
-    /* All done */
-
-    return MPI_SUCCESS;
-}
-

 /*
 *	reduce_log_intra
@ -339,8 +104,8 @@ mca_coll_basic_reduce_log_intra(void *sbuf, void *rbuf, int count,
     * operations. */

    if (!ompi_op_is_commute(op)) {
-        return mca_coll_basic_reduce_lin_intra(sbuf, rbuf, count, dtype,
-                                               op, root, comm, module);
+        return ompi_coll_base_reduce_intra_basic_linear(sbuf, rbuf, count, dtype,
+                                                        op, root, comm, module);
    }

    /* Some variables */
--- a/ompi/mca/coll/basic/coll_basic_scatter.c
+++ b/ompi/mca/coll/basic/coll_basic_scatter.c
@ -2,7 +2,7 @@
 * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
 *                         University Research and Technology
 *                         Corporation.  All rights reserved.
- * Copyright (c) 2004-2005 The University of Tennessee and The University
+ * Copyright (c) 2004-2015 The University of Tennessee and The University
 *                         of Tennessee Research Foundation.  All rights
 *                         reserved.
 * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, 
@ -28,73 +28,6 @@
 #include "coll_basic.h"


-/*
- *	scatter_intra
- *
- *	Function:	- scatter operation
- *	Accepts:	- same arguments as MPI_Scatter()
- *	Returns:	- MPI_SUCCESS or error code
- */
-int
-mca_coll_basic_scatter_intra(void *sbuf, int scount,
-                             struct ompi_datatype_t *sdtype,
-                             void *rbuf, int rcount,
-                             struct ompi_datatype_t *rdtype,
-                             int root, struct ompi_communicator_t *comm,
-                             mca_coll_base_module_t *module)
-{
-    int i, rank, size, err;
-    char *ptmp;
-    ptrdiff_t lb, incr;
-
-    /* Initialize */
-
-    rank = ompi_comm_rank(comm);
-    size = ompi_comm_size(comm);
-
-    /* If not root, receive data. */
-
-    if (rank != root) {
-        err = MCA_PML_CALL(recv(rbuf, rcount, rdtype, root,
-                                MCA_COLL_BASE_TAG_SCATTER,
-                                comm, MPI_STATUS_IGNORE));
-        return err;
-    }
-
-    /* I am the root, loop sending data. */
-
-    err = ompi_datatype_get_extent(sdtype, &lb, &incr);
-    if (OMPI_SUCCESS != err) {
-        return OMPI_ERROR;
-    }
-
-    incr *= scount;
-    for (i = 0, ptmp = (char *) sbuf; i < size; ++i, ptmp += incr) {
-
-        /* simple optimization */
-
-        if (i == rank) {
-            if (MPI_IN_PLACE != rbuf) {
-                err =
-                    ompi_datatype_sndrcv(ptmp, scount, sdtype, rbuf, rcount,
-                                    rdtype);
-            }
-        } else {
-            err = MCA_PML_CALL(send(ptmp, scount, sdtype, i,
-                                    MCA_COLL_BASE_TAG_SCATTER,
-                                    MCA_PML_BASE_SEND_STANDARD, comm));
-        }
-        if (MPI_SUCCESS != err) {
-            return err;
-        }
-    }
-
-    /* All done */
-
-    return MPI_SUCCESS;
-}
-
-
 /*
 *	scatter_inter
 *
--- a/ompi/mca/coll/coll.h
+++ b/ompi/mca/coll/coll.h
@ -3,7 +3,7 @@
 * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
 *                         University Research and Technology
 *                         Corporation.  All rights reserved.
- * Copyright (c) 2004-2005 The University of Tennessee and The University
+ * Copyright (c) 2004-2015 The University of Tennessee and The University
 *                         of Tennessee Research Foundation.  All rights
 *                         reserved.
 * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@ -470,6 +470,9 @@ struct mca_coll_base_module_2_1_0_t {
        be used for the given communicator */
    mca_coll_base_module_disable_1_1_0_fn_t coll_module_disable;

+    /** Data storage for all the algorithms defined in the base. Should
+        not be used by other modules */
+    struct mca_coll_base_comm_t* base_data;
 };
 typedef struct mca_coll_base_module_2_1_0_t mca_coll_base_module_2_1_0_t;

--- a/ompi/mca/coll/tuned/Makefile.am
+++ b/ompi/mca/coll/tuned/Makefile.am
@ -2,7 +2,7 @@
 # Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
 #                         University Research and Technology
 #                         Corporation.  All rights reserved.
-# Copyright (c) 2004-2009 The University of Tennessee and The University
+# Copyright (c) 2004-2015 The University of Tennessee and The University
 #                         of Tennessee Research Foundation.  All rights
 #                         reserved.
 # Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, 
@ -19,29 +19,25 @@

 sources = \
        coll_tuned.h \
-        coll_tuned_topo.h \
-        coll_tuned_util.h \
        coll_tuned_dynamic_file.h \
        coll_tuned_dynamic_rules.h \
-        coll_tuned_topo.c \
-        coll_tuned_util.c \
        coll_tuned_decision_fixed.c \
        coll_tuned_decision_dynamic.c \
        coll_tuned_dynamic_file.c \
        coll_tuned_dynamic_rules.c \
-        coll_tuned_allreduce.c \
-        coll_tuned_alltoall.c \
-        coll_tuned_alltoallv.c \
-        coll_tuned_allgather.c \
-        coll_tuned_allgatherv.c \
-        coll_tuned_barrier.c \
-        coll_tuned_bcast.c \
-        coll_tuned_reduce.c \
-        coll_tuned_reduce_scatter.c \
-	coll_tuned_gather.c \
-	coll_tuned_scatter.c \
        coll_tuned_component.c \
-        coll_tuned_module.c 
+        coll_tuned_module.c \
+        coll_tuned_allgather_decision.c \
+        coll_tuned_allgatherv_decision.c \
+        coll_tuned_allreduce_decision.c \
+        coll_tuned_alltoall_decision.c \
+        coll_tuned_gather_decision.c \
+        coll_tuned_alltoallv_decision.c \
+        coll_tuned_barrier_decision.c \
+        coll_tuned_reduce_decision.c \
+        coll_tuned_bcast_decision.c \
+        coll_tuned_reduce_scatter_decision.c \
+        coll_tuned_scatter_decision.c

 # Make the output library in this directory, and name it either
 # mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
--- a/ompi/mca/coll/tuned/coll_tuned.h
+++ b/ompi/mca/coll/tuned/coll_tuned.h
@ -1,19 +1,8 @@
 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
 /*
- * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
- *                         University Research and Technology
- *                         Corporation.  All rights reserved.
- * Copyright (c) 2004-2009 The University of Tennessee and The University
+ * Copyright (c) 2004-2015 The University of Tennessee and The University
 *                         of Tennessee Research Foundation.  All rights
 *                         reserved.
- * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, 
- *                         University of Stuttgart.  All rights reserved.
- * Copyright (c) 2004-2005 The Regents of the University of California.
- *                         All rights reserved.
- * Copyright (c) 2008      Sun Microsystems, Inc.  All rights reserved.
- * Copyright (c) 2008      Cisco Systems, Inc.  All rights reserved.
- * Copyright (c) 2013      Los Alamos National Security, LLC. All rights
- *                         reserved.
 * $COPYRIGHT$
 * 
 * Additional copyrights may follow
@ -28,61 +17,17 @@

 #include "mpi.h"
 #include "opal/mca/mca.h"
-#include "ompi/mca/coll/coll.h"
 #include "ompi/request/request.h"
-
-/* need to include our own topo prototypes so we can malloc data on the comm correctly */
-#include "coll_tuned_topo.h"
+#include "ompi/mca/coll/base/coll_base_functions.h"

 /* also need the dynamic rule structures */
 #include "coll_tuned_dynamic_rules.h"

-/* some fixed value index vars to simplify certain operations */
-typedef enum COLLTYPE {
-    ALLGATHER = 0,  /*  0 */
-    ALLGATHERV,     /*  1 */
-    ALLREDUCE,      /*  2 */
-    ALLTOALL,       /*  3 */
-    ALLTOALLV,      /*  4 */
-    ALLTOALLW,      /*  5 */
-    BARRIER,        /*  6 */
-    BCAST,          /*  7 */
-    EXSCAN,         /*  8 */
-    GATHER,         /*  9 */
-    GATHERV,        /* 10 */
-    REDUCE,         /* 11 */
-    REDUCESCATTER,  /* 12 */
-    SCAN,           /* 13 */
-    SCATTER,        /* 14 */
-    SCATTERV,       /* 15 */
-    COLLCOUNT       /* 16 end counter keep it as last element */
-} COLLTYPE_T;
-
-/* defined arg lists to simply auto inclusion of user overriding decision functions */
-#define ALLGATHER_ARGS void *sbuf, int scount, struct ompi_datatype_t *sdtype, void *rbuf, int rcount, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module
-#define ALLGATHERV_ARGS void *sbuf, int scount, struct ompi_datatype_t *sdtype, void * rbuf, int *rcounts, int *disps, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module
-#define ALLREDUCE_ARGS void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, mca_coll_base_module_t *module
-#define ALLTOALL_ARGS void *sbuf, int scount, struct ompi_datatype_t *sdtype, void* rbuf, int rcount, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module
-#define ALLTOALLV_ARGS void *sbuf, int *scounts, int *sdisps, struct ompi_datatype_t *sdtype, void *rbuf, int *rcounts, int *rdisps, struct ompi_datatype_t *rdtype, struct ompi_communicator_t *comm, mca_coll_base_module_t *module
-#define ALLTOALLW_ARGS void *sbuf, int *scounts, int *sdisps,  struct ompi_datatype_t **sdtypes, void *rbuf, int *rcounts, int *rdisps, struct ompi_datatype_t **rdtypes, struct ompi_communicator_t *comm, mca_coll_base_module_t *module
-#define BARRIER_ARGS struct ompi_communicator_t *comm, mca_coll_base_module_t *module
-#define BCAST_ARGS void *buff, int count, struct ompi_datatype_t *datatype, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module
-#define EXSCAN_ARGS void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, mca_coll_base_module_t *module
-#define GATHER_ARGS void *sbuf, int scount, struct ompi_datatype_t *sdtype, void *rbuf, int rcount, struct ompi_datatype_t *rdtype, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module
-#define GATHERV_ARGS void *sbuf, int scount, struct ompi_datatype_t *sdtype, void *rbuf, int *rcounts, int *disps, struct ompi_datatype_t *rdtype, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module
-#define REDUCE_ARGS void *sbuf, void* rbuf, int count, struct ompi_datatype_t *dtype, struct ompi_op_t *op, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module
-#define REDUCESCATTER_ARGS void *sbuf, void *rbuf, int *rcounts, struct ompi_datatype_t *dtype, struct ompi_op_t *op, struct ompi_communicator_t *comm, mca_coll_base_module_t *module
-#define SCAN_ARGS void *sbuf, void *rbuf, int count, struct ompi_datatype_t *dtype,  struct ompi_op_t *op, struct ompi_communicator_t *comm, mca_coll_base_module_t *module
-#define SCATTER_ARGS void *sbuf, int scount, struct ompi_datatype_t *sdtype, void *rbuf, int rcount, struct ompi_datatype_t *rdtype, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module
-#define SCATTERV_ARGS void *sbuf, int *scounts, int *disps, struct ompi_datatype_t *sdtype, void* rbuf, int rcount, struct ompi_datatype_t *rdtype, int root, struct ompi_communicator_t *comm, mca_coll_base_module_t *module
-/* end defined arg lists to simply auto inclusion of user overriding decision functions */
-
 BEGIN_C_DECLS

 /* these are the same across all modules and are loaded at component query time */
 extern int   ompi_coll_tuned_stream;
 extern int   ompi_coll_tuned_priority;
-extern int   ompi_coll_tuned_preallocate_memory_comm_size_limit;
 extern bool  ompi_coll_tuned_use_dynamic_rules;
 extern char* ompi_coll_tuned_dynamic_rules_filename;
 extern int   ompi_coll_tuned_init_tree_fanout;
@ -148,12 +93,6 @@ int ompi_coll_tuned_allgather_intra_dec_dynamic(ALLGATHER_ARGS);
 int ompi_coll_tuned_allgather_intra_do_forced(ALLGATHER_ARGS);
 int ompi_coll_tuned_allgather_intra_do_this(ALLGATHER_ARGS, int algorithm, int faninout, int segsize);
 int ompi_coll_tuned_allgather_intra_check_forced_init(coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices);
-int ompi_coll_tuned_allgather_intra_bruck(ALLGATHER_ARGS);
-int ompi_coll_tuned_allgather_intra_recursivedoubling(ALLGATHER_ARGS);
-int ompi_coll_tuned_allgather_intra_ring(ALLGATHER_ARGS);
-int ompi_coll_tuned_allgather_intra_neighborexchange(ALLGATHER_ARGS);
-int ompi_coll_tuned_allgather_intra_basic_linear(ALLGATHER_ARGS);
-int ompi_coll_tuned_allgather_intra_two_procs(ALLGATHER_ARGS);
 int ompi_coll_tuned_allgather_inter_dec_fixed(ALLGATHER_ARGS);
 int ompi_coll_tuned_allgather_inter_dec_dynamic(ALLGATHER_ARGS);

@ -163,11 +102,6 @@ int ompi_coll_tuned_allgatherv_intra_dec_dynamic(ALLGATHERV_ARGS);
 int ompi_coll_tuned_allgatherv_intra_do_forced(ALLGATHERV_ARGS);
 int ompi_coll_tuned_allgatherv_intra_do_this(ALLGATHERV_ARGS, int algorithm, int faninout, int segsize);
 int ompi_coll_tuned_allgatherv_intra_check_forced_init(coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices);
-int ompi_coll_tuned_allgatherv_intra_bruck(ALLGATHERV_ARGS);
-int ompi_coll_tuned_allgatherv_intra_ring(ALLGATHERV_ARGS);
-int ompi_coll_tuned_allgatherv_intra_neighborexchange(ALLGATHERV_ARGS);
-int ompi_coll_tuned_allgatherv_intra_basic_default(ALLGATHERV_ARGS);
-int ompi_coll_tuned_allgatherv_intra_two_procs(ALLGATHERV_ARGS);
 int ompi_coll_tuned_allgatherv_inter_dec_fixed(ALLGATHERV_ARGS);
 int ompi_coll_tuned_allgatherv_inter_dec_dynamic(ALLGATHERV_ARGS);

@ -177,11 +111,6 @@ int ompi_coll_tuned_allreduce_intra_dec_dynamic(ALLREDUCE_ARGS);
 int ompi_coll_tuned_allreduce_intra_do_forced(ALLREDUCE_ARGS);
 int ompi_coll_tuned_allreduce_intra_do_this(ALLREDUCE_ARGS, int algorithm, int faninout, int segsize);
 int ompi_coll_tuned_allreduce_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices);
-int ompi_coll_tuned_allreduce_intra_nonoverlapping(ALLREDUCE_ARGS);
-int ompi_coll_tuned_allreduce_intra_recursivedoubling(ALLREDUCE_ARGS);
-int ompi_coll_tuned_allreduce_intra_ring(ALLREDUCE_ARGS);
-int ompi_coll_tuned_allreduce_intra_ring_segmented(ALLREDUCE_ARGS, uint32_t segsize);
-int ompi_coll_tuned_allreduce_intra_basic_linear(ALLREDUCE_ARGS);
 int ompi_coll_tuned_allreduce_inter_dec_fixed(ALLREDUCE_ARGS);
 int ompi_coll_tuned_allreduce_inter_dec_dynamic(ALLREDUCE_ARGS);

@ -191,11 +120,6 @@ int ompi_coll_tuned_alltoall_intra_dec_dynamic(ALLTOALL_ARGS);
 int ompi_coll_tuned_alltoall_intra_do_forced(ALLTOALL_ARGS);
 int ompi_coll_tuned_alltoall_intra_do_this(ALLTOALL_ARGS, int algorithm, int faninout, int segsize, int max_requests);
 int ompi_coll_tuned_alltoall_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices);
-int ompi_coll_tuned_alltoall_intra_pairwise(ALLTOALL_ARGS);
-int ompi_coll_tuned_alltoall_intra_bruck(ALLTOALL_ARGS);
-int ompi_coll_tuned_alltoall_intra_basic_linear(ALLTOALL_ARGS);
-int ompi_coll_tuned_alltoall_intra_linear_sync(ALLTOALL_ARGS, int max_requests);
-int ompi_coll_tuned_alltoall_intra_two_procs(ALLTOALL_ARGS);
 int ompi_coll_tuned_alltoall_inter_dec_fixed(ALLTOALL_ARGS);
 int ompi_coll_tuned_alltoall_inter_dec_dynamic(ALLTOALL_ARGS);

@ -205,8 +129,6 @@ int ompi_coll_tuned_alltoallv_intra_dec_dynamic(ALLTOALLV_ARGS);
 int ompi_coll_tuned_alltoallv_intra_do_forced(ALLTOALLV_ARGS);
 int ompi_coll_tuned_alltoallv_intra_do_this(ALLTOALLV_ARGS, int algorithm);
 int ompi_coll_tuned_alltoallv_intra_check_forced_init(coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices);
-int ompi_coll_tuned_alltoallv_intra_pairwise(ALLTOALLV_ARGS);
-int ompi_coll_tuned_alltoallv_intra_basic_linear(ALLTOALLV_ARGS);
 int ompi_coll_tuned_alltoallv_inter_dec_fixed(ALLTOALLV_ARGS);
 int ompi_coll_tuned_alltoallv_inter_dec_dynamic(ALLTOALLV_ARGS);

@ -224,12 +146,6 @@ int ompi_coll_tuned_barrier_intra_do_this(BARRIER_ARGS, int algorithm, int fanin
 int ompi_coll_tuned_barrier_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices);
 int ompi_coll_tuned_barrier_inter_dec_fixed(BARRIER_ARGS);
 int ompi_coll_tuned_barrier_inter_dec_dynamic(BARRIER_ARGS);
-int ompi_coll_tuned_barrier_intra_doublering(BARRIER_ARGS);
-int ompi_coll_tuned_barrier_intra_recursivedoubling(BARRIER_ARGS);
-int ompi_coll_tuned_barrier_intra_bruck(BARRIER_ARGS);
-int ompi_coll_tuned_barrier_intra_two_procs(BARRIER_ARGS);
-int ompi_coll_tuned_barrier_intra_linear(BARRIER_ARGS);
-int ompi_coll_tuned_barrier_intra_tree(BARRIER_ARGS);

 /* Bcast */
 int ompi_coll_tuned_bcast_intra_generic( BCAST_ARGS, uint32_t count_by_segment, ompi_coll_tree_t* tree );
@ -238,12 +154,6 @@ int ompi_coll_tuned_bcast_intra_dec_dynamic(BCAST_ARGS);
 int ompi_coll_tuned_bcast_intra_do_forced(BCAST_ARGS);
 int ompi_coll_tuned_bcast_intra_do_this(BCAST_ARGS, int algorithm, int faninout, int segsize);
 int ompi_coll_tuned_bcast_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices);
-int ompi_coll_tuned_bcast_intra_basic_linear(BCAST_ARGS);
-int ompi_coll_tuned_bcast_intra_chain(BCAST_ARGS, uint32_t segsize, int32_t chains);
-int ompi_coll_tuned_bcast_intra_pipeline(BCAST_ARGS, uint32_t segsize);
-int ompi_coll_tuned_bcast_intra_binomial(BCAST_ARGS, uint32_t segsize);
-int ompi_coll_tuned_bcast_intra_bintree(BCAST_ARGS, uint32_t segsize);
-int ompi_coll_tuned_bcast_intra_split_bintree(BCAST_ARGS, uint32_t segsize);
 int ompi_coll_tuned_bcast_inter_dec_fixed(BCAST_ARGS);
 int ompi_coll_tuned_bcast_inter_dec_dynamic(BCAST_ARGS);

@ -259,9 +169,6 @@ int ompi_coll_tuned_gather_intra_dec_dynamic(GATHER_ARGS);
 int ompi_coll_tuned_gather_intra_do_forced(GATHER_ARGS);
 int ompi_coll_tuned_gather_intra_do_this(GATHER_ARGS, int algorithm, int faninout, int segsize);
 int ompi_coll_tuned_gather_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices);
-int ompi_coll_tuned_gather_intra_basic_linear(GATHER_ARGS);
-int ompi_coll_tuned_gather_intra_binomial(GATHER_ARGS);
-int ompi_coll_tuned_gather_intra_linear_sync(GATHER_ARGS, int first_segment_size);
 int ompi_coll_tuned_gather_inter_dec_fixed(GATHER_ARGS);
 int ompi_coll_tuned_gather_inter_dec_dynamic(GATHER_ARGS);

@ -278,12 +185,6 @@ int ompi_coll_tuned_reduce_intra_dec_dynamic(REDUCE_ARGS);
 int ompi_coll_tuned_reduce_intra_do_forced(REDUCE_ARGS);
 int ompi_coll_tuned_reduce_intra_do_this(REDUCE_ARGS, int algorithm, int faninout, int segsize, int max_oustanding_reqs);
 int ompi_coll_tuned_reduce_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices);
-int ompi_coll_tuned_reduce_intra_basic_linear(REDUCE_ARGS);
-int ompi_coll_tuned_reduce_intra_chain(REDUCE_ARGS, uint32_t segsize, int fanout, int max_outstanding_reqs );
-int ompi_coll_tuned_reduce_intra_pipeline(REDUCE_ARGS, uint32_t segsize, int max_outstanding_reqs );
-int ompi_coll_tuned_reduce_intra_binary(REDUCE_ARGS, uint32_t segsize, int max_outstanding_reqs );
-int ompi_coll_tuned_reduce_intra_binomial(REDUCE_ARGS, uint32_t segsize, int max_outstanding_reqs );
-int ompi_coll_tuned_reduce_intra_in_order_binary(REDUCE_ARGS, uint32_t segsize, int max_outstanding_reqs );
 int ompi_coll_tuned_reduce_inter_dec_fixed(REDUCE_ARGS);
 int ompi_coll_tuned_reduce_inter_dec_dynamic(REDUCE_ARGS);

@ -293,10 +194,6 @@ int ompi_coll_tuned_reduce_scatter_intra_dec_dynamic(REDUCESCATTER_ARGS);
 int ompi_coll_tuned_reduce_scatter_intra_do_forced(REDUCESCATTER_ARGS);
 int ompi_coll_tuned_reduce_scatter_intra_do_this(REDUCESCATTER_ARGS, int algorithm, int faninout, int segsize);
 int ompi_coll_tuned_reduce_scatter_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices);
-int ompi_coll_tuned_reduce_scatter_intra_nonoverlapping(REDUCESCATTER_ARGS);
-int ompi_coll_tuned_reduce_scatter_intra_basic_recursivehalving(REDUCESCATTER_ARGS);
-int ompi_coll_tuned_reduce_scatter_intra_ring(REDUCESCATTER_ARGS);
-
 int ompi_coll_tuned_reduce_scatter_inter_dec_fixed(REDUCESCATTER_ARGS);
 int ompi_coll_tuned_reduce_scatter_inter_dec_dynamic(REDUCESCATTER_ARGS);

@ -312,8 +209,6 @@ int ompi_coll_tuned_scatter_intra_dec_dynamic(SCATTER_ARGS);
 int ompi_coll_tuned_scatter_intra_do_forced(SCATTER_ARGS);
 int ompi_coll_tuned_scatter_intra_do_this(SCATTER_ARGS, int algorithm, int faninout, int segsize);
 int ompi_coll_tuned_scatter_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices);
-int ompi_coll_tuned_scatter_intra_basic_linear(SCATTER_ARGS);
-int ompi_coll_tuned_scatter_intra_binomial(SCATTER_ARGS);
 int ompi_coll_tuned_scatter_inter_dec_fixed(SCATTER_ARGS);
 int ompi_coll_tuned_scatter_inter_dec_dynamic(SCATTER_ARGS);

@ -325,16 +220,6 @@ int ompi_coll_tuned_scatterv_inter_dec_dynamic(SCATTERV_ARGS);

 int mca_coll_tuned_ft_event(int state);

-
-/* Utility functions */
-
-static inline void ompi_coll_tuned_free_reqs(ompi_request_t **reqs, int count)
-{
-	int i;
-	for (i = 0; i < count; ++i)
-	    ompi_request_free(&reqs[i]);
-}
-
 struct mca_coll_tuned_component_t {
 	/** Base coll component */ 
 	mca_coll_base_component_2_0_0_t super;
@ -359,200 +244,17 @@ typedef struct mca_coll_tuned_component_t mca_coll_tuned_component_t;
 */
 OMPI_MODULE_DECLSPEC extern mca_coll_tuned_component_t mca_coll_tuned_component;

-/*
- * Data structure for hanging data off the communicator 
- * i.e. per module instance
- */
-struct mca_coll_tuned_comm_t {
-	/* standard data for requests and PML usage */
-    
-	/* Precreate space for requests 
-	 * Note this does not effect basic, 
-	 * but if in wrong context can confuse a debugger
-	 * this is controlled by an MCA param
-	 */
-    
-	ompi_request_t **mcct_reqs;
-	int mcct_num_reqs;
-    
-	/* 
-	 * tuned topo information caching per communicator 
-	 *
-	 * for each communicator we cache the topo information so we can 
-	 * reuse without regenerating if we change the root, [or fanout]
-	 * then regenerate and recache this information 
-	 */
-    
-	/* general tree with n fan out */
-	ompi_coll_tree_t *cached_ntree;
-	int cached_ntree_root; 
-	int cached_ntree_fanout; 
-    
-	/* binary tree */
-	ompi_coll_tree_t *cached_bintree;
-	int cached_bintree_root; 
-    
-	/* binomial tree */
-	ompi_coll_tree_t *cached_bmtree;
-	int cached_bmtree_root;
-    
-	/* binomial tree */
-	ompi_coll_tree_t *cached_in_order_bmtree;
-	int cached_in_order_bmtree_root;
-    
-	/* chained tree (fanout followed by pipelines) */
-	ompi_coll_tree_t *cached_chain;
-	int cached_chain_root;
-	int cached_chain_fanout; 
-    
-	/* pipeline */
-	ompi_coll_tree_t *cached_pipeline;
-	int cached_pipeline_root;
-    
-	/* in-order binary tree (root of the in-order binary tree is rank 0) */
-	ompi_coll_tree_t *cached_in_order_bintree;
-	
-	/* moving to the component */
-	ompi_coll_com_rule_t *com_rules[COLLCOUNT]; /* the communicator rules for each MPI collective for ONLY my comsize */
-
-	/* for forced algorithms we store the information on the module */
-	/* previously we only had one shared copy, ops, it really is per comm/module */
-	coll_tuned_force_algorithm_params_t user_forced[COLLCOUNT];
-};
-typedef struct mca_coll_tuned_comm_t mca_coll_tuned_comm_t;
-
 struct mca_coll_tuned_module_t {
-	mca_coll_base_module_t super;
+    mca_coll_base_module_t super;

-	mca_coll_tuned_comm_t *tuned_data;
+    /* for forced algorithms we store the information on the module */
+    /* previously we only had one shared copy, ops, it really is per comm/module */
+    coll_tuned_force_algorithm_params_t user_forced[COLLCOUNT];
+
+    /* the communicator rules for each MPI collective for ONLY my comsize */
+    ompi_coll_com_rule_t *com_rules[COLLCOUNT];
 };
 typedef struct mca_coll_tuned_module_t mca_coll_tuned_module_t;
 OBJ_CLASS_DECLARATION(mca_coll_tuned_module_t);

-static inline void mca_coll_tuned_free_reqs(ompi_request_t ** reqs,
-                                            int count)
-{
-    int i;
-    for (i = 0; i < count; ++i)
-      ompi_request_free(reqs + i);
-}
-
-END_C_DECLS
-
-#define COLL_TUNED_UPDATE_BINTREE( OMPI_COMM, TUNED_MODULE, ROOT )	\
-do {                                                                                       \
-    mca_coll_tuned_comm_t* coll_comm = (TUNED_MODULE)->tuned_data;                  	   \
-    if( !( (coll_comm->cached_bintree)                                                     \
-           && (coll_comm->cached_bintree_root == (ROOT)) ) ) {                             \
-        if( coll_comm->cached_bintree ) { /* destroy previous binomial if defined */       \
-            ompi_coll_tuned_topo_destroy_tree( &(coll_comm->cached_bintree) );             \
-        }                                                                                  \
-        coll_comm->cached_bintree = ompi_coll_tuned_topo_build_tree(2,(OMPI_COMM),(ROOT)); \
-        coll_comm->cached_bintree_root = (ROOT);                                           \
-    }                                                                                      \
-} while (0)
-
-#define COLL_TUNED_UPDATE_BMTREE( OMPI_COMM, TUNED_MODULE, ROOT )	\
-do {                                                                                         \
-    mca_coll_tuned_comm_t* coll_comm = (TUNED_MODULE)->tuned_data;                           \
-    if( !( (coll_comm->cached_bmtree)                                                        \
-           && (coll_comm->cached_bmtree_root == (ROOT)) ) ) {                                \
-        if( coll_comm->cached_bmtree ) { /* destroy previous binomial if defined */          \
-            ompi_coll_tuned_topo_destroy_tree( &(coll_comm->cached_bmtree) );                \
-        }                                                                                    \
-        coll_comm->cached_bmtree = ompi_coll_tuned_topo_build_bmtree( (OMPI_COMM), (ROOT) ); \
-        coll_comm->cached_bmtree_root = (ROOT);                                              \
-    }                                                                                        \
-} while (0)
-
-#define COLL_TUNED_UPDATE_IN_ORDER_BMTREE( OMPI_COMM, TUNED_MODULE, ROOT ) \
-do {                                                                                         \
-    mca_coll_tuned_comm_t* coll_comm = (TUNED_MODULE)->tuned_data;                           \
-    if( !( (coll_comm->cached_in_order_bmtree)                                               \
-           && (coll_comm->cached_in_order_bmtree_root == (ROOT)) ) ) {                       \
-        if( coll_comm->cached_in_order_bmtree ) { /* destroy previous binomial if defined */ \
-            ompi_coll_tuned_topo_destroy_tree( &(coll_comm->cached_in_order_bmtree) );       \
-        }                                                                                    \
-        coll_comm->cached_in_order_bmtree = ompi_coll_tuned_topo_build_in_order_bmtree( (OMPI_COMM), (ROOT) ); \
-        coll_comm->cached_in_order_bmtree_root = (ROOT);                                     \
-    }                                                                                        \
-} while (0)
-
-#define COLL_TUNED_UPDATE_PIPELINE( OMPI_COMM, TUNED_MODULE, ROOT )	\
-do {                                                                                             \
-    mca_coll_tuned_comm_t* coll_comm = (TUNED_MODULE)->tuned_data;                               \
-    if( !( (coll_comm->cached_pipeline)                                                          \
-           && (coll_comm->cached_pipeline_root == (ROOT)) ) ) {                                  \
-        if (coll_comm->cached_pipeline) { /* destroy previous pipeline if defined */             \
-            ompi_coll_tuned_topo_destroy_tree( &(coll_comm->cached_pipeline) );                  \
-        }                                                                                        \
-        coll_comm->cached_pipeline = ompi_coll_tuned_topo_build_chain( 1, (OMPI_COMM), (ROOT) ); \
-        coll_comm->cached_pipeline_root = (ROOT);                                                \
-    }                                                                                            \
-} while (0)
-
-#define COLL_TUNED_UPDATE_CHAIN( OMPI_COMM, TUNED_MODULE, ROOT, FANOUT )	\
-do {                                                                                             \
-    mca_coll_tuned_comm_t* coll_comm = (TUNED_MODULE)->tuned_data;                               \
-    if( !( (coll_comm->cached_chain)                                                             \
-           && (coll_comm->cached_chain_root == (ROOT))                                           \
-           && (coll_comm->cached_chain_fanout == (FANOUT)) ) ) {                                 \
-        if( coll_comm->cached_chain) { /* destroy previous chain if defined */                   \
-            ompi_coll_tuned_topo_destroy_tree( &(coll_comm->cached_chain) );                     \
-        }                                                                                        \
-        coll_comm->cached_chain = ompi_coll_tuned_topo_build_chain((FANOUT), (OMPI_COMM), (ROOT)); \
-        coll_comm->cached_chain_root = (ROOT);                                                   \
-        coll_comm->cached_chain_fanout = (FANOUT);                                               \
-    }                                                                                            \
-} while (0)
-
-#define COLL_TUNED_UPDATE_IN_ORDER_BINTREE( OMPI_COMM, TUNED_MODULE )	\
-do {                                                                           \
-    mca_coll_tuned_comm_t* coll_comm = (TUNED_MODULE)->tuned_data;             \
-    if( !(coll_comm->cached_in_order_bintree) ) {                              \
-        /* In-order binary tree topology is defined by communicator size */    \
-        /* Thus, there is no need to destroy anything */                       \
-        coll_comm->cached_in_order_bintree =                                   \
-	    ompi_coll_tuned_topo_build_in_order_bintree((OMPI_COMM)); \
-    }                                                                          \
-} while (0)
-
-/**
- * This macro give a generic way to compute the best count of
- * the segment (i.e. the number of complete datatypes that
- * can fit in the specified SEGSIZE). Beware, when this macro
- * is called, the SEGCOUNT should be initialized to the count as
- * expected by the collective call.
- */
-#define COLL_TUNED_COMPUTED_SEGCOUNT(SEGSIZE, TYPELNG, SEGCOUNT)        \
-    if( ((SEGSIZE) >= (TYPELNG)) &&                                     \
-        ((SEGSIZE) < ((TYPELNG) * (SEGCOUNT))) ) {                      \
-        size_t residual;                                                \
-        (SEGCOUNT) = (int)((SEGSIZE) / (TYPELNG));                      \
-        residual = (SEGSIZE) - (SEGCOUNT) * (TYPELNG);                  \
-        if( residual > ((TYPELNG) >> 1) )                               \
-            (SEGCOUNT)++;                                               \
-    }                                                                   \
-
-/**
- * This macro gives a generic wait to compute the well distributed block counts
- * when the count and number of blocks are fixed.
- * Macro returns "early-block" count, "late-block" count, and "split-index"
- * which is the block at which we switch from "early-block" count to 
- * the "late-block" count.
- * count = split_index * early_block_count + 
- *         (block_count - split_index) * late_block_count
- * We do not perform ANY error checks - make sure that the input values 
- * make sense (eg. count > num_blocks).
- */
-#define COLL_TUNED_COMPUTE_BLOCKCOUNT( COUNT, NUM_BLOCKS, SPLIT_INDEX,       \
-                                       EARLY_BLOCK_COUNT, LATE_BLOCK_COUNT ) \
-    EARLY_BLOCK_COUNT = LATE_BLOCK_COUNT = COUNT / NUM_BLOCKS;               \
-    SPLIT_INDEX = COUNT % NUM_BLOCKS;                                        \
-    if (0 != SPLIT_INDEX) {                                                  \
-        EARLY_BLOCK_COUNT = EARLY_BLOCK_COUNT + 1;                           \
-    }                                                                        \
-
-
-#endif /* MCA_COLL_TUNED_EXPORT_H */
-
+#endif  /* MCA_COLL_TUNED_EXPORT_H */
--- a/ompi/mca/coll/tuned/coll_tuned_allgather_decision.c
+++ b/ompi/mca/coll/tuned/coll_tuned_allgather_decision.c
@ -0,0 +1,218 @@
+/*
+ * Copyright (c) 2004-2015 The University of Tennessee and The University
+ *                         of Tennessee Research Foundation.  All rights
+ *                         reserved.
+ * $COPYRIGHT$
+ *
+ * Additional copyrights may follow
+ *
+ * $HEADER$
+ */
+
+#include "ompi_config.h"
+
+#include "mpi.h"
+#include "opal/util/bit_ops.h"
+#include "ompi/constants.h"
+#include "ompi/datatype/ompi_datatype.h"
+#include "ompi/communicator/communicator.h"
+#include "ompi/mca/coll/coll.h"
+#include "ompi/mca/coll/base/coll_tags.h"
+#include "ompi/mca/coll/base/coll_base_topo.h"
+#include "ompi/mca/coll/base/coll_base_util.h"
+#include "coll_tuned.h"
+
+/* allgather algorithm variables */
+static int coll_tuned_allgather_forced_algorithm = 0;
+static int coll_tuned_allgather_segment_size = 0;
+static int coll_tuned_allgather_tree_fanout;
+static int coll_tuned_allgather_chain_fanout;
+
+/* valid values for coll_tuned_allgather_forced_algorithm */
+static mca_base_var_enum_value_t allgather_algorithms[] = {
+    {0, "ignore"},
+    {1, "linear"},
+    {2, "bruck"},
+    {3, "recursive_doubling"},
+    {4, "ring"},
+    {5, "neighbor"},
+    {6, "two_proc"},
+    {0, NULL}
+};
+
+/* The following are used by dynamic and forced rules */
+
+/* publish details of each algorithm and if its forced/fixed/locked in */
+/* as you add methods/algorithms you must update this and the query/map
+   routines */
+
+/* this routine is called by the component only */
+/* this makes sure that the mca parameters are set to their initial values
+   and perms */
+/* module does not call this they call the forced_getvalues routine instead */
+
+int
+ompi_coll_tuned_allgather_intra_check_forced_init(coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices)
+{
+    mca_base_var_enum_t *new_enum;
+    int cnt;
+
+    for( cnt = 0; NULL != allgather_algorithms[cnt].string; cnt++ );
+    ompi_coll_tuned_forced_max_algorithms[ALLGATHER] = cnt;
+
+    (void) mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
+                                           "allgather_algorithm_count",
+                                           "Number of allgather algorithms available",
+                                           MCA_BASE_VAR_TYPE_INT, NULL, 0,
+                                           MCA_BASE_VAR_FLAG_DEFAULT_ONLY,
+                                           OPAL_INFO_LVL_5,
+                                           MCA_BASE_VAR_SCOPE_CONSTANT,
+                                           &cnt);
+
+    /* MPI_T: This variable should eventually be bound to a communicator */
+    coll_tuned_allgather_forced_algorithm = 0;
+    (void) mca_base_var_enum_create("coll_tuned_allgather_algorithms", allgather_algorithms, &new_enum);
+    mca_param_indices->algorithm_param_index =
+        mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
+                                        "allgather_algorithm",
+                                        "Which allallgather algorithm is used. Can be locked down to choice of: 0 ignore, 1 basic linear, 2 bruck, 3 recursive doubling, 4 ring, 5 neighbor exchange, 6: two proc only.",
+                                        MCA_BASE_VAR_TYPE_INT, new_enum, 0, 0,
+                                        OPAL_INFO_LVL_5,
+                                        MCA_BASE_VAR_SCOPE_READONLY,
+                                        &coll_tuned_allgather_forced_algorithm);
+    OBJ_RELEASE(new_enum);
+    if (mca_param_indices->algorithm_param_index < 0) {
+        return mca_param_indices->algorithm_param_index;
+    }
+
+    coll_tuned_allgather_segment_size = 0;
+    mca_param_indices->segsize_param_index =
+        mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
+                                        "allgather_algorithm_segmentsize",
+                                        "Segment size in bytes used by default for allgather algorithms. Only has meaning if algorithm is forced and supports segmenting. 0 bytes means no segmentation. Currently, available algorithms do not support segmentation.",
+                                        MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
+                                        OPAL_INFO_LVL_5,
+                                        MCA_BASE_VAR_SCOPE_READONLY,
+                                        &coll_tuned_allgather_segment_size);
+
+    coll_tuned_allgather_tree_fanout = ompi_coll_tuned_init_tree_fanout; /* get system wide default */
+    mca_param_indices->tree_fanout_param_index =
+        mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
+                                        "allgather_algorithm_tree_fanout",
+                                        "Fanout for n-tree used for allgather algorithms. Only has meaning if algorithm is forced and supports n-tree topo based operation. Currently, available algorithms do not support n-tree topologies.",
+                                        MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
+                                        OPAL_INFO_LVL_5,
+                                        MCA_BASE_VAR_SCOPE_READONLY,
+                                        &coll_tuned_allgather_tree_fanout);
+
+    coll_tuned_allgather_chain_fanout = ompi_coll_tuned_init_chain_fanout; /* get system wide default */
+    mca_param_indices->chain_fanout_param_index =
+      mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
+                                      "allgather_algorithm_chain_fanout",
+                                      "Fanout for chains used for allgather algorithms. Only has meaning if algorithm is forced and supports chain topo based operation. Currently, available algorithms do not support chain topologies.",
+                                      MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
+                                      OPAL_INFO_LVL_5,
+                                      MCA_BASE_VAR_SCOPE_READONLY,
+                                      &coll_tuned_allgather_chain_fanout);
+
+    return (MPI_SUCCESS);
+}
+
+int ompi_coll_tuned_allgather_intra_do_forced(void *sbuf, int scount,
+                                              struct ompi_datatype_t *sdtype,
+                                              void* rbuf, int rcount,
+                                              struct ompi_datatype_t *rdtype,
+                                              struct ompi_communicator_t *comm,
+                                              mca_coll_base_module_t *module)
+{
+    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
+
+    OPAL_OUTPUT((ompi_coll_tuned_stream,
+                 "coll:tuned:allgather_intra_do_forced selected algorithm %d",
+                 tuned_module->user_forced[ALLGATHER].algorithm));
+
+    switch (tuned_module->user_forced[ALLGATHER].algorithm) {
+    case (0):
+        return ompi_coll_tuned_allgather_intra_dec_fixed(sbuf, scount, sdtype,
+                                                         rbuf, rcount, rdtype,
+                                                         comm, module);
+    case (1):
+        return ompi_coll_base_allgather_intra_basic_linear(sbuf, scount, sdtype,
+                                                           rbuf, rcount, rdtype,
+                                                           comm, module);
+    case (2):
+        return ompi_coll_base_allgather_intra_bruck(sbuf, scount, sdtype,
+                                                    rbuf, rcount, rdtype,
+                                                    comm, module);
+    case (3):
+        return ompi_coll_base_allgather_intra_recursivedoubling(sbuf, scount, sdtype,
+                                                                rbuf, rcount, rdtype,
+                                                                comm, module);
+    case (4):
+        return ompi_coll_base_allgather_intra_ring(sbuf, scount, sdtype,
+                                                   rbuf, rcount, rdtype,
+                                                   comm, module);
+    case (5):
+        return ompi_coll_base_allgather_intra_neighborexchange(sbuf, scount, sdtype,
+                                                               rbuf, rcount, rdtype,
+                                                               comm, module);
+    case (6):
+        return ompi_coll_base_allgather_intra_two_procs(sbuf, scount, sdtype,
+                                                        rbuf, rcount, rdtype,
+                                                        comm, module);
+    } /* switch */
+    OPAL_OUTPUT((ompi_coll_tuned_stream,
+                 "coll:tuned:allgather_intra_do_forced attempt to select algorithm %d when only 0-%d is valid?",
+                 tuned_module->user_forced[ALLGATHER].algorithm,
+                 ompi_coll_tuned_forced_max_algorithms[ALLGATHER]));
+    return (MPI_ERR_ARG);
+}
+
+
+int ompi_coll_tuned_allgather_intra_do_this(void *sbuf, int scount,
+                                            struct ompi_datatype_t *sdtype,
+                                            void* rbuf, int rcount,
+                                            struct ompi_datatype_t *rdtype,
+                                            struct ompi_communicator_t *comm,
+                                            mca_coll_base_module_t *module,
+                                            int algorithm, int faninout, int segsize)
+{
+    OPAL_OUTPUT((ompi_coll_tuned_stream,
+                 "coll:tuned:allgather_intra_do_this selected algorithm %d topo faninout %d segsize %d",
+                 algorithm, faninout, segsize));
+
+    switch (algorithm) {
+    case (0):
+        return ompi_coll_tuned_allgather_intra_dec_fixed(sbuf, scount, sdtype,
+                                                         rbuf, rcount, rdtype,
+                                                         comm, module);
+    case (1):
+        return ompi_coll_base_allgather_intra_basic_linear(sbuf, scount, sdtype,
+                                                           rbuf, rcount, rdtype,
+                                                           comm, module);
+    case (2):
+        return ompi_coll_base_allgather_intra_bruck(sbuf, scount, sdtype,
+                                                    rbuf, rcount, rdtype,
+                                                    comm, module);
+    case (3):
+        return ompi_coll_base_allgather_intra_recursivedoubling(sbuf, scount, sdtype,
+                                                                rbuf, rcount, rdtype,
+                                                                comm, module);
+    case (4):
+        return ompi_coll_base_allgather_intra_ring(sbuf, scount, sdtype,
+                                                   rbuf, rcount, rdtype,
+                                                   comm, module);
+    case (5):
+        return ompi_coll_base_allgather_intra_neighborexchange(sbuf, scount, sdtype,
+                                                               rbuf, rcount, rdtype,
+                                                               comm, module);
+    case (6):
+        return ompi_coll_base_allgather_intra_two_procs(sbuf, scount, sdtype,
+                                                        rbuf, rcount, rdtype,
+                                                        comm, module);
+    } /* switch */
+    OPAL_OUTPUT((ompi_coll_tuned_stream,
+                 "coll:tuned:allgather_intra_do_this attempt to select algorithm %d when only 0-%d is valid?",
+                 algorithm, ompi_coll_tuned_forced_max_algorithms[ALLGATHER]));
+    return (MPI_ERR_ARG);
+}
--- a/ompi/mca/coll/tuned/coll_tuned_allgatherv_decision.c
+++ b/ompi/mca/coll/tuned/coll_tuned_allgatherv_decision.c
@ -0,0 +1,212 @@
+/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
+/*
+ * Copyright (c) 2004-2015 The University of Tennessee and The University
+ *                         of Tennessee Research Foundation.  All rights
+ *                         reserved.
+ * $COPYRIGHT$
+ *
+ * Additional copyrights may follow
+ *
+ * $HEADER$
+ */
+
+#include "ompi_config.h"
+
+#include "mpi.h"
+#include "ompi/constants.h"
+#include "ompi/datatype/ompi_datatype.h"
+#include "ompi/communicator/communicator.h"
+#include "ompi/mca/coll/coll.h"
+#include "ompi/mca/coll/base/coll_tags.h"
+#include "coll_tuned.h"
+#include "ompi/mca/coll/base/coll_base_topo.h"
+#include "ompi/mca/coll/base/coll_base_util.h"
+
+/* allgatherv algorithm variables */
+static int coll_tuned_allgatherv_forced_algorithm = 0;
+static int coll_tuned_allgatherv_segment_size = 0;
+static int coll_tuned_allgatherv_tree_fanout;
+static int coll_tuned_allgatherv_chain_fanout;
+
+/* valid values for coll_tuned_allgatherv_forced_algorithm */
+static mca_base_var_enum_value_t allgatherv_algorithms[] = {
+    {0, "ignore"},
+    {1, "default"},
+    {2, "bruck"},
+    {3, "ring"},
+    {4, "neighbor"},
+    {5, "two_proc"},
+    {0, NULL}
+};
+
+/* The following are used by dynamic and forced rules */
+
+/* publish details of each algorithm and if its forced/fixed/locked in */
+/* as you add methods/algorithms you must update this and the query/map
+   routines */
+
+/* this routine is called by the component only */
+/* this makes sure that the mca parameters are set to their initial values
+   and perms */
+/* module does not call this they call the forced_getvalues routine instead */
+
+int
+ompi_coll_tuned_allgatherv_intra_check_forced_init(coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices)
+{
+    mca_base_var_enum_t *new_enum;
+    int cnt;
+
+    for( cnt = 0; NULL != allgatherv_algorithms[cnt].string; cnt++ );
+    ompi_coll_tuned_forced_max_algorithms[ALLGATHERV] = cnt;
+
+    (void) mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
+                                           "allgatherv_algorithm_count",
+                                           "Number of allgatherv algorithms available",
+                                           MCA_BASE_VAR_TYPE_INT, NULL, 0,
+                                           MCA_BASE_VAR_FLAG_DEFAULT_ONLY,
+                                           OPAL_INFO_LVL_5,
+                                           MCA_BASE_VAR_SCOPE_CONSTANT,
+                                           &cnt);
+
+    /* MPI_T: This variable should eventually be bound to a communicator */
+    coll_tuned_allgatherv_forced_algorithm = 0;
+    (void) mca_base_var_enum_create("coll_tuned_allgatherv_algorithms", allgatherv_algorithms, &new_enum);
+    mca_param_indices->algorithm_param_index =
+        mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
+                                        "allgatherv_algorithm",
+                                        "Which allallgatherv algorithm is used. Can be locked down to choice of: 0 ignore, 1 default (allgathervv + bcast), 2 bruck, 3 ring, 4 neighbor exchange, 5: two proc only.",
+                                        MCA_BASE_VAR_TYPE_INT, new_enum, 0, 0,
+                                        OPAL_INFO_LVL_5,
+                                        MCA_BASE_VAR_SCOPE_READONLY,
+                                        &coll_tuned_allgatherv_forced_algorithm);
+    OBJ_RELEASE(new_enum);
+    if (mca_param_indices->algorithm_param_index < 0) {
+        return mca_param_indices->algorithm_param_index;
+    }
+
+    coll_tuned_allgatherv_segment_size = 0;
+    mca_param_indices->segsize_param_index =
+        mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
+                                        "allgatherv_algorithm_segmentsize",
+                                        "Segment size in bytes used by default for allgatherv algorithms. Only has meaning if algorithm is forced and supports segmenting. 0 bytes means no segmentation. Currently, available algorithms do not support segmentation.",
+                                        MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
+                                        OPAL_INFO_LVL_5,
+                                        MCA_BASE_VAR_SCOPE_READONLY,
+                                        &coll_tuned_allgatherv_segment_size);
+
+    coll_tuned_allgatherv_tree_fanout = ompi_coll_tuned_init_tree_fanout; /* get system wide default */
+    mca_param_indices->tree_fanout_param_index =
+        mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
+                                        "allgatherv_algorithm_tree_fanout",
+                                        "Fanout for n-tree used for allgatherv algorithms. Only has meaning if algorithm is forced and supports n-tree topo based operation. Currently, available algorithms do not support n-tree topologies.",
+                                        MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
+                                        OPAL_INFO_LVL_5,
+                                        MCA_BASE_VAR_SCOPE_READONLY,
+                                        &coll_tuned_allgatherv_tree_fanout);
+
+    coll_tuned_allgatherv_chain_fanout = ompi_coll_tuned_init_chain_fanout; /* get system wide default */
+    mca_param_indices->chain_fanout_param_index =
+      mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
+                                      "allgatherv_algorithm_chain_fanout",
+                                      "Fanout for chains used for allgatherv algorithms. Only has meaning if algorithm is forced and supports chain topo based operation. Currently, available algorithms do not support chain topologies.",
+                                      MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
+                                      OPAL_INFO_LVL_5,
+                                      MCA_BASE_VAR_SCOPE_READONLY,
+                                      &coll_tuned_allgatherv_chain_fanout);
+
+    return (MPI_SUCCESS);
+}
+
+int ompi_coll_tuned_allgatherv_intra_do_forced(void *sbuf, int scount,
+                                               struct ompi_datatype_t *sdtype,
+                                               void *rbuf, int *rcounts,
+                                               int *rdispls,
+                                               struct ompi_datatype_t *rdtype,
+                                               struct ompi_communicator_t *comm,
+                                               mca_coll_base_module_t *module)
+{
+    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
+
+    OPAL_OUTPUT((ompi_coll_tuned_stream,
+                 "coll:tuned:allgatherv_intra_do_forced selected algorithm %d",
+                 tuned_module->user_forced[ALLGATHERV].algorithm));
+
+    switch (tuned_module->user_forced[ALLGATHERV].algorithm) {
+    case (0):
+        return ompi_coll_tuned_allgatherv_intra_dec_fixed(sbuf, scount, sdtype,
+                                                          rbuf, rcounts, rdispls, rdtype,
+                                                          comm, module);
+    case (1):
+        return ompi_coll_base_allgatherv_intra_basic_default(sbuf, scount, sdtype,
+                                                             rbuf, rcounts, rdispls, rdtype,
+                                                             comm, module);
+    case (2):
+        return ompi_coll_base_allgatherv_intra_bruck(sbuf, scount, sdtype,
+                                                     rbuf, rcounts, rdispls, rdtype,
+                                                     comm, module);
+    case (3):
+        return ompi_coll_base_allgatherv_intra_ring(sbuf, scount, sdtype,
+                                                    rbuf, rcounts, rdispls, rdtype,
+                                                    comm, module);
+    case (4):
+        return ompi_coll_base_allgatherv_intra_neighborexchange(sbuf, scount, sdtype,
+                                                                rbuf, rcounts, rdispls, rdtype,
+                                                                comm, module);
+    case (5):
+        return ompi_coll_base_allgatherv_intra_two_procs(sbuf, scount, sdtype,
+                                                         rbuf, rcounts, rdispls, rdtype,
+                                                         comm, module);
+    } /* switch */
+    OPAL_OUTPUT((ompi_coll_tuned_stream,
+                 "coll:tuned:allgatherv_intra_do_forced attempt to select algorithm %d when only 0-%d is valid?",
+                 tuned_module->user_forced[ALLGATHERV].algorithm,
+                 ompi_coll_tuned_forced_max_algorithms[ALLGATHERV]));
+    return (MPI_ERR_ARG);
+}
+
+
+int ompi_coll_tuned_allgatherv_intra_do_this(void *sbuf, int scount,
+                                             struct ompi_datatype_t *sdtype,
+                                             void *rbuf, int *rcounts,
+                                             int *rdispls,
+                                             struct ompi_datatype_t *rdtype,
+                                             struct ompi_communicator_t *comm,
+                                             mca_coll_base_module_t *module,
+                                             int algorithm, int faninout,
+                                             int segsize)
+{
+    OPAL_OUTPUT((ompi_coll_tuned_stream,
+                 "coll:tuned:allgatherv_intra_do_this selected algorithm %d topo faninout %d segsize %d",
+                 algorithm, faninout, segsize));
+
+    switch (algorithm) {
+    case (0):
+        return ompi_coll_tuned_allgatherv_intra_dec_fixed(sbuf, scount, sdtype,
+                                                          rbuf, rcounts, rdispls, rdtype,
+                                                          comm, module);
+    case (1):
+        return ompi_coll_base_allgatherv_intra_basic_default(sbuf, scount, sdtype,
+                                                             rbuf, rcounts, rdispls, rdtype,
+                                                             comm, module);
+    case (2):
+        return ompi_coll_base_allgatherv_intra_bruck(sbuf, scount, sdtype,
+                                                     rbuf, rcounts, rdispls, rdtype,
+                                                     comm, module);
+    case (3):
+        return ompi_coll_base_allgatherv_intra_ring(sbuf, scount, sdtype,
+                                                    rbuf, rcounts, rdispls, rdtype,
+                                                    comm, module);
+    case (4):
+        return ompi_coll_base_allgatherv_intra_neighborexchange(sbuf, scount, sdtype,
+                                                                rbuf, rcounts, rdispls, rdtype,
+                                                                comm, module);
+    case (5):
+        return ompi_coll_base_allgatherv_intra_two_procs(sbuf, scount, sdtype,
+                                                         rbuf, rcounts, rdispls, rdtype,
+                                                         comm, module);
+    } /* switch */
+    OPAL_OUTPUT((ompi_coll_tuned_stream,
+                 "coll:tuned:allgatherv_intra_do_this attempt to select algorithm %d when only 0-%d is valid?",
+                 algorithm, ompi_coll_tuned_forced_max_algorithms[ALLGATHERV]));
+    return (MPI_ERR_ARG);
+}
--- a/ompi/mca/coll/tuned/coll_tuned_allreduce_decision.c
+++ b/ompi/mca/coll/tuned/coll_tuned_allreduce_decision.c
@ -0,0 +1,182 @@
+/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
+/*
+ * Copyright (c) 2004-2015 The University of Tennessee and The University
+ *                         of Tennessee Research Foundation.  All rights
+ *                         reserved.
+ * $COPYRIGHT$
+ *
+ * Additional copyrights may follow
+ *
+ * $HEADER$
+ */
+
+#include "ompi_config.h"
+
+#include "mpi.h"
+#include "opal/util/bit_ops.h"
+#include "ompi/constants.h"
+#include "ompi/datatype/ompi_datatype.h"
+#include "ompi/communicator/communicator.h"
+#include "ompi/mca/coll/coll.h"
+#include "ompi/mca/coll/base/coll_tags.h"
+#include "ompi/mca/pml/pml.h"
+#include "ompi/op/op.h"
+#include "coll_tuned.h"
+#include "ompi/mca/coll/base/coll_base_topo.h"
+#include "ompi/mca/coll/base/coll_base_util.h"
+
+/* allreduce algorithm variables */
+static int coll_tuned_allreduce_algorithm_count = 5;
+static int coll_tuned_allreduce_forced_algorithm = 0;
+static int coll_tuned_allreduce_segment_size = 0;
+static int coll_tuned_allreduce_tree_fanout;
+static int coll_tuned_allreduce_chain_fanout;
+
+/* valid values for coll_tuned_allreduce_forced_algorithm */
+static mca_base_var_enum_value_t allreduce_algorithms[] = {
+    {0, "ignore"},
+    {1, "basic_linear"},
+    {2, "nonoverlapping"},
+    {3, "recursive_doubling"},
+    {4, "ring"},
+    {5, "segmented_ring"},
+    {0, NULL}
+};
+
+/* The following are used by dynamic and forced rules */
+
+/* publish details of each algorithm and if its forced/fixed/locked in */
+/* as you add methods/algorithms you must update this and the query/map routines */
+
+/* this routine is called by the component only */
+/* this makes sure that the mca parameters are set to their initial values and perms */
+/* module does not call this they call the forced_getvalues routine instead */
+
+int ompi_coll_tuned_allreduce_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices)
+{
+    mca_base_var_enum_t *new_enum;
+    int cnt;
+
+    for( cnt = 0; NULL != allreduce_algorithms[cnt].string; cnt++ );
+    ompi_coll_tuned_forced_max_algorithms[ALLREDUCE] = cnt;
+
+    (void) mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
+                                           "allreduce_algorithm_count",
+                                           "Number of allreduce algorithms available",
+                                           MCA_BASE_VAR_TYPE_INT, NULL, 0,
+                                           MCA_BASE_VAR_FLAG_DEFAULT_ONLY,
+                                           OPAL_INFO_LVL_5,
+                                           MCA_BASE_VAR_SCOPE_CONSTANT,
+                                           &cnt);
+
+    /* MPI_T: This variable should eventually be bound to a communicator */
+    coll_tuned_allreduce_forced_algorithm = 0;
+    (void) mca_base_var_enum_create("coll_tuned_allreduce_algorithms", allreduce_algorithms, &new_enum);
+    mca_param_indices->algorithm_param_index =
+        mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
+                                        "allreduce_algorithm",
+                                        "Which allreduce algorithm is used. Can be locked down to any of: 0 ignore, 1 basic linear, 2 nonoverlapping (tuned reduce + tuned bcast), 3 recursive doubling, 4 ring, 5 segmented ring",
+                                        MCA_BASE_VAR_TYPE_INT, new_enum, 0, 0,
+                                        OPAL_INFO_LVL_5,
+                                        MCA_BASE_VAR_SCOPE_READONLY,
+                                        &coll_tuned_allreduce_forced_algorithm);
+    OBJ_RELEASE(new_enum);
+    if (mca_param_indices->algorithm_param_index < 0) {
+        return mca_param_indices->algorithm_param_index;
+    }
+
+    coll_tuned_allreduce_segment_size = 0;
+    mca_param_indices->segsize_param_index =
+        mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
+                                        "allreduce_algorithm_segmentsize",
+                                        "Segment size in bytes used by default for allreduce algorithms. Only has meaning if algorithm is forced and supports segmenting. 0 bytes means no segmentation.",
+                                        MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
+                                        OPAL_INFO_LVL_5,
+                                        MCA_BASE_VAR_SCOPE_READONLY,
+                                        &coll_tuned_allreduce_segment_size);
+
+    coll_tuned_allreduce_tree_fanout = ompi_coll_tuned_init_tree_fanout; /* get system wide default */
+    mca_param_indices->tree_fanout_param_index =
+        mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
+                                        "allreduce_algorithm_tree_fanout",
+                                        "Fanout for n-tree used for allreduce algorithms. Only has meaning if algorithm is forced and supports n-tree topo based operation.",
+                                        MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
+                                        OPAL_INFO_LVL_5,
+                                        MCA_BASE_VAR_SCOPE_READONLY,
+                                        &coll_tuned_allreduce_tree_fanout);
+
+    coll_tuned_allreduce_chain_fanout = ompi_coll_tuned_init_chain_fanout; /* get system wide default */
+    mca_param_indices->chain_fanout_param_index =
+      mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
+                                      "allreduce_algorithm_chain_fanout",
+                                      "Fanout for chains used for allreduce algorithms. Only has meaning if algorithm is forced and supports chain topo based operation.",
+                                      MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
+                                      OPAL_INFO_LVL_5,
+                                      MCA_BASE_VAR_SCOPE_READONLY,
+                                      &coll_tuned_allreduce_chain_fanout);
+
+    return (MPI_SUCCESS);
+}
+
+
+int ompi_coll_tuned_allreduce_intra_do_forced(void *sbuf, void *rbuf, int count,
+                                              struct ompi_datatype_t *dtype,
+                                              struct ompi_op_t *op,
+                                              struct ompi_communicator_t *comm,
+                                              mca_coll_base_module_t *module)
+{
+    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
+
+    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:allreduce_intra_do_forced selected algorithm %d, segment size %d",
+                 tuned_module->user_forced[ALLREDUCE].algorithm,
+                 tuned_module->user_forced[ALLREDUCE].segsize));
+
+    switch (tuned_module->user_forced[ALLREDUCE].algorithm) {
+    case (0):
+        return ompi_coll_tuned_allreduce_intra_dec_fixed(sbuf, rbuf, count, dtype, op, comm, module);
+    case (1):
+        return ompi_coll_base_allreduce_intra_basic_linear(sbuf, rbuf, count, dtype, op, comm, module);
+    case (2):
+        return ompi_coll_base_allreduce_intra_nonoverlapping(sbuf, rbuf, count, dtype, op, comm, module);
+    case (3):
+        return ompi_coll_base_allreduce_intra_recursivedoubling(sbuf, rbuf, count, dtype, op, comm, module);
+    case (4):
+        return ompi_coll_base_allreduce_intra_ring(sbuf, rbuf, count, dtype, op, comm, module);
+    case (5):
+        return ompi_coll_base_allreduce_intra_ring_segmented(sbuf, rbuf, count, dtype, op, comm, module, tuned_module->user_forced[ALLREDUCE].segsize);
+    } /* switch */
+    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:allreduce_intra_do_forced attempt to select algorithm %d when only 0-%d is valid?",
+                 tuned_module->user_forced[ALLREDUCE].algorithm,
+                 ompi_coll_tuned_forced_max_algorithms[ALLREDUCE]));
+    return (MPI_ERR_ARG);
+}
+
+
+int ompi_coll_tuned_allreduce_intra_do_this(void *sbuf, void *rbuf, int count,
+                                            struct ompi_datatype_t *dtype,
+                                            struct ompi_op_t *op,
+                                            struct ompi_communicator_t *comm,
+                                            mca_coll_base_module_t *module,
+                                            int algorithm, int faninout, int segsize)
+{
+    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:allreduce_intra_do_this algorithm %d topo fan in/out %d segsize %d",
+                 algorithm, faninout, segsize));
+
+    switch (algorithm) {
+    case (0):
+        return ompi_coll_tuned_allreduce_intra_dec_fixed(sbuf, rbuf, count, dtype, op, comm, module);
+    case (1):
+        return ompi_coll_base_allreduce_intra_basic_linear(sbuf, rbuf, count, dtype, op, comm, module);
+    case (2):
+        return ompi_coll_base_allreduce_intra_nonoverlapping(sbuf, rbuf, count, dtype, op, comm, module);
+    case (3):
+        return ompi_coll_base_allreduce_intra_recursivedoubling(sbuf, rbuf, count, dtype, op, comm, module);
+    case (4):
+        return ompi_coll_base_allreduce_intra_ring(sbuf, rbuf, count, dtype, op, comm, module);
+    case (5):
+        return ompi_coll_base_allreduce_intra_ring_segmented(sbuf, rbuf, count, dtype, op, comm, module, segsize);
+    } /* switch */
+    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:allreduce_intra_do_this attempt to select algorithm %d when only 0-%d is valid?",
+                 algorithm, ompi_coll_tuned_forced_max_algorithms[ALLREDUCE]));
+    return (MPI_ERR_ARG);
+}
--- a/ompi/mca/coll/tuned/coll_tuned_alltoall_decision.c
+++ b/ompi/mca/coll/tuned/coll_tuned_alltoall_decision.c
@ -0,0 +1,204 @@
+/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
+/*
+ * Copyright (c) 2004-2015 The University of Tennessee and The University
+ *                         of Tennessee Research Foundation.  All rights
+ *                         reserved.
+ * $COPYRIGHT$
+ *
+ * Additional copyrights may follow
+ *
+ * $HEADER$
+ */
+
+#include "ompi_config.h"
+
+#include "mpi.h"
+#include "ompi/constants.h"
+#include "ompi/datatype/ompi_datatype.h"
+#include "ompi/communicator/communicator.h"
+#include "ompi/mca/coll/coll.h"
+#include "ompi/mca/coll/base/coll_tags.h"
+#include "ompi/mca/pml/pml.h"
+#include "coll_tuned.h"
+#include "ompi/mca/coll/base/coll_base_topo.h"
+#include "ompi/mca/coll/base/coll_base_util.h"
+
+/* alltoall algorithm variables */
+static int coll_tuned_alltoall_forced_algorithm = 0;
+static int coll_tuned_alltoall_segment_size = 0;
+static int coll_tuned_alltoall_max_requests;
+static int coll_tuned_alltoall_tree_fanout;
+static int coll_tuned_alltoall_chain_fanout;
+
+/* valid values for coll_tuned_alltoall_forced_algorithm */
+static mca_base_var_enum_value_t alltoall_algorithms[] = {
+    {0, "ignore"},
+    {1, "linear"},
+    {2, "pairwise"},
+    {3, "modified_bruck"},
+    {4, "linear_sync"},
+    {5, "two_proc"},
+    {0, NULL}
+};
+
+/* The following are used by dynamic and forced rules */
+
+/* publish details of each algorithm and if its forced/fixed/locked in */
+/* as you add methods/algorithms you must update this and the query/map routines */
+
+/* this routine is called by the component only */
+/* this makes sure that the mca parameters are set to their initial values and perms */
+/* module does not call this they call the forced_getvalues routine instead */
+
+int ompi_coll_tuned_alltoall_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices)
+{
+    mca_base_var_enum_t*new_enum;
+    int cnt;
+
+    for( cnt = 0; NULL != alltoall_algorithms[cnt].string; cnt++ );
+    ompi_coll_tuned_forced_max_algorithms[ALLTOALL] = cnt;
+
+    (void) mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
+                                           "alltoall_algorithm_count",
+                                           "Number of alltoall algorithms available",
+                                           MCA_BASE_VAR_TYPE_INT, NULL, 0,
+                                           MCA_BASE_VAR_FLAG_DEFAULT_ONLY,
+                                           OPAL_INFO_LVL_5,
+                                           MCA_BASE_VAR_SCOPE_CONSTANT,
+                                           &cnt);
+
+    /* MPI_T: This variable should eventually be bound to a communicator */
+    coll_tuned_alltoall_forced_algorithm = 0;
+    (void) mca_base_var_enum_create("coll_tuned_alltoall_algorithms", alltoall_algorithms, &new_enum);
+    mca_param_indices->algorithm_param_index =
+        mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
+                                        "alltoall_algorithm",
+                                        "Which alltoall algorithm is used. Can be locked down to choice of: 0 ignore, 1 basic linear, 2 pairwise, 3: modified bruck, 4: linear with sync, 5:two proc only.",
+                                        MCA_BASE_VAR_TYPE_INT, new_enum, 0, 0,
+                                        OPAL_INFO_LVL_5,
+                                        MCA_BASE_VAR_SCOPE_READONLY,
+                                        &coll_tuned_alltoall_forced_algorithm);
+    OBJ_RELEASE(new_enum);
+    if (mca_param_indices->algorithm_param_index < 0) {
+        return mca_param_indices->algorithm_param_index;
+    }
+
+    coll_tuned_alltoall_segment_size = 0;
+    mca_param_indices->segsize_param_index =
+        mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
+                                        "alltoall_algorithm_segmentsize",
+                                        "Segment size in bytes used by default for alltoall algorithms. Only has meaning if algorithm is forced and supports segmenting. 0 bytes means no segmentation.",
+                                        MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
+                                        OPAL_INFO_LVL_5,
+                                        MCA_BASE_VAR_SCOPE_READONLY,
+                                        &coll_tuned_alltoall_segment_size);
+
+    coll_tuned_alltoall_tree_fanout = ompi_coll_tuned_init_tree_fanout; /* get system wide default */
+    mca_param_indices->tree_fanout_param_index =
+        mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
+                                        "alltoall_algorithm_tree_fanout",
+                                        "Fanout for n-tree used for alltoall algorithms. Only has meaning if algorithm is forced and supports n-tree topo based operation.",
+                                        MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
+                                        OPAL_INFO_LVL_5,
+                                        MCA_BASE_VAR_SCOPE_READONLY,
+                                        &coll_tuned_alltoall_tree_fanout);
+
+    coll_tuned_alltoall_chain_fanout = ompi_coll_tuned_init_chain_fanout; /* get system wide default */
+    mca_param_indices->chain_fanout_param_index =
+      mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
+                                      "alltoall_algorithm_chain_fanout",
+                                      "Fanout for chains used for alltoall algorithms. Only has meaning if algorithm is forced and supports chain topo based operation.",
+                                      MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
+                                      OPAL_INFO_LVL_5,
+                                      MCA_BASE_VAR_SCOPE_READONLY,
+                                      &coll_tuned_alltoall_chain_fanout);
+
+    coll_tuned_alltoall_max_requests = 0; /* no limit for alltoall by default */
+    mca_param_indices->max_requests_param_index =
+      mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
+                                      "alltoall_algorithm_max_requests",
+                                      "Maximum number of outstanding send or recv requests.  Only has meaning for synchronized algorithms.",
+                                      MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
+                                      OPAL_INFO_LVL_5,
+                                      MCA_BASE_VAR_SCOPE_READONLY,
+                                      &coll_tuned_alltoall_max_requests);
+    if (mca_param_indices->max_requests_param_index < 0) {
+        return mca_param_indices->max_requests_param_index;
+    }
+
+    if (coll_tuned_alltoall_max_requests < 0) {
+        if( 0 == ompi_comm_rank( MPI_COMM_WORLD ) ) {
+            opal_output( 0, "Maximum outstanding requests must be positive number greater than 1.  Switching to system level default %d \n",
+                         ompi_coll_tuned_init_max_requests );
+        }
+        coll_tuned_alltoall_max_requests = 0;
+    }
+
+    return (MPI_SUCCESS);
+}
+
+
+
+int ompi_coll_tuned_alltoall_intra_do_forced(void *sbuf, int scount,
+                                             struct ompi_datatype_t *sdtype,
+                                             void* rbuf, int rcount,
+                                             struct ompi_datatype_t *rdtype,
+                                             struct ompi_communicator_t *comm,
+                                             mca_coll_base_module_t *module)
+{
+    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
+
+    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:alltoall_intra_do_forced selected algorithm %d",
+                 tuned_module->user_forced[ALLTOALL].algorithm));
+
+    switch (tuned_module->user_forced[ALLTOALL].algorithm) {
+    case (0):
+        return ompi_coll_tuned_alltoall_intra_dec_fixed (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm, module);
+    case (1):
+        return ompi_coll_base_alltoall_intra_basic_linear (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm, module);
+    case (2):
+        return ompi_coll_base_alltoall_intra_pairwise (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm, module);
+    case (3):
+        return ompi_coll_base_alltoall_intra_bruck (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm, module);
+    case (4):
+        return ompi_coll_base_alltoall_intra_linear_sync (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm, module,
+                                                          tuned_module->user_forced[ALLTOALL].max_requests);
+    case (5):
+        return ompi_coll_base_alltoall_intra_two_procs (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm, module);
+    } /* switch */
+    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:alltoall_intra_do_forced attempt to select algorithm %d when only 0-%d is valid?",
+                 tuned_module->user_forced[ALLTOALL].algorithm, ompi_coll_tuned_forced_max_algorithms[ALLTOALL]));
+    return (MPI_ERR_ARG);
+}
+
+
+int ompi_coll_tuned_alltoall_intra_do_this(void *sbuf, int scount,
+                                           struct ompi_datatype_t *sdtype,
+                                           void* rbuf, int rcount,
+                                           struct ompi_datatype_t *rdtype,
+                                           struct ompi_communicator_t *comm,
+                                           mca_coll_base_module_t *module,
+                                           int algorithm, int faninout, int segsize,
+                                           int max_requests)
+{
+    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:alltoall_intra_do_this selected algorithm %d topo faninout %d segsize %d",
+                 algorithm, faninout, segsize));
+
+    switch (algorithm) {
+    case (0):
+        return ompi_coll_tuned_alltoall_intra_dec_fixed(sbuf, scount, sdtype, rbuf, rcount, rdtype, comm, module);
+    case (1):
+        return ompi_coll_base_alltoall_intra_basic_linear(sbuf, scount, sdtype, rbuf, rcount, rdtype, comm, module);
+    case (2):
+        return ompi_coll_base_alltoall_intra_pairwise(sbuf, scount, sdtype, rbuf, rcount, rdtype, comm, module);
+    case (3):
+        return ompi_coll_base_alltoall_intra_bruck(sbuf, scount, sdtype, rbuf, rcount, rdtype, comm, module);
+    case (4):
+        return ompi_coll_base_alltoall_intra_linear_sync(sbuf, scount, sdtype, rbuf, rcount, rdtype, comm, module, max_requests);
+    case (5):
+        return ompi_coll_base_alltoall_intra_two_procs(sbuf, scount, sdtype, rbuf, rcount, rdtype, comm, module);
+    } /* switch */
+    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:alltoall_intra_do_this attempt to select algorithm %d when only 0-%d is valid?",
+                 algorithm, ompi_coll_tuned_forced_max_algorithms[ALLTOALL]));
+    return (MPI_ERR_ARG);
+}
--- a/ompi/mca/coll/tuned/coll_tuned_alltoallv_decision.c
+++ b/ompi/mca/coll/tuned/coll_tuned_alltoallv_decision.c
@ -0,0 +1,156 @@
+/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
+/*
+ * Copyright (c) 2004-2015 The University of Tennessee and The University
+ *                         of Tennessee Research Foundation.  All rights
+ *                         reserved.
+ * $COPYRIGHT$
+ *
+ * Additional copyrights may follow
+ *
+ * $HEADER$
+ */
+
+#include "ompi_config.h"
+
+#include "mpi.h"
+#include "ompi/constants.h"
+#include "ompi/datatype/ompi_datatype.h"
+#include "ompi/communicator/communicator.h"
+#include "ompi/mca/coll/coll.h"
+#include "ompi/mca/coll/base/coll_tags.h"
+#include "ompi/mca/pml/pml.h"
+#include "coll_tuned.h"
+#include "ompi/mca/coll/base/coll_base_topo.h"
+#include "ompi/mca/coll/base/coll_base_util.h"
+
+/* alltoallv algorithm variables */
+static int coll_tuned_alltoallv_algorithm_count = 2;
+static int coll_tuned_alltoallv_forced_algorithm = 0;
+
+/* valid values for coll_tuned_alltoallv_forced_algorithm */
+static mca_base_var_enum_value_t alltoallv_algorithms[] = {
+    {0, "ignore"},
+    {1, "basic_linear"},
+    {2, "pairwise"},
+    {0, NULL}
+};
+
+/*
+ * The following are used by dynamic and forced rules.  Publish
+ * details of each algorithm and if its forced/fixed/locked in as you add
+ * methods/algorithms you must update this and the query/map routines.
+ * This routine is called by the component only.  This makes sure that
+ * the mca parameters are set to their initial values and perms.
+ * Module does not call this.  They call the forced_getvalues routine
+ * instead.
+ */
+int ompi_coll_tuned_alltoallv_intra_check_forced_init(coll_tuned_force_algorithm_mca_param_indices_t
+                                                      *mca_param_indices)
+{
+    mca_base_var_enum_t *new_enum;
+    int cnt;
+
+    for( cnt = 0; NULL != alltoallv_algorithms[cnt].string; cnt++ );
+    ompi_coll_tuned_forced_max_algorithms[ALLTOALLV] = cnt;
+
+    (void) mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
+                                           "alltoallv_algorithm_count",
+                                           "Number of alltoallv algorithms available",
+                                           MCA_BASE_VAR_TYPE_INT, NULL, 0,
+                                           MCA_BASE_VAR_FLAG_DEFAULT_ONLY,
+                                           OPAL_INFO_LVL_5,
+                                           MCA_BASE_VAR_SCOPE_CONSTANT,
+                                           &cnt);
+
+    /* MPI_T: This variable should eventually be bound to a communicator */
+    coll_tuned_alltoallv_forced_algorithm = 0;
+    (void) mca_base_var_enum_create("coll_tuned_alltoallv_algorithms", alltoallv_algorithms, &new_enum);
+    mca_param_indices->algorithm_param_index =
+        mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
+                                        "alltoallv_algorithm",
+                                        "Which alltoallv algorithm is used. "
+                                        "Can be locked down to choice of: 0 ignore, "
+                                        "1 basic linear, 2 pairwise.",
+                                        MCA_BASE_VAR_TYPE_INT, new_enum, 0, 0,
+                                        OPAL_INFO_LVL_5,
+                                        MCA_BASE_VAR_SCOPE_READONLY,
+                                        &coll_tuned_alltoallv_forced_algorithm);
+    OBJ_RELEASE(new_enum);
+    if (mca_param_indices->algorithm_param_index < 0) {
+        return mca_param_indices->algorithm_param_index;
+    }
+
+    return (MPI_SUCCESS);
+}
+
+
+
+int ompi_coll_tuned_alltoallv_intra_do_forced(void *sbuf, int *scounts, int *sdisps,
+                                              struct ompi_datatype_t *sdtype,
+                                              void* rbuf, int *rcounts, int *rdisps,
+                                              struct ompi_datatype_t *rdtype,
+                                              struct ompi_communicator_t *comm,
+                                              mca_coll_base_module_t *module)
+{
+    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
+
+    OPAL_OUTPUT((ompi_coll_tuned_stream,
+                 "coll:tuned:alltoallv_intra_do_forced selected algorithm %d",
+                 tuned_module->user_forced[ALLTOALLV].algorithm));
+
+    switch (tuned_module->user_forced[ALLTOALLV].algorithm) {
+    case (0):
+        return ompi_coll_tuned_alltoallv_intra_dec_fixed(sbuf, scounts, sdisps, sdtype,
+                                                         rbuf, rcounts, rdisps, rdtype,
+                                                         comm, module);
+    case (1):
+        return ompi_coll_base_alltoallv_intra_basic_linear(sbuf, scounts, sdisps, sdtype,
+                                                           rbuf, rcounts, rdisps, rdtype,
+                                                           comm, module);
+    case (2):
+        return ompi_coll_base_alltoallv_intra_pairwise(sbuf, scounts, sdisps, sdtype,
+                                                       rbuf, rcounts, rdisps, rdtype,
+                                                       comm, module);
+    }  /* switch */
+    OPAL_OUTPUT((ompi_coll_tuned_stream,
+                 "coll:tuned:alltoallv_intra_do_forced attempt to "
+                 "select algorithm %d when only 0-%d is valid.",
+                 tuned_module->user_forced[ALLTOALLV].algorithm,
+                 ompi_coll_tuned_forced_max_algorithms[ALLTOALLV]));
+    return (MPI_ERR_ARG);
+}
+
+/* If the user selects dynamic rules and specifies the algorithm to
+ * use, then this function is called.  */
+int ompi_coll_tuned_alltoallv_intra_do_this(void *sbuf, int *scounts, int *sdisps,
+                                            struct ompi_datatype_t *sdtype,
+                                            void* rbuf, int *rcounts, int *rdisps,
+                                            struct ompi_datatype_t *rdtype,
+                                            struct ompi_communicator_t *comm,
+                                            mca_coll_base_module_t *module,
+                                            int algorithm)
+{
+    OPAL_OUTPUT((ompi_coll_tuned_stream,
+                 "coll:tuned:alltoallv_intra_do_this selected algorithm %d ",
+                 algorithm));
+
+    switch (algorithm) {
+    case (0):
+        return ompi_coll_tuned_alltoallv_intra_dec_fixed(sbuf, scounts, sdisps, sdtype,
+                                                         rbuf, rcounts, rdisps, rdtype,
+                                                         comm, module);
+    case (1):
+        return ompi_coll_base_alltoallv_intra_basic_linear(sbuf, scounts, sdisps, sdtype,
+                                                           rbuf, rcounts, rdisps, rdtype,
+                                                           comm, module);
+    case (2):
+        return ompi_coll_base_alltoallv_intra_pairwise(sbuf, scounts, sdisps, sdtype,
+                                                       rbuf, rcounts, rdisps, rdtype,
+                                                       comm, module);
+    }  /* switch */
+    OPAL_OUTPUT((ompi_coll_tuned_stream,
+                 "coll:tuned:alltoall_intra_do_this attempt to select "
+                 "algorithm %d when only 0-%d is valid.",
+                 algorithm, ompi_coll_tuned_forced_max_algorithms[ALLTOALLV]));
+    return (MPI_ERR_ARG);
+}
--- a/ompi/mca/coll/tuned/coll_tuned_barrier_decision.c
+++ b/ompi/mca/coll/tuned/coll_tuned_barrier_decision.c
@ -0,0 +1,135 @@
+/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
+/*
+ * Copyright (c) 2004-2015 The University of Tennessee and The University
+ *                         of Tennessee Research Foundation.  All rights
+ *                         reserved.
+ * $COPYRIGHT$
+ *
+ * Additional copyrights may follow
+ *
+ * $HEADER$
+ */
+
+#include "ompi_config.h"
+
+#include "mpi.h"
+#include "opal/util/bit_ops.h"
+#include "ompi/constants.h"
+#include "ompi/communicator/communicator.h"
+#include "ompi/mca/coll/coll.h"
+#include "ompi/mca/coll/base/coll_tags.h"
+#include "ompi/mca/pml/pml.h"
+#include "coll_tuned.h"
+#include "ompi/mca/coll/base/coll_base_topo.h"
+#include "ompi/mca/coll/base/coll_base_util.h"
+
+/* barrier algorithm variables */
+static int coll_tuned_barrier_forced_algorithm = 0;
+
+/* valid values for coll_tuned_barrier_forced_algorithm */
+static mca_base_var_enum_value_t barrier_algorithms[] = {
+    {0, "ignore"},
+    {1, "linear"},
+    {2, "double_ring"},
+    {3, "recursive_doubling"},
+    {4, "bruck"},
+    {5, "two_proc"},
+    {6, "tree"},
+    {0, NULL}
+};
+
+/* The following are used by dynamic and forced rules */
+
+/* publish details of each algorithm and if its forced/fixed/locked in */
+/* as you add methods/algorithms you must update this and the query/map  */
+/* routines */
+
+/* this routine is called by the component only */
+/* this makes sure that the mca parameters are set to their initial values */
+/* and perms */
+/* module does not call this they call the forced_getvalues routine instead */
+
+int ompi_coll_tuned_barrier_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices)
+{
+    mca_base_var_enum_t *new_enum;
+    int cnt;
+
+    for( cnt = 0; NULL != barrier_algorithms[cnt].string; cnt++ );
+    ompi_coll_tuned_forced_max_algorithms[BARRIER] = cnt;
+
+    (void) mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
+                                           "barrier_algorithm_count",
+                                           "Number of barrier algorithms available",
+                                           MCA_BASE_VAR_TYPE_INT, NULL, 0,
+                                           MCA_BASE_VAR_FLAG_DEFAULT_ONLY,
+                                           OPAL_INFO_LVL_5,
+                                           MCA_BASE_VAR_SCOPE_CONSTANT,
+                                           &cnt);
+
+    /* MPI_T: This variable should eventually be bound to a communicator */
+    coll_tuned_barrier_forced_algorithm = 0;
+    (void) mca_base_var_enum_create("coll_tuned_barrier_algorithms", barrier_algorithms, &new_enum);
+    mca_param_indices->algorithm_param_index =
+        mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
+                                        "barrier_algorithm",
+                                        "Which barrier algorithm is used. Can be locked down to choice of: 0 ignore, 1 linear, 2 double ring, 3: recursive doubling 4: bruck, 5: two proc only, 6: tree",
+                                        MCA_BASE_VAR_TYPE_INT, new_enum, 0, 0,
+                                        OPAL_INFO_LVL_5,
+                                        MCA_BASE_VAR_SCOPE_READONLY,
+                                        &coll_tuned_barrier_forced_algorithm);
+    OBJ_RELEASE(new_enum);
+    if (mca_param_indices->algorithm_param_index < 0) {
+        return mca_param_indices->algorithm_param_index;
+    }
+
+    return (MPI_SUCCESS);
+}
+
+
+
+int ompi_coll_tuned_barrier_intra_do_forced(struct ompi_communicator_t *comm,
+                                            mca_coll_base_module_t *module)
+{
+    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
+
+    OPAL_OUTPUT((ompi_coll_tuned_stream,
+                 "coll:tuned:barrier_intra_do_forced selected algorithm %d",
+                 tuned_module->user_forced[BARRIER].algorithm));
+
+    switch (tuned_module->user_forced[BARRIER].algorithm) {
+    case (0):   return ompi_coll_tuned_barrier_intra_dec_fixed(comm, module);
+    case (1):   return ompi_coll_base_barrier_intra_basic_linear(comm, module);
+    case (2):   return ompi_coll_base_barrier_intra_doublering(comm, module);
+    case (3):   return ompi_coll_base_barrier_intra_recursivedoubling(comm, module);
+    case (4):   return ompi_coll_base_barrier_intra_bruck(comm, module);
+    case (5):   return ompi_coll_base_barrier_intra_two_procs(comm, module);
+    case (6):   return ompi_coll_base_barrier_intra_tree(comm, module);
+    } /* switch */
+    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:barrier_intra_do_forced attempt to select algorithm %d when only 0-%d is valid?",
+                 tuned_module->user_forced[BARRIER].algorithm,
+                 ompi_coll_tuned_forced_max_algorithms[BARRIER]));
+    return (MPI_ERR_ARG);
+}
+
+
+int ompi_coll_tuned_barrier_intra_do_this (struct ompi_communicator_t *comm,
+                                           mca_coll_base_module_t *module,
+                                           int algorithm, int faninout, int segsize)
+{
+    OPAL_OUTPUT((ompi_coll_tuned_stream,
+                 "coll:tuned:barrier_intra_do_this selected algorithm %d topo fanin/out%d",
+                 algorithm, faninout));
+
+    switch (algorithm) {
+    case (0):   return ompi_coll_tuned_barrier_intra_dec_fixed(comm, module);
+    case (1):   return ompi_coll_base_barrier_intra_basic_linear(comm, module);
+    case (2):   return ompi_coll_base_barrier_intra_doublering(comm, module);
+    case (3):   return ompi_coll_base_barrier_intra_recursivedoubling(comm, module);
+    case (4):   return ompi_coll_base_barrier_intra_bruck(comm, module);
+    case (5):   return ompi_coll_base_barrier_intra_two_procs(comm, module);
+    case (6):   return ompi_coll_base_barrier_intra_tree(comm, module);
+    } /* switch */
+    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:barrier_intra_do_this attempt to select algorithm %d when only 0-%d is valid?",
+                 algorithm, ompi_coll_tuned_forced_max_algorithms[BARRIER]));
+    return (MPI_ERR_ARG);
+}
--- a/ompi/mca/coll/tuned/coll_tuned_bcast_decision.c
+++ b/ompi/mca/coll/tuned/coll_tuned_bcast_decision.c
@ -0,0 +1,183 @@
+/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
+/*
+ * Copyright (c) 2004-2015 The University of Tennessee and The University
+ *                         of Tennessee Research Foundation.  All rights
+ *                         reserved.
+ * $COPYRIGHT$
+ *
+ * Additional copyrights may follow
+ *
+ * $HEADER$
+ */
+
+#include "ompi_config.h"
+
+#include "mpi.h"
+#include "ompi/constants.h"
+#include "ompi/datatype/ompi_datatype.h"
+#include "ompi/communicator/communicator.h"
+#include "ompi/mca/coll/coll.h"
+#include "ompi/mca/coll/base/coll_tags.h"
+#include "ompi/mca/pml/pml.h"
+#include "coll_tuned.h"
+#include "ompi/mca/coll/base/coll_base_topo.h"
+#include "ompi/mca/coll/base/coll_base_util.h"
+
+/* bcast algorithm variables */
+static int coll_tuned_bcast_algorithm_count = 6;
+static int coll_tuned_bcast_forced_algorithm = 0;
+static int coll_tuned_bcast_segment_size = 0;
+static int coll_tuned_bcast_tree_fanout;
+static int coll_tuned_bcast_chain_fanout;
+
+/* valid values for coll_tuned_bcast_forced_algorithm */
+static mca_base_var_enum_value_t bcast_algorithms[] = {
+    {0, "ignore"},
+    {1, "basic_linear"},
+    {2, "chain"},
+    {3, "pipeline"},
+    {4, "split_binary_tree"},
+    {5, "binary_tree"},
+    {6, "binomial"},
+    {0, NULL}
+};
+
+/* The following are used by dynamic and forced rules */
+
+/* publish details of each algorithm and if its forced/fixed/locked in */
+/* as you add methods/algorithms you must update this and the query/map routines */
+
+/* this routine is called by the component only */
+/* this makes sure that the mca parameters are set to their initial values and perms */
+/* module does not call this they call the forced_getvalues routine instead */
+
+int ompi_coll_tuned_bcast_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices)
+{
+    mca_base_var_enum_t *new_enum;
+    int cnt;
+
+    for( cnt = 0; NULL != bcast_algorithms[cnt].string; cnt++ );
+    ompi_coll_tuned_forced_max_algorithms[BCAST] = cnt;
+
+    (void) mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
+                                           "bcast_algorithm_count",
+                                           "Number of bcast algorithms available",
+                                           MCA_BASE_VAR_TYPE_INT, NULL, 0,
+                                           MCA_BASE_VAR_FLAG_DEFAULT_ONLY,
+                                           OPAL_INFO_LVL_5,
+                                           MCA_BASE_VAR_SCOPE_CONSTANT,
+                                           &cnt);
+
+    /* MPI_T: This variable should eventually be bound to a communicator */
+    coll_tuned_bcast_forced_algorithm = 0;
+    (void) mca_base_var_enum_create("coll_tuned_bcast_algorithms", bcast_algorithms, &new_enum);
+    mca_param_indices->algorithm_param_index =
+        mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
+                                        "bcast_algorithm",
+                                        "Which bcast algorithm is used. Can be locked down to choice of: 0 ignore, 1 basic linear, 2 chain, 3: pipeline, 4: split binary tree, 5: binary tree, 6: binomial tree.",
+                                        MCA_BASE_VAR_TYPE_INT, new_enum, 0, 0,
+                                        OPAL_INFO_LVL_5,
+                                        MCA_BASE_VAR_SCOPE_READONLY,
+                                        &coll_tuned_bcast_forced_algorithm);
+    OBJ_RELEASE(new_enum);
+    if (mca_param_indices->algorithm_param_index < 0) {
+        return mca_param_indices->algorithm_param_index;
+    }
+
+    coll_tuned_bcast_segment_size = 0;
+    mca_param_indices->segsize_param_index =
+        mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
+                                        "bcast_algorithm_segmentsize",
+                                        "Segment size in bytes used by default for bcast algorithms. Only has meaning if algorithm is forced and supports segmenting. 0 bytes means no segmentation.",
+                                        MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
+                                        OPAL_INFO_LVL_5,
+                                        MCA_BASE_VAR_SCOPE_READONLY,
+                                        &coll_tuned_bcast_segment_size);
+
+    coll_tuned_bcast_tree_fanout = ompi_coll_tuned_init_tree_fanout; /* get system wide default */
+    mca_param_indices->tree_fanout_param_index =
+        mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
+                                        "bcast_algorithm_tree_fanout",
+                                        "Fanout for n-tree used for bcast algorithms. Only has meaning if algorithm is forced and supports n-tree topo based operation.",
+                                        MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
+                                        OPAL_INFO_LVL_5,
+                                        MCA_BASE_VAR_SCOPE_READONLY,
+                                        &coll_tuned_bcast_tree_fanout);
+
+    coll_tuned_bcast_chain_fanout = ompi_coll_tuned_init_chain_fanout; /* get system wide default */
+    mca_param_indices->chain_fanout_param_index =
+      mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
+                                      "bcast_algorithm_chain_fanout",
+                                      "Fanout for chains used for bcast algorithms. Only has meaning if algorithm is forced and supports chain topo based operation.",
+                                      MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
+                                      OPAL_INFO_LVL_5,
+                                      MCA_BASE_VAR_SCOPE_READONLY,
+                                      &coll_tuned_bcast_chain_fanout);
+
+    return (MPI_SUCCESS);
+}
+
+
+int ompi_coll_tuned_bcast_intra_do_forced(void *buf, int count,
+                                          struct ompi_datatype_t *dtype,
+                                          int root,
+                                          struct ompi_communicator_t *comm,
+                                          mca_coll_base_module_t *module)
+{
+    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
+
+    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:bcast_intra_do_forced algorithm %d",
+                 tuned_module->user_forced[BCAST].algorithm));
+
+    switch (tuned_module->user_forced[BCAST].algorithm) {
+    case (0):   return ompi_coll_tuned_bcast_intra_dec_fixed( buf, count, dtype, root, comm, module );
+    case (1):   return ompi_coll_base_bcast_intra_basic_linear( buf, count, dtype, root, comm, module );
+    case (2):   return ompi_coll_base_bcast_intra_chain( buf, count, dtype, root, comm, module,
+                                                         tuned_module->user_forced[BCAST].segsize,
+                                                         tuned_module->user_forced[BCAST].chain_fanout );
+    case (3):   return ompi_coll_base_bcast_intra_pipeline( buf, count, dtype, root, comm, module,
+                                                            tuned_module->user_forced[BCAST].segsize );
+    case (4):   return ompi_coll_base_bcast_intra_split_bintree( buf, count, dtype, root, comm, module,
+                                                                 tuned_module->user_forced[BCAST].segsize );
+    case (5):   return ompi_coll_base_bcast_intra_bintree( buf, count, dtype, root, comm, module,
+                                                           tuned_module->user_forced[BCAST].segsize );
+    case (6):   return ompi_coll_base_bcast_intra_binomial( buf, count, dtype, root, comm, module,
+                                                            tuned_module->user_forced[BCAST].segsize );
+    } /* switch */
+    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:bcast_intra_do_forced attempt to select algorithm %d when only 0-%d is valid?",
+                 tuned_module->user_forced[BCAST].algorithm, ompi_coll_tuned_forced_max_algorithms[BCAST]));
+    return (MPI_ERR_ARG);
+}
+
+
+int ompi_coll_tuned_bcast_intra_do_this(void *buf, int count,
+                                        struct ompi_datatype_t *dtype,
+                                        int root,
+                                        struct ompi_communicator_t *comm,
+                                        mca_coll_base_module_t *module,
+                                        int algorithm, int faninout, int segsize)
+
+{
+    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:bcast_intra_do_this algorithm %d topo faninout %d segsize %d",
+                 algorithm, faninout, segsize));
+
+    switch (algorithm) {
+    case (0):
+        return ompi_coll_tuned_bcast_intra_dec_fixed( buf, count, dtype, root, comm, module );
+    case (1):
+        return ompi_coll_base_bcast_intra_basic_linear( buf, count, dtype, root, comm, module );
+    case (2):
+        return ompi_coll_base_bcast_intra_chain( buf, count, dtype, root, comm, module, segsize, faninout );
+    case (3):
+        return ompi_coll_base_bcast_intra_pipeline( buf, count, dtype, root, comm, module, segsize );
+    case (4):
+        return ompi_coll_base_bcast_intra_split_bintree( buf, count, dtype, root, comm, module, segsize );
+    case (5):
+        return ompi_coll_base_bcast_intra_bintree( buf, count, dtype, root, comm, module, segsize );
+    case (6):
+        return ompi_coll_base_bcast_intra_binomial( buf, count, dtype, root, comm, module, segsize );
+    } /* switch */
+    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:bcast_intra_do_this attempt to select algorithm %d when only 0-%d is valid?",
+                 algorithm, ompi_coll_tuned_forced_max_algorithms[BCAST]));
+    return (MPI_ERR_ARG);
+}
--- a/ompi/mca/coll/tuned/coll_tuned_component.c
+++ b/ompi/mca/coll/tuned/coll_tuned_component.c
@ -2,7 +2,7 @@
 * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
 *                         University Research and Technology
 *                         Corporation.  All rights reserved.
- * Copyright (c) 2004-2009 The University of Tennessee and The University
+ * Copyright (c) 2004-2015 The University of Tennessee and The University
 *                         of Tennessee Research Foundation.  All rights
 *                         reserved.
 * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@ -44,7 +44,6 @@ const char *ompi_coll_tuned_component_version_string =
 */
 int   ompi_coll_tuned_stream = -1;
 int   ompi_coll_tuned_priority = 30;
-int   ompi_coll_tuned_preallocate_memory_comm_size_limit = (32 * 1024);
 bool  ompi_coll_tuned_use_dynamic_rules = false;
 char* ompi_coll_tuned_dynamic_rules_filename = (char*) NULL;
 int   ompi_coll_tuned_init_tree_fanout = 4;
@ -121,16 +120,6 @@ static int tuned_register(void)
                                           MCA_BASE_VAR_SCOPE_READONLY,
                                           &ompi_coll_tuned_priority);

-    /* parameter for pre-allocated memory requests etc */
-    ompi_coll_tuned_preallocate_memory_comm_size_limit = (32 * 1024);
-    (void) mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                           "pre_allocate_memory_comm_size_limit",
-                                           "Size of communicator were we stop pre-allocating memory for the fixed internal buffer used for message requests etc that is hung off the communicator data segment. I.e. if you have a 100'000 nodes you might not want to pre-allocate 200'000 request handle slots per communicator instance!",
-                                           MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
-                                           OPAL_INFO_LVL_6,
-                                           MCA_BASE_VAR_SCOPE_READONLY,
-                                           &ompi_coll_tuned_preallocate_memory_comm_size_limit);
-    
    /* some initial guesses at topology parameters */
    ompi_coll_tuned_init_tree_fanout = 4;
    (void) mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
@ -272,56 +261,13 @@ static int tuned_close(void)
 static void
 mca_coll_tuned_module_construct(mca_coll_tuned_module_t *module)
 {
-    module->tuned_data = NULL;
-}
+    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
    
-
-static void
-mca_coll_tuned_module_destruct(mca_coll_tuned_module_t *module)
-{
-    mca_coll_tuned_comm_t *data;
-
-    /* Free the space in the data mpool and the data hanging off the
-       communicator */
-
-    data = module->tuned_data;
-    if (NULL != data) {
-#if OPAL_ENABLE_DEBUG
-        /* Reset the reqs to NULL/0 -- they'll be freed as part of freeing
-           the generel c_coll_selected_data */
-        data->mcct_reqs = NULL;
-        data->mcct_num_reqs = 0;
-#endif
-
-        /* free any cached information that has been allocated */
-        if (data->cached_ntree) { /* destroy general tree if defined */
-            ompi_coll_tuned_topo_destroy_tree (&data->cached_ntree);
-        }
-        if (data->cached_bintree) { /* destroy bintree if defined */
-            ompi_coll_tuned_topo_destroy_tree (&data->cached_bintree);
-        }
-        if (data->cached_bmtree) { /* destroy bmtree if defined */
-            ompi_coll_tuned_topo_destroy_tree (&data->cached_bmtree);
-        }
-        if (data->cached_in_order_bmtree) { /* destroy bmtree if defined */
-            ompi_coll_tuned_topo_destroy_tree (&data->cached_in_order_bmtree);
-        }
-        if (data->cached_chain) { /* destroy general chain if defined */
-            ompi_coll_tuned_topo_destroy_tree (&data->cached_chain);
-        }
-        if (data->cached_pipeline) { /* destroy pipeline if defined */
-            ompi_coll_tuned_topo_destroy_tree (&data->cached_pipeline);
-        }
-        if (data->cached_in_order_bintree) { /* destroy in order bintree if defined */
-            ompi_coll_tuned_topo_destroy_tree (&data->cached_in_order_bintree);
-        }
-
-        free(data);
+    for( int i = 0; i < COLLCOUNT; i++ ) {
+        tuned_module->user_forced[i].algorithm = 0;
+        tuned_module->com_rules[i] = NULL;
    }
 }

-
-OBJ_CLASS_INSTANCE(mca_coll_tuned_module_t,
-                   mca_coll_base_module_t,
-                   mca_coll_tuned_module_construct,
-                   mca_coll_tuned_module_destruct);
+OBJ_CLASS_INSTANCE(mca_coll_tuned_module_t, mca_coll_base_module_t,
+                   mca_coll_tuned_module_construct, NULL);
--- a/ompi/mca/coll/tuned/coll_tuned_decision_dynamic.c
+++ b/ompi/mca/coll/tuned/coll_tuned_decision_dynamic.c
@ -2,7 +2,7 @@
 * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
 *                         University Research and Technology
 *                         Corporation.  All rights reserved.
- * Copyright (c) 2004-2012 The University of Tennessee and The University
+ * Copyright (c) 2004-2015 The University of Tennessee and The University
 *                         of Tennessee Research Foundation.  All rights
 *                         reserved.
 * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@ -28,9 +28,6 @@
 #include "ompi/mca/coll/base/coll_tags.h"
 #include "coll_tuned.h"

-#include "coll_tuned.h"
-
-
 /*
 * Notes on evaluation rules and ordering
 *
@ -58,12 +55,11 @@ ompi_coll_tuned_allreduce_intra_dec_dynamic (void *sbuf, void *rbuf, int count,
                                             mca_coll_base_module_t *module)
 {
    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
-    mca_coll_tuned_comm_t *data = tuned_module->tuned_data;

    OPAL_OUTPUT((ompi_coll_tuned_stream, "ompi_coll_tuned_allreduce_intra_dec_dynamic"));

    /* check to see if we have some filebased rules */
-    if (data->com_rules[ALLREDUCE]) {
+    if (tuned_module->com_rules[ALLREDUCE]) {
        /* we do, so calc the message size or what ever we need and use this for the evaluation */
        int alg, faninout, segsize, ignoreme;
        size_t dsize;
@ -71,7 +67,7 @@ ompi_coll_tuned_allreduce_intra_dec_dynamic (void *sbuf, void *rbuf, int count,
        ompi_datatype_type_size (dtype, &dsize);
        dsize *= count;

-        alg = ompi_coll_tuned_get_target_method_params (data->com_rules[ALLREDUCE], 
+        alg = ompi_coll_tuned_get_target_method_params (tuned_module->com_rules[ALLREDUCE],
                                                        dsize, &faninout, &segsize, &ignoreme);

        if (alg) {
@ -82,7 +78,7 @@ ompi_coll_tuned_allreduce_intra_dec_dynamic (void *sbuf, void *rbuf, int count,
        } /* found a method */
    } /*end if any com rules to check */

-    if (data->user_forced[ALLREDUCE].algorithm) {
+    if (tuned_module->user_forced[ALLREDUCE].algorithm) {
        return ompi_coll_tuned_allreduce_intra_do_forced (sbuf, rbuf, count, dtype, op,
                                                          comm, module);
    }
@ -106,12 +102,11 @@ int ompi_coll_tuned_alltoall_intra_dec_dynamic(void *sbuf, int scount,
                                               mca_coll_base_module_t *module)
 {
    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
-    mca_coll_tuned_comm_t *data = tuned_module->tuned_data;

    OPAL_OUTPUT((ompi_coll_tuned_stream, "ompi_coll_tuned_alltoall_intra_dec_dynamic"));

    /* check to see if we have some filebased rules */
-    if (data->com_rules[ALLTOALL]) {
+    if (tuned_module->com_rules[ALLTOALL]) {
        /* we do, so calc the message size or what ever we need and use this for the evaluation */
        int comsize;
        int alg, faninout, segsize, max_requests;
@ -121,7 +116,7 @@ int ompi_coll_tuned_alltoall_intra_dec_dynamic(void *sbuf, int scount,
        comsize = ompi_comm_size(comm);
        dsize *= (ptrdiff_t)comsize * (ptrdiff_t)scount;

-        alg = ompi_coll_tuned_get_target_method_params (data->com_rules[ALLTOALL], 
+        alg = ompi_coll_tuned_get_target_method_params (tuned_module->com_rules[ALLTOALL],
                                                        dsize, &faninout, &segsize, &max_requests);

        if (alg) {
@ -133,7 +128,7 @@ int ompi_coll_tuned_alltoall_intra_dec_dynamic(void *sbuf, int scount,
        } /* found a method */
    } /*end if any com rules to check */

-    if (data->user_forced[ALLTOALL].algorithm) {
+    if (tuned_module->user_forced[ALLTOALL].algorithm) {
        return ompi_coll_tuned_alltoall_intra_do_forced (sbuf, scount, sdtype,
                                                         rbuf, rcount, rdtype,
                                                         comm, module);
@ -157,7 +152,6 @@ int ompi_coll_tuned_alltoallv_intra_dec_dynamic(void *sbuf, int *scounts, int *s
                                                mca_coll_base_module_t *module)
 {
    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
-    mca_coll_tuned_comm_t *data = tuned_module->tuned_data;

    OPAL_OUTPUT((ompi_coll_tuned_stream, "ompi_coll_tuned_alltoallv_intra_dec_dynamic"));

@ -167,10 +161,10 @@ int ompi_coll_tuned_alltoallv_intra_dec_dynamic(void *sbuf, int *scounts, int *s
     * This allow the users to specify the alltoallv algorithm to be used only
     * based on the communicator size.
     */
-    if (data->com_rules[ALLTOALLV]) {
+    if (tuned_module->com_rules[ALLTOALLV]) {
        int alg, faninout, segsize, max_requests;

-        alg = ompi_coll_tuned_get_target_method_params (data->com_rules[ALLTOALLV], 
+        alg = ompi_coll_tuned_get_target_method_params (tuned_module->com_rules[ALLTOALLV],
                                                        0, &faninout, &segsize, &max_requests);

        if (alg) {
@ -182,7 +176,7 @@ int ompi_coll_tuned_alltoallv_intra_dec_dynamic(void *sbuf, int *scounts, int *s
        } /* found a method */
    } /*end if any com rules to check */

-    if (data->user_forced[ALLTOALLV].algorithm) {
+    if (tuned_module->user_forced[ALLTOALLV].algorithm) {
        return ompi_coll_tuned_alltoallv_intra_do_forced(sbuf, scounts, sdisps, sdtype,
                                                         rbuf, rcounts, rdisps, rdtype,
                                                         comm, module);
@ -203,16 +197,15 @@ int ompi_coll_tuned_barrier_intra_dec_dynamic(struct ompi_communicator_t *comm,
                                              mca_coll_base_module_t *module)
 {
    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
-    mca_coll_tuned_comm_t *data = tuned_module->tuned_data;

    OPAL_OUTPUT((ompi_coll_tuned_stream,"ompi_coll_tuned_barrier_intra_dec_dynamic"));

    /* check to see if we have some filebased rules */
-    if (data->com_rules[BARRIER]) {
+    if (tuned_module->com_rules[BARRIER]) {
        /* we do, so calc the message size or what ever we need and use this for the evaluation */
        int alg, faninout, segsize, ignoreme;

-        alg = ompi_coll_tuned_get_target_method_params (data->com_rules[BARRIER], 
+        alg = ompi_coll_tuned_get_target_method_params (tuned_module->com_rules[BARRIER],
                                                        0, &faninout, &segsize, &ignoreme);

        if (alg) {
@ -222,7 +215,7 @@ int ompi_coll_tuned_barrier_intra_dec_dynamic(struct ompi_communicator_t *comm,
        } /* found a method */
    } /*end if any com rules to check */

-    if (data->user_forced[BARRIER].algorithm) {
+    if (tuned_module->user_forced[BARRIER].algorithm) {
        return ompi_coll_tuned_barrier_intra_do_forced (comm, module);
    }
    return ompi_coll_tuned_barrier_intra_dec_fixed (comm, module);
@ -241,12 +234,11 @@ int ompi_coll_tuned_bcast_intra_dec_dynamic(void *buff, int count,
                                            mca_coll_base_module_t *module)
 {
    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
-    mca_coll_tuned_comm_t *data = tuned_module->tuned_data;

    OPAL_OUTPUT((ompi_coll_tuned_stream, "coll:tuned:bcast_intra_dec_dynamic"));

    /* check to see if we have some filebased rules */
-    if (data->com_rules[BCAST]) {
+    if (tuned_module->com_rules[BCAST]) {
        /* we do, so calc the message size or what ever we need and use this for the evaluation */
        int alg, faninout, segsize, ignoreme;
        size_t dsize;
@ -254,7 +246,7 @@ int ompi_coll_tuned_bcast_intra_dec_dynamic(void *buff, int count,
        ompi_datatype_type_size (datatype, &dsize);
        dsize *= count;

-        alg = ompi_coll_tuned_get_target_method_params (data->com_rules[BCAST], 
+        alg = ompi_coll_tuned_get_target_method_params (tuned_module->com_rules[BCAST],
                                                        dsize, &faninout, &segsize, &ignoreme);

        if (alg) {
@ -266,7 +258,7 @@ int ompi_coll_tuned_bcast_intra_dec_dynamic(void *buff, int count,
    } /*end if any com rules to check */


-    if (data->user_forced[BCAST].algorithm) {
+    if (tuned_module->user_forced[BCAST].algorithm) {
        return ompi_coll_tuned_bcast_intra_do_forced (buff, count, datatype, root,
                                                      comm, module);
    }
@ -289,12 +281,11 @@ int ompi_coll_tuned_reduce_intra_dec_dynamic( void *sendbuf, void *recvbuf,
                                              mca_coll_base_module_t *module)
 {
    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
-    mca_coll_tuned_comm_t *data = tuned_module->tuned_data;

    OPAL_OUTPUT((ompi_coll_tuned_stream, "coll:tuned:reduce_intra_dec_dynamic"));

    /* check to see if we have some filebased rules */
-    if (data->com_rules[REDUCE]) {
+    if (tuned_module->com_rules[REDUCE]) {

        /* we do, so calc the message size or what ever we need and use this for the evaluation */
        int alg, faninout, segsize, max_requests;
@ -303,7 +294,7 @@ int ompi_coll_tuned_reduce_intra_dec_dynamic( void *sendbuf, void *recvbuf,
        ompi_datatype_type_size (datatype, &dsize);
        dsize *= count;

-        alg = ompi_coll_tuned_get_target_method_params (data->com_rules[REDUCE], 
+        alg = ompi_coll_tuned_get_target_method_params (tuned_module->com_rules[REDUCE],
                                                        dsize, &faninout, &segsize, &max_requests);

        if (alg) {
@ -317,7 +308,7 @@ int ompi_coll_tuned_reduce_intra_dec_dynamic( void *sendbuf, void *recvbuf,
        } /* found a method */
    } /*end if any com rules to check */

-    if (data->user_forced[REDUCE].algorithm) {
+    if (tuned_module->user_forced[REDUCE].algorithm) {
        return ompi_coll_tuned_reduce_intra_do_forced (sendbuf, recvbuf, count, datatype,
                                                       op, root,
                                                       comm, module);
@ -344,12 +335,11 @@ int ompi_coll_tuned_reduce_scatter_intra_dec_dynamic(void *sbuf, void *rbuf,
                                                     mca_coll_base_module_t *module)
 {
    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
-    mca_coll_tuned_comm_t *data = tuned_module->tuned_data;

    OPAL_OUTPUT((ompi_coll_tuned_stream, "coll:tuned:reduce_scatter_intra_dec_dynamic"));

    /* check to see if we have some filebased rules */
-    if (data->com_rules[REDUCESCATTER]) {
+    if (tuned_module->com_rules[REDUCESCATTER]) {
        /* we do, so calc the message size or what ever we need and use
           this for the evaluation */
        int alg, faninout, segsize, ignoreme, i, count, size;
@ -359,7 +349,7 @@ int ompi_coll_tuned_reduce_scatter_intra_dec_dynamic(void *sbuf, void *rbuf,
        ompi_datatype_type_size (dtype, &dsize);
        dsize *= count;

-        alg = ompi_coll_tuned_get_target_method_params (data->com_rules[REDUCESCATTER], 
+        alg = ompi_coll_tuned_get_target_method_params (tuned_module->com_rules[REDUCESCATTER],
                                                        dsize, &faninout,
                                                        &segsize, &ignoreme);
        if (alg) {
@ -372,7 +362,7 @@ int ompi_coll_tuned_reduce_scatter_intra_dec_dynamic(void *sbuf, void *rbuf,
        } /* found a method */
    } /*end if any com rules to check */

-    if (data->user_forced[REDUCESCATTER].algorithm) {
+    if (tuned_module->user_forced[REDUCESCATTER].algorithm) {
        return ompi_coll_tuned_reduce_scatter_intra_do_forced (sbuf, rbuf, rcounts,
                                                               dtype, op,
                                                               comm, module);
@ -399,12 +389,11 @@ int ompi_coll_tuned_allgather_intra_dec_dynamic(void *sbuf, int scount,
                                                mca_coll_base_module_t *module)
 {
    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
-    mca_coll_tuned_comm_t *data = tuned_module->tuned_data;

    OPAL_OUTPUT((ompi_coll_tuned_stream,
                 "ompi_coll_tuned_allgather_intra_dec_dynamic"));

-    if (data->com_rules[ALLGATHER]) {
+    if (tuned_module->com_rules[ALLGATHER]) {
        /* We have file based rules:
           - calculate message size and other necessary information */
        int comsize;
@ -415,7 +404,7 @@ int ompi_coll_tuned_allgather_intra_dec_dynamic(void *sbuf, int scount,
        comsize = ompi_comm_size(comm);
        dsize *= (ptrdiff_t)comsize * (ptrdiff_t)scount;

-        alg = ompi_coll_tuned_get_target_method_params (data->com_rules[ALLGATHER], 
+        alg = ompi_coll_tuned_get_target_method_params (tuned_module->com_rules[ALLGATHER],
                                                        dsize, &faninout, &segsize, &ignoreme);
        if (alg) {
            /* we have found a valid choice from the file based rules for
@ -428,7 +417,7 @@ int ompi_coll_tuned_allgather_intra_dec_dynamic(void *sbuf, int scount,
    }

    /* We do not have file based rules */
-    if (data->user_forced[ALLGATHER].algorithm) {
+    if (tuned_module->user_forced[ALLGATHER].algorithm) {
        /* User-forced algorithm */
        return ompi_coll_tuned_allgather_intra_do_forced (sbuf, scount, sdtype,
                                                          rbuf, rcount, rdtype,
@ -459,12 +448,11 @@ int ompi_coll_tuned_allgatherv_intra_dec_dynamic(void *sbuf, int scount,
                                                 mca_coll_base_module_t *module)
 {
    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
-    mca_coll_tuned_comm_t *data = tuned_module->tuned_data;

    OPAL_OUTPUT((ompi_coll_tuned_stream,
                 "ompi_coll_tuned_allgatherv_intra_dec_dynamic"));

-    if (data->com_rules[ALLGATHERV]) {
+    if (tuned_module->com_rules[ALLGATHERV]) {
        /* We have file based rules:
           - calculate message size and other necessary information */
        int comsize, i;
@ -476,7 +464,7 @@ int ompi_coll_tuned_allgatherv_intra_dec_dynamic(void *sbuf, int scount,
        total_size = 0;
        for (i = 0; i < comsize; i++) { total_size += dsize * rcounts[i]; }

-        alg = ompi_coll_tuned_get_target_method_params (data->com_rules[ALLGATHERV], 
+        alg = ompi_coll_tuned_get_target_method_params (tuned_module->com_rules[ALLGATHERV],
                                                        total_size, &faninout, &segsize, &ignoreme);
        if (alg) {
            /* we have found a valid choice from the file based rules for
@ -490,7 +478,7 @@ int ompi_coll_tuned_allgatherv_intra_dec_dynamic(void *sbuf, int scount,
    }

    /* We do not have file based rules */
-    if (data->user_forced[ALLGATHERV].algorithm) {
+    if (tuned_module->user_forced[ALLGATHERV].algorithm) {
        /* User-forced algorithm */
        return ompi_coll_tuned_allgatherv_intra_do_forced (sbuf, scount, sdtype,
                                                           rbuf, rcounts,
@ -514,7 +502,6 @@ int ompi_coll_tuned_gather_intra_dec_dynamic(void *sbuf, int scount,
                                             mca_coll_base_module_t *module)
 {
    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
-    mca_coll_tuned_comm_t *data = tuned_module->tuned_data;

    OPAL_OUTPUT((ompi_coll_tuned_stream,
                 "ompi_coll_tuned_gather_intra_dec_dynamic"));
@ -522,7 +509,7 @@ int ompi_coll_tuned_gather_intra_dec_dynamic(void *sbuf, int scount,
    /**
     * check to see if we have some filebased rules.
     */
-    if (data->com_rules[GATHER]) {
+    if (tuned_module->com_rules[GATHER]) {
        int comsize, alg, faninout, segsize, max_requests;
        size_t dsize;

@ -530,7 +517,7 @@ int ompi_coll_tuned_gather_intra_dec_dynamic(void *sbuf, int scount,
        ompi_datatype_type_size (sdtype, &dsize);
        dsize *= comsize;

-        alg = ompi_coll_tuned_get_target_method_params (data->com_rules[GATHER], 
+        alg = ompi_coll_tuned_get_target_method_params (tuned_module->com_rules[GATHER],
                                                        dsize, &faninout, &segsize, &max_requests);

        if (alg) {
@ -542,7 +529,7 @@ int ompi_coll_tuned_gather_intra_dec_dynamic(void *sbuf, int scount,
        } /* found a method */
    } /*end if any com rules to check */

-    if (data->user_forced[GATHER].algorithm) {
+    if (tuned_module->user_forced[GATHER].algorithm) {
        return ompi_coll_tuned_gather_intra_do_forced (sbuf, scount, sdtype,
                                                       rbuf, rcount, rdtype,
                                                       root, comm, module);
@ -561,7 +548,6 @@ int ompi_coll_tuned_scatter_intra_dec_dynamic(void *sbuf, int scount,
                                              mca_coll_base_module_t *module)
 {
    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
-    mca_coll_tuned_comm_t *data = tuned_module->tuned_data;

    OPAL_OUTPUT((ompi_coll_tuned_stream,
                 "ompi_coll_tuned_scatter_intra_dec_dynamic"));
@ -569,7 +555,7 @@ int ompi_coll_tuned_scatter_intra_dec_dynamic(void *sbuf, int scount,
    /**
     * check to see if we have some filebased rules.
     */
-    if (data->com_rules[SCATTER]) {
+    if (tuned_module->com_rules[SCATTER]) {
        int comsize, alg, faninout, segsize, max_requests;
        size_t dsize;

@ -577,7 +563,7 @@ int ompi_coll_tuned_scatter_intra_dec_dynamic(void *sbuf, int scount,
        ompi_datatype_type_size (sdtype, &dsize);
        dsize *= comsize;

-        alg = ompi_coll_tuned_get_target_method_params (data->com_rules[SCATTER], 
+        alg = ompi_coll_tuned_get_target_method_params (tuned_module->com_rules[SCATTER],
                                                        dsize, &faninout, &segsize, &max_requests);

        if (alg) {
@ -589,7 +575,7 @@ int ompi_coll_tuned_scatter_intra_dec_dynamic(void *sbuf, int scount,
        } /* found a method */
    } /*end if any com rules to check */

-    if (data->user_forced[SCATTER].algorithm) {
+    if (tuned_module->user_forced[SCATTER].algorithm) {
        return ompi_coll_tuned_scatter_intra_do_forced (sbuf, scount, sdtype,
                                                        rbuf, rcount, rdtype,
                                                        root, comm, module);
--- a/ompi/mca/coll/tuned/coll_tuned_decision_fixed.c
+++ b/ompi/mca/coll/tuned/coll_tuned_decision_fixed.c
@ -3,7 +3,7 @@
 * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
 *                         University Research and Technology
 *                         Corporation.  All rights reserved.
- * Copyright (c) 2004-2012 The University of Tennessee and The University
+ * Copyright (c) 2004-2015 The University of Tennessee and The University
 *                         of Tennessee Research Foundation.  All rights
 *                         reserved.
 * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@ -31,7 +31,6 @@
 #include "ompi/op/op.h"
 #include "coll_tuned.h"

-
 /*
 *  allreduce_intra
 *
@ -40,11 +39,11 @@
 *  Returns:    - MPI_SUCCESS or error code
 */
 int
-ompi_coll_tuned_allreduce_intra_dec_fixed (void *sbuf, void *rbuf, int count,
-                                           struct ompi_datatype_t *dtype,
-                                           struct ompi_op_t *op,
-                                           struct ompi_communicator_t *comm,
-                                           mca_coll_base_module_t *module)
+ompi_coll_tuned_allreduce_intra_dec_fixed(void *sbuf, void *rbuf, int count,
+                                          struct ompi_datatype_t *dtype,
+                                          struct ompi_op_t *op,
+                                          struct ompi_communicator_t *comm,
+                                          mca_coll_base_module_t *module)
 {
    size_t dsize, block_dsize;
    int comm_size = ompi_comm_size(comm);
@ -62,26 +61,26 @@ ompi_coll_tuned_allreduce_intra_dec_fixed (void *sbuf, void *rbuf, int count,
    block_dsize = dsize * (ptrdiff_t)count;

    if (block_dsize < intermediate_message) {
-        return (ompi_coll_tuned_allreduce_intra_recursivedoubling (sbuf, rbuf, 
-                                                                   count, dtype,
-                                                                   op, comm, module));
+        return (ompi_coll_base_allreduce_intra_recursivedoubling(sbuf, rbuf,
+                                                                 count, dtype,
+                                                                 op, comm, module));
    }

    if( ompi_op_is_commute(op) && (count > comm_size) ) {
        const size_t segment_size = 1 << 20; /* 1 MB */
        if (((size_t)comm_size * (size_t)segment_size >= block_dsize)) {
-            return (ompi_coll_tuned_allreduce_intra_ring (sbuf, rbuf, count, dtype, 
-                                                          op, comm, module));
+            return (ompi_coll_base_allreduce_intra_ring(sbuf, rbuf, count, dtype,
+                                                        op, comm, module));
        } else {
-            return (ompi_coll_tuned_allreduce_intra_ring_segmented (sbuf, rbuf, 
-                                                                    count, dtype, 
-                                                                    op, comm, module,
-                                                                    segment_size));
+            return (ompi_coll_base_allreduce_intra_ring_segmented(sbuf, rbuf,
+                                                                  count, dtype,
+                                                                  op, comm, module,
+                                                                  segment_size));
        }
    }

-    return (ompi_coll_tuned_allreduce_intra_nonoverlapping (sbuf, rbuf, count, 
-                                                            dtype, op, comm, module));
+    return (ompi_coll_base_allreduce_intra_nonoverlapping(sbuf, rbuf, count,
+                                                          dtype, op, comm, module));
 }

 /*
@ -109,9 +108,9 @@ int ompi_coll_tuned_alltoall_intra_dec_fixed(void *sbuf, int scount,

    /* special case */
    if (communicator_size==2) {
-        return ompi_coll_tuned_alltoall_intra_two_procs(sbuf, scount, sdtype, 
-                                                        rbuf, rcount, rdtype, 
-                                                        comm, module);
+        return ompi_coll_base_alltoall_intra_two_procs(sbuf, scount, sdtype,
+                                                       rbuf, rcount, rdtype,
+                                                       comm, module);
    }

    /* Decision function based on measurement on Grig cluster at
@ -123,19 +122,19 @@ int ompi_coll_tuned_alltoall_intra_dec_fixed(void *sbuf, int scount,

    if ((block_dsize < (size_t) ompi_coll_tuned_alltoall_small_msg)
                                              && (communicator_size > 12)) {
-        return ompi_coll_tuned_alltoall_intra_bruck(sbuf, scount, sdtype, 
-                                                    rbuf, rcount, rdtype,
-                                                    comm, module);
+        return ompi_coll_base_alltoall_intra_bruck(sbuf, scount, sdtype,
+                                                   rbuf, rcount, rdtype,
+                                                   comm, module);

    } else if (block_dsize < (size_t) ompi_coll_tuned_alltoall_intermediate_msg) {
-        return ompi_coll_tuned_alltoall_intra_basic_linear(sbuf, scount, sdtype, 
-                                                           rbuf, rcount, rdtype, 
-                                                           comm, module);
+        return ompi_coll_base_alltoall_intra_basic_linear(sbuf, scount, sdtype,
+                                                          rbuf, rcount, rdtype,
+                                                          comm, module);
    }

-    return ompi_coll_tuned_alltoall_intra_pairwise (sbuf, scount, sdtype, 
-                                                    rbuf, rcount, rdtype,
-                                                    comm, module);
+    return ompi_coll_base_alltoall_intra_pairwise(sbuf, scount, sdtype,
+                                                  rbuf, rcount, rdtype,
+                                                  comm, module);

 #if 0
    /* previous decision */
@ -148,12 +147,12 @@ int ompi_coll_tuned_alltoall_intra_dec_fixed(void *sbuf, int scount,
                 ompi_comm_rank(comm), communicator_size, total_dsize));

    if (communicator_size >= 12 && total_dsize <= 768) {
-        return ompi_coll_tuned_alltoall_intra_bruck (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm, module);
+        return ompi_coll_base_alltoall_intra_bruck(sbuf, scount, sdtype, rbuf, rcount, rdtype, comm, module);
    }
    if (total_dsize <= 131072) {
-        return ompi_coll_tuned_alltoall_intra_basic_linear (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm, module);
+        return ompi_coll_base_alltoall_intra_basic_linear(sbuf, scount, sdtype, rbuf, rcount, rdtype, comm, module);
    }
-    return ompi_coll_tuned_alltoall_intra_pairwise (sbuf, scount, sdtype, rbuf, rcount, rdtype, comm, module);
+    return ompi_coll_base_alltoall_intra_pairwise(sbuf, scount, sdtype, rbuf, rcount, rdtype, comm, module);
 #endif
 }

@ -170,9 +169,9 @@ int ompi_coll_tuned_alltoallv_intra_dec_fixed(void *sbuf, int *scounts, int *sdi
                                              mca_coll_base_module_t *module)
 {
    /* For starters, just keep the original algorithm. */
-    return ompi_coll_tuned_alltoallv_intra_pairwise(sbuf, scounts, sdisps, sdtype, 
-                                                    rbuf, rcounts, rdisps,rdtype,
-                                                    comm, module);
+    return ompi_coll_base_alltoallv_intra_pairwise(sbuf, scounts, sdisps, sdtype,
+                                                   rbuf, rcounts, rdisps,rdtype,
+                                                   comm, module);
 }


@ -192,7 +191,7 @@ int ompi_coll_tuned_barrier_intra_dec_fixed(struct ompi_communicator_t *comm,
                 communicator_size));

    if( 2 == communicator_size )
-        return ompi_coll_tuned_barrier_intra_two_procs(comm, module);
+        return ompi_coll_base_barrier_intra_two_procs(comm, module);
    /**
     * Basic optimisation. If we have a power of 2 number of nodes
     * the use the recursive doubling algorithm, otherwise
@ -203,14 +202,12 @@ int ompi_coll_tuned_barrier_intra_dec_fixed(struct ompi_communicator_t *comm,
        for( ; communicator_size > 0; communicator_size >>= 1 ) {
            if( communicator_size & 0x1 ) {
                if( has_one )
-                    return ompi_coll_tuned_barrier_intra_bruck(comm, module);
+                    return ompi_coll_base_barrier_intra_bruck(comm, module);
                has_one = true;
            }
        }
    }
-    return ompi_coll_tuned_barrier_intra_recursivedoubling(comm, module);
-    /*     return ompi_coll_tuned_barrier_intra_linear(comm); */
-    /*         return ompi_coll_tuned_barrier_intra_doublering(comm); */
+    return ompi_coll_base_barrier_intra_recursivedoubling(comm, module);
 }


@ -256,80 +253,80 @@ int ompi_coll_tuned_bcast_intra_dec_fixed(void *buff, int count,
    if ((message_size < small_message_size) || (count <= 1)) {
        /* Binomial without segmentation */
        segsize = 0;
-        return  ompi_coll_tuned_bcast_intra_binomial (buff, count, datatype, 
-                                                      root, comm, module,
-                                                      segsize);
+        return  ompi_coll_base_bcast_intra_binomial(buff, count, datatype,
+                                                    root, comm, module,
+                                                    segsize);

    } else if (message_size < intermediate_message_size) {
        /* SplittedBinary with 1KB segments */
        segsize = 1024;
-        return ompi_coll_tuned_bcast_intra_split_bintree(buff, count, datatype, 
-                                                         root, comm, module,
-                                                         segsize);
+        return ompi_coll_base_bcast_intra_split_bintree(buff, count, datatype,
+                                                        root, comm, module,
+                                                        segsize);

    }
    /* Handle large message sizes */
    else if (communicator_size < (a_p128 * message_size + b_p128)) {
        /* Pipeline with 128KB segments */
        segsize = 1024  << 7;
-        return ompi_coll_tuned_bcast_intra_pipeline (buff, count, datatype, 
-                                                     root, comm, module,
-                                                     segsize);
+        return ompi_coll_base_bcast_intra_pipeline(buff, count, datatype,
+                                                   root, comm, module,
+                                                   segsize);

    } else if (communicator_size < 13) {
        /* Split Binary with 8KB segments */
        segsize = 1024 << 3;
-        return ompi_coll_tuned_bcast_intra_split_bintree(buff, count, datatype, 
-                                                         root, comm, module,
-                                                         segsize);
+        return ompi_coll_base_bcast_intra_split_bintree(buff, count, datatype,
+                                                        root, comm, module,
+                                                        segsize);

    } else if (communicator_size < (a_p64 * message_size + b_p64)) {
        /* Pipeline with 64KB segments */
        segsize = 1024 << 6;
-        return ompi_coll_tuned_bcast_intra_pipeline (buff, count, datatype, 
-                                                     root, comm, module,
-                                                     segsize);
+        return ompi_coll_base_bcast_intra_pipeline(buff, count, datatype,
+                                                   root, comm, module,
+                                                   segsize);

    } else if (communicator_size < (a_p16 * message_size + b_p16)) {
        /* Pipeline with 16KB segments */
        segsize = 1024 << 4;
-        return ompi_coll_tuned_bcast_intra_pipeline (buff, count, datatype, 
-                                                     root, comm, module,
-                                                     segsize);
+        return ompi_coll_base_bcast_intra_pipeline(buff, count, datatype,
+                                                   root, comm, module,
+                                                   segsize);

    }

    /* Pipeline with 8KB segments */
    segsize = 1024 << 3;
-    return ompi_coll_tuned_bcast_intra_pipeline (buff, count, datatype, 
-                                                 root, comm, module, 
-                                                 segsize);
+    return ompi_coll_base_bcast_intra_pipeline(buff, count, datatype,
+                                               root, comm, module,
+                                               segsize);
 #if 0
    /* this is based on gige measurements */

    if (communicator_size  < 4) {
-        return ompi_coll_tuned_bcast_intra_basic_linear (buff, count, datatype, root, comm, module);
+        return ompi_coll_base_bcast_intra_basic_linear(buff, count, datatype, root, comm, module);
    }
    if (communicator_size == 4) {
        if (message_size < 524288) segsize = 0;
        else segsize = 16384;
-        return ompi_coll_tuned_bcast_intra_bintree (buff, count, datatype, root, comm, module, segsize);
+        return ompi_coll_base_bcast_intra_bintree(buff, count, datatype, root, comm, module, segsize);
    }
    if (communicator_size <= 8 && message_size < 4096) {
-        return ompi_coll_tuned_bcast_intra_basic_linear (buff, count, datatype, root, comm, module);
+        return ompi_coll_base_bcast_intra_basic_linear(buff, count, datatype, root, comm, module);
    }
    if (communicator_size > 8 && message_size >= 32768 && message_size < 524288) {
        segsize = 16384;
-        return  ompi_coll_tuned_bcast_intra_bintree (buff, count, datatype, root, comm, module, segsize);
+        return  ompi_coll_base_bcast_intra_bintree(buff, count, datatype, root, comm, module, segsize);
    }
    if (message_size >= 524288) {
        segsize = 16384;
-        return ompi_coll_tuned_bcast_intra_pipeline (buff, count, datatype, root, comm, module, segsize);
+        return ompi_coll_base_bcast_intra_pipeline(buff, count, datatype, root, comm, module, segsize);
    }
    segsize = 0;
    /* once tested can swap this back in */
-    /* return ompi_coll_tuned_bcast_intra_bmtree (buff, count, datatype, root, comm, segsize); */
-    return ompi_coll_tuned_bcast_intra_bintree (buff, count, datatype, root, comm, module, segsize);
+    /* return ompi_coll_base_bcast_intra_bmtree(buff, count, datatype, root, comm, segsize); */
+    return ompi_coll_base_bcast_intra_bintree(buff, count, datatype, root, comm, module, segsize);
 #endif  /* 0 */
 }

@ -372,9 +369,9 @@ int ompi_coll_tuned_reduce_intra_dec_fixed( void *sendbuf, void *recvbuf,
     */
    if( !ompi_op_is_commute(op) ) {
        if ((communicator_size < 12) && (message_size < 2048)) {
-            return ompi_coll_tuned_reduce_intra_basic_linear (sendbuf, recvbuf, count, datatype, op, root, comm, module); 
+            return ompi_coll_base_reduce_intra_basic_linear (sendbuf, recvbuf, count, datatype, op, root, comm, module);
        }
-        return ompi_coll_tuned_reduce_intra_in_order_binary (sendbuf, recvbuf, count, datatype, op, root, comm, module,
+        return ompi_coll_base_reduce_intra_in_order_binary (sendbuf, recvbuf, count, datatype, op, root, comm, module,
                                                             0, max_requests);
    }

@ -384,27 +381,27 @@ int ompi_coll_tuned_reduce_intra_dec_fixed( void *sendbuf, void *recvbuf,

    if ((communicator_size < 8) && (message_size < 512)){
        /* Linear_0K */
-        return ompi_coll_tuned_reduce_intra_basic_linear (sendbuf, recvbuf, count, datatype, op, root, comm, module); 
+        return ompi_coll_base_reduce_intra_basic_linear(sendbuf, recvbuf, count, datatype, op, root, comm, module);
    } else if (((communicator_size < 8) && (message_size < 20480)) ||
               (message_size < 2048) || (count <= 1)) {
        /* Binomial_0K */
        segsize = 0;
-        return ompi_coll_tuned_reduce_intra_binomial(sendbuf, recvbuf, count, datatype, op, root, comm, module,
+        return ompi_coll_base_reduce_intra_binomial(sendbuf, recvbuf, count, datatype, op, root, comm, module,
                                                     segsize, max_requests);
    } else if (communicator_size > (a1 * message_size + b1)) {
        /* Binomial_1K */
        segsize = 1024;
-        return ompi_coll_tuned_reduce_intra_binomial(sendbuf, recvbuf, count, datatype, op, root, comm, module,
+        return ompi_coll_base_reduce_intra_binomial(sendbuf, recvbuf, count, datatype, op, root, comm, module,
                                                     segsize, max_requests);
    } else if (communicator_size > (a2 * message_size + b2)) {
        /* Pipeline_1K */
        segsize = 1024;
-        return ompi_coll_tuned_reduce_intra_pipeline (sendbuf, recvbuf, count, datatype, op, root, comm, module, 
-                                                      segsize, max_requests);
+        return ompi_coll_base_reduce_intra_pipeline(sendbuf, recvbuf, count, datatype, op, root, comm, module,
+                                                    segsize, max_requests);
    } else if (communicator_size > (a3 * message_size + b3)) {
        /* Binary_32K */
        segsize = 32*1024;
-        return ompi_coll_tuned_reduce_intra_binary( sendbuf, recvbuf, count, datatype, op, root,
+        return ompi_coll_base_reduce_intra_binary( sendbuf, recvbuf, count, datatype, op, root,
                                                    comm, module, segsize, max_requests);
    }
    if (communicator_size > (a4 * message_size + b4)) {
@ -414,8 +411,8 @@ int ompi_coll_tuned_reduce_intra_dec_fixed( void *sendbuf, void *recvbuf,
        /* Pipeline_64K */
        segsize = 64*1024;
    }
-    return ompi_coll_tuned_reduce_intra_pipeline (sendbuf, recvbuf, count, datatype, op, root, comm, module, 
-                                                  segsize, max_requests);
+    return ompi_coll_base_reduce_intra_pipeline(sendbuf, recvbuf, count, datatype, op, root, comm, module,
+                                                segsize, max_requests);

 #if 0
    /* for small messages use linear algorithm */
@ -424,8 +421,7 @@ int ompi_coll_tuned_reduce_intra_dec_fixed( void *sendbuf, void *recvbuf,
        fanout = communicator_size - 1;
        /* when linear implemented or taken from basic put here, right now using chain as a linear system */
        /* it is implemented and I shouldn't be calling a chain with a fanout bigger than MAXTREEFANOUT from topo.h! */
-        return ompi_coll_tuned_reduce_intra_basic_linear (sendbuf, recvbuf, count, datatype, op, root, comm, module); 
-        /*        return ompi_coll_tuned_reduce_intra_chain (sendbuf, recvbuf, count, datatype, op, root, comm, segsize, fanout); */
+        return ompi_coll_base_reduce_intra_basic_linear(sendbuf, recvbuf, count, datatype, op, root, comm, module);
    }
    if (message_size < 524288) {
        if (message_size <= 65536 ) {
@ -437,12 +433,12 @@ int ompi_coll_tuned_reduce_intra_dec_fixed( void *sendbuf, void *recvbuf,
        }
        /* later swap this for a binary tree */
        /*         fanout = 2; */
-        return ompi_coll_tuned_reduce_intra_chain (sendbuf, recvbuf, count, datatype, op, root, comm, module,
-                                                   segsize, fanout, max_requests);
+        return ompi_coll_base_reduce_intra_chain(sendbuf, recvbuf, count, datatype, op, root, comm, module,
+                                                 segsize, fanout, max_requests);
    }
    segsize = 1024;
-    return ompi_coll_tuned_reduce_intra_pipeline (sendbuf, recvbuf, count, datatype, op, root, comm, module,
-                                                  segsize, max_requests);
+    return ompi_coll_base_reduce_intra_pipeline(sendbuf, recvbuf, count, datatype, op, root, comm, module,
+                                                segsize, max_requests);
 #endif  /* 0 */
 }

@ -479,9 +475,9 @@ int ompi_coll_tuned_reduce_scatter_intra_dec_fixed( void *sbuf, void *rbuf,
    }

    if( !ompi_op_is_commute(op) ) {
-        return ompi_coll_tuned_reduce_scatter_intra_nonoverlapping (sbuf, rbuf, rcounts, 
-                                                                    dtype, op, 
-                                                                    comm, module); 
+        return ompi_coll_base_reduce_scatter_intra_nonoverlapping(sbuf, rbuf, rcounts,
+                                                                  dtype, op,
+                                                                  comm, module);
    }

    total_message_size *= dsize;
@ -493,11 +489,11 @@ int ompi_coll_tuned_reduce_scatter_intra_dec_fixed( void *sbuf, void *rbuf,
        ((total_message_size <= large_message_size) && (pow2 == comm_size)) ||
        (comm_size >= a * total_message_size + b)) {
        return
-            ompi_coll_tuned_reduce_scatter_intra_basic_recursivehalving(sbuf, rbuf, rcounts,
-                                                                        dtype, op,
-                                                                        comm, module);
+            ompi_coll_base_reduce_scatter_intra_basic_recursivehalving(sbuf, rbuf, rcounts,
+                                                                       dtype, op,
+                                                                       comm, module);
    }
-    return ompi_coll_tuned_reduce_scatter_intra_ring(sbuf, rbuf, rcounts,
+    return ompi_coll_base_reduce_scatter_intra_ring(sbuf, rbuf, rcounts,
                                                     dtype, op,
                                                     comm, module);
 }
@ -525,9 +521,9 @@ int ompi_coll_tuned_allgather_intra_dec_fixed(void *sbuf, int scount,

    /* Special case for 2 processes */
    if (communicator_size == 2) {
-        return ompi_coll_tuned_allgather_intra_two_procs (sbuf, scount, sdtype, 
-                                                          rbuf, rcount, rdtype, 
-                                                          comm, module);
+        return ompi_coll_base_allgather_intra_two_procs(sbuf, scount, sdtype,
+                                                        rbuf, rcount, rdtype,
+                                                        comm, module);
    }

    /* Determine complete data size */
@ -550,23 +546,23 @@ int ompi_coll_tuned_allgather_intra_dec_fixed(void *sbuf, int scount,
    */
    if (total_dsize < 50000) {
        if (pow2_size == communicator_size) {
-            return ompi_coll_tuned_allgather_intra_recursivedoubling(sbuf, scount, sdtype, 
-                                                                     rbuf, rcount, rdtype,
-                                                                     comm, module);
+            return ompi_coll_base_allgather_intra_recursivedoubling(sbuf, scount, sdtype,
+                                                                    rbuf, rcount, rdtype,
+                                                                    comm, module);
        } else {
-            return ompi_coll_tuned_allgather_intra_bruck(sbuf, scount, sdtype, 
-                                                         rbuf, rcount, rdtype, 
-                                                         comm, module);
+            return ompi_coll_base_allgather_intra_bruck(sbuf, scount, sdtype,
+                                                        rbuf, rcount, rdtype,
+                                                        comm, module);
        }
    } else {
        if (communicator_size % 2) {
-            return ompi_coll_tuned_allgather_intra_ring(sbuf, scount, sdtype, 
-                                                        rbuf, rcount, rdtype, 
-                                                        comm, module);
+            return ompi_coll_base_allgather_intra_ring(sbuf, scount, sdtype,
+                                                       rbuf, rcount, rdtype,
+                                                       comm, module);
        } else {
-            return  ompi_coll_tuned_allgather_intra_neighborexchange(sbuf, scount, sdtype,
-                                                                     rbuf, rcount, rdtype,
-                                                                     comm, module);
+            return  ompi_coll_base_allgather_intra_neighborexchange(sbuf, scount, sdtype,
+                                                                    rbuf, rcount, rdtype,
+                                                                    comm, module);
        }
    }

@ -581,17 +577,17 @@ int ompi_coll_tuned_allgather_intra_dec_fixed(void *sbuf, int scount,
       - for everything else use ring.
    */
    if ((pow2_size == communicator_size) && (total_dsize < 524288)) {
-        return ompi_coll_tuned_allgather_intra_recursivedoubling(sbuf, scount, sdtype, 
-                                                                 rbuf, rcount, rdtype, 
-                                                                 comm, module);
+        return ompi_coll_base_allgather_intra_recursivedoubling(sbuf, scount, sdtype,
+                                                                rbuf, rcount, rdtype,
+                                                                comm, module);
    } else if (total_dsize <= 81920) {
-        return ompi_coll_tuned_allgather_intra_bruck(sbuf, scount, sdtype, 
-                                                     rbuf, rcount, rdtype,
-                                                     comm, module);
+        return ompi_coll_base_allgather_intra_bruck(sbuf, scount, sdtype,
+                                                    rbuf, rcount, rdtype,
+                                                    comm, module);
    }
-    return ompi_coll_tuned_allgather_intra_ring(sbuf, scount, sdtype, 
-                                                rbuf, rcount, rdtype,
-                                                comm, module);
+    return ompi_coll_base_allgather_intra_ring(sbuf, scount, sdtype,
+                                               rbuf, rcount, rdtype,
+                                               comm, module);
 #endif  /* defined(USE_MPICH2_DECISION) */
 }

@ -620,9 +616,9 @@ int ompi_coll_tuned_allgatherv_intra_dec_fixed(void *sbuf, int scount,

    /* Special case for 2 processes */
    if (communicator_size == 2) {
-        return ompi_coll_tuned_allgatherv_intra_two_procs (sbuf, scount, sdtype,
-                                                           rbuf, rcounts, rdispls, rdtype, 
-                                                           comm, module);
+        return ompi_coll_base_allgatherv_intra_two_procs(sbuf, scount, sdtype,
+                                                         rbuf, rcounts, rdispls, rdtype,
+                                                         comm, module);
    }

    /* Determine complete data size */
@ -639,18 +635,18 @@ int ompi_coll_tuned_allgatherv_intra_dec_fixed(void *sbuf, int scount,

    /* Decision based on allgather decision.   */
    if (total_dsize < 50000) {
-        return ompi_coll_tuned_allgatherv_intra_bruck(sbuf, scount, sdtype, 
-                                                      rbuf, rcounts, rdispls, rdtype, 
-                                                      comm, module);
+        return ompi_coll_base_allgatherv_intra_bruck(sbuf, scount, sdtype,
+                                                     rbuf, rcounts, rdispls, rdtype,
+                                                     comm, module);
    } else {
        if (communicator_size % 2) {
-            return ompi_coll_tuned_allgatherv_intra_ring(sbuf, scount, sdtype, 
-                                                         rbuf, rcounts, rdispls, rdtype, 
-                                                         comm, module);
+            return ompi_coll_base_allgatherv_intra_ring(sbuf, scount, sdtype,
+                                                        rbuf, rcounts, rdispls, rdtype,
+                                                        comm, module);
        } else {
-            return  ompi_coll_tuned_allgatherv_intra_neighborexchange(sbuf, scount, sdtype,
-                                                                      rbuf, rcounts, rdispls, rdtype, 
-                                                                      comm, module);
+            return  ompi_coll_base_allgatherv_intra_neighborexchange(sbuf, scount, sdtype,
+                                                                     rbuf, rcounts, rdispls, rdtype,
+                                                                     comm, module);
        }
    }
 }
@ -701,29 +697,28 @@ int ompi_coll_tuned_gather_intra_dec_fixed(void *sbuf, int scount,
    }

    if (block_size > large_block_size) {
-        return ompi_coll_tuned_gather_intra_linear_sync (sbuf, scount, sdtype, 
-                                                         rbuf, rcount, rdtype, 
-                                                         root, comm, module,
-                                                         large_segment_size);
+        return ompi_coll_base_gather_intra_linear_sync(sbuf, scount, sdtype,
+                                                       rbuf, rcount, rdtype,
+                                                       root, comm, module,
+                                                       large_segment_size);

    } else if (block_size > intermediate_block_size) {
-        return ompi_coll_tuned_gather_intra_linear_sync (sbuf, scount, sdtype, 
-                                                         rbuf, rcount, rdtype, 
-                                                         root, comm, module,
-                                                         small_segment_size);
+        return ompi_coll_base_gather_intra_linear_sync(sbuf, scount, sdtype,
+                                                       rbuf, rcount, rdtype,
+                                                       root, comm, module,
+                                                       small_segment_size);

    } else if ((communicator_size > large_communicator_size) ||
               ((communicator_size > small_communicator_size) &&
                (block_size < small_block_size))) {
-        return ompi_coll_tuned_gather_intra_binomial (sbuf, scount, sdtype, 
-                                                      rbuf, rcount, rdtype, 
-                                                      root, comm, module);
-
+        return ompi_coll_base_gather_intra_binomial(sbuf, scount, sdtype,
+                                                    rbuf, rcount, rdtype,
+                                                    root, comm, module);
    }
    /* Otherwise, use basic linear */
-    return ompi_coll_tuned_gather_intra_basic_linear (sbuf, scount, sdtype, 
-                                                      rbuf, rcount, rdtype, 
-                                                      root, comm, module);
+    return ompi_coll_base_gather_intra_basic_linear(sbuf, scount, sdtype,
+                                                    rbuf, rcount, rdtype,
+                                                    root, comm, module);
 }

 /*
@ -763,11 +758,11 @@ int ompi_coll_tuned_scatter_intra_dec_fixed(void *sbuf, int scount,

    if ((communicator_size > small_comm_size) &&
        (block_size < small_block_size)) {
-        return ompi_coll_tuned_scatter_intra_binomial (sbuf, scount, sdtype, 
-                                                       rbuf, rcount, rdtype, 
-                                                       root, comm, module);
+        return ompi_coll_base_scatter_intra_binomial(sbuf, scount, sdtype,
+                                                     rbuf, rcount, rdtype,
+                                                     root, comm, module);
    }
-    return ompi_coll_tuned_scatter_intra_basic_linear (sbuf, scount, sdtype, 
-                                                       rbuf, rcount, rdtype, 
-                                                       root, comm, module);
+    return ompi_coll_base_scatter_intra_basic_linear(sbuf, scount, sdtype,
+                                                     rbuf, rcount, rdtype,
+                                                     root, comm, module);
 }
--- a/ompi/mca/coll/tuned/coll_tuned_dynamic_file.c
+++ b/ompi/mca/coll/tuned/coll_tuned_dynamic_file.c
@ -1,9 +1,8 @@
-
 /*
 * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
 *                         University Research and Technology
 *                         Corporation.  All rights reserved.
- * Copyright (c) 2004-2005 The University of Tennessee and The University
+ * Copyright (c) 2004-2015 The University of Tennessee and The University
 *                         of Tennessee Research Foundation.  All rights
 *                         reserved.
 * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@ -28,7 +27,7 @@
 #include "coll_tuned.h"

 /* need to include our own topo prototypes so we can malloc data on the comm correctly */
-#include "coll_tuned_topo.h"
+#include "ompi/mca/coll/base/coll_base_topo.h"

 /* also need the dynamic rule structures */
 #include "coll_tuned_dynamic_rules.h"
@ -97,6 +96,10 @@ int ompi_coll_tuned_read_rules_config_file (char *fname, ompi_coll_alg_rule_t**

    /* make space and init the algorithm rules for each of the n_collectives MPI collectives */
    alg_rules = ompi_coll_tuned_mk_alg_rules (n_collectives);
+    if (NULL == alg_rules) {
+        OPAL_OUTPUT((ompi_coll_tuned_stream,"cannot cannot allocate rules for file [%s]\n", fname));
+        goto on_file_error;
+    }

    if (NULL == alg_rules) {
        OPAL_OUTPUT((ompi_coll_tuned_stream,"cannot cannot allocate rules for file [%s]\n", fname));
@ -127,10 +130,7 @@ int ompi_coll_tuned_read_rules_config_file (char *fname, ompi_coll_alg_rule_t**

        if (alg_rules[CI].alg_rule_id != CI) {
            OPAL_OUTPUT((ompi_coll_tuned_stream, "Internal error in handling collective ID %d\n", CI));
-            fclose(fptr);
-            ompi_coll_tuned_free_all_rules (alg_rules, n_collectives);
-            *rules = (ompi_coll_alg_rule_t*) NULL;
-            return (-4);
+            goto on_file_error;
        }
        OPAL_OUTPUT((ompi_coll_tuned_stream, "Reading dynamic rule for collective ID %d\n", CI));
        alg_p = &alg_rules[CI];
@ -291,4 +291,3 @@ static long getnext (FILE *fptr)
        if ('#' == trash) skiptonewline (fptr);
    } while (1);
 }
-
--- a/ompi/mca/coll/tuned/coll_tuned_dynamic_rules.c
+++ b/ompi/mca/coll/tuned/coll_tuned_dynamic_rules.c
@ -2,7 +2,7 @@
 * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
 *                         University Research and Technology
 *                         Corporation.  All rights reserved.
- * Copyright (c) 2004-2009 The University of Tennessee and The University
+ * Copyright (c) 2004-2015 The University of Tennessee and The University
 *                         of Tennessee Research Foundation.  All rights
 *                         reserved.
 * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@ -25,7 +25,7 @@
 #include "coll_tuned.h"

 /* need to include our own topo prototypes so we can malloc data on the comm correctly */
-#include "coll_tuned_topo.h"
+#include "ompi/mca/coll/base/coll_base_topo.h"

 /* also need the dynamic rule structures */
 #include "coll_tuned_dynamic_rules.h"
@ -33,7 +33,7 @@
 #include <stdlib.h>
 #include <stdio.h>

-#include "coll_tuned_util.h"
+#include "ompi/mca/coll/base/coll_base_util.h"


 ompi_coll_alg_rule_t* ompi_coll_tuned_mk_alg_rules (int n_alg)
@ -389,4 +389,3 @@ int ompi_coll_tuned_get_target_method_params (ompi_coll_com_rule_t* base_com_rul
    /* return the algorithm/method to use */
    return (best_msg_p->result_alg);
 }
-
--- a/ompi/mca/coll/tuned/coll_tuned_gather_decision.c
+++ b/ompi/mca/coll/tuned/coll_tuned_gather_decision.c
@ -0,0 +1,198 @@
+/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
+/*
+ * Copyright (c) 2004-2015 The University of Tennessee and The University
+ *                         of Tennessee Research Foundation.  All rights
+ *                         reserved.
+ * $COPYRIGHT$
+ *
+ * Additional copyrights may follow
+ *
+ * $HEADER$
+ */
+
+#include "ompi_config.h"
+
+#include "mpi.h"
+#include "ompi/constants.h"
+#include "ompi/datatype/ompi_datatype.h"
+#include "ompi/communicator/communicator.h"
+#include "ompi/mca/coll/coll.h"
+#include "ompi/mca/coll/base/coll_tags.h"
+#include "ompi/mca/pml/pml.h"
+#include "coll_tuned.h"
+#include "ompi/mca/coll/base/coll_base_topo.h"
+#include "ompi/mca/coll/base/coll_base_util.h"
+
+/* gather algorithm variables */
+static int coll_tuned_gather_forced_algorithm = 0;
+static int coll_tuned_gather_segment_size = 0;
+static int coll_tuned_gather_tree_fanout;
+static int coll_tuned_gather_chain_fanout;
+
+/* valid values for coll_tuned_gather_forced_algorithm */
+static mca_base_var_enum_value_t gather_algorithms[] = {
+    {0, "ignore"},
+    {1, "basic_linear"},
+    {2, "binomial"},
+    {3, "linear_sync"},
+    {0, NULL}
+};
+
+/* The following are used by dynamic and forced rules */
+
+/* publish details of each algorithm and if its forced/fixed/locked in */
+/* as you add methods/algorithms you must update this and the query/map
+   routines */
+
+/* this routine is called by the component only */
+/* this makes sure that the mca parameters are set to their initial values
+   and perms */
+/* module does not call this they call the forced_getvalues routine instead */
+
+int
+ompi_coll_tuned_gather_intra_check_forced_init(coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices)
+{
+    mca_base_var_enum_t *new_enum;
+    int cnt;
+
+    for( cnt = 0; NULL != gather_algorithms[cnt].string; cnt++ );
+
+    ompi_coll_tuned_forced_max_algorithms[GATHER] = cnt;
+
+    (void) mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
+                                           "gather_algorithm_count",
+                                           "Number of gather algorithms available",
+                                           MCA_BASE_VAR_TYPE_INT, NULL, 0,
+                                           MCA_BASE_VAR_FLAG_DEFAULT_ONLY,
+                                           OPAL_INFO_LVL_5,
+                                           MCA_BASE_VAR_SCOPE_CONSTANT,
+                                           &cnt);
+
+    /* MPI_T: This variable should eventually be bound to a communicator */
+    coll_tuned_gather_forced_algorithm = 0;
+    (void) mca_base_var_enum_create("coll_tuned_gather_algorithms", gather_algorithms, &new_enum);
+    mca_param_indices->algorithm_param_index =
+        mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
+                                        "gather_algorithm",
+                                        "Which gather algorithm is used. Can be locked down to choice of: 0 ignore, 1 basic linear, 2 binomial, 3 linear with synchronization.",
+                                        MCA_BASE_VAR_TYPE_INT, new_enum, 0, 0,
+                                        OPAL_INFO_LVL_5,
+                                        MCA_BASE_VAR_SCOPE_READONLY,
+                                        &coll_tuned_gather_forced_algorithm);
+    OBJ_RELEASE(new_enum);
+    if (mca_param_indices->algorithm_param_index < 0) {
+        return mca_param_indices->algorithm_param_index;
+    }
+
+    coll_tuned_gather_segment_size = 0;
+    mca_param_indices->segsize_param_index =
+        mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
+                                        "gather_algorithm_segmentsize",
+                                        "Segment size in bytes used by default for gather algorithms. Only has meaning if algorithm is forced and supports segmenting. 0 bytes means no segmentation. Currently, available algorithms do not support segmentation.",
+                                        MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
+                                        OPAL_INFO_LVL_5,
+                                        MCA_BASE_VAR_SCOPE_READONLY,
+                                        &coll_tuned_gather_segment_size);
+
+    coll_tuned_gather_tree_fanout = ompi_coll_tuned_init_tree_fanout; /* get system wide default */
+    mca_param_indices->tree_fanout_param_index =
+        mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
+                                        "gather_algorithm_tree_fanout",
+                                        "Fanout for n-tree used for gather algorithms. Only has meaning if algorithm is forced and supports n-tree topo based operation. Currently, available algorithms do not support n-tree topologies.",
+                                        MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
+                                        OPAL_INFO_LVL_5,
+                                        MCA_BASE_VAR_SCOPE_READONLY,
+                                        &coll_tuned_gather_tree_fanout);
+
+    coll_tuned_gather_chain_fanout = ompi_coll_tuned_init_chain_fanout; /* get system wide default */
+    mca_param_indices->chain_fanout_param_index =
+      mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
+                                      "gather_algorithm_chain_fanout",
+                                      "Fanout for chains used for gather algorithms. Only has meaning if algorithm is forced and supports chain topo based operation. Currently, available algorithms do not support chain topologies.",
+                                      MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
+                                      OPAL_INFO_LVL_5,
+                                      MCA_BASE_VAR_SCOPE_READONLY,
+                                      &coll_tuned_gather_chain_fanout);
+
+    return (MPI_SUCCESS);
+}
+
+int
+ompi_coll_tuned_gather_intra_do_forced(void *sbuf, int scount,
+                                       struct ompi_datatype_t *sdtype,
+                                       void* rbuf, int rcount,
+                                       struct ompi_datatype_t *rdtype,
+                                       int root,
+                                       struct ompi_communicator_t *comm,
+                                       mca_coll_base_module_t *module)
+{
+    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
+
+    OPAL_OUTPUT((ompi_coll_tuned_stream,
+                 "coll:tuned:gather_intra_do_forced selected algorithm %d",
+                 tuned_module->user_forced[GATHER].algorithm));
+
+    switch (tuned_module->user_forced[GATHER].algorithm) {
+    case (0):
+        return ompi_coll_tuned_gather_intra_dec_fixed(sbuf, scount, sdtype,
+                                                      rbuf, rcount, rdtype,
+                                                      root, comm, module);
+    case (1):
+        return ompi_coll_base_gather_intra_basic_linear(sbuf, scount, sdtype,
+                                                        rbuf, rcount, rdtype,
+                                                        root, comm, module);
+    case (2):
+        return ompi_coll_base_gather_intra_binomial(sbuf, scount, sdtype,
+                                                     rbuf, rcount, rdtype,
+                                                     root, comm, module);
+    case (3):
+        return ompi_coll_base_gather_intra_linear_sync(sbuf, scount, sdtype,
+                                                       rbuf, rcount, rdtype,
+                                                       root, comm, module,
+                                                       tuned_module->user_forced[GATHER].segsize);
+    } /* switch */
+    OPAL_OUTPUT((ompi_coll_tuned_stream,
+                 "coll:tuned:gather_intra_do_forced attempt to select algorithm %d when only 0-%d is valid?",
+                 tuned_module->user_forced[GATHER].algorithm,
+                 ompi_coll_tuned_forced_max_algorithms[GATHER]));
+    return (MPI_ERR_ARG);
+}
+
+int
+ompi_coll_tuned_gather_intra_do_this(void *sbuf, int scount,
+                                     struct ompi_datatype_t *sdtype,
+                                     void* rbuf, int rcount,
+                                     struct ompi_datatype_t *rdtype,
+                                     int root,
+                                     struct ompi_communicator_t *comm,
+                                     mca_coll_base_module_t *module,
+                                     int algorithm, int faninout, int segsize)
+{
+    OPAL_OUTPUT((ompi_coll_tuned_stream,
+                 "coll:tuned:gather_intra_do_this selected algorithm %d topo faninout %d segsize %d",
+                 algorithm, faninout, segsize));
+
+    switch (algorithm) {
+    case (0):
+        return ompi_coll_tuned_gather_intra_dec_fixed(sbuf, scount, sdtype,
+                                                      rbuf, rcount, rdtype,
+                                                      root, comm, module);
+    case (1):
+        return ompi_coll_base_gather_intra_basic_linear(sbuf, scount, sdtype,
+                                                        rbuf, rcount, rdtype,
+                                                        root, comm, module);
+    case (2):
+        return ompi_coll_base_gather_intra_binomial(sbuf, scount, sdtype,
+                                                    rbuf, rcount, rdtype,
+                                                    root, comm, module);
+    case (3):
+        return ompi_coll_base_gather_intra_linear_sync(sbuf, scount, sdtype,
+                                                       rbuf, rcount, rdtype,
+                                                       root, comm, module,
+                                                       segsize);
+    } /* switch */
+    OPAL_OUTPUT((ompi_coll_tuned_stream,
+                 "coll:tuned:gather_intra_do_this attempt to select algorithm %d when only 0-%d is valid?",
+                 algorithm, ompi_coll_tuned_forced_max_algorithms[GATHER]));
+    return (MPI_ERR_ARG);
+}
--- a/ompi/mca/coll/tuned/coll_tuned_module.c
+++ b/ompi/mca/coll/tuned/coll_tuned_module.c
@ -2,7 +2,7 @@
 * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
 *                         University Research and Technology
 *                         Corporation.  All rights reserved.
- * Copyright (c) 2004-2009 The University of Tennessee and The University
+ * Copyright (c) 2004-2015 The University of Tennessee and The University
 *                         of Tennessee Research Foundation.  All rights
 *                         reserved.
 * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@ -26,13 +26,13 @@
 #include "ompi/communicator/communicator.h"
 #include "ompi/mca/coll/coll.h"
 #include "ompi/mca/coll/base/base.h"
+#include "ompi/mca/coll/base/coll_base_topo.h"
 #include "coll_tuned.h"
-#include "coll_tuned_topo.h"
 #include "coll_tuned_dynamic_rules.h"
 #include "coll_tuned_dynamic_file.h"

 static int tuned_module_enable(mca_coll_base_module_t *module,
-			       struct ompi_communicator_t *comm);
+                   struct ompi_communicator_t *comm);
 /*
 * Initial query function that is invoked during MPI_INIT, allowing
 * this component to disqualify itself if it doesn't support the
@ -145,20 +145,20 @@ ompi_coll_tuned_forced_getvalues( enum COLLTYPE type,
    return (MPI_SUCCESS);
 }

-#define COLL_TUNED_EXECUTE_IF_DYNAMIC(DATA, TYPE, EXECUTE)              \
+#define COLL_TUNED_EXECUTE_IF_DYNAMIC(TMOD, TYPE, EXECUTE)              \
    {                                                                   \
        int need_dynamic_decision = 0;                                  \
-        ompi_coll_tuned_forced_getvalues( (TYPE), &((DATA)->user_forced[(TYPE)]) ); \
-        (DATA)->com_rules[(TYPE)] = NULL;                               \
-        if( 0 != (DATA)->user_forced[(TYPE)].algorithm ) {              \
+        ompi_coll_tuned_forced_getvalues( (TYPE), &((TMOD)->user_forced[(TYPE)]) ); \
+        (TMOD)->com_rules[(TYPE)] = NULL;                               \
+        if( 0 != (TMOD)->user_forced[(TYPE)].algorithm ) {              \
            need_dynamic_decision = 1;                                  \
            EXECUTE;                                                    \
        }                                                               \
        if( NULL != mca_coll_tuned_component.all_base_rules ) {         \
-            (DATA)->com_rules[(TYPE)]                                   \
+            (TMOD)->com_rules[(TYPE)]                                   \
                = ompi_coll_tuned_get_com_rule_ptr( mca_coll_tuned_component.all_base_rules, \
                                                    (TYPE), size );     \
-            if( NULL != (DATA)->com_rules[(TYPE)] ) {                   \
+            if( NULL != (TMOD)->com_rules[(TYPE)] ) {                   \
                need_dynamic_decision = 1;                              \
            }                                                           \
        }                                                               \
@ -178,7 +178,7 @@ tuned_module_enable( mca_coll_base_module_t *module,
 {
    int size;
    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t *) module;
-    mca_coll_tuned_comm_t *data = NULL;
+    mca_coll_base_comm_t *data = NULL;

    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:module_init called."));

@ -198,25 +198,12 @@ tuned_module_enable( mca_coll_base_module_t *module,
     * we do check a MCA parameter to see if if we should allocate this memory
     *
     * The default is set very high
-     *
     */

    /* if we within the memory/size limit, allow preallocated data */
-    if( size <= ompi_coll_tuned_preallocate_memory_comm_size_limit ) {
-        data = (mca_coll_tuned_comm_t*)malloc(sizeof(struct mca_coll_tuned_comm_t) +
-                                              (sizeof(ompi_request_t *) * size * 2));
-        if (NULL == data) {
-            return OMPI_ERROR;
-        }
-        data->mcct_reqs = (ompi_request_t **) (data + 1);
-        data->mcct_num_reqs = size * 2;
-    } else {
-        data = (mca_coll_tuned_comm_t*)malloc(sizeof(struct mca_coll_tuned_comm_t)); 
-        if (NULL == data) {
-            return OMPI_ERROR;
-        }
-        data->mcct_reqs = (ompi_request_t **) NULL;
-        data->mcct_num_reqs = 0;
+    data = OBJ_NEW(mca_coll_base_comm_t);
+    if (NULL == data) {
+        return OMPI_ERROR;
    }

    if (ompi_coll_tuned_use_dynamic_rules) {
@ -230,37 +217,37 @@ tuned_module_enable( mca_coll_base_module_t *module,
         * next dynamic state, recheck all forced rules as well
         * warning, we should check to make sure this is really an INTRA comm here...
         */
-        COLL_TUNED_EXECUTE_IF_DYNAMIC(data, ALLGATHER,
+        COLL_TUNED_EXECUTE_IF_DYNAMIC(tuned_module, ALLGATHER,
                                      tuned_module->super.coll_allgather  = ompi_coll_tuned_allgather_intra_dec_dynamic);
-        COLL_TUNED_EXECUTE_IF_DYNAMIC(data, ALLGATHERV,
+        COLL_TUNED_EXECUTE_IF_DYNAMIC(tuned_module, ALLGATHERV,
                                      tuned_module->super.coll_allgatherv = ompi_coll_tuned_allgatherv_intra_dec_dynamic);
-        COLL_TUNED_EXECUTE_IF_DYNAMIC(data, ALLREDUCE,
+        COLL_TUNED_EXECUTE_IF_DYNAMIC(tuned_module, ALLREDUCE,
                                      tuned_module->super.coll_allreduce  = ompi_coll_tuned_allreduce_intra_dec_dynamic);
-        COLL_TUNED_EXECUTE_IF_DYNAMIC(data, ALLTOALL,
+        COLL_TUNED_EXECUTE_IF_DYNAMIC(tuned_module, ALLTOALL,
                                      tuned_module->super.coll_alltoall   = ompi_coll_tuned_alltoall_intra_dec_dynamic);
-        COLL_TUNED_EXECUTE_IF_DYNAMIC(data, ALLTOALLV,
+        COLL_TUNED_EXECUTE_IF_DYNAMIC(tuned_module, ALLTOALLV,
                                      tuned_module->super.coll_alltoallv  = ompi_coll_tuned_alltoallv_intra_dec_dynamic);
-        COLL_TUNED_EXECUTE_IF_DYNAMIC(data, ALLTOALLW,
+        COLL_TUNED_EXECUTE_IF_DYNAMIC(tuned_module, ALLTOALLW,
                                      tuned_module->super.coll_alltoallw  = NULL);
-        COLL_TUNED_EXECUTE_IF_DYNAMIC(data, BARRIER,
+        COLL_TUNED_EXECUTE_IF_DYNAMIC(tuned_module, BARRIER,
                                      tuned_module->super.coll_barrier    = ompi_coll_tuned_barrier_intra_dec_dynamic);
-        COLL_TUNED_EXECUTE_IF_DYNAMIC(data, BCAST,
+        COLL_TUNED_EXECUTE_IF_DYNAMIC(tuned_module, BCAST,
                                      tuned_module->super.coll_bcast      = ompi_coll_tuned_bcast_intra_dec_dynamic);
-        COLL_TUNED_EXECUTE_IF_DYNAMIC(data, EXSCAN,
+        COLL_TUNED_EXECUTE_IF_DYNAMIC(tuned_module, EXSCAN,
                                      tuned_module->super.coll_exscan     = NULL);
-        COLL_TUNED_EXECUTE_IF_DYNAMIC(data, GATHER,
+        COLL_TUNED_EXECUTE_IF_DYNAMIC(tuned_module, GATHER,
                                      tuned_module->super.coll_gather     = ompi_coll_tuned_gather_intra_dec_dynamic);
-        COLL_TUNED_EXECUTE_IF_DYNAMIC(data, GATHERV,
+        COLL_TUNED_EXECUTE_IF_DYNAMIC(tuned_module, GATHERV,
                                      tuned_module->super.coll_gatherv    = NULL);
-        COLL_TUNED_EXECUTE_IF_DYNAMIC(data, REDUCE,
+        COLL_TUNED_EXECUTE_IF_DYNAMIC(tuned_module, REDUCE,
                                      tuned_module->super.coll_reduce     = ompi_coll_tuned_reduce_intra_dec_dynamic);
-        COLL_TUNED_EXECUTE_IF_DYNAMIC(data, REDUCESCATTER,
+        COLL_TUNED_EXECUTE_IF_DYNAMIC(tuned_module, REDUCESCATTER,
                                      tuned_module->super.coll_reduce_scatter = ompi_coll_tuned_reduce_scatter_intra_dec_dynamic);
-        COLL_TUNED_EXECUTE_IF_DYNAMIC(data, SCAN,
+        COLL_TUNED_EXECUTE_IF_DYNAMIC(tuned_module, SCAN,
                                      tuned_module->super.coll_scan       = NULL);
-        COLL_TUNED_EXECUTE_IF_DYNAMIC(data, SCATTER,
+        COLL_TUNED_EXECUTE_IF_DYNAMIC(tuned_module, SCATTER,
                                      tuned_module->super.coll_scatter    = ompi_coll_tuned_scatter_intra_dec_dynamic);
-        COLL_TUNED_EXECUTE_IF_DYNAMIC(data, SCATTERV,
+        COLL_TUNED_EXECUTE_IF_DYNAMIC(tuned_module, SCATTERV,
                                      tuned_module->super.coll_scatterv   = NULL);

        if( false == ompi_coll_tuned_use_dynamic_rules ) {
@ -286,7 +273,7 @@ tuned_module_enable( mca_coll_base_module_t *module,
    data->cached_in_order_bintree = NULL;

    /* All done */
-    tuned_module->tuned_data = data;
+    tuned_module->super.base_data = data;

    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:module_init Tuned is in use"));
    return OMPI_SUCCESS;
--- a/ompi/mca/coll/tuned/coll_tuned_reduce_decision.c
+++ b/ompi/mca/coll/tuned/coll_tuned_reduce_decision.c
@ -0,0 +1,222 @@
+/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
+/*
+ * Copyright (c) 2004-2015 The University of Tennessee and The University
+ *                         of Tennessee Research Foundation.  All rights
+ *                         reserved.
+ * $COPYRIGHT$
+ *
+ * Additional copyrights may follow
+ *
+ * $HEADER$
+ */
+
+#include "ompi_config.h"
+
+#include "mpi.h"
+#include "ompi/constants.h"
+#include "ompi/datatype/ompi_datatype.h"
+#include "ompi/communicator/communicator.h"
+#include "ompi/mca/coll/coll.h"
+#include "ompi/mca/coll/base/coll_base_topo.h"
+#include "ompi/mca/coll/base/coll_tags.h"
+#include "ompi/mca/pml/pml.h"
+#include "ompi/op/op.h"
+#include "coll_tuned.h"
+
+/* reduce algorithm variables */
+static int coll_tuned_reduce_forced_algorithm = 0;
+static int coll_tuned_reduce_segment_size = 0;
+static int coll_tuned_reduce_max_requests;
+static int coll_tuned_reduce_tree_fanout;
+static int coll_tuned_reduce_chain_fanout;
+
+/* valid values for coll_tuned_reduce_forced_algorithm */
+static mca_base_var_enum_value_t reduce_algorithms[] = {
+    {0, "ignore"},
+    {1, "linear"},
+    {2, "chain"},
+    {3, "pipeline"},
+    {4, "binary"},
+    {5, "binomial"},
+    {6, "in-order_binary"},
+    {0, NULL}
+};
+
+/**
+ * The following are used by dynamic and forced rules
+ *
+ * publish details of each algorithm and if its forced/fixed/locked in
+ * as you add methods/algorithms you must update this and the query/map routines
+ *
+ * this routine is called by the component only
+ * this makes sure that the mca parameters are set to their initial values and
+ * perms module does not call this they call the forced_getvalues routine
+ * instead.
+ */
+
+int ompi_coll_tuned_reduce_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices)
+{
+    mca_base_var_enum_t*new_enum;
+    int cnt;
+
+    for( cnt = 0; NULL != reduce_algorithms[cnt].string; cnt++ );
+    ompi_coll_tuned_forced_max_algorithms[REDUCE] = cnt;
+
+    (void) mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
+                                           "reduce_algorithm_count",
+                                           "Number of reduce algorithms available",
+                                           MCA_BASE_VAR_TYPE_INT, NULL, 0,
+                                           MCA_BASE_VAR_FLAG_DEFAULT_ONLY,
+                                           OPAL_INFO_LVL_5,
+                                           MCA_BASE_VAR_SCOPE_CONSTANT,
+                                           &cnt);
+
+    /* MPI_T: This variable should eventually be bound to a communicator */
+    coll_tuned_reduce_forced_algorithm = 0;
+    (void) mca_base_var_enum_create("coll_tuned_reduce_algorithms", reduce_algorithms, &new_enum);
+    mca_param_indices->algorithm_param_index =
+        mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
+                                        "reduce_algorithm",
+                                        "Which reduce algorithm is used. Can be locked down to choice of: 0 ignore, 1 linear, 2 chain, 3 pipeline, 4 binary, 5 binomial, 6 in-order binary",
+                                        MCA_BASE_VAR_TYPE_INT, new_enum, 0, 0,
+                                        OPAL_INFO_LVL_5,
+                                        MCA_BASE_VAR_SCOPE_READONLY,
+                                        &coll_tuned_reduce_forced_algorithm);
+    OBJ_RELEASE(new_enum);
+    if (mca_param_indices->algorithm_param_index < 0) {
+        return mca_param_indices->algorithm_param_index;
+    }
+
+    coll_tuned_reduce_segment_size = 0;
+    mca_param_indices->segsize_param_index =
+        mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
+                                        "reduce_algorithm_segmentsize",
+                                        "Segment size in bytes used by default for reduce algorithms. Only has meaning if algorithm is forced and supports segmenting. 0 bytes means no segmentation.",
+                                        MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
+                                        OPAL_INFO_LVL_5,
+                                        MCA_BASE_VAR_SCOPE_READONLY,
+                                        &coll_tuned_reduce_segment_size);
+
+    coll_tuned_reduce_tree_fanout = ompi_coll_tuned_init_tree_fanout; /* get system wide default */
+    mca_param_indices->tree_fanout_param_index =
+        mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
+                                        "reduce_algorithm_tree_fanout",
+                                        "Fanout for n-tree used for reduce algorithms. Only has meaning if algorithm is forced and supports n-tree topo based operation.",
+                                        MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
+                                        OPAL_INFO_LVL_5,
+                                        MCA_BASE_VAR_SCOPE_READONLY,
+                                        &coll_tuned_reduce_tree_fanout);
+
+    coll_tuned_reduce_chain_fanout = ompi_coll_tuned_init_chain_fanout; /* get system wide default */
+    mca_param_indices->chain_fanout_param_index =
+      mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
+                                      "reduce_algorithm_chain_fanout",
+                                      "Fanout for chains used for reduce algorithms. Only has meaning if algorithm is forced and supports chain topo based operation.",
+                                      MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
+                                      OPAL_INFO_LVL_5,
+                                      MCA_BASE_VAR_SCOPE_READONLY,
+                                      &coll_tuned_reduce_chain_fanout);
+
+    coll_tuned_reduce_max_requests = 0; /* no limit for reduce by default */
+    mca_param_indices->max_requests_param_index =
+      mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
+                                      "reduce_algorithm_max_requests",
+                                      "Maximum number of outstanding send requests on leaf nodes. 0 means no limit.",
+                                      MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
+                                      OPAL_INFO_LVL_5,
+                                      MCA_BASE_VAR_SCOPE_READONLY,
+                                      &coll_tuned_reduce_max_requests);
+    if (mca_param_indices->max_requests_param_index < 0) {
+        return mca_param_indices->max_requests_param_index;
+    }
+
+    if (coll_tuned_reduce_max_requests < 0) {
+        if( 0 == ompi_comm_rank( MPI_COMM_WORLD ) ) {
+            opal_output( 0, "Maximum outstanding requests must be positive number or 0.  Initializing to 0 (no limit).\n" );
+        }
+        coll_tuned_reduce_max_requests = 0;
+    }
+
+    return (MPI_SUCCESS);
+}
+
+
+int ompi_coll_tuned_reduce_intra_do_forced(void *sbuf, void* rbuf, int count,
+                                           struct ompi_datatype_t *dtype,
+                                           struct ompi_op_t *op, int root,
+                                           struct ompi_communicator_t *comm,
+                                           mca_coll_base_module_t *module)
+{
+    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
+
+    const int segsize      = tuned_module->user_forced[REDUCE].segsize;
+    const int chain_fanout = tuned_module->user_forced[REDUCE].chain_fanout;
+    const int max_requests = tuned_module->user_forced[REDUCE].max_requests;
+
+    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:reduce_intra_do_forced selected algorithm %d",
+                 tuned_module->user_forced[REDUCE].algorithm));
+
+
+    switch (tuned_module->user_forced[REDUCE].algorithm) {
+    case (0):  return ompi_coll_tuned_reduce_intra_dec_fixed(sbuf, rbuf, count, dtype,
+                                                             op, root, comm, module);
+    case (1):  return ompi_coll_base_reduce_intra_basic_linear(sbuf, rbuf, count, dtype,
+                                                               op, root, comm, module);
+    case (2):  return ompi_coll_base_reduce_intra_chain(sbuf, rbuf, count, dtype,
+                                                        op, root, comm, module,
+                                                        segsize, chain_fanout, max_requests);
+    case (3):  return ompi_coll_base_reduce_intra_pipeline(sbuf, rbuf, count, dtype,
+                                                           op, root, comm, module,
+                                                           segsize, max_requests);
+    case (4):  return ompi_coll_base_reduce_intra_binary(sbuf, rbuf, count, dtype,
+                                                         op, root, comm, module,
+                                                         segsize, max_requests);
+    case (5):  return ompi_coll_base_reduce_intra_binomial(sbuf, rbuf, count, dtype,
+                                                           op, root, comm, module,
+                                                           segsize, max_requests);
+    case (6):  return ompi_coll_base_reduce_intra_in_order_binary(sbuf, rbuf, count, dtype,
+                                                                  op, root, comm, module,
+                                                                  segsize, max_requests);
+    } /* switch */
+    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:reduce_intra_do_forced attempt to select algorithm %d when only 0-%d is valid?",
+                 tuned_module->user_forced[REDUCE].algorithm, ompi_coll_tuned_forced_max_algorithms[REDUCE]));
+    return (MPI_ERR_ARG);
+}
+
+
+int ompi_coll_tuned_reduce_intra_do_this(void *sbuf, void* rbuf, int count,
+                                         struct ompi_datatype_t *dtype,
+                                         struct ompi_op_t *op, int root,
+                                         struct ompi_communicator_t *comm,
+                                         mca_coll_base_module_t *module,
+                                         int algorithm, int faninout,
+                                         int segsize, int max_requests )
+{
+    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:reduce_intra_do_this selected algorithm %d topo faninout %d segsize %d",
+                 algorithm, faninout, segsize));
+
+    switch (algorithm) {
+    case (0):  return ompi_coll_tuned_reduce_intra_dec_fixed(sbuf, rbuf, count, dtype,
+                                                             op, root, comm, module);
+    case (1):  return ompi_coll_base_reduce_intra_basic_linear(sbuf, rbuf, count, dtype,
+                                                               op, root, comm, module);
+    case (2):  return ompi_coll_base_reduce_intra_chain(sbuf, rbuf, count, dtype,
+                                                        op, root, comm, module,
+                                                        segsize, faninout, max_requests);
+    case (3):  return ompi_coll_base_reduce_intra_pipeline(sbuf, rbuf, count, dtype,
+                                                           op, root, comm, module,
+                                                           segsize, max_requests);
+    case (4):  return ompi_coll_base_reduce_intra_binary(sbuf, rbuf, count, dtype,
+                                                         op, root, comm, module,
+                                                         segsize, max_requests);
+    case (5):  return ompi_coll_base_reduce_intra_binomial(sbuf, rbuf, count, dtype,
+                                                           op, root, comm, module,
+                                                           segsize, max_requests);
+    case (6):  return ompi_coll_base_reduce_intra_in_order_binary(sbuf, rbuf, count, dtype,
+                                                                  op, root, comm, module,
+                                                                  segsize, max_requests);
+    } /* switch */
+    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:reduce_intra_do_this attempt to select algorithm %d when only 0-%d is valid?",
+                 algorithm, ompi_coll_tuned_forced_max_algorithms[REDUCE]));
+    return (MPI_ERR_ARG);
+}
--- a/ompi/mca/coll/tuned/coll_tuned_reduce_scatter_decision.c
+++ b/ompi/mca/coll/tuned/coll_tuned_reduce_scatter_decision.c
@ -0,0 +1,173 @@
+/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
+/*
+ * Copyright (c) 2004-2015 The University of Tennessee and The University
+ *                         of Tennessee Research Foundation.  All rights
+ *                         reserved.
+ * $COPYRIGHT$
+ *
+ * Additional copyrights may follow
+ *
+ * $HEADER$
+ */
+
+#include "ompi_config.h"
+
+#include "mpi.h"
+#include "opal/util/bit_ops.h"
+#include "ompi/constants.h"
+#include "ompi/datatype/ompi_datatype.h"
+#include "ompi/communicator/communicator.h"
+#include "ompi/mca/coll/coll.h"
+#include "ompi/mca/coll/base/coll_base_topo.h"
+#include "ompi/mca/coll/base/coll_tags.h"
+#include "ompi/mca/pml/pml.h"
+#include "ompi/op/op.h"
+#include "coll_tuned.h"
+
+/* reduce_scatter algorithm variables */
+static int coll_tuned_reduce_scatter_forced_algorithm = 0;
+static int coll_tuned_reduce_scatter_segment_size = 0;
+static int coll_tuned_reduce_scatter_tree_fanout;
+static int coll_tuned_reduce_scatter_chain_fanout;
+
+/* valid values for coll_tuned_reduce_scatter_forced_algorithm */
+static mca_base_var_enum_value_t reduce_scatter_algorithms[] = {
+    {0, "ignore"},
+    {1, "non-overlapping"},
+    {2, "recursive_halfing"},
+    {3, "ring"},
+    {0, NULL}
+};
+
+/**
+ * The following are used by dynamic and forced rules
+ *
+ * publish details of each algorithm and if its forced/fixed/locked in
+ * as you add methods/algorithms you must update this and the query/map routines
+ *
+ * this routine is called by the component only
+ * this makes sure that the mca parameters are set to their initial values and
+ * perms module does not call this they call the forced_getvalues routine
+ * instead
+ */
+
+int ompi_coll_tuned_reduce_scatter_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices)
+{
+    mca_base_var_enum_t *new_enum;
+    int cnt;
+
+    for( cnt = 0; NULL != reduce_scatter_algorithms[cnt].string; cnt++ );
+    ompi_coll_tuned_forced_max_algorithms[REDUCESCATTER] = cnt;
+
+    (void) mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
+                                           "reduce_scatter_algorithm_count",
+                                           "Number of reduce_scatter algorithms available",
+                                           MCA_BASE_VAR_TYPE_INT, NULL, 0,
+                                           MCA_BASE_VAR_FLAG_DEFAULT_ONLY,
+                                           OPAL_INFO_LVL_5,
+                                           MCA_BASE_VAR_SCOPE_CONSTANT,
+                                           &cnt);
+
+    /* MPI_T: This variable should eventually be bound to a communicator */
+    coll_tuned_reduce_scatter_forced_algorithm = 0;
+    (void) mca_base_var_enum_create("coll_tuned_reduce_scatter_algorithms", reduce_scatter_algorithms, &new_enum);
+    mca_param_indices->algorithm_param_index =
+        mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
+                                        "reduce_scatter_algorithm",
+                                        "Which reduce reduce_scatter algorithm is used. Can be locked down to choice of: 0 ignore, 1 non-overlapping (Reduce + Scatterv), 2 recursive halving, 3 ring",
+                                        MCA_BASE_VAR_TYPE_INT, new_enum, 0, 0,
+                                        OPAL_INFO_LVL_5,
+                                        MCA_BASE_VAR_SCOPE_READONLY,
+                                        &coll_tuned_reduce_scatter_forced_algorithm);
+    OBJ_RELEASE(new_enum);
+    if (mca_param_indices->algorithm_param_index < 0) {
+        return mca_param_indices->algorithm_param_index;
+    }
+
+    coll_tuned_reduce_scatter_segment_size = 0;
+    mca_param_indices->segsize_param_index =
+        mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
+                                        "reduce_scatter_algorithm_segmentsize",
+                                        "Segment size in bytes used by default for reduce_scatter algorithms. Only has meaning if algorithm is forced and supports segmenting. 0 bytes means no segmentation.",
+                                        MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
+                                        OPAL_INFO_LVL_5,
+                                        MCA_BASE_VAR_SCOPE_READONLY,
+                                        &coll_tuned_reduce_scatter_segment_size);
+
+    coll_tuned_reduce_scatter_tree_fanout = ompi_coll_tuned_init_tree_fanout; /* get system wide default */
+    mca_param_indices->tree_fanout_param_index =
+        mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
+                                        "reduce_scatter_algorithm_tree_fanout",
+                                        "Fanout for n-tree used for reduce_scatter algorithms. Only has meaning if algorithm is forced and supports n-tree topo based operation.",
+                                        MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
+                                        OPAL_INFO_LVL_5,
+                                        MCA_BASE_VAR_SCOPE_READONLY,
+                                        &coll_tuned_reduce_scatter_tree_fanout);
+
+    coll_tuned_reduce_scatter_chain_fanout = ompi_coll_tuned_init_chain_fanout; /* get system wide default */
+    mca_param_indices->chain_fanout_param_index =
+      mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
+                                      "reduce_scatter_algorithm_chain_fanout",
+                                      "Fanout for chains used for reduce_scatter algorithms. Only has meaning if algorithm is forced and supports chain topo based operation.",
+                                      MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
+                                      OPAL_INFO_LVL_5,
+                                      MCA_BASE_VAR_SCOPE_READONLY,
+                                      &coll_tuned_reduce_scatter_chain_fanout);
+
+    return (MPI_SUCCESS);
+}
+
+
+int ompi_coll_tuned_reduce_scatter_intra_do_forced(void *sbuf, void* rbuf,
+                                                   int *rcounts,
+                                                   struct ompi_datatype_t *dtype,
+                                                   struct ompi_op_t *op,
+                                                   struct ompi_communicator_t *comm,
+                                                   mca_coll_base_module_t *module)
+{
+    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
+
+    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:reduce_scatter_intra_do_forced selected algorithm %d",
+                 tuned_module->user_forced[REDUCESCATTER].algorithm));
+
+    switch (tuned_module->user_forced[REDUCESCATTER].algorithm) {
+    case (0): return ompi_coll_tuned_reduce_scatter_intra_dec_fixed(sbuf, rbuf, rcounts,
+                                                                    dtype, op, comm, module);
+    case (1): return ompi_coll_base_reduce_scatter_intra_nonoverlapping(sbuf, rbuf, rcounts,
+                                                                        dtype, op, comm, module);
+    case (2): return ompi_coll_base_reduce_scatter_intra_basic_recursivehalving(sbuf, rbuf, rcounts,
+                                                                                dtype, op, comm, module);
+    case (3): return ompi_coll_base_reduce_scatter_intra_ring(sbuf, rbuf, rcounts,
+                                                              dtype, op, comm, module);
+    } /* switch */
+    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:reduce_scatter_intra_do_forced attempt to select algorithm %d when only 0-%d is valid?",
+                 tuned_module->user_forced[REDUCESCATTER].algorithm, ompi_coll_tuned_forced_max_algorithms[REDUCESCATTER]));
+    return (MPI_ERR_ARG);
+}
+
+
+int ompi_coll_tuned_reduce_scatter_intra_do_this(void *sbuf, void* rbuf,
+                                                 int *rcounts,
+                                                 struct ompi_datatype_t *dtype,
+                                                 struct ompi_op_t *op,
+                                                 struct ompi_communicator_t *comm,
+                                                 mca_coll_base_module_t *module,
+                                                 int algorithm, int faninout, int segsize)
+{
+    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:reduce_scatter_intra_do_this selected algorithm %d topo faninout %d segsize %d",
+                 algorithm, faninout, segsize));
+
+    switch (algorithm) {
+    case (0): return ompi_coll_tuned_reduce_scatter_intra_dec_fixed(sbuf, rbuf, rcounts,
+                                                                    dtype, op, comm, module);
+    case (1): return ompi_coll_base_reduce_scatter_intra_nonoverlapping(sbuf, rbuf, rcounts,
+                                                                        dtype, op, comm, module);
+    case (2): return ompi_coll_base_reduce_scatter_intra_basic_recursivehalving(sbuf, rbuf, rcounts,
+                                                                                dtype, op, comm, module);
+    case (3): return ompi_coll_base_reduce_scatter_intra_ring(sbuf, rbuf, rcounts,
+                                                              dtype, op, comm, module);
+    } /* switch */
+    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:reduce_scatter_intra_do_this attempt to select algorithm %d when only 0-%d is valid?",
+                 algorithm, ompi_coll_tuned_forced_max_algorithms[REDUCESCATTER]));
+    return (MPI_ERR_ARG);
+}
--- a/ompi/mca/coll/tuned/coll_tuned_scatter.c
+++ b/ompi/mca/coll/tuned/coll_tuned_scatter.c
@ -1,421 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
-/*
- * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
- *                         University Research and Technology
- *                         Corporation.  All rights reserved.
- * Copyright (c) 2004-2014 The University of Tennessee and The University
- *                         of Tennessee Research Foundation.  All rights
- *                         reserved.
- * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
- *                         University of Stuttgart.  All rights reserved.
- * Copyright (c) 2004-2005 The Regents of the University of California.
- *                         All rights reserved.
- * Copyright (c) 2013      Los Alamos National Security, LLC. All rights
- *                         reserved.
- * $COPYRIGHT$
- *
- * Additional copyrights may follow
- *
- * $HEADER$
- */
-
-#include "ompi_config.h"
-
-#include "mpi.h"
-#include "ompi/constants.h"
-#include "ompi/datatype/ompi_datatype.h"
-#include "ompi/communicator/communicator.h"
-#include "ompi/mca/coll/coll.h"
-#include "ompi/mca/coll/base/coll_tags.h"
-#include "ompi/mca/pml/pml.h"
-#include "coll_tuned.h"
-#include "coll_tuned_topo.h"
-#include "coll_tuned_util.h"
-
-/* scatter algorithm variables */
-static int coll_tuned_scatter_algorithm_count = 2;
-static int coll_tuned_scatter_forced_algorithm = 0;
-static int coll_tuned_scatter_segment_size = 0;
-static int coll_tuned_scatter_tree_fanout;
-static int coll_tuned_scatter_chain_fanout;
-
-/* valid values for coll_tuned_scatter_forced_algorithm */
-static mca_base_var_enum_value_t scatter_algorithms[] = {
-    {0, "ignore"},
-    {1, "basic_linear"},
-    {2, "binomial"},
-    {0, NULL}
-};
-
-int
-ompi_coll_tuned_scatter_intra_binomial(void *sbuf, int scount,
-                                       struct ompi_datatype_t *sdtype,
-                                       void *rbuf, int rcount,
-                                       struct ompi_datatype_t *rdtype,
-                                       int root,
-                                       struct ompi_communicator_t *comm,
-                                       mca_coll_base_module_t *module)
-{
-    int line = -1, i, rank, vrank, size, total_send = 0, err;
-    char *ptmp, *tempbuf = NULL;
-    ompi_coll_tree_t* bmtree;
-    MPI_Status status;
-    MPI_Aint sextent, slb, strue_lb, strue_extent; 
-    MPI_Aint rextent, rlb, rtrue_lb, rtrue_extent;
-    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
-    mca_coll_tuned_comm_t *data = tuned_module->tuned_data;
-
-    size = ompi_comm_size(comm);
-    rank = ompi_comm_rank(comm);
-
-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "ompi_coll_tuned_scatter_intra_binomial rank %d", rank));
-
-    /* create the binomial tree */
-    COLL_TUNED_UPDATE_IN_ORDER_BMTREE( comm, tuned_module, root );
-    bmtree = data->cached_in_order_bmtree;
-
-    ompi_datatype_get_extent(sdtype, &slb, &sextent);
-    ompi_datatype_get_true_extent(sdtype, &strue_lb, &strue_extent);
-    ompi_datatype_get_extent(rdtype, &rlb, &rextent);
-    ompi_datatype_get_true_extent(rdtype, &rtrue_lb, &rtrue_extent);
-
-    vrank = (rank - root + size) % size;
-    ptmp = (char *) rbuf;  /* by default suppose leaf nodes, just use rbuf */
-
-    if (rank == root) {
-        if (0 == root) {
-            /* root on 0, just use the send buffer */
-            ptmp = (char *) sbuf;
-            if (rbuf != MPI_IN_PLACE) {
-                /* local copy to rbuf */
-                err = ompi_datatype_sndrcv(sbuf, scount, sdtype,
-                                           rbuf, rcount, rdtype);
-                if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
-            }
-        } else {
-            /* root is not on 0, allocate temp buffer for send */
-            tempbuf = (char *) malloc(strue_extent + ((ptrdiff_t)scount * (ptrdiff_t)size - 1) * sextent);
-            if (NULL == tempbuf) {
-                err = OMPI_ERR_OUT_OF_RESOURCE; line = __LINE__; goto err_hndl;
-            }
-
-            ptmp = tempbuf - strue_lb;
-
-            /* and rotate data so they will eventually in the right place */
-            err = ompi_datatype_copy_content_same_ddt(sdtype, (ptrdiff_t)scount * (ptrdiff_t)(size - root),
-                                                      ptmp, (char *) sbuf + sextent * (ptrdiff_t)root * (ptrdiff_t)scount);
-            if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
-
-
-            err = ompi_datatype_copy_content_same_ddt(sdtype, (ptrdiff_t)scount * (ptrdiff_t)root,
-                                                      ptmp + sextent * (ptrdiff_t)scount * (ptrdiff_t)(size - root), (char *)sbuf);
-            if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
-
-            if (rbuf != MPI_IN_PLACE) {
-                /* local copy to rbuf */
-                err = ompi_datatype_sndrcv(ptmp, scount, sdtype,
-                                           rbuf, rcount, rdtype);
-                if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
-            }
-        }
-        total_send = scount;
-    } else if (!(vrank % 2)) {
-        /* non-root, non-leaf nodes, allocte temp buffer for recv
-         * the most we need is rcount*size/2 */
-        tempbuf = (char *) malloc(rtrue_extent + ((ptrdiff_t)rcount * (ptrdiff_t)size - 1) * rextent);
-        if (NULL == tempbuf) {
-            err= OMPI_ERR_OUT_OF_RESOURCE; line = __LINE__; goto err_hndl;
-        }
-
-        ptmp = tempbuf - rtrue_lb;
-
-        sdtype = rdtype;
-        scount = rcount;
-        sextent = rextent;
-        total_send = scount;
-    }
-
-    if (!(vrank % 2)) {
-        if (rank != root) {
-            /* recv from parent on non-root */
-            err = MCA_PML_CALL(recv(ptmp, (ptrdiff_t)rcount * (ptrdiff_t)size, rdtype, bmtree->tree_prev,
-                                    MCA_COLL_BASE_TAG_SCATTER, comm, &status));
-            if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
-            /* local copy to rbuf */
-            err = ompi_datatype_sndrcv(ptmp, scount, sdtype,
-                                       rbuf, rcount, rdtype);
-            if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
-        }
-        /* send to children on all non-leaf */
-        for (i = 0; i < bmtree->tree_nextsize; i++) {
-            size_t mycount = 0;
-            int vkid;
-            /* figure out how much data I have to send to this child */
-            vkid = (bmtree->tree_next[i] - root + size) % size;
-            mycount = vkid - vrank;
-            if( (int)mycount > (size - vkid) )
-                mycount = size - vkid;
-            mycount *= scount;
-
-            err = MCA_PML_CALL(send(ptmp + (ptrdiff_t)total_send * sextent, mycount, sdtype,
-                                    bmtree->tree_next[i],
-                                    MCA_COLL_BASE_TAG_SCATTER,
-                                    MCA_PML_BASE_SEND_STANDARD, comm));
-            if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
-
-            total_send += mycount;
-        }
-
-        if (NULL != tempbuf) 
-            free(tempbuf);
-    } else {
-        /* recv from parent on leaf nodes */
-        err = MCA_PML_CALL(recv(ptmp, rcount, rdtype, bmtree->tree_prev,
-                                MCA_COLL_BASE_TAG_SCATTER, comm, &status));
-        if (MPI_SUCCESS != err) { line = __LINE__; goto err_hndl; }
-    }
-
-    return MPI_SUCCESS;
-
- err_hndl:
-    if (NULL != tempbuf)
-        free(tempbuf);
-
-    OPAL_OUTPUT((ompi_coll_tuned_stream,  "%s:%4d\tError occurred %d, rank %2d",
-                 __FILE__, line, err, rank));
-    return err;
-}
-
-/*
- * Linear functions are copied from the BASIC coll module
- * they do not segment the message and are simple implementations
- * but for some small number of nodes and/or small data sizes they 
- * are just as fast as tuned/tree based segmenting operations 
- * and as such may be selected by the decision functions
- * These are copied into this module due to the way we select modules
- * in V1. i.e. in V2 we will handle this differently and so will not
- * have to duplicate code.
- * JPG following the examples from other coll_tuned implementations. Dec06.
- */
-
-/* copied function (with appropriate renaming) starts here */
-/*
- *	scatter_intra
- *
- *	Function:	- basic scatter operation
- *	Accepts:	- same arguments as MPI_Scatter()
- *	Returns:	- MPI_SUCCESS or error code
- */
-int
-ompi_coll_tuned_scatter_intra_basic_linear(void *sbuf, int scount,
-                                           struct ompi_datatype_t *sdtype,
-                                           void *rbuf, int rcount,
-                                           struct ompi_datatype_t *rdtype,
-                                           int root,
-                                           struct ompi_communicator_t *comm,
-                                           mca_coll_base_module_t *module)
-{
-    int i, rank, size, err;
-    ptrdiff_t lb, incr;
-    char *ptmp;
-
-    /* Initialize */
-
-    rank = ompi_comm_rank(comm);
-    size = ompi_comm_size(comm);
-
-    /* If not root, receive data. */
-
-    if (rank != root) {
-        err = MCA_PML_CALL(recv(rbuf, rcount, rdtype, root,
-                                MCA_COLL_BASE_TAG_SCATTER,
-                                comm, MPI_STATUS_IGNORE));
-        return err;
-    }
-
-    /* I am the root, loop sending data. */
-
-    err = ompi_datatype_get_extent(sdtype, &lb, &incr);
-    if (OMPI_SUCCESS != err) {
-        return OMPI_ERROR;
-    }
-
-    incr *= scount;
-    for (i = 0, ptmp = (char *) sbuf; i < size; ++i, ptmp += incr) {
-
-        /* simple optimization */
-
-        if (i == rank) {
-            if (MPI_IN_PLACE != rbuf) {
-                err =
-                    ompi_datatype_sndrcv(ptmp, scount, sdtype, rbuf, rcount,
-                                         rdtype);
-            }
-        } else {
-            err = MCA_PML_CALL(send(ptmp, scount, sdtype, i,
-                                    MCA_COLL_BASE_TAG_SCATTER,
-                                    MCA_PML_BASE_SEND_STANDARD, comm));
-        }
-        if (MPI_SUCCESS != err) {
-            return err;
-        }
-    }
-
-    /* All done */
-
-    return MPI_SUCCESS;
-}
-
-
-/* copied function (with appropriate renaming) ends here */
-
-/* The following are used by dynamic and forced rules */
-
-/* publish details of each algorithm and if its forced/fixed/locked in */
-/* as you add methods/algorithms you must update this and the query/map 
-   routines */
-
-/* this routine is called by the component only */
-/* this makes sure that the mca parameters are set to their initial values 
-   and perms */
-/* module does not call this they call the forced_getvalues routine instead */
-
-int 
-ompi_coll_tuned_scatter_intra_check_forced_init(coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices)
-{
-    mca_base_var_enum_t *new_enum;
-
-    ompi_coll_tuned_forced_max_algorithms[SCATTER] = coll_tuned_scatter_algorithm_count;
-
-    (void) mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                           "scatter_algorithm_count",
-                                           "Number of scatter algorithms available",
-                                           MCA_BASE_VAR_TYPE_INT, NULL, 0,
-                                           MCA_BASE_VAR_FLAG_DEFAULT_ONLY,
-                                           OPAL_INFO_LVL_5,
-                                           MCA_BASE_VAR_SCOPE_CONSTANT,
-                                           &coll_tuned_scatter_algorithm_count);
-
-    /* MPI_T: This variable should eventually be bound to a communicator */
-    coll_tuned_scatter_forced_algorithm = 0;
-    (void) mca_base_var_enum_create("coll_tuned_scatter_algorithms", scatter_algorithms, &new_enum);
-    mca_param_indices->algorithm_param_index =
-        mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                        "scatter_algorithm",
-                                        "Which scatter algorithm is used. Can be locked down to choice of: 0 ignore, 1 basic linear, 2 binomial.",       
-                                        MCA_BASE_VAR_TYPE_INT, new_enum, 0, 0,
-                                        OPAL_INFO_LVL_5,
-                                        MCA_BASE_VAR_SCOPE_READONLY,
-                                        &coll_tuned_scatter_forced_algorithm);
-    OBJ_RELEASE(new_enum);
-    if (mca_param_indices->algorithm_param_index < 0) {
-        return mca_param_indices->algorithm_param_index;
-    }
-
-    coll_tuned_scatter_segment_size = 0;
-    mca_param_indices->segsize_param_index =
-        mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                        "scatter_algorithm_segmentsize",
-                                        "Segment size in bytes used by default for scatter algorithms. Only has meaning if algorithm is forced and supports segmenting. 0 bytes means no segmentation. Currently, available algorithms do not support segmentation.",
-                                        MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
-                                        OPAL_INFO_LVL_5,
-                                        MCA_BASE_VAR_SCOPE_READONLY,
-                                        &coll_tuned_scatter_segment_size);
-
-    coll_tuned_scatter_tree_fanout = ompi_coll_tuned_init_tree_fanout; /* get system wide default */
-    mca_param_indices->tree_fanout_param_index =
-        mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                        "scatter_algorithm_tree_fanout",
-                                        "Fanout for n-tree used for scatter algorithms. Only has meaning if algorithm is forced and supports n-tree topo based operation. Currently, available algorithms do not support n-tree topologies.",
-                                        MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
-                                        OPAL_INFO_LVL_5,
-                                        MCA_BASE_VAR_SCOPE_READONLY,
-                                        &coll_tuned_scatter_tree_fanout);
-
-    coll_tuned_scatter_chain_fanout = ompi_coll_tuned_init_chain_fanout; /* get system wide default */
-    mca_param_indices->chain_fanout_param_index=
-      mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
-                                      "scatter_algorithm_chain_fanout",
-                                      "Fanout for chains used for scatter algorithms. Only has meaning if algorithm is forced and supports chain topo based operation. Currently, available algorithms do not support chain topologies.",
-                                      MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
-                                      OPAL_INFO_LVL_5,
-                                      MCA_BASE_VAR_SCOPE_READONLY,
-                                      &coll_tuned_scatter_chain_fanout);
-
-    return (MPI_SUCCESS);
-}
-
-int
-ompi_coll_tuned_scatter_intra_do_forced(void *sbuf, int scount,
-                                        struct ompi_datatype_t *sdtype,
-                                        void* rbuf, int rcount,
-                                        struct ompi_datatype_t *rdtype,
-                                        int root,
-                                        struct ompi_communicator_t *comm,
-                                        mca_coll_base_module_t *module)
-{
-    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
-    mca_coll_tuned_comm_t *data = tuned_module->tuned_data;
-
-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "coll:tuned:scatter_intra_do_forced selected algorithm %d",
-                 data->user_forced[SCATTER].algorithm));
-
-    switch (data->user_forced[SCATTER].algorithm) {
-    case (0):
-        return ompi_coll_tuned_scatter_intra_dec_fixed (sbuf, scount, sdtype, 
-                                                        rbuf, rcount, rdtype, 
-                                                        root, comm, module);
-    case (1):
-        return ompi_coll_tuned_scatter_intra_basic_linear (sbuf, scount, sdtype,
-                                                           rbuf, rcount, rdtype,
-                                                           root, comm, module);
-    case (2):
-        return ompi_coll_tuned_scatter_intra_binomial(sbuf, scount, sdtype,
-                                                      rbuf, rcount, rdtype,
-                                                      root, comm, module);
-    default:
-        OPAL_OUTPUT((ompi_coll_tuned_stream,
-                     "coll:tuned:scatter_intra_do_forced attempt to select algorithm %d when only 0-%d is valid?", 
-                     data->user_forced[SCATTER].algorithm,
-                     ompi_coll_tuned_forced_max_algorithms[SCATTER]));
-        return (MPI_ERR_ARG);
-    } /* switch */
-}
-
-int
-ompi_coll_tuned_scatter_intra_do_this(void *sbuf, int scount,
-                                      struct ompi_datatype_t *sdtype,
-                                      void* rbuf, int rcount,
-                                      struct ompi_datatype_t *rdtype,
-                                      int root,
-                                      struct ompi_communicator_t *comm,
-                                      mca_coll_base_module_t *module,
-                                      int algorithm, int faninout, int segsize)
-{
-    OPAL_OUTPUT((ompi_coll_tuned_stream,
-                 "coll:tuned:scatter_intra_do_this selected algorithm %d topo faninout %d segsize %d", 
-                 algorithm, faninout, segsize));
-   
-    switch (algorithm) {
-    case (0):
-        return ompi_coll_tuned_scatter_intra_dec_fixed (sbuf, scount, sdtype, 
-                                                        rbuf, rcount, rdtype, 
-                                                        root, comm, module);
-    case (1):
-        return ompi_coll_tuned_scatter_intra_basic_linear (sbuf, scount, sdtype,
-                                                           rbuf, rcount, rdtype,
-                                                           root, comm, module);
-    case (2):  
-        return ompi_coll_tuned_scatter_intra_binomial(sbuf, scount, sdtype,
-                                                      rbuf, rcount, rdtype,
-                                                      root, comm, module);
-    default:
-        OPAL_OUTPUT((ompi_coll_tuned_stream,
-                     "coll:tuned:scatter_intra_do_this attempt to select algorithm %d when only 0-%d is valid?", 
-                     algorithm, 
-                     ompi_coll_tuned_forced_max_algorithms[SCATTER]));
-        return (MPI_ERR_ARG);
-    } /* switch */
-}
--- a/ompi/mca/coll/tuned/coll_tuned_scatter_decision.c
+++ b/ompi/mca/coll/tuned/coll_tuned_scatter_decision.c
@ -0,0 +1,185 @@
+/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
+/*
+ * Copyright (c) 2004-2015 The University of Tennessee and The University
+ *                         of Tennessee Research Foundation.  All rights
+ *                         reserved.
+ * $COPYRIGHT$
+ *
+ * Additional copyrights may follow
+ *
+ * $HEADER$
+ */
+
+#include "ompi_config.h"
+
+#include "mpi.h"
+#include "ompi/constants.h"
+#include "ompi/datatype/ompi_datatype.h"
+#include "ompi/communicator/communicator.h"
+#include "ompi/mca/coll/coll.h"
+#include "ompi/mca/coll/base/coll_tags.h"
+#include "ompi/mca/coll/base/coll_base_topo.h"
+#include "ompi/mca/coll/base/coll_base_util.h"
+#include "ompi/mca/pml/pml.h"
+#include "coll_tuned.h"
+
+/* scatter algorithm variables */
+static int coll_tuned_scatter_forced_algorithm = 0;
+static int coll_tuned_scatter_segment_size = 0;
+static int coll_tuned_scatter_tree_fanout;
+static int coll_tuned_scatter_chain_fanout;
+
+/* valid values for coll_tuned_scatter_forced_algorithm */
+static mca_base_var_enum_value_t scatter_algorithms[] = {
+    {0, "ignore"},
+    {1, "basic_linear"},
+    {2, "binomial"},
+    {0, NULL}
+};
+
+/* The following are used by dynamic and forced rules */
+
+/* publish details of each algorithm and if its forced/fixed/locked in */
+/* as you add methods/algorithms you must update this and the query/map
+   routines */
+
+/* this routine is called by the component only */
+/* this makes sure that the mca parameters are set to their initial values
+   and perms */
+/* module does not call this they call the forced_getvalues routine instead */
+
+int
+ompi_coll_tuned_scatter_intra_check_forced_init(coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices)
+{
+    mca_base_var_enum_t *new_enum;
+    int cnt;
+
+    for( cnt = 0; NULL != scatter_algorithms[cnt].string; cnt++ );
+    ompi_coll_tuned_forced_max_algorithms[SCATTER] = cnt;
+
+    (void) mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
+                                           "scatter_algorithm_count",
+                                           "Number of scatter algorithms available",
+                                           MCA_BASE_VAR_TYPE_INT, NULL, 0,
+                                           MCA_BASE_VAR_FLAG_DEFAULT_ONLY,
+                                           OPAL_INFO_LVL_5,
+                                           MCA_BASE_VAR_SCOPE_CONSTANT,
+                                           &cnt);
+
+    /* MPI_T: This variable should eventually be bound to a communicator */
+    coll_tuned_scatter_forced_algorithm = 0;
+    (void) mca_base_var_enum_create("coll_tuned_scatter_algorithms", scatter_algorithms, &new_enum);
+    mca_param_indices->algorithm_param_index =
+        mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
+                                        "scatter_algorithm",
+                                        "Which scatter algorithm is used. Can be locked down to choice of: 0 ignore, 1 basic linear, 2 binomial.",
+                                        MCA_BASE_VAR_TYPE_INT, new_enum, 0, 0,
+                                        OPAL_INFO_LVL_5,
+                                        MCA_BASE_VAR_SCOPE_READONLY,
+                                        &coll_tuned_scatter_forced_algorithm);
+    OBJ_RELEASE(new_enum);
+    if (mca_param_indices->algorithm_param_index < 0) {
+        return mca_param_indices->algorithm_param_index;
+    }
+
+    coll_tuned_scatter_segment_size = 0;
+    mca_param_indices->segsize_param_index =
+        mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
+                                        "scatter_algorithm_segmentsize",
+                                        "Segment size in bytes used by default for scatter algorithms. Only has meaning if algorithm is forced and supports segmenting. 0 bytes means no segmentation. Currently, available algorithms do not support segmentation.",
+                                        MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
+                                        OPAL_INFO_LVL_5,
+                                        MCA_BASE_VAR_SCOPE_READONLY,
+                                        &coll_tuned_scatter_segment_size);
+
+    coll_tuned_scatter_tree_fanout = ompi_coll_tuned_init_tree_fanout; /* get system wide default */
+    mca_param_indices->tree_fanout_param_index =
+        mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
+                                        "scatter_algorithm_tree_fanout",
+                                        "Fanout for n-tree used for scatter algorithms. Only has meaning if algorithm is forced and supports n-tree topo based operation. Currently, available algorithms do not support n-tree topologies.",
+                                        MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
+                                        OPAL_INFO_LVL_5,
+                                        MCA_BASE_VAR_SCOPE_READONLY,
+                                        &coll_tuned_scatter_tree_fanout);
+
+    coll_tuned_scatter_chain_fanout = ompi_coll_tuned_init_chain_fanout; /* get system wide default */
+    mca_param_indices->chain_fanout_param_index=
+      mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
+                                      "scatter_algorithm_chain_fanout",
+                                      "Fanout for chains used for scatter algorithms. Only has meaning if algorithm is forced and supports chain topo based operation. Currently, available algorithms do not support chain topologies.",
+                                      MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
+                                      OPAL_INFO_LVL_5,
+                                      MCA_BASE_VAR_SCOPE_READONLY,
+                                      &coll_tuned_scatter_chain_fanout);
+
+    return (MPI_SUCCESS);
+}
+
+int
+ompi_coll_tuned_scatter_intra_do_forced(void *sbuf, int scount,
+                                        struct ompi_datatype_t *sdtype,
+                                        void* rbuf, int rcount,
+                                        struct ompi_datatype_t *rdtype,
+                                        int root,
+                                        struct ompi_communicator_t *comm,
+                                        mca_coll_base_module_t *module)
+{
+    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
+
+    OPAL_OUTPUT((ompi_coll_tuned_stream,
+                 "coll:tuned:scatter_intra_do_forced selected algorithm %d",
+                 tuned_module->user_forced[SCATTER].algorithm));
+
+    switch (tuned_module->user_forced[SCATTER].algorithm) {
+    case (0):
+        return ompi_coll_tuned_scatter_intra_dec_fixed(sbuf, scount, sdtype,
+                                                       rbuf, rcount, rdtype,
+                                                       root, comm, module);
+    case (1):
+        return ompi_coll_base_scatter_intra_basic_linear(sbuf, scount, sdtype,
+                                                         rbuf, rcount, rdtype,
+                                                         root, comm, module);
+    case (2):
+        return ompi_coll_base_scatter_intra_binomial(sbuf, scount, sdtype,
+                                                     rbuf, rcount, rdtype,
+                                                     root, comm, module);
+    } /* switch */
+    OPAL_OUTPUT((ompi_coll_tuned_stream,
+                 "coll:tuned:scatter_intra_do_forced attempt to select algorithm %d when only 0-%d is valid?",
+                 tuned_module->user_forced[SCATTER].algorithm, ompi_coll_tuned_forced_max_algorithms[SCATTER]));
+    return MPI_ERR_ARG;
+}
+
+int
+ompi_coll_tuned_scatter_intra_do_this(void *sbuf, int scount,
+                                      struct ompi_datatype_t *sdtype,
+                                      void* rbuf, int rcount,
+                                      struct ompi_datatype_t *rdtype,
+                                      int root,
+                                      struct ompi_communicator_t *comm,
+                                      mca_coll_base_module_t *module,
+                                      int algorithm, int faninout, int segsize)
+{
+    OPAL_OUTPUT((ompi_coll_tuned_stream,
+                 "coll:tuned:scatter_intra_do_this selected algorithm %d topo faninout %d segsize %d",
+                 algorithm, faninout, segsize));
+
+    switch (algorithm) {
+    case (0):
+        return ompi_coll_tuned_scatter_intra_dec_fixed(sbuf, scount, sdtype,
+                                                       rbuf, rcount, rdtype,
+                                                       root, comm, module);
+    case (1):
+        return ompi_coll_base_scatter_intra_basic_linear(sbuf, scount, sdtype,
+                                                         rbuf, rcount, rdtype,
+                                                         root, comm, module);
+    case (2):
+        return ompi_coll_base_scatter_intra_binomial(sbuf, scount, sdtype,
+                                                     rbuf, rcount, rdtype,
+                                                     root, comm, module);
+    } /* switch */
+    OPAL_OUTPUT((ompi_coll_tuned_stream,
+                 "coll:tuned:scatter_intra_do_this attempt to select algorithm %d when only 0-%d is valid?",
+                 algorithm, ompi_coll_tuned_forced_max_algorithms[SCATTER]));
+    return MPI_ERR_ARG;
+}